1

I am trying to run this script which worked previously:

import csv from selenium import webdriver from time import sleep from parsel import Selector from selenium.webdriver.common.keys import Keys from collections import defaultdict from selenium.webdriver.support.select import Select ####### reading from the input file ########## columns = defaultdict(list) # each value in each column is appended to a list # get the list of keywords from the csv file with open('query.csv', 'r') as csvfile: reader = csv.DictReader(csvfile) # read rows into a dictionary format for row in reader: # read a row as {column1: value1, column2: value2,...} for (k, v) in row.items(): # go over each column name and value columns[k].append(v) # append the value into the appropriate list # the list containing all of the keywords search_query_list = columns['Keyword'] ########## start scraping ############### rb_results = [] # create a driver and let it open google chrome driver = webdriver.Chrome("chromedriver") # get linkedin website driver.get('https://www.redbubble.com/') sleep(0.5) for i in range(len(search_query_list)): next_query = search_query_list[i] # get RB website driver.get('https://www.redbubble.com/') # get the search by its id search_bar = driver.find_element_by_name("query") sleep(0.5) # enter the query to the search bar search_bar.send_keys(next_query) # press enter search_bar.send_keys(Keys.RETURN) sleep(1) # from parsel's selector get the page source sel1 = Selector(text=driver.page_source) sleep(0.5) # prima maglietta // continue_link = driver.find_element_by_class_name('shared-components-ShopSearchSkeleton-ShopSearchSkeleton__composedComponentWrapper--1s_CI').click() sleep(1) sel2 = Selector(text=driver.page_source) sleep(0.5) ################## get TAGS ############### # Check tags for all products try: # get the tags for the search query tags_rb = driver.find_element_by_class_name("shared-components-Tags-Tags__listContent--oLdDf").text tags_rb = str(tags_rb) # if number of products is found print it and search for the prime # print the number of products found if tags_rb == None: rb_results.append("0") else: #rb_results = str(tags_rb) rb_results.append(tags_rb) except ValueError: continue #except: #rb_results.append("errore") ###### writing part ######## with open ("rb_results.csv","w", newline='') as resultFile: writer = csv.DictWriter(resultFile, fieldnames=["Rb Results"],delimiter='\t') writer.writeheader() writer.writerows({'Rb Results': item} for item in rb_results) resultFile.close() 

When I run this script I come across this error:

Traceback (most recent call last): File "rb-spider.py", line 18, in for row in reader: # read a row as {column1: value1, column2: value2,...} File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/csv.py", line 111, in next self.fieldnames File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/csv.py", line 98, in fieldnames self._fieldnames = next(self.reader) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/codecs.py", line 321, in decode (result, consumed) = self._buffer_decode(data, self.errors, final) UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 3362: invalid continuation byte

I can't figure it out why I get this error. Any idea?

1
  • If it can be useful if I reduce the number of queries in the list (from 30 to less than 10), then sometimes the script works... however, I would like to run the script with a list of 100 queries and unfortunately it does not work Commented Jul 19, 2018 at 14:05

1 Answer 1

2

Try to use encoding attribute.

# get the list of keywords from the csv file with open('query.csv', 'r', encoding='utf-8') as csvfile: ... 
Sign up to request clarification or add additional context in comments.

Comments

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.