I need cheap GPU's and decided to make a little ebay-scraper to make sure I can buy it at cheap prices. It works and I can search for (any) product to scrape the Title, Price and Shipping cost of that product.
But I feel like I haven't done it in a generic way, hence the need to post it here. Thus my question is, could this have been done more generic?
Code
from bs4 import BeautifulSoup from urllib.request import urlopen def find_next_page(soup): ''' Finds next page Returns: Next page link if exists None if next page does not exists ''' next_page = soup.find('a', {'class': 'gspr next'}) try: return next_page['href'] except: return None def scrape_page(soup, idx): ''' Scrape page for products and save them as a dictionary Returns: A dictionary of products ''' products = {} prod_idx = idx for ultag in soup.find_all('ul', {'id': 'ListViewInner'}): for litag in ultag.find_all('li'): title = litag.find('a', {'class': 'vip'}) if not title is None: products[prod_idx] = {} title_text = title.text if 'New listing' in title_text: title_text = title_text.replace('New listing', '').lstrip() title_text.strip() products[prod_idx]['Title'] = title_text ul = litag.find('ul', {'class': 'lvprices left space-zero'}) if not ul is None: for li in ul.find_all('li'): if '$' in li.text and not 'shipping' in li.text.lower(): products[prod_idx]['Price'] = li.text.split()[0] if 'shipping' in li.text.lower(): products[prod_idx]['Shipping'] = li.text.strip() prod_idx += 1 return products, prod_idx def table_print(products): ''' Prints products in nice human-readable format ''' print ("{:<8} {:<100} {:<15} {:<30}".format('Key', 'Product', 'Price', 'Shipping')) for k, v in products.items(): try: t, p, s = v print ('{:<8} {:<100} {:<15} {:<30}'.format(k, products[k][t], products[k][p], products[k][s])) except ValueError: continue def scrape_product(ebay_page): ''' Main scraper ''' products = {} idx = 0 while not ebay_page is None: html_doc = urlopen(ebay_page) soup = BeautifulSoup(html_doc, 'html.parser') prod, idx = scrape_page(soup, idx) products.update(prod) ebay_page = find_next_page(soup) return products def make_search_string(keywords): ''' Make ebay search products string ''' base = 'https://www.ebay.com/sch/i.html?_from=R40&_trksid=m570.l1313&_nkw=' end = '&_sacat=0' return '{0}{1}{2}'.format(base, '+'.join(keywords), end) if __name__ == '__main__': print ('Project I Need a Cheap GPU (Ebay-Scraper in Python3) \n@Ludisposed \n') keywords = input('What do you want to search ebay for? ').split() # Make search string ebay_page = make_search_string(keywords) # Find all products products = scrape_product(ebay_page) # Print all products table_print(products) Example
Project I Need a Cheap GPU (Ebay-Scraper in Python3) @Ludisposed What do you want to search ebay for? amd 580 Key Product Price Shipping 0 XFX AMD Radeon RX 580 8GB GDDR5 GTR Black Edition PCI Express 3.0 New Sealed $305.00 +$37.83 shipping 1 MSI AMD Radeon RX 580 GAMING X 4G GDDR5 DVI/2HDMI/2Displayport PCI-Express Video $336.99 +$46.19 shipping etc....