So I'm using the below code to scrape a CSV of Business Names & Website domains (about 10,000) for "mailto:" links and trying to save those to a CSV when mailto links are found. But occasionally I run into "temporary dns lookup failur" and "connection time out" errors.
I need help figuring out how to go about having it "Skip" when the request function throws these errors (any error) and just continue down the list.
import csv import requests from bs4 import BeautifulSoup import numpy as np results = [] agency_names = ['Agency Name'] agency_websites = ['Website'] agency_emails = ['Email Address'] with open('agencies_clean.csv') as csvfile: reader = csv.reader(csvfile) # change contents to floats count = 0; for row in reader: # each row is a list if count != 0: if row[1] != "": print("working on "+row[1]+"...") page = requests.get('http://'+row[1]) soup = BeautifulSoup(page.content, "html.parser") mailtos = soup.select('a[href^=mailto]') if mailtos: agency_names.append(row[0]) agency_websites.append(row[1]) agency_emails.append(mailtos[0].text) print('Completed[x] Company:' + row[0] + 'Email: '+mailtos[0].text) count=count+1 np.savetxt('scrapes/agencies_w_emails.csv', [p for p in zip(agency_names, agency_websites)], delimiter=',', fmt='%s')