So I have code that spins up 4 selenium chrome drivers and scrapes data from an element on the web pages. The code can be simplified to something like this:
import json import multiprocessing as mp from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait class scraper(): def __init__(self,list_of_urls, process_num): self.urls = list_of_urls self.process_num = process_num def scrape_urls(self): driver = webdriver.Chrome(driver_dir) data = [] for url in self.urls: driver.get(url) element = WebDriverWait(driver, timeout=7).until(lambda d: d.find_element(by=By.CLASS_NAME, value="InterestingData")) data.append(element.text) print("Scraper # ", self.process_num," got data from: ",url) return data if __name__ == '__main__': with open('array_of_urls', 'r') as infile: urls = json.load(infile) number_of_processes=4 length_of_urls = len(urls) partition_into = math.ceil(length_of_urls/number_of_processes) scrapers = [] start = 0 end = start + partition_into for num in range(number_of_processes): new_scraper = scraper(urls[start:end],num) scrapers.append(new_scraper) start = end end = start + partition_into if end > length_of_urls: end = length_of_urls-1 with mp.Pool(processes=number_of_processes) as pool: result_array = [] for num in range(number_of_processes): result_array.append(pool.apply_async(scrapers[num].scrape_urls)) pool.close() pool.join() The problem I am running into is that after 5-10 minutes one of the scrapers would just stop, the only thing that would wake it back up is to manually refresh the page on the browser. If I leave it for an hour or so, 3 of the 4 stop and only one is running. They don't error out or print anything it just stops running. I've tried it on 2 different laptops and they both have the same issue. I've also tried doing this with 4 different mp.Process() running scrape_url and that also does the same thing. Has anyone else run into this issue or am I doing something wrong here?