I'm importing data from API using multithreathing as:
def importdata(data, auth_token): # # --- Get data from Keros API -- hed = {'Authorization': 'Bearer ' + auth_token, 'Accept': 'application/json'} urlApi = 'http://...&offset=0&limit=1' responsedata = requests.get(urlApi, data=data, headers=hed, verify=False) if responsedata.ok: num_of_records = int(math.ceil(responsedata.json()['total'])) value_limit = 249 # Number of records per page. num_of_pages = num_of_records / value_limit print num_of_records print num_of_pages pages = [i for i in range(0, num_of_pages-1)] #pages = [i for i in range(0, 3)] datarALL = [] with ThreadPoolExecutor(max_workers=num_of_pages) as executor: futh = [(executor.submit(getdata, page, hed, value_limit)) for page in pages] for data in as_completed(futh): datarALL = datarALL + data.result() return datarALL else: return None def getdata(page,hed,limit): is_valid = True value_offset = page * limit value_limit = limit #limit of records allowed per page datarALL = [] url = 'http://...&offset={0}&limit={1}'.format(value_offset,value_limit) responsedata = requests.get(url, data=data, headers=hed, verify=False) if responsedata.status_code == 200: #200 for successful call responsedata = responsedata.text jsondata = json.loads(responsedata) if "results" in jsondata: if jsondata["results"]: datarALL = datarALL + jsondata["results"] print "page {} finished".format(page) return datarALL When I set:
pages = [i for i in range(0, 3)] it works with no problems.
But when I try
pages = [i for i in range(0, num_of_pages-1)] It generate this error:
page 317 finished page 240 finished page 15 finished page 12 finished page 350 finished page 16 finished page 288 finished page 18 finished page 17 finished Traceback (most recent call last): File "/home/ubuntu/scripts/import.py", line 84, in importdata datarALL = datarALL + data.result() File "/usr/local/lib/python2.7/dist-packages/concurrent/futures/_base.py", line 455, in result return self.__get_result() File "/usr/local/lib/python2.7/dist-packages/concurrent/futures/thread.py", line 63, in run result = self.fn(*self.args, **self.kwargs) File "/home/ubuntu/scripts/import.py", line 54, in getdata responsedata = requests.get(url, data=data, headers=hed, verify=False) File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 72, in get return request('get', url, params=params, **kwargs) File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 58, in request return session.request(method=method, url=url, **kwargs) File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 512, in request resp = self.send(prep, **send_kwargs) File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 662, in send r.content File "/usr/local/lib/python2.7/dist-packages/requests/models.py", line 827, in content self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b'' File "/usr/local/lib/python2.7/dist-packages/requests/models.py", line 752, in generate raise ChunkedEncodingError(e) requests.exceptions.ChunkedEncodingError: ("Connection broken: error(104, 'Connection reset by peer')", error(104, 'Connection reset by peer')) I'm not sure why this happens and why it gives me connection broken? Any idea why it doesn't work?
Is there a way to actually get detail of what is broken? for example which specific url caused the problem etc?