@@ -236,7 +236,8 @@ def parallel_bulk(client, actions, thread_count=4, chunk_size=500,
236236 pool .close ()
237237 pool .join ()
238238
239- def scan (client , query = None , scroll = '5m' , raise_on_error = True , preserve_order = False , ** kwargs ):
239+ def scan (client , query = None , scroll = '5m' , raise_on_error = True ,
240+ preserve_order = False , size = 1000 , ** kwargs ):
240241 """
241242 Simple abstraction on top of the
242243 :meth:`~elasticsearch.Elasticsearch.scroll` api - a simple iterator that
@@ -258,6 +259,7 @@ def scan(client, query=None, scroll='5m', raise_on_error=True, preserve_order=Fa
258259 cause the scroll to paginate with preserving the order. Note that this
259260 can be an extremely expensive operation and can easily lead to
260261 unpredictable results, use with caution.
262+ :arg size: size (per shard) of the batch send at each iteration.
261263
262264 Any additional keyword arguments will be passed to the initial
263265 :meth:`~elasticsearch.Elasticsearch.search` call::
@@ -273,7 +275,7 @@ def scan(client, query=None, scroll='5m', raise_on_error=True, preserve_order=Fa
273275 body = query .copy () if query else {}
274276 body ["sort" ] = "_doc"
275277 # initial search
276- resp = client .search (body = query , scroll = scroll , ** kwargs )
278+ resp = client .search (body = query , scroll = scroll , size = size , ** kwargs )
277279
278280 scroll_id = resp .get ('_scroll_id' )
279281 if scroll_id is None :
0 commit comments