gchiam
diff --git a/‎elasticsearch/helpers/__init__.py‎
Lines changed: 4 additions & 2 deletions b/‎elasticsearch/helpers/__init__.py‎
Lines changed: 4 additions & 2 deletions
@@ -236,7 +236,8 @@ def parallel_bulk(client, actions, thread_count=4, chunk_size=500,
  pool.close()
  pool.join()
 
-def scan(client, query=None, scroll='5m', raise_on_error=True, preserve_order=False, **kwargs):
+def scan(client, query=None, scroll='5m', raise_on_error=True,
+ preserve_order=False, size=1000, **kwargs):
  """
  Simple abstraction on top of the
  :meth:`~elasticsearch.Elasticsearch.scroll` api - a simple iterator that
@@ -258,6 +259,7 @@ def scan(client, query=None, scroll='5m', raise_on_error=True, preserve_order=Fa
  cause the scroll to paginate with preserving the order. Note that this
  can be an extremely expensive operation and can easily lead to
  unpredictable results, use with caution.
+ :arg size: size (per shard) of the batch send at each iteration.
 
  Any additional keyword arguments will be passed to the initial
  :meth:`~elasticsearch.Elasticsearch.search` call::
@@ -273,7 +275,7 @@ def scan(client, query=None, scroll='5m', raise_on_error=True, preserve_order=Fa
  body = query.copy() if query else {}
  body["sort"] = "_doc"
  # initial search
- resp = client.search(body=query, scroll=scroll, **kwargs)
+ resp = client.search(body=query, scroll=scroll, size=size, **kwargs)
 
  scroll_id = resp.get('_scroll_id')
  if scroll_id is None: