googleapis · tswast · Sep 2, 2025 · Jul 24, 2025 · Jul 24, 2025 · Jul 24, 2025
@@ -184,6 +184,7 @@ def read_gbq( # type: ignore[overload-overlap]
  use_cache: Optional[bool] = ...,
  col_order: Iterable[str] = ...,
  dry_run: Literal[False] = ...,
+ allow_large_results: bool = ...,
 ) -> bigframes.dataframe.DataFrame:
  ...
 
@@ -200,6 +201,7 @@ def read_gbq(
  use_cache: Optional[bool] = ...,
  col_order: Iterable[str] = ...,
  dry_run: Literal[True] = ...,
+ allow_large_results: bool = ...,
 ) -> pandas.Series:
  ...
 
@@ -215,6 +217,7 @@ def read_gbq(
  use_cache: Optional[bool] = None,
  col_order: Iterable[str] = (),
  dry_run: bool = False,
+ allow_large_results: bool = True,
 ) -> bigframes.dataframe.DataFrame | pandas.Series:
  _set_default_session_location_if_possible(query_or_table)
  return global_session.with_default_session(
@@ -228,6 +231,7 @@ def read_gbq(
  use_cache=use_cache,
  col_order=col_order,
  dry_run=dry_run,
+ allow_large_results=allow_large_results,
  )
 
 
@@ -391,6 +395,7 @@ def read_gbq_query( # type: ignore[overload-overlap]
  col_order: Iterable[str] = ...,
  filters: vendored_pandas_gbq.FiltersType = ...,
  dry_run: Literal[False] = ...,
+ allow_large_results: bool = ...,
 ) -> bigframes.dataframe.DataFrame:
  ...
 
@@ -407,6 +412,7 @@ def read_gbq_query(
  col_order: Iterable[str] = ...,
  filters: vendored_pandas_gbq.FiltersType = ...,
  dry_run: Literal[True] = ...,
+ allow_large_results: bool = ...,
 ) -> pandas.Series:
  ...
 
@@ -422,6 +428,7 @@ def read_gbq_query(
  col_order: Iterable[str] = (),
  filters: vendored_pandas_gbq.FiltersType = (),
  dry_run: bool = False,
+ allow_large_results: bool = True,
 ) -> bigframes.dataframe.DataFrame | pandas.Series:
  _set_default_session_location_if_possible(query)
  return global_session.with_default_session(
@@ -435,6 +442,7 @@ def read_gbq_query(
  col_order=col_order,
  filters=filters,
  dry_run=dry_run,
+ allow_large_results=allow_large_results,
  )
 
 

@@ -394,6 +394,7 @@ def read_gbq( # type: ignore[overload-overlap]
  use_cache: Optional[bool] = ...,
  col_order: Iterable[str] = ...,
  dry_run: Literal[False] = ...,
+ allow_large_results: bool = ...,
  ) -> dataframe.DataFrame:
  ...
 
@@ -410,6 +411,7 @@ def read_gbq(
  use_cache: Optional[bool] = ...,
  col_order: Iterable[str] = ...,
  dry_run: Literal[True] = ...,
+ allow_large_results: bool = ...,
  ) -> pandas.Series:
  ...
 
@@ -424,8 +426,8 @@ def read_gbq(
  filters: third_party_pandas_gbq.FiltersType = (),
  use_cache: Optional[bool] = None,
  col_order: Iterable[str] = (),
- dry_run: bool = False
- # Add a verify index argument that fails if the index is not unique.
+ dry_run: bool = False,
+ allow_large_results: bool = True,
  ) -> dataframe.DataFrame | pandas.Series:
  # TODO(b/281571214): Generate prompt to show the progress of read_gbq.
  if columns and col_order:
@@ -445,6 +447,7 @@ def read_gbq(
  use_cache=use_cache,
  filters=filters,
  dry_run=dry_run,
+ allow_large_results=allow_large_results,
  )
  else:
  if configuration is not None:
@@ -551,6 +554,7 @@ def read_gbq_query( # type: ignore[overload-overlap]
  col_order: Iterable[str] = ...,
  filters: third_party_pandas_gbq.FiltersType = ...,
  dry_run: Literal[False] = ...,
+ allow_large_results: bool = ...,
  ) -> dataframe.DataFrame:
  ...
 
@@ -567,6 +571,7 @@ def read_gbq_query(
  col_order: Iterable[str] = ...,
  filters: third_party_pandas_gbq.FiltersType = ...,
  dry_run: Literal[True] = ...,
+ allow_large_results: bool = ...,
  ) -> pandas.Series:
  ...
 
@@ -582,6 +587,7 @@ def read_gbq_query(
  col_order: Iterable[str] = (),
  filters: third_party_pandas_gbq.FiltersType = (),
  dry_run: bool = False,
+ allow_large_results: bool = True,
  ) -> dataframe.DataFrame | pandas.Series:
  """Turn a SQL query into a DataFrame.
 
@@ -631,9 +637,48 @@ def read_gbq_query(
 
  See also: :meth:`Session.read_gbq`.
 
+ Args:
+ query (str):
+ A SQL query to execute.
+ index_col (Iterable[str] or str, optional):
+ The column(s) to use as the index for the DataFrame. This can be
+ a single column name or a list of column names. If not provided,
+ a default index will be used.
+ columns (Iterable[str], optional):
+ The columns to read from the query result. If not
+ specified, all columns will be read.
+ configuration (dict, optional):
+ A dictionary of query job configuration options. See the
+ BigQuery REST API documentation for a list of available options:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query
+ max_results (int, optional):
+ The maximum number of rows to retrieve from the query
+ result. If not specified, all rows will be loaded.
+ use_cache (bool, optional):
+ Whether to use cached results for the query. Defaults to ``True``.
+ Setting this to ``False`` will force a re-execution of the query.
+ col_order (Iterable[str], optional):
+ The desired order of columns in the resulting DataFrame. This
+ parameter is deprecated and will be removed in a future version.
+ Use ``columns`` instead.
+ filters (list[tuple], optional):
+ A list of filters to apply to the data. Filters are specified
+ as a list of tuples, where each tuple contains a column name,
+ an operator (e.g., '==', '!='), and a value.
+ dry_run (bool, optional):
+ If ``True``, the function will not actually execute the query but
+ will instead return statistics about the query. Defaults to
+ ``False``.
+ allow_large_results (bool, optional):
+ Whether to allow large query results. If ``True``, the query
+ results can be larger than the maximum response size.
+ Defaults to ``True``.
+
  Returns:
- bigframes.pandas.DataFrame:
- A DataFrame representing results of the query or table.
+ bigframes.pandas.DataFrame or pandas.Series:
+ A DataFrame representing the result of the query. If ``dry_run``
+ is ``True``, a ``pandas.Series`` containing query statistics is
+ returned.
 
  Raises:
  ValueError:
@@ -657,6 +702,7 @@ def read_gbq_query(
  use_cache=use_cache,
  filters=filters,
  dry_run=dry_run,
+ allow_large_results=allow_large_results,
  )
 
  @overload
@@ -714,9 +760,40 @@ def read_gbq_table(
 
  See also: :meth:`Session.read_gbq`.
 
+ Args:
+ table_id (str):
+ The identifier of the BigQuery table to read.
+ index_col (Iterable[str] or str, optional):
+ The column(s) to use as the index for the DataFrame. This can be
+ a single column name or a list of column names. If not provided,
+ a default index will be used.
+ columns (Iterable[str], optional):
+ The columns to read from the table. If not specified, all
+ columns will be read.
+ max_results (int, optional):
+ The maximum number of rows to retrieve from the table. If not
+ specified, all rows will be loaded.
+ filters (list[tuple], optional):
+ A list of filters to apply to the data. Filters are specified
+ as a list of tuples, where each tuple contains a column name,
+ an operator (e.g., '==', '!='), and a value.
+ use_cache (bool, optional):
+ Whether to use cached results for the query. Defaults to ``True``.
+ Setting this to ``False`` will force a re-execution of the query.
+ col_order (Iterable[str], optional):
+ The desired order of columns in the resulting DataFrame. This
+ parameter is deprecated and will be removed in a future version.
+ Use ``columns`` instead.
+ dry_run (bool, optional):
+ If ``True``, the function will not actually execute the query but
+ will instead return statistics about the table. Defaults to
+ ``False``.
+
  Returns:
- bigframes.pandas.DataFrame:
- A DataFrame representing results of the query or table.
+ bigframes.pandas.DataFrame or pandas.Series:
+ A DataFrame representing the contents of the table. If
+ ``dry_run`` is ``True``, a ``pandas.Series`` containing table
+ statistics is returned.
 
  Raises:
  ValueError:

@@ -33,6 +33,7 @@
 import pytest
 
 import bigframes
+import bigframes.core.nodes as nodes
 import bigframes.dataframe
 import bigframes.dtypes
 import bigframes.ml.linear_model
@@ -640,6 +641,37 @@ def test_read_gbq_with_configuration(
  assert df.shape == (9, 3)
 
 
+def test_read_gbq_query_w_allow_large_results(session: bigframes.Session):
+ if not hasattr(session.bqclient, "default_job_creation_mode"):
+ pytest.skip("Jobless query only available on newer google-cloud-bigquery.")
+
+ query = "SELECT 1"
+
+ # Make sure we don't get a cached table.
+ configuration = {"query": {"useQueryCache": False}}
+
+ # Very small results should wrap a local node.
+ df_false = session.read_gbq(
+ query,
+ configuration=configuration,
+ allow_large_results=False,
+ )
+ assert df_false.shape == (1, 1)
+ roots_false = df_false._get_block().expr.node.roots
+ assert any(isinstance(node, nodes.ReadLocalNode) for node in roots_false)
+ assert not any(isinstance(node, nodes.ReadTableNode) for node in roots_false)
+
+ # Large results allowed should wrap a table.
+ df_true = session.read_gbq(
+ query,
+ configuration=configuration,
+ allow_large_results=True,
+ )
+ assert df_true.shape == (1, 1)
+ roots_true = df_true._get_block().expr.node.roots
+ assert any(isinstance(node, nodes.ReadTableNode) for node in roots_true)
+
+
 def test_read_gbq_with_custom_global_labels(
  session: bigframes.Session, scalars_table_id: str
 ):

@@ -25,6 +25,7 @@ def read_gbq(
  filters: FiltersType = (),
  use_cache: Optional[bool] = None,
  col_order: Iterable[str] = (),
+ allow_large_results: bool = True,
  ):
  """Loads a DataFrame from BigQuery.
 
@@ -156,6 +157,11 @@ def read_gbq(
  `configuration` to avoid conflicts.
  col_order (Iterable[str]):
  Alias for columns, retained for backwards compatibility.
+ allow_large_results (bool, optional):
+ Whether to allow large query results. If ``True``, the query
+ results can be larger than the maximum response size. This
+ option is only applicable when ``query_or_table`` is a query.
+ Defaults to ``True``.
 
  Raises:
  bigframes.exceptions.DefaultIndexWarning: