googleapis
diff --git a/‎bigframes/dataframe.py‎
Lines changed: 24 additions & 0 deletions b/‎bigframes/dataframe.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎bigframes/series.py‎
Lines changed: 25 additions & 0 deletions b/‎bigframes/series.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎tests/system/small/test_dataframe.py‎
Lines changed: 1 addition & 19 deletions b/‎tests/system/small/test_dataframe.py‎
Lines changed: 1 addition & 19 deletions
diff --git a/‎tests/system/small/test_dataframe_io.py‎
Lines changed: 56 additions & 8 deletions b/‎tests/system/small/test_dataframe_io.py‎
Lines changed: 56 additions & 8 deletions
diff --git a/‎tests/system/small/test_index.py‎
Lines changed: 0 additions & 8 deletions b/‎tests/system/small/test_index.py‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎tests/system/small/test_index_io.py‎
Lines changed: 8 additions & 0 deletions b/‎tests/system/small/test_index_io.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/system/small/test_series_io.py‎
Lines changed: 33 additions & 0 deletions b/‎tests/system/small/test_series_io.py‎
Lines changed: 33 additions & 0 deletions
@@ -1669,17 +1669,27 @@ def to_pandas(
 
  Args:
  max_download_size (int, default None):
+ .. deprecated:: 2.0.0
+ `max_download_size` parameter is deprecated. Please use `to_pandas_batch()` method
+ instead.
+
  Download size threshold in MB. If max_download_size is exceeded when downloading data
  (e.g., to_pandas()), the data will be downsampled if
  bigframes.options.sampling.enable_downsampling is True, otherwise, an error will be
  raised. If set to a value other than None, this will supersede the global config.
  sampling_method (str, default None):
+ .. deprecated:: 2.0.0
+ `sampling_method` parameter is deprecated. Please use `sample()` method instead.
+
  Downsampling algorithms to be chosen from, the choices are: "head": This algorithm
  returns a portion of the data from the beginning. It is fast and requires minimal
  computations to perform the downsampling; "uniform": This algorithm returns uniform
  random samples of the data. If set to a value other than None, this will supersede
  the global config.
  random_state (int, default None):
+ .. deprecated:: 2.0.0
+ `random_state` parameter is deprecated. Please use `sample()` method instead.
+
  The seed for the uniform downsampling algorithm. If provided, the uniform method may
  take longer to execute and require more computation. If set to a value other than
  None, this will supersede the global config.
@@ -1699,6 +1709,20 @@ def to_pandas(
  downsampled rows and all columns of this DataFrame. If dry_run is set, a pandas
  Series containing dry run statistics will be returned.
  """
+ if max_download_size is not None:
+ msg = bfe.format_message(
+ "DEPRECATED: The `max_download_size` parameters for `DataFrame.to_pandas()` "
+ "are deprecated and will be removed soon. Please use `DataFrame.to_pandas_batch()`."
+ )
+ warnings.warn(msg, category=UserWarning)
+ if sampling_method is not None or random_state is not None:
+ msg = bfe.format_message(
+ "DEPRECATED: The `sampling_method` and `random_state` parameters for "
+ "`DataFrame.to_pandas()` are deprecated and will be removed soon. "
+ "Please use `DataFrame.sample().to_pandas()` instead for sampling."
+ )
+ warnings.warn(msg, category=UserWarning, stacklevel=2)
+
  if dry_run:
  dry_run_stats, dry_run_job = self._block._compute_dry_run(
  max_download_size=max_download_size,
 
@@ -24,6 +24,7 @@
 import textwrap
 import typing
 from typing import Any, cast, List, Literal, Mapping, Optional, Sequence, Tuple, Union
+import warnings
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.series as vendored_pandas_series
@@ -49,6 +50,7 @@
 import bigframes.core.window_spec as windows
 import bigframes.dataframe
 import bigframes.dtypes
+import bigframes.exceptions as bfe
 import bigframes.formatting_helpers as formatter
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
@@ -420,17 +422,27 @@ def to_pandas(
 
  Args:
  max_download_size (int, default None):
+ .. deprecated:: 2.0.0
+ `max_download_size` parameter is deprecated. Please use `to_pandas_batch()` method
+ instead.
+
  Download size threshold in MB. If max_download_size is exceeded when downloading data
  (e.g., to_pandas()), the data will be downsampled if
  bigframes.options.sampling.enable_downsampling is True, otherwise, an error will be
  raised. If set to a value other than None, this will supersede the global config.
  sampling_method (str, default None):
+ .. deprecated:: 2.0.0
+ `sampling_method` parameter is deprecated. Please use `sample()` method instead.
+
  Downsampling algorithms to be chosen from, the choices are: "head": This algorithm
  returns a portion of the data from the beginning. It is fast and requires minimal
  computations to perform the downsampling; "uniform": This algorithm returns uniform
  random samples of the data. If set to a value other than None, this will supersede
  the global config.
  random_state (int, default None):
+ .. deprecated:: 2.0.0
+ `random_state` parameter is deprecated. Please use `sample()` method instead.
+
  The seed for the uniform downsampling algorithm. If provided, the uniform method may
  take longer to execute and require more computation. If set to a value other than
  None, this will supersede the global config.
@@ -449,6 +461,19 @@ def to_pandas(
  is not exceeded; otherwise, a pandas Series with downsampled rows of the DataFrame. If dry_run
  is set to True, a pandas Series containing dry run statistics will be returned.
  """
+ if max_download_size is not None:
+ msg = bfe.format_message(
+ "DEPRECATED: The `max_download_size` parameters for `Series.to_pandas()` "
+ "are deprecated and will be removed soon. Please use `Series.to_pandas_batch()`."
+ )
+ warnings.warn(msg, category=UserWarning)
+ if sampling_method is not None or random_state is not None:
+ msg = bfe.format_message(
+ "DEPRECATED: The `sampling_method` and `random_state` parameters for "
+ "`Series.to_pandas()` are deprecated and will be removed soon. "
+ "Please use `Series.sample().to_pandas()` instead for sampling."
+ )
+ warnings.warn(msg, category=UserWarning)
 
  if dry_run:
  dry_run_stats, dry_run_job = self._block._compute_dry_run(
 
@@ -5203,9 +5203,7 @@ def test_query_complexity_repeated_subtrees(
  # See: https://github.com/python/cpython/issues/112282
  reason="setrecursionlimit has no effect on the Python C stack since Python 3.12.",
 )
-def test_query_complexity_repeated_analytic(
- scalars_df_index, scalars_pandas_df_index, with_multiquery_execution
-):
+def test_query_complexity_repeated_analytic(scalars_df_index, scalars_pandas_df_index):
  bf_df = scalars_df_index[["int64_col", "int64_too"]]
  pd_df = scalars_pandas_df_index[["int64_col", "int64_too"]]
  # Uses LAG analytic operator, each in a new SELECT
@@ -5217,22 +5215,6 @@ def test_query_complexity_repeated_analytic(
  assert_pandas_df_equal(bf_result, pd_result)
 
 
-def test_to_pandas_downsampling_option_override(session):
- df = session.read_gbq("bigframes-dev.bigframes_tests_sys.batting")
- download_size = 1
-
- # limits only apply for allow_large_result=True
- df = df.to_pandas(
- max_download_size=download_size,
- sampling_method="head",
- allow_large_results=True,
- )
-
- total_memory_bytes = df.memory_usage(deep=True).sum()
- total_memory_mb = total_memory_bytes / (1024 * 1024)
- assert total_memory_mb == pytest.approx(download_size, rel=0.5)
-
-
 def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_created):
  dataset_id = dataset_id_not_created
  destination_table = f"{dataset_id}.scalars_df"
 
@@ -266,6 +266,62 @@ def test_to_pandas_override_global_option(scalars_df_index):
  assert scalars_df_index._query_job.destination.table_id == table_id
 
 
+def test_to_pandas_downsampling_option_override(session):
+ df = session.read_gbq("bigframes-dev.bigframes_tests_sys.batting")
+ download_size = 1
+
+ with pytest.warns(
+ UserWarning, match="The data size .* exceeds the maximum download limit"
+ ):
+ # limits only apply for allow_large_result=True
+ df = df.to_pandas(
+ max_download_size=download_size,
+ sampling_method="head",
+ allow_large_results=True,
+ )
+
+ total_memory_bytes = df.memory_usage(deep=True).sum()
+ total_memory_mb = total_memory_bytes / (1024 * 1024)
+ assert total_memory_mb == pytest.approx(download_size, rel=0.5)
+
+
+@pytest.mark.parametrize(
+ ("kwargs", "message"),
+ [
+ pytest.param(
+ {"sampling_method": "head"},
+ r"DEPRECATED[\S\s]*sampling_method[\S\s]*DataFrame",
+ id="sampling_method",
+ ),
+ pytest.param(
+ {"random_state": 10},
+ r"DEPRECATED[\S\s]*random_state[\S\s]*DataFrame",
+ id="random_state",
+ ),
+ pytest.param(
+ {"max_download_size": 10},
+ r"DEPRECATED[\S\s]*max_download_size[\S\s]*DataFrame",
+ id="max_download_size",
+ ),
+ ],
+)
+def test_to_pandas_warns_deprecated_parameters(scalars_df_index, kwargs, message):
+ with pytest.warns(UserWarning, match=message):
+ scalars_df_index.to_pandas(
+ # limits only apply for allow_large_result=True
+ allow_large_results=True,
+ **kwargs,
+ )
+
+
+def test_to_pandas_dry_run(session, scalars_pandas_df_multi_index):
+ bf_df = session.read_pandas(scalars_pandas_df_multi_index)
+
+ result = bf_df.to_pandas(dry_run=True)
+
+ assert len(result) == 14
+
+
 def test_to_arrow_override_global_option(scalars_df_index):
  # Direct call to_arrow uses global default setting (allow_large_results=True),
  with bigframes.option_context("bigquery.allow_large_results", True):
@@ -813,11 +869,3 @@ def test_to_sql_query_named_index_excluded(
  utils.assert_pandas_df_equal(
  roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True
  )
-
-
-def test_to_pandas_dry_run(session, scalars_pandas_df_multi_index):
- bf_df = session.read_pandas(scalars_pandas_df_multi_index)
-
- result = bf_df.to_pandas(dry_run=True)
-
- assert len(result) == 14
 
@@ -426,11 +426,3 @@ def test_multiindex_repr_includes_all_names(session):
  )
  index = session.read_pandas(df).set_index(["A", "B"]).index
  assert "names=['A', 'B']" in repr(index)
-
-
-def test_to_pandas_dry_run(scalars_df_index):
- index = scalars_df_index.index
-
- result = index.to_pandas(dry_run=True)
-
- assert len(result) == 14
@@ -30,6 +30,14 @@ def test_to_pandas_override_global_option(scalars_df_index):
  assert bf_index._query_job.destination.table_id == table_id
 
 
+def test_to_pandas_dry_run(scalars_df_index):
+ index = scalars_df_index.index
+
+ result = index.to_pandas(dry_run=True)
+
+ assert len(result) == 14
+
+
 def test_to_numpy_override_global_option(scalars_df_index):
  with bigframes.option_context("bigquery.allow_large_results", True):
 
 
@@ -11,7 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import pytest
+
 import bigframes
+import bigframes.series
 
 
 def test_to_pandas_override_global_option(scalars_df_index):
@@ -32,3 +35,33 @@ def test_to_pandas_override_global_option(scalars_df_index):
  bf_series.to_pandas(allow_large_results=False)
  assert bf_series._query_job.destination.table_id == table_id
  assert session._metrics.execution_count - execution_count == 1
+
+
+@pytest.mark.parametrize(
+ ("kwargs", "message"),
+ [
+ pytest.param(
+ {"sampling_method": "head"},
+ r"DEPRECATED[\S\s]*sampling_method[\S\s]*Series",
+ id="sampling_method",
+ ),
+ pytest.param(
+ {"random_state": 10},
+ r"DEPRECATED[\S\s]*random_state[\S\s]*Series",
+ id="random_state",
+ ),
+ pytest.param(
+ {"max_download_size": 10},
+ r"DEPRECATED[\S\s]*max_download_size[\S\s]*Series",
+ id="max_download_size",
+ ),
+ ],
+)
+def test_to_pandas_warns_deprecated_parameters(scalars_df_index, kwargs, message):
+ s: bigframes.series.Series = scalars_df_index["int64_col"]
+ with pytest.warns(UserWarning, match=message):
+ s.to_pandas(
+ # limits only apply for allow_large_result=True
+ allow_large_results=True,
+ **kwargs,
+ )