Skip to content

Commit 91aa237

Browse files
committed
feat: warn deprecated parameters
1 parent 40c55a0 commit 91aa237

File tree

7 files changed

+147
-35
lines changed

7 files changed

+147
-35
lines changed

bigframes/dataframe.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,17 +1669,27 @@ def to_pandas(
16691669
16701670
Args:
16711671
max_download_size (int, default None):
1672+
.. deprecated:: 2.0.0
1673+
`max_download_size` parameter is deprecated. Please use `to_pandas_batch()` method
1674+
instead.
1675+
16721676
Download size threshold in MB. If max_download_size is exceeded when downloading data
16731677
(e.g., to_pandas()), the data will be downsampled if
16741678
bigframes.options.sampling.enable_downsampling is True, otherwise, an error will be
16751679
raised. If set to a value other than None, this will supersede the global config.
16761680
sampling_method (str, default None):
1681+
.. deprecated:: 2.0.0
1682+
`sampling_method` parameter is deprecated. Please use `sample()` method instead.
1683+
16771684
Downsampling algorithms to be chosen from, the choices are: "head": This algorithm
16781685
returns a portion of the data from the beginning. It is fast and requires minimal
16791686
computations to perform the downsampling; "uniform": This algorithm returns uniform
16801687
random samples of the data. If set to a value other than None, this will supersede
16811688
the global config.
16821689
random_state (int, default None):
1690+
.. deprecated:: 2.0.0
1691+
`random_state` parameter is deprecated. Please use `sample()` method instead.
1692+
16831693
The seed for the uniform downsampling algorithm. If provided, the uniform method may
16841694
take longer to execute and require more computation. If set to a value other than
16851695
None, this will supersede the global config.
@@ -1699,6 +1709,20 @@ def to_pandas(
16991709
downsampled rows and all columns of this DataFrame. If dry_run is set, a pandas
17001710
Series containing dry run statistics will be returned.
17011711
"""
1712+
if max_download_size is not None:
1713+
msg = bfe.format_message(
1714+
"DEPRECATED: The `max_download_size` parameters for `DataFrame.to_pandas()` "
1715+
"are deprecated and will be removed soon. Please use `DataFrame.to_pandas_batch()`."
1716+
)
1717+
warnings.warn(msg, category=UserWarning)
1718+
if sampling_method is not None or random_state is not None:
1719+
msg = bfe.format_message(
1720+
"DEPRECATED: The `sampling_method` and `random_state` parameters for "
1721+
"`DataFrame.to_pandas()` are deprecated and will be removed soon. "
1722+
"Please use `DataFrame.sample().to_pandas()` instead for sampling."
1723+
)
1724+
warnings.warn(msg, category=UserWarning, stacklevel=2)
1725+
17021726
if dry_run:
17031727
dry_run_stats, dry_run_job = self._block._compute_dry_run(
17041728
max_download_size=max_download_size,

bigframes/series.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import textwrap
2525
import typing
2626
from typing import Any, cast, List, Literal, Mapping, Optional, Sequence, Tuple, Union
27+
import warnings
2728

2829
import bigframes_vendored.constants as constants
2930
import bigframes_vendored.pandas.core.series as vendored_pandas_series
@@ -49,6 +50,7 @@
4950
import bigframes.core.window_spec as windows
5051
import bigframes.dataframe
5152
import bigframes.dtypes
53+
import bigframes.exceptions as bfe
5254
import bigframes.formatting_helpers as formatter
5355
import bigframes.operations as ops
5456
import bigframes.operations.aggregations as agg_ops
@@ -420,17 +422,27 @@ def to_pandas(
420422
421423
Args:
422424
max_download_size (int, default None):
425+
.. deprecated:: 2.0.0
426+
`max_download_size` parameter is deprecated. Please use `to_pandas_batch()` method
427+
instead.
428+
423429
Download size threshold in MB. If max_download_size is exceeded when downloading data
424430
(e.g., to_pandas()), the data will be downsampled if
425431
bigframes.options.sampling.enable_downsampling is True, otherwise, an error will be
426432
raised. If set to a value other than None, this will supersede the global config.
427433
sampling_method (str, default None):
434+
.. deprecated:: 2.0.0
435+
`sampling_method` parameter is deprecated. Please use `sample()` method instead.
436+
428437
Downsampling algorithms to be chosen from, the choices are: "head": This algorithm
429438
returns a portion of the data from the beginning. It is fast and requires minimal
430439
computations to perform the downsampling; "uniform": This algorithm returns uniform
431440
random samples of the data. If set to a value other than None, this will supersede
432441
the global config.
433442
random_state (int, default None):
443+
.. deprecated:: 2.0.0
444+
`random_state` parameter is deprecated. Please use `sample()` method instead.
445+
434446
The seed for the uniform downsampling algorithm. If provided, the uniform method may
435447
take longer to execute and require more computation. If set to a value other than
436448
None, this will supersede the global config.
@@ -449,6 +461,19 @@ def to_pandas(
449461
is not exceeded; otherwise, a pandas Series with downsampled rows of the DataFrame. If dry_run
450462
is set to True, a pandas Series containing dry run statistics will be returned.
451463
"""
464+
if max_download_size is not None:
465+
msg = bfe.format_message(
466+
"DEPRECATED: The `max_download_size` parameters for `Series.to_pandas()` "
467+
"are deprecated and will be removed soon. Please use `Series.to_pandas_batch()`."
468+
)
469+
warnings.warn(msg, category=UserWarning)
470+
if sampling_method is not None or random_state is not None:
471+
msg = bfe.format_message(
472+
"DEPRECATED: The `sampling_method` and `random_state` parameters for "
473+
"`Series.to_pandas()` are deprecated and will be removed soon. "
474+
"Please use `Series.sample().to_pandas()` instead for sampling."
475+
)
476+
warnings.warn(msg, category=UserWarning)
452477

453478
if dry_run:
454479
dry_run_stats, dry_run_job = self._block._compute_dry_run(

tests/system/small/test_dataframe.py

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5203,9 +5203,7 @@ def test_query_complexity_repeated_subtrees(
52035203
# See: https://github.com/python/cpython/issues/112282
52045204
reason="setrecursionlimit has no effect on the Python C stack since Python 3.12.",
52055205
)
5206-
def test_query_complexity_repeated_analytic(
5207-
scalars_df_index, scalars_pandas_df_index, with_multiquery_execution
5208-
):
5206+
def test_query_complexity_repeated_analytic(scalars_df_index, scalars_pandas_df_index):
52095207
bf_df = scalars_df_index[["int64_col", "int64_too"]]
52105208
pd_df = scalars_pandas_df_index[["int64_col", "int64_too"]]
52115209
# Uses LAG analytic operator, each in a new SELECT
@@ -5217,22 +5215,6 @@ def test_query_complexity_repeated_analytic(
52175215
assert_pandas_df_equal(bf_result, pd_result)
52185216

52195217

5220-
def test_to_pandas_downsampling_option_override(session):
5221-
df = session.read_gbq("bigframes-dev.bigframes_tests_sys.batting")
5222-
download_size = 1
5223-
5224-
# limits only apply for allow_large_result=True
5225-
df = df.to_pandas(
5226-
max_download_size=download_size,
5227-
sampling_method="head",
5228-
allow_large_results=True,
5229-
)
5230-
5231-
total_memory_bytes = df.memory_usage(deep=True).sum()
5232-
total_memory_mb = total_memory_bytes / (1024 * 1024)
5233-
assert total_memory_mb == pytest.approx(download_size, rel=0.5)
5234-
5235-
52365218
def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_created):
52375219
dataset_id = dataset_id_not_created
52385220
destination_table = f"{dataset_id}.scalars_df"

tests/system/small/test_dataframe_io.py

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,62 @@ def test_to_pandas_override_global_option(scalars_df_index):
266266
assert scalars_df_index._query_job.destination.table_id == table_id
267267

268268

269+
def test_to_pandas_downsampling_option_override(session):
270+
df = session.read_gbq("bigframes-dev.bigframes_tests_sys.batting")
271+
download_size = 1
272+
273+
with pytest.warns(
274+
UserWarning, match="The data size .* exceeds the maximum download limit"
275+
):
276+
# limits only apply for allow_large_result=True
277+
df = df.to_pandas(
278+
max_download_size=download_size,
279+
sampling_method="head",
280+
allow_large_results=True,
281+
)
282+
283+
total_memory_bytes = df.memory_usage(deep=True).sum()
284+
total_memory_mb = total_memory_bytes / (1024 * 1024)
285+
assert total_memory_mb == pytest.approx(download_size, rel=0.5)
286+
287+
288+
@pytest.mark.parametrize(
289+
("kwargs", "message"),
290+
[
291+
pytest.param(
292+
{"sampling_method": "head"},
293+
r"DEPRECATED[\S\s]*sampling_method[\S\s]*DataFrame",
294+
id="sampling_method",
295+
),
296+
pytest.param(
297+
{"random_state": 10},
298+
r"DEPRECATED[\S\s]*random_state[\S\s]*DataFrame",
299+
id="random_state",
300+
),
301+
pytest.param(
302+
{"max_download_size": 10},
303+
r"DEPRECATED[\S\s]*max_download_size[\S\s]*DataFrame",
304+
id="max_download_size",
305+
),
306+
],
307+
)
308+
def test_to_pandas_warns_deprecated_parameters(scalars_df_index, kwargs, message):
309+
with pytest.warns(UserWarning, match=message):
310+
scalars_df_index.to_pandas(
311+
# limits only apply for allow_large_result=True
312+
allow_large_results=True,
313+
**kwargs,
314+
)
315+
316+
317+
def test_to_pandas_dry_run(session, scalars_pandas_df_multi_index):
318+
bf_df = session.read_pandas(scalars_pandas_df_multi_index)
319+
320+
result = bf_df.to_pandas(dry_run=True)
321+
322+
assert len(result) == 14
323+
324+
269325
def test_to_arrow_override_global_option(scalars_df_index):
270326
# Direct call to_arrow uses global default setting (allow_large_results=True),
271327
with bigframes.option_context("bigquery.allow_large_results", True):
@@ -813,11 +869,3 @@ def test_to_sql_query_named_index_excluded(
813869
utils.assert_pandas_df_equal(
814870
roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True
815871
)
816-
817-
818-
def test_to_pandas_dry_run(session, scalars_pandas_df_multi_index):
819-
bf_df = session.read_pandas(scalars_pandas_df_multi_index)
820-
821-
result = bf_df.to_pandas(dry_run=True)
822-
823-
assert len(result) == 14

tests/system/small/test_index.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -426,11 +426,3 @@ def test_multiindex_repr_includes_all_names(session):
426426
)
427427
index = session.read_pandas(df).set_index(["A", "B"]).index
428428
assert "names=['A', 'B']" in repr(index)
429-
430-
431-
def test_to_pandas_dry_run(scalars_df_index):
432-
index = scalars_df_index.index
433-
434-
result = index.to_pandas(dry_run=True)
435-
436-
assert len(result) == 14

tests/system/small/test_index_io.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ def test_to_pandas_override_global_option(scalars_df_index):
3030
assert bf_index._query_job.destination.table_id == table_id
3131

3232

33+
def test_to_pandas_dry_run(scalars_df_index):
34+
index = scalars_df_index.index
35+
36+
result = index.to_pandas(dry_run=True)
37+
38+
assert len(result) == 14
39+
40+
3341
def test_to_numpy_override_global_option(scalars_df_index):
3442
with bigframes.option_context("bigquery.allow_large_results", True):
3543

tests/system/small/test_series_io.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
import pytest
15+
1416
import bigframes
17+
import bigframes.series
1518

1619

1720
def test_to_pandas_override_global_option(scalars_df_index):
@@ -32,3 +35,33 @@ def test_to_pandas_override_global_option(scalars_df_index):
3235
bf_series.to_pandas(allow_large_results=False)
3336
assert bf_series._query_job.destination.table_id == table_id
3437
assert session._metrics.execution_count - execution_count == 1
38+
39+
40+
@pytest.mark.parametrize(
41+
("kwargs", "message"),
42+
[
43+
pytest.param(
44+
{"sampling_method": "head"},
45+
r"DEPRECATED[\S\s]*sampling_method[\S\s]*Series",
46+
id="sampling_method",
47+
),
48+
pytest.param(
49+
{"random_state": 10},
50+
r"DEPRECATED[\S\s]*random_state[\S\s]*Series",
51+
id="random_state",
52+
),
53+
pytest.param(
54+
{"max_download_size": 10},
55+
r"DEPRECATED[\S\s]*max_download_size[\S\s]*Series",
56+
id="max_download_size",
57+
),
58+
],
59+
)
60+
def test_to_pandas_warns_deprecated_parameters(scalars_df_index, kwargs, message):
61+
s: bigframes.series.Series = scalars_df_index["int64_col"]
62+
with pytest.warns(UserWarning, match=message):
63+
s.to_pandas(
64+
# limits only apply for allow_large_result=True
65+
allow_large_results=True,
66+
**kwargs,
67+
)

0 commit comments

Comments
 (0)