|
19 | 19 | from tests.system.utils import assert_pandas_df_equal |
20 | 20 |
|
21 | 21 |
|
| 22 | +# ================= |
| 23 | +# DataFrame.groupby |
| 24 | +# ================= |
| 25 | + |
22 | 26 | @pytest.mark.parametrize( |
23 | 27 | ("operator"), |
24 | 28 | [ |
@@ -250,21 +254,26 @@ def test_dataframe_groupby_analytic( |
250 | 254 | pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False) |
251 | 255 |
|
252 | 256 |
|
253 | | -def test_series_groupby_skew(scalars_df_index, scalars_pandas_df_index): |
254 | | - bf_result = scalars_df_index.groupby("bool_col")["int64_too"].skew().to_pandas() |
255 | | - pd_result = scalars_pandas_df_index.groupby("bool_col")["int64_too"].skew() |
| 257 | +def test_dataframe_groupby_size_as_index_false( |
| 258 | + scalars_df_index, scalars_pandas_df_index |
| 259 | +): |
| 260 | + bf_result = scalars_df_index.groupby("string_col", as_index=False).size() |
| 261 | + bf_result_computed = bf_result.to_pandas() |
| 262 | + pd_result = scalars_pandas_df_index.groupby("string_col", as_index=False).size() |
256 | 263 |
|
257 | | - pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) |
| 264 | + pd.testing.assert_frame_equal( |
| 265 | + pd_result, bf_result_computed, check_dtype=False, check_index_type=False |
| 266 | + ) |
258 | 267 |
|
259 | 268 |
|
260 | | -def test_series_groupby_kurt(scalars_df_index, scalars_pandas_df_index): |
261 | | - bf_result = scalars_df_index.groupby("bool_col")["int64_too"].kurt().to_pandas() |
262 | | - # Pandas doesn't have groupby.kurt yet: https://github.com/pandas-dev/pandas/issues/40139 |
263 | | - pd_result = scalars_pandas_df_index.groupby("bool_col")["int64_too"].apply( |
264 | | - pd.Series.kurt |
265 | | - ) |
| 269 | +def test_dataframe_groupby_size_as_index_true( |
| 270 | + scalars_df_index, scalars_pandas_df_index |
| 271 | +): |
| 272 | + bf_result = scalars_df_index.groupby("string_col", as_index=True).size() |
| 273 | + pd_result = scalars_pandas_df_index.groupby("string_col", as_index=True).size() |
| 274 | + bf_result_computed = bf_result.to_pandas() |
266 | 275 |
|
267 | | - pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) |
| 276 | + pd.testing.assert_series_equal(pd_result, bf_result_computed, check_dtype=False) |
268 | 277 |
|
269 | 278 |
|
270 | 279 | def test_dataframe_groupby_skew(scalars_df_index, scalars_pandas_df_index): |
@@ -337,6 +346,26 @@ def test_dataframe_groupby_getitem_list( |
337 | 346 | pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) |
338 | 347 |
|
339 | 348 |
|
| 349 | +def test_dataframe_groupby_nonnumeric_with_mean(): |
| 350 | + df = pd.DataFrame( |
| 351 | + { |
| 352 | + "key1": ["a", "a", "a", "b"], |
| 353 | + "key2": ["a", "a", "c", "c"], |
| 354 | + "key3": [1, 2, 3, 4], |
| 355 | + "key4": [1.6, 2, 3, 4], |
| 356 | + } |
| 357 | + ) |
| 358 | + pd_result = df.groupby(["key1", "key2"]).mean() |
| 359 | + bf_result = bpd.DataFrame(df).groupby(["key1", "key2"]).mean().to_pandas() |
| 360 | + |
| 361 | + pd.testing.assert_frame_equal( |
| 362 | + pd_result, bf_result, check_index_type=False, check_dtype=False |
| 363 | + ) |
| 364 | + |
| 365 | +# ============== |
| 366 | +# Series.groupby |
| 367 | +# ============== |
| 368 | + |
340 | 369 | def test_series_groupby_agg_string(scalars_df_index, scalars_pandas_df_index): |
341 | 370 | bf_result = ( |
342 | 371 | scalars_df_index["int64_col"] |
@@ -373,18 +402,46 @@ def test_series_groupby_agg_list(scalars_df_index, scalars_pandas_df_index): |
373 | 402 | ) |
374 | 403 |
|
375 | 404 |
|
376 | | -def test_dataframe_groupby_nonnumeric_with_mean(): |
377 | | - df = pd.DataFrame( |
378 | | - { |
379 | | - "key1": ["a", "a", "a", "b"], |
380 | | - "key2": ["a", "a", "c", "c"], |
381 | | - "key3": [1, 2, 3, 4], |
382 | | - "key4": [1.6, 2, 3, 4], |
383 | | - } |
| 405 | +def test_series_groupby_kurt(scalars_df_index, scalars_pandas_df_index): |
| 406 | + bf_result = ( |
| 407 | + scalars_df_index["int64_too"] |
| 408 | + .groupby(scalars_df_index["bool_col"]) |
| 409 | + .kurt() |
| 410 | + .to_pandas() |
| 411 | + ) |
| 412 | + # Pandas doesn't have groupby.kurt yet: https://github.com/pandas-dev/pandas/issues/40139 |
| 413 | + pd_result = scalars_pandas_df_index.groupby("bool_col")["int64_too"].apply( |
| 414 | + pd.Series.kurt |
384 | 415 | ) |
385 | | - pd_result = df.groupby(["key1", "key2"]).mean() |
386 | | - bf_result = bpd.DataFrame(df).groupby(["key1", "key2"]).mean().to_pandas() |
387 | 416 |
|
388 | | - pd.testing.assert_frame_equal( |
389 | | - pd_result, bf_result, check_index_type=False, check_dtype=False |
| 417 | + pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) |
| 418 | + |
| 419 | + |
| 420 | +def test_series_groupby_size(scalars_df_index, scalars_pandas_df_index): |
| 421 | + bf_result = ( |
| 422 | + scalars_df_index["int64_too"].groupby(scalars_df_index["bool_col"]).size() |
390 | 423 | ) |
| 424 | + pd_result = ( |
| 425 | + scalars_pandas_df_index["int64_too"] |
| 426 | + .groupby(scalars_pandas_df_index["bool_col"]) |
| 427 | + .size() |
| 428 | + ) |
| 429 | + bf_result_computed = bf_result.to_pandas() |
| 430 | + |
| 431 | + pd.testing.assert_series_equal(pd_result, bf_result_computed, check_dtype=False) |
| 432 | + |
| 433 | + |
| 434 | +def test_series_groupby_skew(scalars_df_index, scalars_pandas_df_index): |
| 435 | + bf_result = ( |
| 436 | + scalars_df_index["int64_too"] |
| 437 | + .groupby(scalars_df_index["bool_col"]) |
| 438 | + .skew() |
| 439 | + .to_pandas() |
| 440 | + ) |
| 441 | + pd_result = ( |
| 442 | + scalars_pandas_df_index["int64_too"] |
| 443 | + .groupby(scalars_pandas_df_index["bool_col"]) |
| 444 | + .skew() |
| 445 | + ) |
| 446 | + |
| 447 | + pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) |
0 commit comments