-
- Notifications
You must be signed in to change notification settings - Fork 19.4k
Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
In [5]: df = pd.DataFrame({'a': [1,1,2], 'b': [4,5,6], 'i': [0,1,2]}, dtype='Int64')[::-1] In [6]: df.groupby('a')['b'].cummax() --------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[6], line 1 ----> 1 df.groupby('a')['b'].cummax() File ~/pandas-dev/pandas/core/groupby/groupby.py:4945, in GroupBy.cummax(self, numeric_only, **kwargs) 4886 """ 4887 Cumulative max for each group. 4888 (...) 4942 bull 6 9 4943 """ 4944 skipna = kwargs.get("skipna", True) -> 4945 return self._cython_transform( 4946 "cummax", numeric_only=numeric_only, skipna=skipna 4947 ) File ~/pandas-dev/pandas/core/groupby/generic.py:684, in SeriesGroupBy._cython_transform(self, how, numeric_only, **kwargs) 681 obj = self._obj_with_exclusions 683 try: --> 684 result = self._grouper._cython_operation( 685 "transform", obj._values, how, 0, **kwargs 686 ) 687 except NotImplementedError as err: 688 # e.g. test_groupby_raises_string 689 raise TypeError(f"{how} is not supported for {obj.dtype} dtype") from err File ~/pandas-dev/pandas/core/groupby/ops.py:932, in BaseGrouper._cython_operation(self, kind, values, how, axis, min_count, **kwargs) 928 assert kind in ["transform", "aggregate"] 930 cy_op = WrappedCythonOp(kind=kind, how=how, has_dropped_na=self.has_dropped_na) --> 932 return cy_op.cython_operation( 933 values=values, 934 axis=axis, 935 min_count=min_count, 936 comp_ids=self.ids, 937 ngroups=self.ngroups, 938 **kwargs, 939 ) File ~/pandas-dev/pandas/core/groupby/ops.py:546, in WrappedCythonOp.cython_operation(self, values, axis, min_count, comp_ids, ngroups, **kwargs) 542 self._validate_axis(axis, values) 544 if not isinstance(values, np.ndarray): 545 # i.e. ExtensionArray --> 546 return values._groupby_op( 547 how=self.how, 548 has_dropped_na=self.has_dropped_na, 549 min_count=min_count, 550 ngroups=ngroups, 551 ids=comp_ids, 552 **kwargs, 553 ) 555 return self._cython_op_ndim_compat( 556 values, 557 min_count=min_count, (...) 561 **kwargs, 562 ) File ~/pandas-dev/pandas/core/arrays/masked.py:1602, in BaseMaskedArray._groupby_op(self, how, has_dropped_na, min_count, ngroups, ids, **kwargs) 1599 if how == "rank" and kwargs.get("na_option") in ["top", "bottom"]: 1600 result_mask[:] = False -> 1602 res_values = op._cython_op_ndim_compat( 1603 self._data, 1604 min_count=min_count, 1605 ngroups=ngroups, 1606 comp_ids=ids, 1607 mask=mask, 1608 result_mask=result_mask, 1609 **kwargs, 1610 ) 1612 if op.how == "ohlc": 1613 arity = op._cython_arity.get(op.how, 1) File ~/pandas-dev/pandas/core/groupby/ops.py:331, in WrappedCythonOp._cython_op_ndim_compat(self, values, min_count, ngroups, comp_ids, mask, result_mask, **kwargs) 329 if result_mask is not None: 330 result_mask = result_mask[None, :] --> 331 res = self._call_cython_op( 332 values2d, 333 min_count=min_count, 334 ngroups=ngroups, 335 comp_ids=comp_ids, 336 mask=mask, 337 result_mask=result_mask, 338 **kwargs, 339 ) 340 if res.shape[0] == 1: 341 return res[0] File ~/pandas-dev/pandas/core/groupby/ops.py:477, in WrappedCythonOp._call_cython_op(self, values, min_count, ngroups, comp_ids, mask, result_mask, **kwargs) 474 if self.how != "rank": 475 # TODO: should rank take result_mask? 476 kwargs["result_mask"] = result_mask --> 477 func( 478 out=result, 479 values=values, 480 labels=comp_ids, 481 ngroups=ngroups, 482 is_datetimelike=is_datetimelike, 483 mask=mask, 484 **kwargs, 485 ) 487 if self.kind == "aggregate" and self.how not in ["idxmin", "idxmax"]: 488 # i.e. counts is defined. Locations where count<min_count 489 # need to have the result set to np.nan, which may require casting, 490 # see GH#40767. For idxmin/idxmax is handled specially via post-processing 491 if result.dtype.kind in "iu" and not is_datetimelike: 492 # if the op keeps the int dtypes, we have to use 0 File groupby.pyx:2287, in pandas._libs.groupby.group_cummax() File <stringsource>:663, in View.MemoryView.memoryview_cwrapper() File <stringsource>:353, in View.MemoryView.memoryview.__cinit__() ValueError: ndarray is not C-contiguousIssue Description
groupby-cummax raises for nullable integers in this case
Note that for pyarrow-backed integers it works fine
Expected Behavior
2 6 1 5 0 5 Name: b, dtype: Int64 Installed Versions
INSTALLED VERSIONS
commit : b552dc9
python : 3.10.12
python-bits : 64
OS : Linux
OS-release : 5.15.167.4-microsoft-standard-WSL2
Version : #1 SMP Tue Nov 5 00:21:55 UTC 2024
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : C.UTF-8
LOCALE : en_US.UTF-8
pandas : 3.0.0.dev0+618.gb552dc95c9
numpy : 1.26.4
dateutil : 2.9.0.post0
pip : 25.0
Cython : 3.0.11
sphinx : 8.1.3
IPython : 8.32.0
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.13.3
blosc : None
bottleneck : 1.4.2
fastparquet : 2024.11.0
fsspec : 2025.2.0
html5lib : 1.1
hypothesis : 6.125.2
gcsfs : 2025.2.0
jinja2 : 3.1.5
lxml.etree : 5.3.0
matplotlib : 3.10.0
numba : 0.61.0
numexpr : 2.10.2
odfpy : None
openpyxl : 3.1.5
psycopg2 : 2.9.10
pymysql : 1.4.6
pyarrow : 19.0.0
pyreadstat : 1.2.8
pytest : 8.3.4
python-calamine : None
pytz : 2025.1
pyxlsb : 1.0.10
s3fs : 2025.2.0
scipy : 1.15.1
sqlalchemy : 2.0.37
tables : 3.10.1
tabulate : 0.9.0
xarray : 2024.9.0
xlrd : 2.0.1
xlsxwriter : 3.2.2
zstandard : 0.23.0
tzdata : 2025.1
qtpy : None
pyqt5 : None
Spotted in Narwhals