Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ Sparse
^^^^^^
- Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`)
- Bug in :meth:`arrays.SparseArray.from_spmatrix` wrongly read scipy sparse matrix (:issue:`31991`)
-
- Bug in :meth:`Series.sum` with ``SparseArray`` raises ``TypeError`` (:issue:`25777`)

ExtensionArray
^^^^^^^^^^^^^^
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1220,7 +1220,7 @@ def any(self, axis=0, *args, **kwargs):

return values.any().item()

def sum(self, axis=0, *args, **kwargs):
def sum(self, axis=0, min_count=0, *args, **kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add to the doc-string

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and type args if you can

"""
Sum of non-NA/null values

Expand All @@ -1230,11 +1230,17 @@ def sum(self, axis=0, *args, **kwargs):
"""
nv.validate_sum(args, kwargs)
valid_vals = self._valid_sp_values
count = len(valid_vals)
sp_sum = valid_vals.sum()
if self._null_fill_value:
if count < min_count:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you use array_ops/check_below_min_count

return na_value_for_dtype(self.dtype.subtype, compat=False)
return sp_sum
else:
nsparse = self.sp_index.ngaps
count += nsparse
if count < min_count:
return na_value_for_dtype(self.dtype.subtype, compat=False)
return sp_sum + self.fill_value * nsparse

def cumsum(self, axis=0, *args, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,9 @@ def na_value_for_dtype(dtype, compat: bool = True):
return 0
return np.nan
elif is_bool_dtype(dtype):
return False
if compat:
return False
return np.nan
return np.nan


Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,25 @@ def test_sum(self):
out = SparseArray(data, fill_value=np.nan).sum()
assert out == 40.0

@pytest.mark.parametrize(
"arr",
[
np.array([0, 1, np.nan, 1]),
np.array([0, 1, 1]),
np.array([True, True, False]),
],
)
@pytest.mark.parametrize("fill_value", [0, 1, np.nan, True, False])
@pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)])
def test_sum_min_count(self, arr, fill_value, min_count, expected):
# https://github.com/pandas-dev/pandas/issues/25777
sparray = SparseArray(arr, fill_value=fill_value)
result = sparray.sum(min_count=min_count)
if np.isnan(expected):
assert np.isnan(result)
else:
assert result == expected

def test_numpy_sum(self):
data = np.arange(10).astype(float)
out = np.sum(SparseArray(data))
Expand Down