pandas-dev · jreback · Sep 28, 2021 · Sep 21, 2021 · Sep 28, 2021 · Sep 28, 2021
diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
@@ -195,4 +195,17 @@ def time_take(self, indices, allow_fill):
  self.sp_arr.take(indices, allow_fill=allow_fill)
 
 
+class GetItem:
+ def setup(self):
+ N = 1_000_000
+ arr = make_array(N, 1e-5, np.nan, np.float64)
+ self.sp_arr = SparseArray(arr)
+
+ def time_integer_indexing(self):
+ self.sp_arr[78]
+
+ def time_slice(self):
+ self.sp_arr[1:]
+
+
 from .pandas_vb_common import setup # noqa: F401 isort:skip
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -357,8 +357,10 @@ Performance improvements
 - Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`)
 - :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`)
+- Indexing into a :class:`SparseArray` with a ``slice`` with ``step=1`` no longer requires converting to a dense array (:issue:`43777`)
 - Performance improvement in :meth:`SparseArray.take` with ``allow_fill=False`` (:issue:`43654`)
 - Performance improvement in :meth:`.Rolling.mean` and :meth:`.Expanding.mean` with ``engine="numba"`` (:issue:`43612`)
+-
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -892,13 +892,39 @@ def __getitem__(
  elif isinstance(key, tuple):
  data_slice = self.to_dense()[key]
  elif isinstance(key, slice):
- # special case to preserve dtypes
- if key == slice(None):
- return self.copy()
- # TODO: this logic is surely elsewhere
- # TODO: this could be more efficient
- indices = np.arange(len(self), dtype=np.int32)[key]
- return self.take(indices)
+
+ # Avoid densifying when handling contiguous slices
+ if key.step is None or key.step == 1:
+ start = 0 if key.start is None else key.start
+ if start < 0:
+ start += len(self)
+
+ end = len(self) if key.stop is None else key.stop
+ if end < 0:
+ end += len(self)
+
+ indices = self.sp_index.to_int_index().indices
+ keep_inds = np.flatnonzero((indices >= start) & (indices < end))
+ sp_vals = self.sp_values[keep_inds]
+
+ sp_index = indices[keep_inds].copy()
+
+ # If we've sliced to not include the start of the array, all our indices
+ # should be shifted. NB: here we are careful to also not shift by a
+ # negative value for a case like [0, 1][-100:] where the start index
+ # should be treated like 0
+ if start > 0:
+ sp_index -= start
+
+ # Length of our result should match applying this slice to a range
+ # of the length of our original array
+ new_len = len(range(len(self))[key])
+ new_sp_index = make_sparse_index(new_len, sp_index, self.kind)
+ return type(self)._simple_new(sp_vals, new_sp_index, self.dtype)
+ else:
+ indices = np.arange(len(self), dtype=np.int32)[key]
+ return self.take(indices)
+
  else:
  # TODO: I think we can avoid densifying when masking a
  # boolean SparseArray with another. Need to look at the
@@ -1745,10 +1771,10 @@ def make_sparse_index(length: int, indices, kind: Literal["integer"]) -> IntInde
 
 def make_sparse_index(length: int, indices, kind: SparseIndexKind) -> SparseIndex:
  index: SparseIndex
- if kind == "block" or isinstance(kind, BlockIndex):
+ if kind == "block":
  locs, lens = splib.get_blocks(indices)
  index = BlockIndex(length, locs, lens)
- elif kind == "integer" or isinstance(kind, IntIndex):
+ elif kind == "integer":
  index = IntIndex(length, indices)
  else: # pragma: no cover
  raise ValueError("must be block or integer type")

diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -679,23 +679,37 @@ def test_getitem_arraylike_mask(self):
  expected = SparseArray([0, 2])
  tm.assert_sp_array_equal(result, expected)
 
- def test_getslice(self):
- result = self.arr[:-3]
- exp = SparseArray(self.arr.to_dense()[:-3])
- tm.assert_sp_array_equal(result, exp)
-
- result = self.arr[-4:]
- exp = SparseArray(self.arr.to_dense()[-4:])
- tm.assert_sp_array_equal(result, exp)
-
- # two corner cases from Series
- result = self.arr[-12:]
- exp = SparseArray(self.arr)
- tm.assert_sp_array_equal(result, exp)
-
- result = self.arr[:-12]
- exp = SparseArray(self.arr.to_dense()[:0])
- tm.assert_sp_array_equal(result, exp)
+ @pytest.mark.parametrize(
+ "slc",
+ [
+ np.s_[:],
+ np.s_[1:10],
+ np.s_[1:100],
+ np.s_[10:1],
+ np.s_[:-3],
+ np.s_[-5:-4],
+ np.s_[:-12],
+ np.s_[-12:],
+ np.s_[2:],
+ np.s_[2::3],
+ np.s_[::2],
+ np.s_[::-1],
+ np.s_[::-2],
+ np.s_[1:6:2],
+ np.s_[:-6:-2],
+ ],
+ )
+ @pytest.mark.parametrize(
+ "as_dense", [[np.nan] * 10, [1] * 10, [np.nan] * 5 + [1] * 5, []]
+ )
+ def test_getslice(self, slc, as_dense):
+ as_dense = np.array(as_dense)
+ arr = SparseArray(as_dense)
+
+ result = arr[slc]
+ expected = SparseArray(as_dense[slc])
+
+ tm.assert_sp_array_equal(result, expected)
 
  def test_getslice_tuple(self):
  dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])