pandas-dev · jreback · May 29, 2019 · Mar 15, 2019 · Mar 12, 2019 · May 14, 2019
diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -299,14 +299,39 @@ Other API Changes
 Deprecations
 ~~~~~~~~~~~~
 
+Sparse Subclasses
+^^^^^^^^^^^^^^^^^
+
+The ``SparseSeries`` and ``SparseDataFrame`` subclasses are deprecated. Their functionality is better-provided
+by a ``Series`` or ``DataFrame`` with sparse values.
+
+**Previous Way**
+
+.. ipython:: python
+ :okwarning:
+
+ df = pd.SparseDataFrame({"A": [0, 0, 1, 2]})
+ df.dtypes
+
+**New Way**
+
+.. ipython:: python
+
+ df = pd.DataFrame({"A": pd.SparseArray([0, 0, 1, 2])})
+ df.dtypes
+
+The memory usage of the two approaches is identical. See :ref:`sparse.migration` for more (:issue:`19239`).
+
+Other Deprecations
+^^^^^^^^^^^^^^^^^^
+
 - The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`).
 - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
 - The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or
  the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
 - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
 - The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).
 
-
 .. _whatsnew_0250.prior_deprecations:
 
 Removal of prior version deprecations/changes

diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
@@ -2014,9 +2014,9 @@ def from_coo(cls, A, dense_index=False):
  from pandas.core.sparse.scipy_sparse import _coo_to_sparse_series
  from pandas import Series
 
- result = _coo_to_sparse_series(A, dense_index=dense_index)
- # SparseSeries -> Series[sparse]
- result = Series(result.values, index=result.index, copy=False)
+ result = _coo_to_sparse_series(A, dense_index=dense_index,
+  sparse_series=False)
+ result = Series(result.array, index=result.index, copy=False)
 
  return result
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1930,13 +1930,13 @@ def to_sparse(self, fill_value=None, kind='block'):
  >>> type(df)
  <class 'pandas.core.frame.DataFrame'>
 
- >>> sdf = df.to_sparse()
- >>> sdf
+ >>> sdf = df.to_sparse() # doctest: +SKIP
+ >>> sdf # doctest: +SKIP
  0 1
  0 NaN NaN
  1 1.0 NaN
  2 NaN 1.0
- >>> type(sdf)
+ >>> type(sdf) # doctest: +SKIP
  <class 'pandas.core.sparse.frame.SparseDataFrame'>
  """
  from pandas.core.sparse.api import SparseDataFrame

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -5589,7 +5589,7 @@ def ftypes(self):
  3 float64:dense
  dtype: object
 
- >>> pd.SparseDataFrame(arr).ftypes
+ >>> pd.SparseDataFrame(arr).ftypes # doctest: +SKIP
  0 float64:sparse
  1 float64:sparse
  2 float64:sparse

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1586,7 +1586,6 @@ def to_sparse(self, kind='block', fill_value=None):
  SparseSeries
  Sparse representation of the Series.
  """
- # TODO: deprecate
  from pandas.core.sparse.series import SparseSeries
 
  values = SparseArray(self, kind=kind, fill_value=fill_value)

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
@@ -28,13 +28,24 @@
 from pandas.core.sparse.series import SparseSeries
 
 _shared_doc_kwargs = dict(klass='SparseDataFrame')
+depr_msg = """\
+SparseDataFrame is deprecated and will be removed in a future version.
+Use a regular DataFrame whose columns are SparseArrays instead.
+
+See http://pandas.pydata.org/pandas-docs/stable/\
+user_guide/sparse.html#migrating for more.
+"""
 
 
 class SparseDataFrame(DataFrame):
  """
  DataFrame containing sparse floating point data in the form of SparseSeries
  objects
 
+ .. deprectaed:: 0.25.0
+
+ Use a DataFrame with sparse values instead.
+
  Parameters
  ----------
  data : same types as can be passed to DataFrame or scipy.sparse.spmatrix
@@ -56,6 +67,7 @@ class SparseDataFrame(DataFrame):
  def __init__(self, data=None, index=None, columns=None, default_kind=None,
  default_fill_value=None, dtype=None, copy=False):
 
+ warnings.warn(depr_msg, FutureWarning, stacklevel=2)
  # pick up the defaults from the Sparse structures
  if isinstance(data, SparseDataFrame):
  if index is None:

diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py
@@ -116,14 +116,32 @@ def _sparse_series_to_coo(ss, row_levels=(0, ), column_levels=(1, ),
  return sparse_matrix, rows, columns
 
 
-def _coo_to_sparse_series(A, dense_index=False):
+def _coo_to_sparse_series(A, dense_index: bool = False,
+ sparse_series: bool = True):
  """
  Convert a scipy.sparse.coo_matrix to a SparseSeries.
- Use the defaults given in the SparseSeries constructor.
+
+ Parameters
+ ----------
+ A : scipy.sparse.coo.coo_matrix
+ dense_index : bool, default False
+ sparse_series : bool, default True
+
+ Returns
+ -------
+ Series or SparseSeries
  """
+ from pandas import SparseDtype
+
  s = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
  s = s.sort_index()
- s = s.to_sparse() # TODO: specify kind?
+ if sparse_series:
+ # TODO(SparseSeries): remove this and the sparse_series keyword.
+ # This is just here to avoid a DeprecationWarning when
+ # _coo_to_sparse_series is called via Series.sparse.from_coo
+ s = s.to_sparse() # TODO: specify kind?
+ else:
+ s = s.astype(SparseDtype(s.dtype))
  if dense_index:
  # is there a better constructor method to use here?
  i = range(A.shape[0])

diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
@@ -32,9 +32,24 @@
  optional_labels='', optional_axis='')
 
 
+depr_msg = """\
+SparseSeries is deprecated and will be removed in a future version.
+Use a Series with sparse values instead.
+
+ >>> series = pd.Series(pd.SparseArray(...))
+
+See http://pandas.pydata.org/pandas-docs/stable/\
+user_guide/sparse.html#migrating for more.
+"""
+
+
 class SparseSeries(Series):
  """Data structure for labeled, sparse floating point data
 
+ .. deprectaed:: 0.25.0
+
+ Use a Series with sparse values instead.
+
  Parameters
  ----------
  data : {array-like, Series, SparseSeries, dict}
@@ -60,6 +75,7 @@ class SparseSeries(Series):
  def __init__(self, data=None, index=None, sparse_index=None, kind='block',
  fill_value=None, name=None, dtype=None, copy=False,
  fastpath=False):
+ warnings.warn(depr_msg, FutureWarning, stacklevel=2)
  # TODO: Most of this should be refactored and shared with Series
  # 1. BlockManager -> array
  # 2. Series.index, Series.name, index, name reconciliation

diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
@@ -101,3 +101,21 @@ def test_density(self):
  res = df.sparse.density
  expected = 0.75
  assert res == expected
+
+ @pytest.mark.parametrize("dtype", ['int64', 'float64'])
+ @pytest.mark.parametrize("dense_index", [True, False])
+ @td.skip_if_no_scipy
+ def test_series_from_coo(self, dtype, dense_index):
+ import scipy.sparse
+
+ A = scipy.sparse.eye(3, format='coo', dtype=dtype)
+ result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
+ index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
+ expected = pd.Series(pd.SparseArray(np.array([1, 1, 1], dtype=dtype)),
+ index=index)
+ if dense_index:
+ expected = expected.reindex(
+ pd.MultiIndex.from_product(index.levels)
+ )
+
+ tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py
@@ -8,6 +8,7 @@
 import pandas.util.testing as tm
 
 
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 class TestSparseArrayArithmetics:
 
  _base = np.array

diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -215,6 +215,7 @@ def test_scalar_with_index_infer_dtype(self, scalar, dtype):
  assert exp.dtype == dtype
 
  @pytest.mark.parametrize("fill", [1, np.nan, 0])
+ @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
  def test_sparse_series_round_trip(self, kind, fill):
  # see gh-13999
  arr = SparseArray([np.nan, 1, np.nan, 2, 3],
@@ -231,6 +232,7 @@ def test_sparse_series_round_trip(self, kind, fill):
  tm.assert_sp_array_equal(arr, res)
 
  @pytest.mark.parametrize("fill", [True, False, np.nan])
+ @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
  def test_sparse_series_round_trip2(self, kind, fill):
  # see gh-13999
  arr = SparseArray([True, False, True, True], dtype=np.bool,
@@ -1098,6 +1100,7 @@ def test_npoints(self):
  assert arr.npoints == 1
 
 
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 class TestAccessor:
 
  @pytest.mark.parametrize('attr', [

diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
@@ -15,6 +15,10 @@
 from pandas.core.sparse.api import SparseDtype
 import pandas.util.testing as tm
 
+ignore_sparse_warning = pytest.mark.filterwarnings(
+ "ignore:Sparse:FutureWarning"
+)
+
 
 # EA & Actual Dtypes
 def to_ea_dtypes(dtypes):
@@ -146,6 +150,7 @@ def test_is_object():
 @pytest.mark.parametrize("check_scipy", [
  False, pytest.param(True, marks=td.skip_if_no_scipy)
 ])
+@ignore_sparse_warning
 def test_is_sparse(check_scipy):
  assert com.is_sparse(pd.SparseArray([1, 2, 3]))
  assert com.is_sparse(pd.SparseSeries([1, 2, 3]))
@@ -158,6 +163,7 @@ def test_is_sparse(check_scipy):
 
 
 @td.skip_if_no_scipy
+@ignore_sparse_warning
 def test_is_scipy_sparse():
  from scipy.sparse import bsr_matrix
  assert com.is_scipy_sparse(bsr_matrix([1, 2, 3]))
@@ -529,6 +535,7 @@ def test_is_bool_dtype():
 @pytest.mark.parametrize("check_scipy", [
  False, pytest.param(True, marks=td.skip_if_no_scipy)
 ])
+@ignore_sparse_warning
 def test_is_extension_type(check_scipy):
  assert not com.is_extension_type([1, 2, 3])
  assert not com.is_extension_type(np.array([1, 2, 3]))
@@ -595,8 +602,6 @@ def test_is_offsetlike():
  (pd.DatetimeIndex([1, 2]).dtype, np.dtype('=M8[ns]')),
  ('<M8[ns]', np.dtype('<M8[ns]')),
  ('datetime64[ns, Europe/London]', DatetimeTZDtype('ns', 'Europe/London')),
- (pd.SparseSeries([1, 2], dtype='int32'), SparseDtype('int32')),
- (pd.SparseSeries([1, 2], dtype='int32').dtype, SparseDtype('int32')),
  (PeriodDtype(freq='D'), PeriodDtype(freq='D')),
  ('period[D]', PeriodDtype(freq='D')),
  (IntervalDtype(), IntervalDtype()),
@@ -605,6 +610,14 @@ def test__get_dtype(input_param, result):
  assert com._get_dtype(input_param) == result
 
 
+@ignore_sparse_warning
+def test__get_dtype_sparse():
+ ser = pd.SparseSeries([1, 2], dtype='int32')
+ expected = SparseDtype('int32')
+ assert com._get_dtype(ser) == expected
+ assert com._get_dtype(ser.dtype) == expected
+
+
 @pytest.mark.parametrize('input_param,expected_error_message', [
  (None, "Cannot deduce dtype from null object"),
  (1, "data type not understood"),
@@ -640,8 +653,7 @@ def test__get_dtype_fails(input_param, expected_error_message):
  (pd.DatetimeIndex(['2000'], tz='Europe/London').dtype,
  pd.Timestamp),
  ('datetime64[ns, Europe/London]', pd.Timestamp),
- (pd.SparseSeries([1, 2], dtype='int32'), np.int32),
- (pd.SparseSeries([1, 2], dtype='int32').dtype, np.int32),
+
  (PeriodDtype(freq='D'), pd.Period),
  ('period[D]', pd.Period),
  (IntervalDtype(), pd.Interval),
@@ -652,3 +664,11 @@ def test__get_dtype_fails(input_param, expected_error_message):
 ])
 def test__is_dtype_type(input_param, result):
  assert com._is_dtype_type(input_param, lambda tipo: tipo == result)
+
+
+@ignore_sparse_warning
+def test__is_dtype_type_sparse():
+ ser = pd.SparseSeries([1, 2], dtype='int32')
+ result = np.dtype('int32')
+ assert com._is_dtype_type(ser, lambda tipo: tipo == result)
+ assert com._is_dtype_type(ser.dtype, lambda tipo: tipo == result)
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
@@ -870,7 +870,6 @@ def test_registry_find(dtype, expected):
  (pd.Series([1, 2]), False),
  (np.array([True, False]), True),
  (pd.Series([True, False]), True),
- (pd.SparseSeries([True, False]), True),
  (pd.SparseArray([True, False]), True),
  (SparseDtype(bool), True)
 ])
@@ -879,6 +878,12 @@ def test_is_bool_dtype(dtype, expected):
  assert result is expected
 
 
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+def test_is_bool_dtype_sparse():
+ result = is_bool_dtype(pd.SparseSeries([True, False]))
+ assert result is True
+
+
 @pytest.mark.parametrize("check", [
  is_categorical_dtype,
  is_datetime64tz_dtype,

diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py
@@ -1,4 +1,4 @@
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
 
 import numpy as np
 
@@ -17,9 +17,12 @@ class TestABCClasses:
  categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1])
  categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical)
  df = pd.DataFrame({'names': ['a', 'b', 'c']}, index=multi_index)
- sparse_series = pd.Series([1, 2, 3]).to_sparse()
+ with catch_warnings():
+ simplefilter('ignore', FutureWarning)
+ sparse_series = pd.Series([1, 2, 3]).to_sparse()
+ sparse_frame = pd.SparseDataFrame({'a': [1, -1, None]})
+
  sparse_array = pd.SparseArray(np.random.randn(10))
- sparse_frame = pd.SparseDataFrame({'a': [1, -1, None]})
  datetime_array = pd.core.arrays.DatetimeArray(datetime_index)
  timedelta_array = pd.core.arrays.TimedeltaArray(timedelta_index)
 

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
@@ -13,6 +13,7 @@
 import pandas.util.testing as tm
 
 
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 class TestDataFrameAlterAxes:
 
  def test_set_index_directly(self, float_string_frame):
@@ -1376,6 +1377,7 @@ def test_droplevel(self):
  tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 class TestIntervalIndex:
 
  def test_setitem(self):

diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -2073,6 +2073,7 @@ def test_loc_duplicates(self):
  df.loc[trange[bool_idx], "A"] += 6
  tm.assert_frame_equal(df, expected)
 
+ @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
  def test_iloc_sparse_propegate_fill_value(self):
  from pandas.core.sparse.api import SparseDataFrame
  df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999)