pandas-dev · jreback · Mar 22, 2020 · Mar 19, 2020 · Mar 19, 2020 · Mar 19, 2020
diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
@@ -45,8 +45,7 @@ def time_sparse_array(self, dense_proportion, fill_value, dtype):
 class SparseDataFrameConstructor:
  def setup(self):
  N = 1000
- self.arr = np.arange(N)
- self.sparse = scipy.sparse.rand(N, N, 0.005)
+ self.sparse = scipy.sparse.rand(N, N, 0.005, random_state=0)
 
  def time_from_scipy(self):
  pd.DataFrame.sparse.from_spmatrix(self.sparse)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -224,6 +224,9 @@ Performance improvements
 - The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index,
  avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of
  existing indexes (:issue:`28584`, :issue:`32640`, :issue:`32669`)
+- Performance improvement when creating a :class:`DataFrame` with sparse values
+ from ``scipy.sparse`` matrices using the
+ :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32196`).
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
@@ -34,18 +34,21 @@ cdef class IntIndex(SparseIndex):
  length : integer
  indices : array-like
  Contains integers corresponding to the indices.
+ check_integrity : bool, default=True
+ Check integrity of the input.
  """
 
  cdef readonly:
  Py_ssize_t length, npoints
  ndarray indices
 
- def __init__(self, Py_ssize_t length, indices):
+ def __init__(self, Py_ssize_t length, indices, check_integrity=True):
  self.length = length
  self.indices = np.ascontiguousarray(indices, dtype=np.int32)
  self.npoints = len(self.indices)
 
- self.check_integrity()
+ if check_integrity:
+ self.check_integrity()
 
  def __reduce__(self):
  args = (self.length, self.indices)

diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
@@ -227,15 +227,24 @@ def from_spmatrix(cls, data, index=None, columns=None):
  1 0.0 1.0 0.0
  2 0.0 0.0 1.0
  """
- from pandas import DataFrame
+ from pandas import DataFrame, SparseDtype
+ from . import IntIndex, SparseArray
 
  data = data.tocsc()
  index, columns = cls._prep_index(data, index, columns)
- sparrays = [SparseArray.from_spmatrix(data[:, i]) for i in range(data.shape[1])]
- data = dict(enumerate(sparrays))
- result = DataFrame(data, index=index)
- result.columns = columns
- return result
+ n_rows, n_columns = data.shape
+ data.sort_indices()
+ indices = data.indices
+ indptr = data.indptr
+ data = data.data
+ dtype = SparseDtype(data.dtype, 0)
+ arrays = []
+ for i in range(n_columns):
+ sl = slice(indptr[i], indptr[i + 1])
+ idx = IntIndex(n_rows, indices[sl], check_integrity=False)
+ arr = SparseArray._simple_new(data[sl], idx, dtype)
+ arrays.append(arr)
+ return DataFrame._from_arrays(arrays, columns=columns, index=index)
 
  def to_dense(self):
  """