Skip to content
102 changes: 3 additions & 99 deletions pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np

from pandas._typing import (
ArrayLike,
npt,
)

from pandas.core.dtypes.common import is_sparse
from pandas.core.dtypes.missing import (
isna,
na_value_for_dtype,
)

from pandas.core.nanops import nanpercentile

if TYPE_CHECKING:
from pandas.core.arrays import ExtensionArray


def quantile_compat(
values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str
Expand All @@ -40,23 +34,12 @@ def quantile_compat(
if isinstance(values, np.ndarray):
fill_value = na_value_for_dtype(values.dtype, compat=False)
mask = isna(values)
return _quantile_with_mask(values, mask, fill_value, qs, interpolation)
return quantile_with_mask(values, mask, fill_value, qs, interpolation)
else:
# In general we don't want to import from arrays here;
# this is temporary pending discussion in GH#41428
from pandas.core.arrays import BaseMaskedArray

if isinstance(values, BaseMaskedArray):
# e.g. IntegerArray, does not implement _from_factorized
out = _quantile_ea_fallback(values, qs, interpolation)

else:
out = _quantile_ea_compat(values, qs, interpolation)
return values._quantile(qs, interpolation)

return out


def _quantile_with_mask(
def quantile_with_mask(
values: np.ndarray,
mask: np.ndarray,
fill_value,
Expand Down Expand Up @@ -114,82 +97,3 @@ def _quantile_with_mask(
result = result.T

return result


def _quantile_ea_compat(
values: ExtensionArray, qs: npt.NDArray[np.float64], interpolation: str
) -> ExtensionArray:
"""
ExtensionArray compatibility layer for _quantile_with_mask.

We pretend that an ExtensionArray with shape (N,) is actually (1, N,)
for compatibility with non-EA code.

Parameters
----------
values : ExtensionArray
qs : np.ndarray[float64]
interpolation: str

Returns
-------
ExtensionArray
"""
# TODO(EA2D): make-believe not needed with 2D EAs
orig = values

# asarray needed for Sparse, see GH#24600
mask = np.asarray(values.isna())
mask = np.atleast_2d(mask)

arr, fill_value = values._values_for_factorize()
arr = np.atleast_2d(arr)

result = _quantile_with_mask(arr, mask, fill_value, qs, interpolation)

if not is_sparse(orig.dtype):
# shape[0] should be 1 as long as EAs are 1D

if orig.ndim == 2:
# i.e. DatetimeArray
result = type(orig)._from_factorized(result, orig)

else:
assert result.shape == (1, len(qs)), result.shape
result = type(orig)._from_factorized(result[0], orig)

# error: Incompatible return value type (got "ndarray", expected "ExtensionArray")
return result # type: ignore[return-value]


def _quantile_ea_fallback(
values: ExtensionArray, qs: npt.NDArray[np.float64], interpolation: str
) -> ExtensionArray:
"""
quantile compatibility for ExtensionArray subclasses that do not
implement `_from_factorized`, e.g. IntegerArray.

Notes
-----
We assume that all impacted cases are 1D-only.
"""
mask = np.atleast_2d(np.asarray(values.isna()))
npvalues = np.atleast_2d(np.asarray(values))

res = _quantile_with_mask(
npvalues,
mask=mask,
fill_value=values.dtype.na_value,
qs=qs,
interpolation=interpolation,
)
assert res.ndim == 2
assert res.shape[0] == 1
res = res[0]
try:
out = type(values)._from_sequence(res, dtype=values.dtype)
except TypeError:
# GH#42626: not able to safely cast Int64
# for floating point output
out = np.atleast_2d(np.asarray(res, dtype=np.float64))
return out
25 changes: 25 additions & 0 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
unique,
value_counts,
)
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.array_algos.transforms import shift
from pandas.core.arrays.base import ExtensionArray
from pandas.core.construction import extract_array
Expand Down Expand Up @@ -463,6 +464,30 @@ def value_counts(self, dropna: bool = True):
index = Index(index_arr, name=result.index.name)
return Series(result._values, index=index, name=result.name)

def _quantile(
self: NDArrayBackedExtensionArrayT,
qs: npt.NDArray[np.float64],
interpolation: str,
) -> NDArrayBackedExtensionArrayT:
# TODO: disable for Categorical if not ordered?

# asarray needed for Sparse, see GH#24600
mask = np.asarray(self.isna())
mask = np.atleast_2d(mask)

arr = np.atleast_2d(self._ndarray)
# TODO: something NDArrayBacked-specific instead of _values_for_factorize[1]?
fill_value = self._values_for_factorize()[1]

res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)

result = type(self)._from_factorized(res_values, self)
if self.ndim == 1:
assert result.shape == (1, len(qs)), result.shape
result = result[0]

return result

# ------------------------------------------------------------------------
# numpy-like methods

Expand Down
36 changes: 36 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
isin,
unique,
)
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.sorting import (
nargminmax,
nargsort,
Expand Down Expand Up @@ -1494,6 +1495,41 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
)
return result

def _quantile(
self: ExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str
) -> ExtensionArrayT:
"""
Compute the quantiles of self for each quantile in `qs`.

Parameters
----------
qs : np.ndarray[float64]
interpolation: str

Returns
-------
same type as self
"""
# asarray needed for Sparse, see GH#24600
mask = np.asarray(self.isna())
mask = np.atleast_2d(mask)

arr = np.atleast_2d(np.asarray(self))
fill_value = np.nan

res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)

if self.ndim == 2:
# i.e. DatetimeArray
result = type(self)._from_sequence(res_values)

else:
# shape[0] should be 1 as long as EAs are 1D
assert res_values.shape == (1, len(qs)), res_values.shape
result = type(self)._from_sequence(res_values[0])

return result

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if any(
isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
Expand Down
33 changes: 33 additions & 0 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
take,
)
from pandas.core.array_algos import masked_reductions
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import ExtensionArray
from pandas.core.indexers import check_array_indexer
Expand Down Expand Up @@ -692,6 +693,38 @@ def equals(self, other) -> bool:
right = other._data[~other._mask]
return array_equivalent(left, right, dtype_equal=True)

def _quantile(
self: BaseMaskedArrayT, qs: npt.NDArray[np.float64], interpolation: str
) -> BaseMaskedArrayT:
"""
Dispatch to quantile_with_mask, needed because we do not have
_from_factorized.

Notes
-----
We assume that all impacted cases are 1D-only.
"""
mask = np.atleast_2d(np.asarray(self.isna()))
npvalues = np.atleast_2d(np.asarray(self))

res = quantile_with_mask(
npvalues,
mask=mask,
fill_value=self.dtype.na_value,
qs=qs,
interpolation=interpolation,
)
assert res.ndim == 2
assert res.shape[0] == 1
res = res[0]
try:
out = type(self)._from_sequence(res, dtype=self.dtype)
except TypeError:
# GH#42626: not able to safely cast Int64
# for floating point output
out = np.asarray(res, dtype=np.float64)
return out

def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
if name in {"any", "all"}:
return getattr(self, name)(skipna=skipna, **kwargs)
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,12 @@ def value_counts(self, dropna: bool = True) -> Series:
keys = Index(keys)
return Series(counts, index=keys)

def _quantile(self, qs: npt.NDArray[np.float64], interpolation: str):
# Special case: the returned array isn't _really_ sparse, so we don't
# wrap it in a SparseArray
result = super()._quantile(qs, interpolation)
return np.asarray(result)

# --------
# Indexing
# --------
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1310,6 +1310,9 @@ def quantile(
assert is_list_like(qs) # caller is responsible for this

result = quantile_compat(self.values, np.asarray(qs._values), interpolation)
# ensure_block_shape needed for cases where we start with EA and result
# is ndarray, e.g. IntegerArray, SparseArray
result = ensure_block_shape(result, ndim=2)
return new_block_2d(result, placement=self._mgr_locs)


Expand Down