Skip to content
Next Next commit
REF: move MaskedArray quantile logic to MaskedArray
  • Loading branch information
jbrockmendel committed Nov 12, 2021
commit a5e31f2ec01dc383197ccbada38c3a22915f7628
43 changes: 5 additions & 38 deletions pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,23 @@ def quantile_compat(values: ArrayLike, qs: np.ndarray, interpolation: str) -> Ar
if isinstance(values, np.ndarray):
fill_value = na_value_for_dtype(values.dtype, compat=False)
mask = isna(values)
return _quantile_with_mask(values, mask, fill_value, qs, interpolation)
return quantile_with_mask(values, mask, fill_value, qs, interpolation)
else:
# In general we don't want to import from arrays here;
# this is temporary pending discussion in GH#41428
from pandas.core.arrays import BaseMaskedArray

if isinstance(values, BaseMaskedArray):
# e.g. IntegerArray, does not implement _from_factorized
out = _quantile_ea_fallback(values, qs, interpolation)
return values._quantile(qs, interpolation)

else:
out = _quantile_ea_compat(values, qs, interpolation)

return out


def _quantile_with_mask(
def quantile_with_mask(
values: np.ndarray,
mask: np.ndarray,
fill_value,
Expand Down Expand Up @@ -115,7 +115,7 @@ def _quantile_ea_compat(
values: ExtensionArray, qs: np.ndarray, interpolation: str
) -> ExtensionArray:
"""
ExtensionArray compatibility layer for _quantile_with_mask.
ExtensionArray compatibility layer for quantile_with_mask.

We pretend that an ExtensionArray with shape (N,) is actually (1, N,)
for compatibility with non-EA code.
Expand All @@ -140,7 +140,7 @@ def _quantile_ea_compat(
arr, fill_value = values._values_for_factorize()
arr = np.atleast_2d(arr)

result = _quantile_with_mask(arr, mask, fill_value, qs, interpolation)
result = quantile_with_mask(arr, mask, fill_value, qs, interpolation)

if not is_sparse(orig.dtype):
# shape[0] should be 1 as long as EAs are 1D
Expand All @@ -155,36 +155,3 @@ def _quantile_ea_compat(

# error: Incompatible return value type (got "ndarray", expected "ExtensionArray")
return result # type: ignore[return-value]


def _quantile_ea_fallback(
values: ExtensionArray, qs: np.ndarray, interpolation: str
) -> ExtensionArray:
"""
quantile compatibility for ExtensionArray subclasses that do not
implement `_from_factorized`, e.g. IntegerArray.

Notes
-----
We assume that all impacted cases are 1D-only.
"""
mask = np.atleast_2d(np.asarray(values.isna()))
npvalues = np.atleast_2d(np.asarray(values))

res = _quantile_with_mask(
npvalues,
mask=mask,
fill_value=values.dtype.na_value,
qs=qs,
interpolation=interpolation,
)
assert res.ndim == 2
assert res.shape[0] == 1
res = res[0]
try:
out = type(values)._from_sequence(res, dtype=values.dtype)
except TypeError:
# GH#42626: not able to safely cast Int64
# for floating point output
out = np.atleast_2d(np.asarray(res, dtype=np.float64))
return out
33 changes: 33 additions & 0 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
take,
)
from pandas.core.array_algos import masked_reductions
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import ExtensionArray
from pandas.core.indexers import check_array_indexer
Expand Down Expand Up @@ -627,6 +628,38 @@ def value_counts(self, dropna: bool = True) -> Series:

return Series(counts, index=index)

def _quantile(
values: BaseMaskedArrayT, qs: npt.NDArray[np.float64], interpolation: str
) -> BaseMaskedArrayT:
"""
Dispatch to quantile_with_mask, needed because we do not have
_from_factorized.

Notes
-----
We assume that all impacted cases are 1D-only.
"""
mask = np.atleast_2d(np.asarray(values.isna()))
npvalues = np.atleast_2d(np.asarray(values))

res = quantile_with_mask(
npvalues,
mask=mask,
fill_value=values.dtype.na_value,
qs=qs,
interpolation=interpolation,
)
assert res.ndim == 2
assert res.shape[0] == 1
res = res[0]
try:
out = type(values)._from_sequence(res, dtype=values.dtype)
except TypeError:
# GH#42626: not able to safely cast Int64
# for floating point output
out = np.asarray(res, dtype=np.float64)
return out

def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
if name in {"any", "all"}:
return getattr(self, name)(skipna=skipna, **kwargs)
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,9 @@ def quantile(
assert is_list_like(qs) # caller is responsible for this

result = quantile_compat(self.values, np.asarray(qs._values), interpolation)
# ensure_block_shape needed for cases where we start with EA and result
# is ndarray, e.g. IntegerArray, SparseArray
result = ensure_block_shape(result, ndim=2)
return new_block_2d(result, placement=self._mgr_locs)


Expand Down