Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
intended for public consumption
"""
from textwrap import dedent
from typing import Dict
from typing import Dict, Iterable, Union
from warnings import catch_warnings, simplefilter, warn

import numpy as np
Expand Down Expand Up @@ -1105,6 +1105,22 @@ def _get_score(at):
return result


def check_percentile(q: Union[float, Iterable[float]]) -> np.ndarray:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would this make more sense in validators?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's reasonable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I shift it to utilts/_validators.py ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea let's do that

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would renaming it to validate_percentile be better?

"""
Validate percentiles (used by describe and quantile).
"""

msg = "percentiles should all be in the interval [0, 1]. " "Try {0} instead."
q_arr = np.asarray(q)
if q_arr.ndim == 0:
if not 0 <= q_arr <= 1:
raise ValueError(msg.format(q_arr / 100.0))
else:
if not all(0 <= qs <= 1 for qs in q_arr):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for a large array this is going to be much less performant than ((0 <= q_arr) & (q_arr <= 1)).all()

raise ValueError(msg.format(q / 100.0))
return q_arr


# --------------- #
# select n #
# --------------- #
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8206,7 +8206,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
C 1 days 12:00:00
Name: 0.5, dtype: object
"""
self._check_percentile(q)
algorithms.check_percentile(q)

data = self._get_numeric_data() if numeric_only else self
axis = self._get_axis_number(axis)
Expand Down
17 changes: 1 addition & 16 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10157,7 +10157,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
percentiles = list(percentiles)

# get them all to be in [0, 1]
self._check_percentile(percentiles)
algos.check_percentile(percentiles)

# median should always be included
if 0.5 not in percentiles:
Expand Down Expand Up @@ -10261,21 +10261,6 @@ def describe_1d(data):
d.columns = data.columns.copy()
return d

def _check_percentile(self, q):
"""
Validate percentiles (used by describe and quantile).
"""

msg = "percentiles should all be in the interval [0, 1]. " "Try {0} instead."
q = np.asarray(q)
if q.ndim == 0:
if not 0 <= q <= 1:
raise ValueError(msg.format(q / 100.0))
else:
if not all(0 <= qs <= 1 for qs in q):
raise ValueError(msg.format(q / 100.0))
return q

_shared_docs[
"pct_change"
] = """
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2354,7 +2354,7 @@ def quantile(self, q=0.5, interpolation="linear"):
dtype: float64
"""

self._check_percentile(q)
algorithms.check_percentile(q)

# We dispatch to DataFrame so that core.internals only has to worry
# about 2D cases.
Expand Down