44Method NDFrame.describe() delegates actual execution to function describe_ndframe().
55"""
66
7- from typing import TYPE_CHECKING , List , Optional , Sequence , Union
7+ from typing import TYPE_CHECKING , List , Optional , Sequence , Union , cast
88import warnings
99
1010import numpy as np
@@ -62,32 +62,14 @@ def describe_ndframe(
6262 if obj .ndim == 2 and obj .columns .size == 0 :
6363 raise ValueError ("Cannot describe a DataFrame without columns" )
6464
65- if percentiles is not None :
66- # explicit conversion of `percentiles` to list
67- percentiles = list (percentiles )
68-
69- # get them all to be in [0, 1]
70- validate_percentile (percentiles )
71-
72- # median should always be included
73- if 0.5 not in percentiles :
74- percentiles .append (0.5 )
75- percentiles = np .asarray (percentiles )
76- else :
77- percentiles = np .array ([0.25 , 0.5 , 0.75 ])
78-
79- # sort and check for duplicates
80- unique_pcts = np .unique (percentiles )
81- assert percentiles is not None
82- if len (unique_pcts ) < len (percentiles ):
83- raise ValueError ("percentiles cannot contain duplicates" )
84- percentiles = unique_pcts
65+ percentiles = _refine_percentiles (percentiles )
8566
8667 if obj .ndim == 1 :
68+ series = cast ("Series" , obj )
8769 # Incompatible return value type
8870 # (got "Series", expected "FrameOrSeries") [return-value]
8971 return describe_1d (
90- obj ,
72+ series ,
9173 percentiles ,
9274 datetime_is_numeric ,
9375 is_series = True ,
@@ -125,14 +107,14 @@ def describe_ndframe(
125107 return d
126108
127109
128- def describe_numeric_1d (series , percentiles ) -> "Series" :
110+ def describe_numeric_1d (series : "Series" , percentiles : Sequence [ float ] ) -> "Series" :
129111 """Describe series containing numerical data.
130112
131113 Parameters
132114 ----------
133115 series : Series
134116 Series to be described.
135- percentiles : list-like of numbers, optional
117+ percentiles : list-like of numbers
136118 The percentiles to include in the output.
137119 """
138120 from pandas import Series
@@ -148,7 +130,7 @@ def describe_numeric_1d(series, percentiles) -> "Series":
148130 return Series (d , index = stat_index , name = series .name )
149131
150132
151- def describe_categorical_1d (data , is_series ) -> "Series" :
133+ def describe_categorical_1d (data : "Series" , is_series : bool ) -> "Series" :
152134 """Describe series containing categorical data.
153135
154136 Parameters
@@ -210,14 +192,14 @@ def describe_categorical_1d(data, is_series) -> "Series":
210192 return Series (result , index = names , name = data .name , dtype = dtype )
211193
212194
213- def describe_timestamp_1d (data , percentiles ) -> "Series" :
195+ def describe_timestamp_1d (data : "Series" , percentiles : Sequence [ float ] ) -> "Series" :
214196 """Describe series containing datetime64 dtype.
215197
216198 Parameters
217199 ----------
218200 data : Series
219201 Series to be described.
220- percentiles : list-like of numbers, optional
202+ percentiles : list-like of numbers
221203 The percentiles to include in the output.
222204 """
223205 # GH-30164
@@ -234,14 +216,20 @@ def describe_timestamp_1d(data, percentiles) -> "Series":
234216 return Series (d , index = stat_index , name = data .name )
235217
236218
237- def describe_1d (data , percentiles , datetime_is_numeric , * , is_series ) -> "Series" :
219+ def describe_1d (
220+ data : "Series" ,
221+ percentiles : Sequence [float ],
222+ datetime_is_numeric : bool ,
223+ * ,
224+ is_series : bool ,
225+ ) -> "Series" :
238226 """Describe series.
239227
240228 Parameters
241229 ----------
242230 data : Series
243231 Series to be described.
244- percentiles : list-like of numbers, optional
232+ percentiles : list-like of numbers
245233 The percentiles to include in the output.
246234 datetime_is_numeric : bool, default False
247235 Whether to treat datetime dtypes as numeric.
@@ -263,3 +251,35 @@ def describe_1d(data, percentiles, datetime_is_numeric, *, is_series) -> "Series
263251 return describe_numeric_1d (data , percentiles )
264252 else :
265253 return describe_categorical_1d (data , is_series )
254+
255+
256+ def _refine_percentiles (percentiles : Optional [Sequence [float ]]) -> Sequence [float ]:
257+ """Ensure that percentiles are unique and sorted.
258+
259+ Parameters
260+ ----------
261+ percentiles : list-like of numbers, optional
262+ The percentiles to include in the output.
263+ """
264+ if percentiles is None :
265+ return np .array ([0.25 , 0.5 , 0.75 ])
266+
267+ # explicit conversion of `percentiles` to list
268+ percentiles = list (percentiles )
269+
270+ # get them all to be in [0, 1]
271+ validate_percentile (percentiles )
272+
273+ # median should always be included
274+ if 0.5 not in percentiles :
275+ percentiles .append (0.5 )
276+
277+ percentiles = np .asarray (percentiles )
278+
279+ # sort and check for duplicates
280+ unique_pcts = np .unique (percentiles )
281+ assert percentiles is not None
282+ if len (unique_pcts ) < len (percentiles ):
283+ raise ValueError ("percentiles cannot contain duplicates" )
284+
285+ return unique_pcts
0 commit comments