Skip to content

Commit c91ba28

Browse files
Donk23TomAugspurger
authored andcommitted
DOC: update the pandas.Series.dropna and pandas.DataFrame.dropna docstring (pandas-dev#20204)
1 parent 552a39f commit c91ba28

File tree

2 files changed

+137
-46
lines changed

2 files changed

+137
-46
lines changed

pandas/core/frame.py

Lines changed: 72 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -3885,71 +3885,101 @@ def notnull(self):
38853885
def dropna(self, axis=0, how='any', thresh=None, subset=None,
38863886
inplace=False):
38873887
"""
3888-
Return object with labels on given axis omitted where alternately any
3889-
or all of the data are missing
3888+
Remove missing values.
3889+
3890+
See the :ref:`User Guide <missing_data>` for more on which values are
3891+
considered missing, and how to work with missing data.
38903892
38913893
Parameters
38923894
----------
38933895
axis : {0 or 'index', 1 or 'columns'}, or tuple/list thereof
3894-
Pass tuple or list to drop on multiple axes
3895-
how : {'any', 'all'}
3896-
* any : if any NA values are present, drop that label
3897-
* all : if all values are NA, drop that label
3898-
thresh : int, default None
3899-
int value : require that many non-NA values
3900-
subset : array-like
3896+
Determine if rows or columns which contain missing values are
3897+
removed.
3898+
3899+
* 0, or 'index' : Drop rows which contain missing values.
3900+
* 1, or 'columns' : Drop columns which contain missing value.
3901+
3902+
Pass tuple or list to drop on multiple axes.
3903+
how : {'any', 'all'}, default 'any'
3904+
Determine if row or column is removed from DataFrame, when we have
3905+
at least one NA or all NA.
3906+
3907+
* 'any' : If any NA values are present, drop that row or column.
3908+
* 'all' : If all values are NA, drop that row or column.
3909+
thresh : int, optional
3910+
Require that many non-NA values.
3911+
subset : array-like, optional
39013912
Labels along other axis to consider, e.g. if you are dropping rows
3902-
these would be a list of columns to include
3903-
inplace : boolean, default False
3913+
these would be a list of columns to include.
3914+
inplace : bool, default False
39043915
If True, do operation inplace and return None.
39053916
39063917
Returns
39073918
-------
3908-
dropped : DataFrame
3919+
DataFrame
3920+
DataFrame with NA entries dropped from it.
3921+
3922+
See Also
3923+
--------
3924+
DataFrame.isna: Indicate missing values.
3925+
DataFrame.notna : Indicate existing (non-missing) values.
3926+
DataFrame.fillna : Replace missing values.
3927+
Series.dropna : Drop missing values.
3928+
Index.dropna : Drop missing indices.
39093929
39103930
Examples
39113931
--------
3912-
>>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], [3, 4, np.nan, 1],
3913-
... [np.nan, np.nan, np.nan, 5]],
3914-
... columns=list('ABCD'))
3932+
>>> df = pd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'],
3933+
... "toy": [np.nan, 'Batmobile', 'Bullwhip'],
3934+
... "born": [pd.NaT, pd.Timestamp("1940-04-25"),
3935+
... pd.NaT]})
39153936
>>> df
3916-
A B C D
3917-
0 NaN 2.0 NaN 0
3918-
1 3.0 4.0 NaN 1
3919-
2 NaN NaN NaN 5
3937+
name toy born
3938+
0 Alfred NaN NaT
3939+
1 Batman Batmobile 1940-04-25
3940+
2 Catwoman Bullwhip NaT
39203941
3921-
Drop the columns where all elements are nan:
3942+
Drop the rows where at least one element is missing.
39223943
3923-
>>> df.dropna(axis=1, how='all')
3924-
A B D
3925-
0 NaN 2.0 0
3926-
1 3.0 4.0 1
3927-
2 NaN NaN 5
3944+
>>> df.dropna()
3945+
name toy born
3946+
1 Batman Batmobile 1940-04-25
39283947
3929-
Drop the columns where any of the elements is nan
3948+
Drop the columns where at least one element is missing.
39303949
3931-
>>> df.dropna(axis=1, how='any')
3932-
D
3933-
0 0
3934-
1 1
3935-
2 5
3950+
>>> df.dropna(axis='columns')
3951+
name
3952+
0 Alfred
3953+
1 Batman
3954+
2 Catwoman
39363955
3937-
Drop the rows where all of the elements are nan
3938-
(there is no row to drop, so df stays the same):
3956+
Drop the rows where all elements are missing.
39393957
3940-
>>> df.dropna(axis=0, how='all')
3941-
A B C D
3942-
0 NaN 2.0 NaN 0
3943-
1 3.0 4.0 NaN 1
3944-
2 NaN NaN NaN 5
3958+
>>> df.dropna(how='all')
3959+
name toy born
3960+
0 Alfred NaN NaT
3961+
1 Batman Batmobile 1940-04-25
3962+
2 Catwoman Bullwhip NaT
39453963
3946-
Keep only the rows with at least 2 non-na values:
3964+
Keep only the rows with at least 2 non-NA values.
39473965
39483966
>>> df.dropna(thresh=2)
3949-
A B C D
3950-
0 NaN 2.0 NaN 0
3951-
1 3.0 4.0 NaN 1
3967+
name toy born
3968+
1 Batman Batmobile 1940-04-25
3969+
2 Catwoman Bullwhip NaT
3970+
3971+
Define in which columns to look for missing values.
39523972
3973+
>>> df.dropna(subset=['name', 'born'])
3974+
name toy born
3975+
1 Batman Batmobile 1940-04-25
3976+
3977+
Keep the DataFrame with valid entries in the same variable.
3978+
3979+
>>> df.dropna(inplace=True)
3980+
>>> df
3981+
name toy born
3982+
1 Batman Batmobile 1940-04-25
39533983
"""
39543984
inplace = validate_bool_kwarg(inplace, 'inplace')
39553985
if isinstance(axis, (tuple, list)):

pandas/core/series.py

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3683,13 +3683,74 @@ def notnull(self):
36833683

36843684
def dropna(self, axis=0, inplace=False, **kwargs):
36853685
"""
3686-
Return Series without null values
3686+
Return a new Series with missing values removed.
3687+
3688+
See the :ref:`User Guide <missing_data>` for more on which values are
3689+
considered missing, and how to work with missing data.
3690+
3691+
Parameters
3692+
----------
3693+
axis : {0 or 'index'}, default 0
3694+
There is only one axis to drop values from.
3695+
inplace : bool, default False
3696+
If True, do operation inplace and return None.
3697+
**kwargs
3698+
Not in use.
36873699
36883700
Returns
36893701
-------
3690-
valid : Series
3691-
inplace : boolean, default False
3692-
Do operation in place.
3702+
Series
3703+
Series with NA entries dropped from it.
3704+
3705+
See Also
3706+
--------
3707+
Series.isna: Indicate missing values.
3708+
Series.notna : Indicate existing (non-missing) values.
3709+
Series.fillna : Replace missing values.
3710+
DataFrame.dropna : Drop rows or columns which contain NA values.
3711+
Index.dropna : Drop missing indices.
3712+
3713+
Examples
3714+
--------
3715+
>>> ser = pd.Series([1., 2., np.nan])
3716+
>>> ser
3717+
0 1.0
3718+
1 2.0
3719+
2 NaN
3720+
dtype: float64
3721+
3722+
Drop NA values from a Series.
3723+
3724+
>>> ser.dropna()
3725+
0 1.0
3726+
1 2.0
3727+
dtype: float64
3728+
3729+
Keep the Series with valid entries in the same variable.
3730+
3731+
>>> ser.dropna(inplace=True)
3732+
>>> ser
3733+
0 1.0
3734+
1 2.0
3735+
dtype: float64
3736+
3737+
Empty strings are not considered NA values. ``None`` is considered an
3738+
NA value.
3739+
3740+
>>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
3741+
>>> ser
3742+
0 NaN
3743+
1 2
3744+
2 NaT
3745+
3
3746+
4 None
3747+
5 I stay
3748+
dtype: object
3749+
>>> ser.dropna()
3750+
1 2
3751+
3
3752+
5 I stay
3753+
dtype: object
36933754
"""
36943755
inplace = validate_bool_kwarg(inplace, 'inplace')
36953756
kwargs.pop('how', None)

0 commit comments

Comments
 (0)