Skip to content
18 changes: 3 additions & 15 deletions asv_bench/benchmarks/algos/isin.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
import numpy as np

try:
from pandas.compat import np_version_under1p20
except ImportError:
from pandas.compat.numpy import _np_version_under1p20 as np_version_under1p20

from pandas import (
Categorical,
NaT,
Expand Down Expand Up @@ -283,10 +278,6 @@ class IsInLongSeriesLookUpDominates:
def setup(self, dtype, MaxNumber, series_type):
N = 10 ** 7

# https://github.com/pandas-dev/pandas/issues/39844
if not np_version_under1p20 and dtype in ("Int64", "Float64"):
raise NotImplementedError

if series_type == "random_hits":
array = np.random.randint(0, MaxNumber, N)
if series_type == "random_misses":
Expand All @@ -297,7 +288,8 @@ def setup(self, dtype, MaxNumber, series_type):
array = np.arange(N) + MaxNumber

self.series = Series(array).astype(dtype)
self.values = np.arange(MaxNumber).astype(dtype)

self.values = np.arange(MaxNumber).astype(dtype.lower())

def time_isin(self, dtypes, MaxNumber, series_type):
self.series.isin(self.values)
Expand All @@ -313,16 +305,12 @@ class IsInLongSeriesValuesDominate:
def setup(self, dtype, series_type):
N = 10 ** 7

# https://github.com/pandas-dev/pandas/issues/39844
if not np_version_under1p20 and dtype in ("Int64", "Float64"):
raise NotImplementedError

if series_type == "random":
vals = np.random.randint(0, 10 * N, N)
if series_type == "monotone":
vals = np.arange(N)

self.values = vals.astype(dtype)
self.values = vals.astype(dtype.lower())
M = 10 ** 6 + 1
self.series = Series(np.arange(M)).astype(dtype)

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Fixed regressions
- Performance regression in :class:`DataFrame` in reduction operations requiring casting such as :meth:`DataFrame.mean` on integer data (:issue:`38592`)
- Performance regression in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when ``orient`` argument one of "records", "dict", or "split" (:issue:`42352`)
- Fixed regression in indexing with a ``list`` subclass incorrectly raising ``TypeError`` (:issue:`42433`, :issue:`42461`)
- Fixed regression in :meth:`DataFrame.isin` and :meth:`Series.isin` raising ``TypeError`` with nullable data containing at least one missing value (:issue:`42405`)
-

.. ---------------------------------------------------------------------------
Expand Down
18 changes: 13 additions & 5 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,12 +403,20 @@ def isin(self, values) -> BooleanArray: # type: ignore[override]

from pandas.core.arrays import BooleanArray

result = isin(self._data, values)
# algorithms.isin will eventually convert values to an ndarray, so no extra
# cost to doing it here first
values_arr = np.asarray(values)
result = isin(self._data, values_arr)

if self._hasna:
if libmissing.NA in values:
result += self._mask
else:
result *= np.invert(self._mask)
values_have_NA = is_object_dtype(values_arr.dtype) and any(
val is self.dtype.na_value for val in values_arr
)

# For now, NA does not propagate so set result according to presence of NA,
# see https://github.com/pandas-dev/pandas/pull/38379 for some discussion
result[self._mask] = values_have_NA
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line should be equivalent to the deleted if/else AFAICT, but I think makes logic easier to follow


mask = np.zeros_like(self, dtype=bool)
return BooleanArray(result, mask, copy=False)

Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/series/methods/test_isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,27 @@ def test_isin_float_in_int_series(self, values):
expected = Series([True, False])
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"])
@pytest.mark.parametrize(
"data,values,expected",
[
([0, 1, 0], [1], [False, True, False]),
([0, 1, 0], [1, pd.NA], [False, True, False]),
([0, pd.NA, 0], [1, 0], [True, False, True]),
([0, 1, pd.NA], [1, pd.NA], [False, True, True]),
([0, 1, pd.NA], [1, np.nan], [False, True, False]),
([0, pd.NA, pd.NA], [np.nan, pd.NaT, None], [False, False, False]),
],
)
def test_isin_masked_types(self, dtype, data, values, expected):
# GH#42405
ser = Series(data, dtype=dtype)

result = ser.isin(values)
expected = Series(expected, dtype="boolean")

tm.assert_series_equal(result, expected)


@pytest.mark.slow
def test_isin_large_series_mixed_dtypes_and_nan():
Expand Down