Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added kite-installer
Binary file not shown.
24 changes: 16 additions & 8 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,15 +627,23 @@ def comp(s, regex=False):
for b in rb:
m = masks[i][b.mgr_locs.indexer]
convert = i == src_len # only convert once at the end
result = b._replace_coerce(
mask=m,
to_replace=s,
value=d,
inplace=inplace,
convert=convert,
regex=regex,
)

def f(mask, values, idx):
if idx is not None:
block = b.getitem_block(slice(idx, idx + 1))
else:
block = b
return block._replace_coerce(
mask=mask.reshape(block.shape),
to_replace=s,
value=d,
inplace=inplace,
convert=convert,
regex=regex,
)

if m.any() or convert:
result = b.split_and_operate(m, f, inplace)
new_rb = _extend_blocks(result, new_rb)
else:
new_rb.append(b)
Expand Down
64 changes: 64 additions & 0 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1381,6 +1381,61 @@ def test_replace_invalid_to_replace(self):
with pytest.raises(TypeError, match=msg):
df.replace(lambda x: x.strip())

def test_replace_real_dont_change_other_columns(self, any_real_dtype):
# GH30512
dtype = any_real_dtype
value_a = 2
value_b = 3
_test_replace_doesnt_change_other_columns(value_a, value_b, dtype)

def test_replace_string_dont_change_other_columns(self, string_dtype):
# GH30512
dtype = string_dtype
value_a = "a"
value_b = "b"
_test_replace_doesnt_change_other_columns(value_a, value_b, dtype)

def test_replace_datetime64_dont_change_other_columns(self, datetime64_dtype):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you parameterize all of these cases into a single test

Copy link
Member Author

@MarcoGorelli MarcoGorelli Jan 26, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that, but these test functions take fixtures as arguments (e.g. datetime64_dtype is defined as a fixture in pandas.conftest.py). Is it possible to parametrize over fixtures? I tried looking around for a way but couldn't find one

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to parametrize over fixtures?

I think tests.io.excel.test_readers.engine_and_read_ext might be doing something similar to what you need

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or tests.arrays.test_integer.all_data might be a better fit

Copy link
Member Author

@MarcoGorelli MarcoGorelli Feb 25, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel thanks for the tip, have updated the test - is this what you had in mind?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update: what I've done is not correct, it generates nearly 90,000 tests (when I made commit 61b8b2c there were only 101) in pandas/tests/frame/methods/test_replace.py

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update: what I've done is not correct, it generates nearly 90,000 tests (when I made commit 61b8b2c there were only 101) in pandas/tests/frame/methods/test_replace.py

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update: what I've done is not correct, it generates nearly 90,000 tests (when I made commit 61b8b2c there were only 101) in pandas/tests/frame/methods/test_replace.py

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel @jreback if I write

@pytest.fixture( params=[ "any_real_dtype", "string_dtype", "datetime64_dtype", "timedelta64_dtype", "bool", "complex_dtype", "bytes_dtype", "tz_aware_fixture", ] ) def replace_value_a_value_b( request, any_real_dtype, string_dtype, datetime64_dtype, timedelta64_dtype, complex_dtype, bytes_dtype, tz_aware_fixture, ): if request.param == "any_real_dtype": return any_real_dtype, 2, 3 elif request.param == "string_dtype": return string_dtype, "a", "b" elif request.param == "datetime64_dtype": return datetime64_dtype, pd.Timestamp("2020-01-24"), pd.Timestamp("2020-01-25") elif request.param == "timedelta64_dtype": return timedelta64_dtype, pd.Timedelta(3), pd.Timedelta(4) elif request.param == "bool": return bool, 1, 0 elif request.param == "complex_dtype": return complex_dtype, 1 + 2j, 1 + 3j elif request.param == "bytes_dtype": return bytes_dtype, b"a", b"b" elif request.param == "tz_aware_fixture": return ( None, Timestamp("20130102", tz=tz_aware_fixture), Timestamp("20130103", tz=tz_aware_fixture), ) 

then nearly 90,000 tests are collected.

An alternative would involve importing the types in here (e.g. ALL_REAL_DTYPES) and iterating over them but, as I understand, importing from conftest is discouraged.

Any other tips on how to do this? It seems parametrisation over fixtures has been proposed in pytest, but it's not there yet - https://docs.pytest.org/en/latest/proposals/parametrize_with_fixtures.html

# GH30512
dtype = datetime64_dtype
value_a = pd.Timestamp("2020-01-24")
value_b = pd.Timestamp("2020-01-25")
_test_replace_doesnt_change_other_columns(value_a, value_b, dtype)

def test_replace_timedelta64_dont_change_other_columns(self, timedelta64_dtype):
# GH30512
dtype = timedelta64_dtype
value_a = pd.Timedelta(3)
value_b = pd.Timedelta(4)
_test_replace_doesnt_change_other_columns(value_a, value_b, dtype)

def test_replace_bool_dont_change_other_columns(self):
# GH30512
dtype = bool
value_a = 1
value_b = 0
_test_replace_doesnt_change_other_columns(value_a, value_b, dtype)

def test_replace_complex_dont_change_other_columns(self, complex_dtype):
# GH30512
dtype = complex_dtype
value_a = 1 + 2j
value_b = 1 + 3j
_test_replace_doesnt_change_other_columns(value_a, value_b, dtype)

def test_replace_bytes_dont_change_other_columns(self, bytes_dtype):
# GH30512
dtype = bytes_dtype
value_a = b"a"
value_b = b"b"
_test_replace_doesnt_change_other_columns(value_a, value_b, dtype)

def test_replace_tz_aware_dont_change_other_columns(self, tz_aware_fixture):
# GH30512
value_a = Timestamp("20130102", tz=tz_aware_fixture)
value_b = Timestamp("20130103", tz=tz_aware_fixture)
_test_replace_doesnt_change_other_columns(value_a, value_b, None)

@pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"])
@pytest.mark.parametrize("value", [np.nan, pd.NA])
def test_replace_no_replacement_dtypes(self, dtype, value):
Expand All @@ -1403,3 +1458,12 @@ def test_replace_with_duplicate_columns(self, replacement):
result["B"] = result["B"].replace(7, replacement)

tm.assert_frame_equal(result, expected)


def _test_replace_doesnt_change_other_columns(value_a, value_b, dtype):
# GH30512
df = pd.DataFrame({"A": [value_a], "B": [value_b]}, dtype=dtype)
expected = df.copy()
expected["A"] = expected["A"].replace(to_replace={value_a: None})
result = df.replace(to_replace={value_a: None})
tm.assert_frame_equal(result, expected)