-
- Notifications
You must be signed in to change notification settings - Fork 19.4k
BUG: Bug in loc did not change dtype when complete column was assigned #37749
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 32 commits
6450a2c 1599c5c 4d39612 f9f37cb 5cf355b 8d203f9 e35e009 4c391da 71fbf9f babcd38 caa6046 8b95236 3b98ee0 f9b8a59 4bef38e 27ea3e2 f94277b 279e812 d5f6150 706dc6a 66d4b4e fa25075 3c06ba6 a33659c 0f556c4 181e62a b759ac9 a353930 d28e1e1 1aa8522 1bc0d46 61aab16 14fe5a8 26b5d6f 913ffea e6e22f3 23f6f3b 99b87c9 f97a252 700ce6c File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -289,6 +289,27 @@ def test_setitem_periodindex(self): | |
| assert isinstance(rs.index, PeriodIndex) | ||
| tm.assert_index_equal(rs.index, rng) | ||
| | ||
| @pytest.mark.parametrize("klass", [list, np.array]) | ||
| def test_iloc_setitem_bool_indexer(self, klass): | ||
| ||
| # GH: 36741 | ||
| df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) | ||
| indexer = klass([True, False, False]) | ||
| df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 | ||
| expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) | ||
| tm.assert_frame_equal(df, expected) | ||
| | ||
| def test_setitem_scalar_dtype_change(self): | ||
| # GH#27583 | ||
| df = DataFrame({"a": [0.0], "b": [0.0]}) | ||
| df[["a", "b"]] = 0 | ||
| expected = DataFrame({"a": [0], "b": [0]}) | ||
| tm.assert_frame_equal(df, expected) | ||
| | ||
| df = DataFrame({"a": [0.0], "b": [0.0]}) | ||
| df["b"] = 0 | ||
| expected = DataFrame({"a": [0.0], "b": [0]}) | ||
| tm.assert_frame_equal(df, expected) | ||
| | ||
| | ||
| class TestDataFrameSetItemSlicing: | ||
| def test_setitem_slice_position(self): | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -17,6 +17,7 @@ | |
| concat, | ||
| date_range, | ||
| isna, | ||
| to_datetime, | ||
| ) | ||
| import pandas._testing as tm | ||
| from pandas.api.types import is_scalar | ||
| | @@ -834,6 +835,32 @@ def test_iloc_setitem_dictionary_value(self): | |
| expected = DataFrame({"x": [1, 9], "y": [2, 99]}) | ||
| tm.assert_frame_equal(df, expected) | ||
| | ||
| def test_iloc_setitem_conversion_to_datetime(self): | ||
| # GH#20511 | ||
| df = DataFrame( | ||
| [["2015-01-01", "2016-01-01"], ["2016-01-01", "2015-01-01"]], | ||
| columns=["date0", "date1"], | ||
| ) | ||
| df.iloc[:, [0]] = df.iloc[:, [0]].apply( | ||
| lambda x: to_datetime(x, errors="coerce") | ||
| ) | ||
| expected = DataFrame( | ||
| { | ||
| "date0": [to_datetime("2015-01-01"), to_datetime("2016-01-01")], | ||
| ||
| "date1": ["2016-01-01", "2015-01-01"], | ||
| } | ||
| ) | ||
| tm.assert_frame_equal(df, expected) | ||
| | ||
| def test_iloc_conversion_to_float_32_for_columns_list(self): | ||
| # GH#33198 | ||
| arr = np.random.randn(10 ** 2).reshape(5, 20).astype(np.float64) | ||
| df = DataFrame(arr) | ||
| df.iloc[:, 10:] = df.iloc[:, 10:].astype(np.float32) | ||
| result = df.dtypes.value_counts() | ||
| expected = Series([10, 10], index=[np.dtype("float32"), np.dtype("float64")]) | ||
| tm.assert_series_equal(result, expected) | ||
| | ||
| | ||
| class TestILocErrors: | ||
| # NB: this test should work for _any_ Series we can pass as | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -1127,6 +1127,23 @@ def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): | |
| | ||
| tm.assert_frame_equal(expected, df) | ||
| | ||
| def test_loc_setitem_null_slice_single_column_series_value_different_dtype(self): | ||
| # GH#20635 | ||
| df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]}) | ||
| df.loc[:, "C"] = df["C"].astype("int64") | ||
| expected = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": [3, 4]}) | ||
| tm.assert_frame_equal(df, expected) | ||
| | ||
| @pytest.mark.parametrize("dtype", ["int64", "Int64"]) | ||
| def test_loc_setitem_null_slice_different_dtypes(self, dtype): | ||
| # GH#20635 | ||
| df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]}) | ||
| rhs = df[["B", "C"]].astype("int64").astype(dtype) | ||
| df.loc[:, ["B", "C"]] = rhs | ||
| expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}) | ||
| expected[["B", "C"]] = expected[["B", "C"]].astype(dtype) | ||
| tm.assert_frame_equal(df, expected) | ||
| | ||
| | ||
| class TestLocWithMultiIndex: | ||
| @pytest.mark.parametrize( | ||
| | @@ -2000,6 +2017,14 @@ def test_loc_setitem_dt64tz_values(self): | |
| result = s2["a"] | ||
| assert result == expected | ||
| | ||
| @pytest.mark.parametrize("dtype", ["int64", "Int64"]) | ||
| def test_setitem_series_null_slice_different_dtypes(self, dtype): | ||
| ||
| # GH: 20635 | ||
| ser = Series(["3", "4"], name="A") | ||
| ser.loc[:] = ser.astype("int64").astype(dtype) | ||
| expected = Series([3, 4], name="A", dtype=dtype) | ||
| Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think this is doing the opposite of #39163. did we decide to revert part or all of that? Member Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since yours is significantly newer I am fine with „closing“ this. Would check if some of the issues are fixed | ||
| tm.assert_series_equal(ser, expected) | ||
| | ||
| | ||
| @pytest.mark.parametrize("value", [1, 1.5]) | ||
| def test_loc_int_in_object_index(frame_or_series, value): | ||
| | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this can be the else condtiion
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, value can be anything from int, float to numpy array. I think this check is only necessary if we have Series or DataFrame. Maybe with an array?