pandas-dev · phofl · Nov 10, 2020 · Nov 10, 2020 · Nov 10, 2020 · Nov 11, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -612,6 +612,7 @@ Indexing
 - Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty :class:`DataFrame` with ``tolerance`` not None or ``method="nearest"`` (:issue:`27315`)
 - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`)
 - Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`)
+- Bug in :meth:`DataFrame.loc` not preserving dtype of new values, when complete columns was assigned (:issue:`20635`, :issue:`20511`, :issue:`27583`)
 
 Missing
 ^^^^^^^

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -11,8 +11,10 @@
 from pandas.errors import AbstractMethodError, InvalidIndexError
 from pandas.util._decorators import doc
 
+from pandas.core.dtypes.cast import infer_dtype_from_scalar
 from pandas.core.dtypes.common import (
  is_array_like,
+ is_dtype_equal,
  is_hashable,
  is_integer,
  is_iterator,
@@ -1542,6 +1544,17 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
  val = list(value.values()) if isinstance(value, dict) else value
  blk = self.obj._mgr.blocks[0]
  take_split_path = not blk._can_hold_element(val)
+ if not take_split_path:
+ if is_scalar(value):
+ dtype, _ = infer_dtype_from_scalar(value)
+ take_split_path = not is_dtype_equal(dtype, blk.dtype)
+ elif isinstance(value, ABCSeries):
+ take_split_path = not (is_dtype_equal(value.dtype, blk.dtype))
+ elif isinstance(value, ABCDataFrame):
+ dtypes = list(value.dtypes.unique())
+ take_split_path = not (
+ len(dtypes) == 1 and is_dtype_equal(dtypes[0], blk.dtype)
+ )
 
  # if we have any multi-indexes that have non-trivial slices
  # (not null slices) then we must take the split path, xref

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -1471,7 +1471,7 @@ def test_at_time_between_time_datetimeindex(self):
  result.loc[akey] = 0
  result = result.loc[akey]
  expected = df.loc[akey].copy()
- expected.loc[:] = 0
+ expected.loc[:] = 0.0
  tm.assert_frame_equal(result, expected)
 
  result = df.copy()
@@ -1483,7 +1483,7 @@ def test_at_time_between_time_datetimeindex(self):
  result.loc[bkey] = 0
  result = result.loc[bkey]
  expected = df.loc[bkey].copy()
- expected.loc[:] = 0
+ expected.loc[:] = 0.0
  tm.assert_frame_equal(result, expected)
 
  result = df.copy()

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
@@ -289,6 +289,27 @@ def test_setitem_periodindex(self):
  assert isinstance(rs.index, PeriodIndex)
  tm.assert_index_equal(rs.index, rng)
 
+ @pytest.mark.parametrize("klass", [list, np.array])
+ def test_iloc_setitem_bool_indexer(self, klass):
+ # GH: 36741
+ df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]})
+ indexer = klass([True, False, False])
+ df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
+ expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
+ tm.assert_frame_equal(df, expected)
+
+ def test_setitem_scalar_dtype_change(self):
+ # GH#27583
+ df = DataFrame({"a": [0.0], "b": [0.0]})
+ df[["a", "b"]] = 0
+ expected = DataFrame({"a": [0], "b": [0]})
+ tm.assert_frame_equal(df, expected)
+
+ df = DataFrame({"a": [0.0], "b": [0.0]})
+ df["b"] = 0
+ expected = DataFrame({"a": [0.0], "b": [0]})
+ tm.assert_frame_equal(df, expected)
+
 
 class TestDataFrameSetItemSlicing:
  def test_setitem_slice_position(self):

diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py
@@ -120,7 +120,7 @@ def test_partial_set(self, multiindex_year_month_day_dataframe_random_data):
  exp["A"].loc[2000, 4].values[:] = 1
  tm.assert_frame_equal(df, exp)
 
- df.loc[2000] = 5
+ df.loc[2000] = 5.0
  exp.loc[2000].values[:] = 5
  tm.assert_frame_equal(df, exp)
 

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -17,6 +17,7 @@
  concat,
  date_range,
  isna,
+ to_datetime,
 )
 import pandas._testing as tm
 from pandas.api.types import is_scalar
@@ -834,6 +835,32 @@ def test_iloc_setitem_dictionary_value(self):
  expected = DataFrame({"x": [1, 9], "y": [2, 99]})
  tm.assert_frame_equal(df, expected)
 
+ def test_iloc_setitem_conversion_to_datetime(self):
+ # GH#20511
+ df = DataFrame(
+ [["2015-01-01", "2016-01-01"], ["2016-01-01", "2015-01-01"]],
+ columns=["date0", "date1"],
+ )
+ df.iloc[:, [0]] = df.iloc[:, [0]].apply(
+ lambda x: to_datetime(x, errors="coerce")
+ )
+ expected = DataFrame(
+ {
+ "date0": [to_datetime("2015-01-01"), to_datetime("2016-01-01")],
+ "date1": ["2016-01-01", "2015-01-01"],
+ }
+ )
+ tm.assert_frame_equal(df, expected)
+
+ def test_iloc_conversion_to_float_32_for_columns_list(self):
+ # GH#33198
+ arr = np.random.randn(10 ** 2).reshape(5, 20).astype(np.float64)
+ df = DataFrame(arr)
+ df.iloc[:, 10:] = df.iloc[:, 10:].astype(np.float32)
+ result = df.dtypes.value_counts()
+ expected = Series([10, 10], index=[np.dtype("float32"), np.dtype("float64")])
+ tm.assert_series_equal(result, expected)
+
 
 class TestILocErrors:
  # NB: this test should work for _any_ Series we can pass as

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -1127,6 +1127,23 @@ def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected):
 
  tm.assert_frame_equal(expected, df)
 
+ def test_loc_setitem_null_slice_single_column_series_value_different_dtype(self):
+ # GH#20635
+ df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]})
+ df.loc[:, "C"] = df["C"].astype("int64")
+ expected = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": [3, 4]})
+ tm.assert_frame_equal(df, expected)
+
+ @pytest.mark.parametrize("dtype", ["int64", "Int64"])
+ def test_loc_setitem_null_slice_different_dtypes(self, dtype):
+ # GH#20635
+ df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]})
+ rhs = df[["B", "C"]].astype("int64").astype(dtype)
+ df.loc[:, ["B", "C"]] = rhs
+ expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]})
+ expected[["B", "C"]] = expected[["B", "C"]].astype(dtype)
+ tm.assert_frame_equal(df, expected)
+
 
 class TestLocWithMultiIndex:
  @pytest.mark.parametrize(
@@ -2000,6 +2017,14 @@ def test_loc_setitem_dt64tz_values(self):
  result = s2["a"]
  assert result == expected
 
+ @pytest.mark.parametrize("dtype", ["int64", "Int64"])
+ def test_setitem_series_null_slice_different_dtypes(self, dtype):
+ # GH: 20635
+ ser = Series(["3", "4"], name="A")
+ ser.loc[:] = ser.astype("int64").astype(dtype)
+ expected = Series([3, 4], name="A", dtype=dtype)
+ tm.assert_series_equal(ser, expected)
+
 
 @pytest.mark.parametrize("value", [1, 1.5])
 def test_loc_int_in_object_index(frame_or_series, value):