From a5ae33a77290509a83bfe34f37fe33aa50513d31 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 13 Jul 2020 22:23:27 +0200 Subject: [PATCH] REGR: setting column with setitem should not modify existing array inplace --- pandas/core/indexing.py | 12 +++++++++--- pandas/core/internals/blocks.py | 2 +- pandas/tests/indexing/test_iloc.py | 9 +++++++++ pandas/tests/indexing/test_indexing.py | 19 +++++++++++++++++++ 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 04d1dbceb3342..ffa7a03535e59 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1684,15 +1684,21 @@ def isetter(loc, v): com.is_null_slice(idx) or com.is_full_slice(idx, len(self.obj)) for idx in pi ): - ser = v + if ser._mgr.any_extension_types: + # avoid `iset` for extension arrays, as this doesn't + # change the underlying values inplace (GH33457) + ser._mgr = ser._mgr.setitem(indexer=pi, value=v) + else: + ser = v + self.obj._iset_item(loc, ser) else: # set the item, possibly having a dtype change ser = ser.copy() ser._mgr = ser._mgr.setitem(indexer=pi, value=v) ser._maybe_update_cacher(clear=True) - # reset the sliced object if unique - self.obj._iset_item(loc, ser) + # reset the sliced object if unique + self.obj._iset_item(loc, ser) # we need an iterable, with a ndim of at least 1 # eg. don't pass through np.array(0) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6a4b3318d3aa7..16268be12118e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1589,7 +1589,7 @@ def should_store(self, value: ArrayLike) -> bool: def set(self, locs, values): assert locs.tolist() == [0] - self.values[:] = values + self.values = values def putmask( self, mask, new, inplace: bool = False, axis: int = 0, transpose: bool = False, diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index c5f40102874dd..40cbc5fc7e136 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -705,6 +705,15 @@ def test_iloc_setitem_categorical_updates_inplace(self): expected = pd.Categorical(["C", "B", "A"]) tm.assert_categorical_equal(cat, expected) + # __setitem__ under the other hand does not work in-place + cat = pd.Categorical(["A", "B", "C"]) + df = pd.DataFrame({1: cat, 2: [1, 2, 3]}) + + df["cat"] = cat[::-1] + + expected = pd.Categorical(["A", "B", "C"]) + tm.assert_categorical_equal(cat, expected) + def test_iloc_with_boolean_operation(self): # GH 20627 result = DataFrame([[0, 1], [2, 3], [4, 5], [6, np.nan]]) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index ced70069dd955..e799048fefa49 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1100,3 +1100,22 @@ def test_long_text_missing_labels_inside_loc_error_message_limited(): error_message_regex = "long_missing_label_text_0.*\\\\n.*long_missing_label_text_1" with pytest.raises(KeyError, match=error_message_regex): s.loc[["a", "c"] + missing_labels] + + +def test_setitem_EA_column_update(): + # https://github.com/pandas-dev/pandas/issues/33457 + + df = pd.DataFrame( + { + "int": [1, 2, 3], + "int2": [3, 4, 5], + "float": [0.1, 0.2, 0.3], + "EA": pd.array([1, 2, None], dtype="Int64"), + } + ) + original_arr = df.EA.array + + # overwrite column with new array + df["EA"] = pd.array([1, 2, 3], dtype="Int64") + assert original_arr is not df.EA.array + tm.assert_extension_array_equal(original_arr, pd.array([1, 2, None], dtype="Int64"))