pandas-dev · jbrockmendel · Jan 8, 2021 · Jan 12, 2021 · Jan 13, 2021 · Jan 15, 2021
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -506,14 +506,31 @@ def array_equals(left: ArrayLike, right: ArrayLike) -> bool:
         return array_equivalent(left, right, dtype_equal=True)
 
 
-def infer_fill_value(val):
+def infer_fill_value(val, length: int):
     """
     infer the fill value for the nan/NaT from the provided
     scalar/ndarray/list-like if we are a NaT, return the correct dtyped
     element to provide proper block construction
     """
     if not is_list_like(val):
         val = [val]
+
+    if type(val).__name__ == "PandasArray":
+        # for test_numpy test where we patch PandasArray._typ
+        val = val.to_numpy()
+
+    if is_extension_array_dtype(val):
+        # We cannot use dtype._na_value bc pd.NA/pd.NaT do not preserve dtype
+        if len(val) == length:
+            # TODO: in this case see if we can avoid making a copy later on
+            return val
+        if length == 0:
+            return val[:0].copy()
+
+        dtype = val.dtype
+        cls = dtype.construct_array_type()
+        return cls._from_sequence([dtype._na_value], dtype=dtype).repeat(length)
+
     val = np.array(val, copy=False)
     if needs_i8_conversion(val.dtype):
         return np.array("NaT", dtype=val.dtype)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1601,7 +1601,7 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
                             # We are setting an entire column
                             self.obj[key] = value
                         else:
-                            self.obj[key] = infer_fill_value(value)
+                            self.obj[key] = infer_fill_value(value, len(self.obj))
 
                         new_indexer = convert_from_missing_indexer_tuple(
                             indexer, self.obj.axes

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -65,8 +65,11 @@
     Categorical,
     DatetimeArray,
     ExtensionArray,
+    FloatingArray,
+    IntegerArray,
     PandasArray,
     PandasDtype,
+    StringArray,
     TimedeltaArray,
 )
 from pandas.core.base import PandasObject
@@ -621,10 +624,17 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
             )
             raise TypeError(msg)
 
+        values = self.values
         dtype = pandas_dtype(dtype)
+        if isinstance(dtype, ExtensionDtype) and self.values.ndim == 2:
+            # TODO(EA2D): kludge not needed with 2D EAs (astype_nansafe would raise)
+            # note DataFrame.astype has special handling to avoid getting here
+            if self.shape[0] != 1:
+                raise NotImplementedError("Need 2D EAs!")
+            values = values[0]
 
         try:
-            new_values = self._astype(dtype, copy=copy)
+            new_values = self._astype(values, dtype, copy=copy)
         except (ValueError, TypeError):
             # e.g. astype_nansafe can fail on object-dtype of strings
             #  trying to convert to float
@@ -643,8 +653,8 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
                 )
         return newb
 
-    def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike:
-        values = self.values
+    @staticmethod
+    def _astype(values, dtype: DtypeObj, copy: bool) -> ArrayLike:
 
         if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
             return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True)
@@ -936,6 +946,16 @@ def setitem(self, indexer, value):
 
             return self.astype(dtype).setitem(indexer, value)
 
+        value = extract_array(value, extract_numpy=True)
+        if isinstance(value, PandasArray) and not isinstance(value, StringArray):
+            # this is redundant with extract_array except for PandasArray tests
+            #  that patch _typ
+            value = value.to_numpy()
+
+        if isinstance(value, (IntegerArray, FloatingArray)) and not value._mask.any():
+            # GH#38896
+            value = value.to_numpy(value.dtype.numpy_dtype)
+
         # value must be storable at this moment
         if is_extension_array_dtype(getattr(value, "dtype", None)):
             # We need to be careful not to allow through strings that
@@ -1910,6 +1930,10 @@ class FloatBlock(NumericBlock):
     def _can_hold_element(self, element: Any) -> bool:
         tipo = maybe_infer_dtype_type(element)
         if tipo is not None:
+            if isinstance(element, (IntegerArray, FloatingArray)):
+                # GH#38896
+                if element._mask.any():
+                    return False
             return issubclass(tipo.type, (np.floating, np.integer)) and not issubclass(
                 tipo.type, np.timedelta64
             )
@@ -1975,11 +1999,16 @@ class IntBlock(NumericBlock):
     def _can_hold_element(self, element: Any) -> bool:
         tipo = maybe_infer_dtype_type(element)
         if tipo is not None:
-            return (
-                issubclass(tipo.type, np.integer)
-                and not issubclass(tipo.type, np.timedelta64)
-                and self.dtype.itemsize >= tipo.itemsize
-            )
+            if issubclass(tipo.type, np.integer):
+                if isinstance(element, IntegerArray):
+                    # GH#38896
+                    if element._mask.any():
+                        return False
+
+                return (
+                    not issubclass(tipo.type, np.timedelta64)
+                    and self.dtype.itemsize >= tipo.itemsize
+                )
         # We have not inferred an integer from the dtype
         # check if we have a builtin int or a float equal to an int
         return is_integer(element) or (is_float(element) and element.is_integer())

diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py
@@ -24,12 +24,22 @@
 from pandas.tests.extension import base
 
 
-def make_data():
+def make_data(with_nas: bool = True):
+    if with_nas:
+        return (
+            list(np.arange(0.1, 0.9, 0.1))
+            + [pd.NA]
+            + list(np.arange(1, 9.8, 0.1))
+            + [pd.NA]
+            + [9.9, 10.0]
+        )
+    # case without pd.NA that can be cast to floating ndarray losslessly
+    # TODO: get cases with np.nan instead of pd.NA GH#39039
     return (
         list(np.arange(0.1, 0.9, 0.1))
-        + [pd.NA]
+        + [np.nan]
         + list(np.arange(1, 9.8, 0.1))
-        + [pd.NA]
+        + [np.nan]
         + [9.9, 10.0]
     )
 
@@ -39,9 +49,10 @@ def dtype(request):
     return request.param()
 
 
-@pytest.fixture
-def data(dtype):
-    return pd.array(make_data(), dtype=dtype)
+@pytest.fixture(params=[True, False])
+def data(dtype, request):
+    with_nas = request.param
+    return pd.array(make_data(with_nas), dtype=dtype)
 
 
 @pytest.fixture

diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
@@ -33,8 +33,11 @@
 from pandas.tests.extension import base
 
 
-def make_data():
-    return list(range(1, 9)) + [pd.NA] + list(range(10, 98)) + [pd.NA] + [99, 100]
+def make_data(with_nas: bool = True):
+    if with_nas:
+        return list(range(1, 9)) + [pd.NA] + list(range(10, 98)) + [pd.NA] + [99, 100]
+
+    return list(range(1, 101))
 
 
 @pytest.fixture(
@@ -53,9 +56,10 @@ def dtype(request):
     return request.param()
 
 
-@pytest.fixture
-def data(dtype):
-    return pd.array(make_data(), dtype=dtype)
+@pytest.fixture(params=[True, False])
+def data(dtype, request):
+    with_nas = request.param
+    return pd.array(make_data(with_nas), dtype=dtype)
 
 
 @pytest.fixture
@@ -198,7 +202,20 @@ class TestGetitem(base.BaseGetitemTests):
 
 
 class TestSetitem(base.BaseSetitemTests):
-    pass
+    def test_setitem_series(self, data, full_indexer):
+        # https://github.com/pandas-dev/pandas/issues/32395
+        ser = expected = pd.Series(data, name="data")
+        result = pd.Series(index=ser.index, dtype=object, name="data")
+
+        key = full_indexer(ser)
+        result.loc[key] = ser
+
+        if not data._mask.any():
+            # GH#38896 like we do with ndarray, we set the values inplace
+            #  but cast to the new numpy dtype
+            expected = pd.Series(data.to_numpy(data.dtype.numpy_dtype), name="data")
+
+        self.assert_series_equal(result, expected)
 
 
 class TestMissing(base.BaseMissingTests):

diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
@@ -501,6 +501,39 @@ def test_setitem_slice(self, data, box_in_series):
     def test_setitem_loc_iloc_slice(self, data):
         super().test_setitem_loc_iloc_slice(data)
 
+    def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, request):
+        # https://github.com/pandas-dev/pandas/issues/32395
+        df = pd.DataFrame({"data": pd.Series(data)})
+        result = pd.DataFrame(index=df.index)
+
+        key = full_indexer(df)
+        result.loc[key, "data"] = df["data"]._values
+
+        # For PandasArray we expect to get unboxed to numpy
+        expected = pd.DataFrame({"data": data.to_numpy()})
+        if isinstance(key, slice) and (
+            key == slice(None) or data.dtype.numpy_dtype == object
+        ):
+            mark = pytest.mark.xfail(
+                reason="This case goes through a different code path"
+            )
+            # Other cases go through Block.setitem
+            request.node.add_marker(mark)
+
+        self.assert_frame_equal(result, expected)
+
+    def test_setitem_series(self, data, full_indexer):
+        # https://github.com/pandas-dev/pandas/issues/32395
+        ser = pd.Series(data, name="data")
+        result = pd.Series(index=ser.index, dtype=object, name="data")
+
+        key = full_indexer(ser)
+        result.loc[key] = ser
+
+        # For PandasArray we expect to get unboxed to numpy
+        expected = pd.Series(data.to_numpy(), name="data")
+        self.assert_series_equal(result, expected)
+
 
 @skip_nested
 class TestParsing(BaseNumPyTests, base.BaseParsingTests):