pandas-dev · jreback · Feb 1, 2021 · Jan 28, 2021 · Jan 28, 2021 · Jan 28, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -411,7 +411,7 @@ Reshaping
 - Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`)
 - :meth:`Series.value_counts` and :meth:`Series.mode` return consistent keys in original order (:issue:`12679`, :issue:`11227` and :issue:`39007`)
 - Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`)
--
+- Bug in :meth:`DataFrame.append` returning incorrect dtypes with combinations of ``ExtensionDtype`` dtypes (:issue:`39454`)
 
 Sparse
 ^^^^^^

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
@@ -11,7 +11,7 @@
 from pandas._typing import ArrayLike, DtypeObj, Manager, Shape
 from pandas.util._decorators import cache_readonly
 
-from pandas.core.dtypes.cast import maybe_promote
+from pandas.core.dtypes.cast import find_common_type, maybe_promote
 from pandas.core.dtypes.common import (
     get_dtype,
     is_categorical_dtype,
@@ -393,7 +393,11 @@ def _get_empty_dtype_and_na(join_units: Sequence[JoinUnit]) -> Tuple[DtypeObj, A
     if _is_uniform_reindex(join_units):
         # FIXME: integrate property
         empty_dtype = join_units[0].block.dtype
-        upcasted_na = join_units[0].block.fill_value
+        if is_extension_array_dtype(empty_dtype):
+            # for dt64tz we need this to get NaT instead of np.datetime64("NaT")
+            upcasted_na = empty_dtype.na_value
+        else:
+            upcasted_na = join_units[0].block.fill_value
         return empty_dtype, upcasted_na
 
     has_none_blocks = False
@@ -404,25 +408,29 @@ def _get_empty_dtype_and_na(join_units: Sequence[JoinUnit]) -> Tuple[DtypeObj, A
         else:
             dtypes[i] = unit.dtype
 
+    filtered_dtypes = [
+        unit.dtype for unit in join_units if unit.block is not None and not unit.is_na
+    ]
+    if not len(filtered_dtypes):
+        filtered_dtypes = [unit.dtype for unit in join_units if unit.block is not None]
+    dtype_alt = find_common_type(filtered_dtypes)
+
     upcast_classes = _get_upcast_classes(join_units, dtypes)
 
+    if is_extension_array_dtype(dtype_alt):
+        return dtype_alt, dtype_alt.na_value
+    elif dtype_alt == object:
+        return dtype_alt, np.nan
+
     # TODO: de-duplicate with maybe_promote?
     # create the result
     if "extension" in upcast_classes:
-        if len(upcast_classes) == 1:
-            cls = upcast_classes["extension"][0]
-            return cls, cls.na_value
-        else:
-            return np.dtype("object"), np.nan
-    elif "object" in upcast_classes:
-        return np.dtype(np.object_), np.nan
+        return np.dtype("object"), np.nan
     elif "bool" in upcast_classes:
         if has_none_blocks:
             return np.dtype(np.object_), np.nan
         else:
             return np.dtype(np.bool_), None
-    elif "category" in upcast_classes:
-        return np.dtype(np.object_), np.nan
     elif "datetimetz" in upcast_classes:
         # GH-25014. We use NaT instead of iNaT, since this eventually
         # ends up in DatetimeArray.take, which does not allow iNaT.
@@ -480,7 +488,7 @@ def _get_upcast_classes(
 def _select_upcast_cls_from_dtype(dtype: DtypeObj) -> str:
     """Select upcast class name based on dtype."""
     if is_categorical_dtype(dtype):
-        return "category"
+        return "extension"
     elif is_datetime64tz_dtype(dtype):
         return "datetimetz"
     elif is_extension_array_dtype(dtype):

diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
@@ -117,9 +117,7 @@ class TestConstructors(base.BaseConstructorsTests):
 
 
 class TestReshaping(base.BaseReshapingTests):
-    @pytest.mark.xfail(reason="Deliberately upcast to object?")
-    def test_concat_with_reindex(self, data):
-        super().test_concat_with_reindex(data)
+    pass
 
 
 class TestGetitem(base.BaseGetitemTests):

diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py
@@ -365,13 +365,25 @@ def test_append_empty_tz_frame_with_datetime64ns(self):
 
         # pd.NaT gets inferred as tz-naive, so append result is tz-naive
         result = df.append({"a": pd.NaT}, ignore_index=True)
-        expected = DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]")
+        expected = DataFrame({"a": [pd.NaT]}).astype(object)
         tm.assert_frame_equal(result, expected)
 
         # also test with typed value to append
         df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
-        result = df.append(
-            Series({"a": pd.NaT}, dtype="datetime64[ns]"), ignore_index=True
-        )
-        expected = DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]")
+        other = Series({"a": pd.NaT}, dtype="datetime64[ns]")
+        result = df.append(other, ignore_index=True)
+        expected = DataFrame({"a": [pd.NaT]}).astype(object)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"]
+    )
+    def test_append_empty_frame_with_timedelta64ns_nat(self, dtype_str):
+        # https://github.com/pandas-dev/pandas/issues/35460
+        df = DataFrame(columns=["a"]).astype(dtype_str)
+
+        other = DataFrame({"a": [np.timedelta64("NaT", "ns")]})
+        result = df.append(other, ignore_index=True)
+
+        expected = other.astype(object)
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py
@@ -42,6 +42,7 @@ def test_categorical_concat(self, sort):
                 "h": [None] * 6 + cat_values,
             }
         )
+        exp["h"] = exp["h"].astype(df2["h"].dtype)
         tm.assert_frame_equal(res, exp)
 
     def test_categorical_concat_dtypes(self):