From 63292d45a3e1d5347a822c54a747b1b96573e7b4 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 7 Apr 2023 16:57:10 -0700
Subject: [PATCH 01/12] DEPR: concat with empty objects

---
 doc/source/whatsnew/v2.1.0.rst                |  1 +
 pandas/core/dtypes/concat.py                  | 22 ++++++++
 pandas/core/internals/concat.py               | 55 +++++++++++++++++--
 pandas/tests/dtypes/test_concat.py            |  7 ++-
 pandas/tests/groupby/test_groupby.py          | 10 +++-
 pandas/tests/indexes/test_base.py             |  4 +-
 pandas/tests/reshape/concat/test_append.py    |  4 +-
 .../reshape/concat/test_append_common.py      | 21 ++++---
 pandas/tests/reshape/concat/test_concat.py    | 10 +++-
 pandas/tests/reshape/concat/test_datetimes.py |  4 +-
 pandas/tests/reshape/concat/test_empty.py     | 12 ++--
 pandas/tests/reshape/concat/test_series.py    |  4 +-
 pandas/tests/reshape/merge/test_merge.py      |  6 +-
 .../series/methods/test_combine_first.py      |  8 ++-
 14 files changed, 137 insertions(+), 31 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index a037e50593737..c4d3f6af1e007 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -152,6 +152,7 @@ Deprecations
 - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
 - Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`)
 - Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`)
+- Deprecated :func:`concat` behavior when any of the objects being concatenated have length 0; in the past the dtypes of empty objects were ignored when determining the resulting dtype, in a future version they will not (:issue:`39122`)
 - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
 - Deprecated the behavior of :func:`concat` with both ``len(keys) != len(objs)``, in a future version this will raise instead of truncating to the shorter of the two sequences (:issue:`43485`)
 - Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index b55c8cd31c110..e33b8682b0d2e 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -8,10 +8,12 @@
     Sequence,
     cast,
 )
+import warnings
 
 import numpy as np
 
 from pandas._libs import lib
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.astype import astype_array
 from pandas.core.dtypes.cast import (
@@ -42,6 +44,9 @@
     )
 
 
+_dtype_obj = np.dtype(object)
+
+
 def _is_nonempty(x, axis) -> bool:
     # filter empty arrays
     # 1-d dtypes always are included here
@@ -104,6 +109,23 @@ def concat_compat(
     non_empties = [x for x in to_concat if _is_nonempty(x, axis)]
     if non_empties and axis == 0 and not ea_compat_axis:
         # ea_compat_axis see GH#39574
+        if len(non_empties) < len(to_concat) and not any(
+            obj.dtype == _dtype_obj for obj in non_empties
+        ):
+            # Check for object dtype is an imperfect proxy for checking if
+            #  the result dtype is going to change once the deprecation is
+            #  enforced.
+            # GH#39122
+            warnings.warn(
+                "The behavior of array concatenation with empty entries is "
+                "deprecated. In a future version, this will no longer exclude "
+                "empty items when determining the result dtype. To opt in to "
+                "the future behavior, set pd.set_option('future.concat_empty', True). "
+                "To retain the old behavior, exclude the empty entries before "
+                "the concat operation.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         to_concat = non_empties
 
     dtypes = {obj.dtype for obj in to_concat}
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index fda445af5c0c4..18d63c28f1b26 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -7,6 +7,7 @@
     Sequence,
     cast,
 )
+import warnings
 
 import numpy as np
 
@@ -16,6 +17,7 @@
 )
 from pandas._libs.missing import NA
 from pandas.util._decorators import cache_readonly
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.astype import astype_array
 from pandas.core.dtypes.cast import (
@@ -470,7 +472,9 @@ def is_na(self) -> bool:
 
         values = blk.values
         if values.size == 0:
+            # GH#39122 this case will return False once deprecation is enforced
             return True
+
         if isinstance(values.dtype, SparseDtype):
             return False
 
@@ -488,6 +492,19 @@ def is_na(self) -> bool:
                 return False
             return all(isna_all(row) for row in values)
 
+    @cache_readonly
+    def is_na_after_size_deprecation(self) -> bool:
+        """
+        Will self.is_na be True after values.size == 0 deprecation is enforced?
+        """
+        blk = self.block
+        if blk.dtype.kind == "V":
+            return True
+
+        if not blk._can_hold_na:
+            return False
+        return self.is_na and blk.values.size != 0
+
     def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
         values: ArrayLike
 
@@ -578,7 +595,7 @@ def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike
     """
     Concatenate values from several join units along axis=1.
     """
-    empty_dtype = _get_empty_dtype(join_units)
+    empty_dtype, empty_dtype_future = _get_empty_dtype(join_units)
 
     has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
     upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks)
@@ -617,6 +634,18 @@ def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike
     else:
         concat_values = concat_compat(to_concat, axis=1)
 
+    if empty_dtype != empty_dtype_future:
+        if empty_dtype == concat_values.dtype:
+            # GH#39122
+            warnings.warn(
+                "The behavior of DataFrame concatenation with empty entries is "
+                "deprecated. In a future version, this will no longer exclude "
+                "empty frames when determining the result dtypes. "
+                "To retain the old behavior, exclude the empty entries before "
+                "the concat operation.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
     return concat_values
 
 
@@ -643,7 +672,7 @@ def _dtype_to_na_value(dtype: DtypeObj, has_none_blocks: bool):
     raise NotImplementedError
 
 
-def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
+def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> tuple[DtypeObj, DtypeObj]:
     """
     Return dtype and N/A values to use when concatenating specified units.
 
@@ -655,11 +684,11 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
     """
     if len(join_units) == 1:
         blk = join_units[0].block
-        return blk.dtype
+        return blk.dtype, blk.dtype
 
     if _is_uniform_reindex(join_units):
         empty_dtype = join_units[0].block.dtype
-        return empty_dtype
+        return empty_dtype, empty_dtype
 
     has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
 
@@ -670,7 +699,23 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
     dtype = find_common_type(dtypes)
     if has_none_blocks:
         dtype = ensure_dtype_can_hold_na(dtype)
-    return dtype
+
+    dtype_future = dtype
+    if len(dtypes) != len(join_units):
+        dtypes_future = [
+            unit.dtype for unit in join_units if not unit.is_na_after_size_deprecation
+        ]
+        if not len(dtypes_future):
+            dtypes_future = [
+                unit.dtype for unit in join_units if unit.block.dtype.kind != "V"
+            ]
+
+        if len(dtypes) != len(dtypes_future):
+            dtype_future = find_common_type(dtypes_future)
+            if has_none_blocks:
+                dtype_future = ensure_dtype_can_hold_na(dtype_future)
+
+    return dtype, dtype_future
 
 
 def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
diff --git a/pandas/tests/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py
index 772dfdfe8fb03..97718386dabb7 100644
--- a/pandas/tests/dtypes/test_concat.py
+++ b/pandas/tests/dtypes/test_concat.py
@@ -12,8 +12,11 @@ def test_concat_mismatched_categoricals_with_empty():
     ser1 = Series(["a", "b", "c"], dtype="category")
     ser2 = Series([], dtype="category")
 
-    result = _concat.concat_compat([ser1._values, ser2._values])
-    expected = pd.concat([ser1, ser2])._values
+    msg = "The behavior of array concatenation with empty entries is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = _concat.concat_compat([ser1._values, ser2._values])
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = pd.concat([ser1, ser2])._values
     tm.assert_categorical_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 4388913511be2..168dce0c6f3eb 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -362,8 +362,11 @@ def f3(x):
     df2 = DataFrame({"a": [3, 2, 2, 2], "b": range(4), "c": range(5, 9)})
 
     # correct result
-    result1 = df.groupby("a").apply(f1)
-    result2 = df2.groupby("a").apply(f1)
+    depr_msg = "The behavior of array concatenation with empty entries is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=depr_msg):
+        result1 = df.groupby("a").apply(f1)
+    with tm.assert_produces_warning(FutureWarning, match=depr_msg):
+        result2 = df2.groupby("a").apply(f1)
     tm.assert_frame_equal(result1, result2)
 
     # should fail (not the same number of levels)
@@ -377,7 +380,8 @@ def f3(x):
     with pytest.raises(AssertionError, match=msg):
         df.groupby("a").apply(f3)
     with pytest.raises(AssertionError, match=msg):
-        df2.groupby("a").apply(f3)
+        with tm.assert_produces_warning(FutureWarning, match=depr_msg):
+            df2.groupby("a").apply(f3)
 
 
 def test_attr_wrapper(ts):
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index e681223933abb..4c97d07f39bbf 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -616,7 +616,9 @@ def test_append_empty_preserve_name(self, name, expected):
         left = Index([], name="foo")
         right = Index([1, 2, 3], name=name)
 
-        result = left.append(right)
+        msg = "The behavior of array concatenation with empty entries is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = left.append(right)
         assert result.name == expected
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py
index b540cd514c0b5..e794f5abf4b21 100644
--- a/pandas/tests/reshape/concat/test_append.py
+++ b/pandas/tests/reshape/concat/test_append.py
@@ -162,7 +162,9 @@ def test_append_preserve_index_name(self):
         df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"])
         df2 = df2.set_index(["A"])
 
-        result = df1._append(df2)
+        msg = "The behavior of array concatenation with empty entries is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df1._append(df2)
         assert result.index.name == "A"
 
     indexes_can_append = [
diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py
index 2d84de8145111..e5d42d9cb1bfb 100644
--- a/pandas/tests/reshape/concat/test_append_common.py
+++ b/pandas/tests/reshape/concat/test_append_common.py
@@ -693,11 +693,14 @@ def test_concat_categorical_empty(self):
         s1 = Series([], dtype="category")
         s2 = Series([1, 2], dtype="category")
 
-        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
-        tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
+        msg = "The behavior of array concatenation with empty entries is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
+            tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
 
-        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
-        tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
+            tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
 
         s1 = Series([], dtype="category")
         s2 = Series([], dtype="category")
@@ -719,11 +722,13 @@ def test_concat_categorical_empty(self):
 
         # empty Series is ignored
         exp = Series([np.nan, np.nan])
-        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
-        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+            tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 
-        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
-        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
+            tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
 
     def test_categorical_concat_append(self):
         cat = Categorical(["a", "b"], categories=["a", "b"])
diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
index 244fe6a7927fe..d9fa3232db964 100644
--- a/pandas/tests/reshape/concat/test_concat.py
+++ b/pandas/tests/reshape/concat/test_concat.py
@@ -747,7 +747,15 @@ def test_concat_ignore_empty_object_float(empty_dtype, df_dtype):
     # https://github.com/pandas-dev/pandas/issues/45637
     df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype)
     empty = DataFrame(columns=["foo", "bar"], dtype=empty_dtype)
-    result = concat([empty, df])
+
+    msg = "The behavior of DataFrame concatenation with empty entries is deprecated"
+    warn = None
+    if df_dtype == "datetime64[ns]" or (
+        df_dtype == "float64" and empty_dtype != "float64"
+    ):
+        warn = FutureWarning
+    with tm.assert_produces_warning(warn, match=msg):
+        result = concat([empty, df])
     expected = df
     if df_dtype == "int64":
         # TODO what exact behaviour do we want for integer eventually?
diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py
index 43c6bb03b6a9a..950b90e581060 100644
--- a/pandas/tests/reshape/concat/test_datetimes.py
+++ b/pandas/tests/reshape/concat/test_datetimes.py
@@ -567,7 +567,9 @@ def test_concat_float_datetime64(using_array_manager):
 
     if not using_array_manager:
         expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")})
-        result = concat([df_time, df_float.iloc[:0]])
+        msg = "The behavior of DataFrame concatenation with empty entries is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = concat([df_time, df_float.iloc[:0]])
         tm.assert_frame_equal(result, expected)
     else:
         expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}).astype(
diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py
index 919bcb8b2e577..6ef54b907cf34 100644
--- a/pandas/tests/reshape/concat/test_empty.py
+++ b/pandas/tests/reshape/concat/test_empty.py
@@ -58,7 +58,9 @@ def test_concat_empty_series(self):
 
         s1 = Series([1, 2, 3], name="x")
         s2 = Series(name="y", dtype="float64")
-        res = concat([s1, s2], axis=0)
+        msg = "The behavior of array concatenation with empty entries is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            res = concat([s1, s2], axis=0)
         # name will be reset
         exp = Series([1, 2, 3])
         tm.assert_series_equal(res, exp)
@@ -238,9 +240,11 @@ def test_concat_inner_join_empty(self):
         df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64")
         df_expected = DataFrame({"a": []}, index=RangeIndex(0), dtype="int64")
 
-        for how, expected in [("inner", df_expected), ("outer", df_a)]:
-            result = concat([df_a, df_empty], axis=1, join=how)
-            tm.assert_frame_equal(result, expected)
+        result = concat([df_a, df_empty], axis=1, join="inner")
+        tm.assert_frame_equal(result, df_expected)
+
+        result = concat([df_a, df_empty], axis=1, join="outer")
+        tm.assert_frame_equal(result, df_a)
 
     def test_empty_dtype_coerce(self):
         # xref to #12411
diff --git a/pandas/tests/reshape/concat/test_series.py b/pandas/tests/reshape/concat/test_series.py
index c5d3a8a7c74d1..2711b6a34c62c 100644
--- a/pandas/tests/reshape/concat/test_series.py
+++ b/pandas/tests/reshape/concat/test_series.py
@@ -40,7 +40,9 @@ def test_concat_empty_and_non_empty_series_regression(self):
         s2 = Series([], dtype=object)
 
         expected = s1
-        result = concat([s1, s2])
+        msg = "The behavior of array concatenation with empty entries is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = concat([s1, s2])
         tm.assert_series_equal(result, expected)
 
     def test_concat_series_axis1(self):
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 3a822c8134eb4..96d268b7841fd 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -682,8 +682,10 @@ def test_join_append_timedeltas(self, using_array_manager):
             {"d": [datetime(2013, 11, 5, 5, 56)], "t": [timedelta(0, 22500)]}
         )
         df = DataFrame(columns=list("dt"))
-        df = concat([df, d], ignore_index=True)
-        result = concat([df, d], ignore_index=True)
+        msg = "The behavior of DataFrame concatenation with empty entries is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            df = concat([df, d], ignore_index=True)
+            result = concat([df, d], ignore_index=True)
         expected = DataFrame(
             {
                 "d": [datetime(2013, 11, 5, 5, 56), datetime(2013, 11, 5, 5, 56)],
diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py
index 46af5f509d6ab..fb6f7e386d5d5 100644
--- a/pandas/tests/series/methods/test_combine_first.py
+++ b/pandas/tests/series/methods/test_combine_first.py
@@ -63,7 +63,9 @@ def test_combine_first(self):
         # corner case
         ser = Series([1.0, 2, 3], index=[0, 1, 2])
         empty = Series([], index=[], dtype=object)
-        result = ser.combine_first(empty)
+        msg = "The behavior of array concatenation with empty entries is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = ser.combine_first(empty)
         ser.index = ser.index.astype("O")
         tm.assert_series_equal(ser, result)
 
@@ -110,7 +112,9 @@ def test_combine_first_timezone_series_with_empty_series(self):
         )
         s1 = Series(range(10), index=time_index)
         s2 = Series(index=time_index)
-        result = s1.combine_first(s2)
+        msg = "The behavior of array concatenation with empty entries is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = s1.combine_first(s2)
         tm.assert_series_equal(result, s1)
 
     def test_combine_first_preserves_dtype(self):

From 2ace79c60ff9f5423d5ce6aa436dc436053ab80e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 7 Apr 2023 18:25:29 -0700
Subject: [PATCH 02/12] xfail on 32bit

---
 pandas/core/dtypes/concat.py         | 3 +--
 pandas/tests/io/formats/test_info.py | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index e33b8682b0d2e..f53f74e69da46 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -119,8 +119,7 @@ def concat_compat(
             warnings.warn(
                 "The behavior of array concatenation with empty entries is "
                 "deprecated. In a future version, this will no longer exclude "
-                "empty items when determining the result dtype. To opt in to "
-                "the future behavior, set pd.set_option('future.concat_empty', True). "
+                "empty items when determining the result dtype. "
                 "To retain the old behavior, exclude the empty entries before "
                 "the concat operation.",
                 FutureWarning,
diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py
index e79e135208995..fdb467cd2d1b0 100644
--- a/pandas/tests/io/formats/test_info.py
+++ b/pandas/tests/io/formats/test_info.py
@@ -495,6 +495,7 @@ def test_info_int_columns():
     assert result == expected
 
 
+@pytest.mark.xfail(not IS64)
 def test_memory_usage_empty_no_warning():
     # GH#50066
     df = DataFrame(index=["a", "b"])

From 6258adf6f14099c5b2997c4b67689a1876a6c1b9 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 8 Apr 2023 08:49:21 -0700
Subject: [PATCH 03/12] missing reason

---
 pandas/tests/io/formats/test_info.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py
index fdb467cd2d1b0..42aebb3d9ac1c 100644
--- a/pandas/tests/io/formats/test_info.py
+++ b/pandas/tests/io/formats/test_info.py
@@ -495,7 +495,7 @@ def test_info_int_columns():
     assert result == expected
 
 
-@pytest.mark.xfail(not IS64)
+@pytest.mark.xfail(not IS64, reason="concat will cast to int64")
 def test_memory_usage_empty_no_warning():
     # GH#50066
     df = DataFrame(index=["a", "b"])

From 51e6d360b32b8e3c422021551fcf591e6cfc4c70 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 10 Apr 2023 14:11:31 -0700
Subject: [PATCH 04/12] Fix AM build

---
 pandas/tests/reshape/merge/test_merge.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 96d268b7841fd..491795a87d15a 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -683,7 +683,10 @@ def test_join_append_timedeltas(self, using_array_manager):
         )
         df = DataFrame(columns=list("dt"))
         msg = "The behavior of DataFrame concatenation with empty entries is deprecated"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
+        warn = FutureWarning
+        if using_array_manager:
+            warn = None
+        with tm.assert_produces_warning(warn, match=msg):
             df = concat([df, d], ignore_index=True)
             result = concat([df, d], ignore_index=True)
         expected = DataFrame(

From 52ce0d7732586a5ffeb6f9c9e7b8999fbfa90f43 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 10 Apr 2023 15:56:35 -0700
Subject: [PATCH 05/12] post-merge fixup

---
 pandas/core/internals/concat.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 7ccb5b434a058..826378d60a506 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -653,11 +653,13 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> tuple[DtypeObj, DtypeObj
     dtype_future = dtype
     if len(dtypes) != len(join_units):
         dtypes_future = [
-            unit.dtype for unit in join_units if not unit.is_na_after_size_deprecation
+            unit.block.dtype
+            for unit in join_units
+            if not unit.is_na_after_size_deprecation
         ]
         if not len(dtypes_future):
             dtypes_future = [
-                unit.dtype for unit in join_units if unit.block.dtype.kind != "V"
+                unit.block.dtype for unit in join_units if unit.block.dtype.kind != "V"
             ]
 
         if len(dtypes) != len(dtypes_future):

From 163bf8a9e62c75b5d416c03fb51990ee0063c13b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 11 Apr 2023 16:56:56 -0700
Subject: [PATCH 06/12] catch more specifically

---
 pandas/core/dtypes/concat.py | 128 +++++++++++------------------------
 1 file changed, 39 insertions(+), 89 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index f53f74e69da46..c2e059952abb5 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -21,14 +21,9 @@
     find_common_type,
 )
 from pandas.core.dtypes.common import is_dtype_equal
-from pandas.core.dtypes.dtypes import (
-    CategoricalDtype,
-    DatetimeTZDtype,
-    ExtensionDtype,
-)
+from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.generic import (
     ABCCategoricalIndex,
-    ABCExtensionArray,
     ABCSeries,
 )
 
@@ -106,15 +101,17 @@ def concat_compat(
     # Creating an empty array directly is tempting, but the winnings would be
     # marginal given that it would still require shape & dtype calculation and
     # np.concatenate which has them both implemented is compiled.
+    orig = to_concat
     non_empties = [x for x in to_concat if _is_nonempty(x, axis)]
     if non_empties and axis == 0 and not ea_compat_axis:
         # ea_compat_axis see GH#39574
-        if len(non_empties) < len(to_concat) and not any(
-            obj.dtype == _dtype_obj for obj in non_empties
-        ):
-            # Check for object dtype is an imperfect proxy for checking if
-            #  the result dtype is going to change once the deprecation is
-            #  enforced.
+        to_concat = non_empties
+
+    any_ea, kinds, target_dtype = _get_result_dtype(to_concat, non_empties)
+
+    if len(to_concat) < len(orig):
+        _, _, alt_dtype = _get_result_dtype(orig, non_empties)
+        if alt_dtype != target_dtype:
             # GH#39122
             warnings.warn(
                 "The behavior of array concatenation with empty entries is "
@@ -125,42 +122,42 @@ def concat_compat(
                 FutureWarning,
                 stacklevel=find_stack_level(),
             )
-        to_concat = non_empties
 
-    dtypes = {obj.dtype for obj in to_concat}
-    kinds = {obj.dtype.kind for obj in to_concat}
-    contains_datetime = any(
-        isinstance(dtype, (np.dtype, DatetimeTZDtype)) and dtype.kind in "mM"
-        for dtype in dtypes
-    ) or any(isinstance(obj, ABCExtensionArray) and obj.ndim > 1 for obj in to_concat)
+    if target_dtype is not None:
+        to_concat = [astype_array(arr, target_dtype, copy=False) for arr in to_concat]
 
-    all_empty = not len(non_empties)
-    single_dtype = len(dtypes) == 1
-    any_ea = any(isinstance(x, ExtensionDtype) for x in dtypes)
+    if not isinstance(to_concat[0], np.ndarray):
+        # i.e. isinstance(to_concat[0], ExtensionArray)
+        to_concat_eas = cast("Sequence[ExtensionArray]", to_concat)
+        cls = type(to_concat[0])
+        return cls._concat_same_type(to_concat_eas)
+    else:
+        to_concat_arrs = cast("Sequence[np.ndarray]", to_concat)
+        result = np.concatenate(to_concat_arrs, axis=axis)
+
+        if not any_ea and "b" in kinds and result.dtype.kind in "iuf":
+            # GH#39817 cast to object instead of casting bools to numeric
+            result = result.astype(object, copy=False)
+    return result
 
-    if contains_datetime:
-        return _concat_datetime(to_concat, axis=axis)
 
+def _get_result_dtype(to_concat: Sequence[ArrayLike], non_empties: Sequence[ArrayLike]):
+    target_dtype = None
+
+    dtypes = {obj.dtype for obj in to_concat}
+    kinds = {obj.dtype.kind for obj in to_concat}
+
+    any_ea = any(not isinstance(x, np.ndarray) for x in to_concat)
     if any_ea:
+        # i.e. any ExtensionArrays
+
         # we ignore axis here, as internally concatting with EAs is always
         # for axis=0
-        if not single_dtype:
+        if len(dtypes) != 1:
             target_dtype = find_common_type([x.dtype for x in to_concat])
             target_dtype = common_dtype_categorical_compat(to_concat, target_dtype)
-            to_concat = [
-                astype_array(arr, target_dtype, copy=False) for arr in to_concat
-            ]
-
-        if isinstance(to_concat[0], ABCExtensionArray):
-            # TODO: what about EA-backed Index?
-            to_concat_eas = cast("Sequence[ExtensionArray]", to_concat)
-            cls = type(to_concat[0])
-            return cls._concat_same_type(to_concat_eas)
-        else:
-            to_concat_arrs = cast("Sequence[np.ndarray]", to_concat)
-            return np.concatenate(to_concat_arrs)
 
-    elif all_empty:
+    elif not len(non_empties):
         # we have all empties, but may need to coerce the result dtype to
         # object if we have non-numeric type operands (numpy would otherwise
         # cast this to float)
@@ -170,17 +167,12 @@ def concat_compat(
                 pass
             else:
                 # coerce to object
-                to_concat = [x.astype("object") for x in to_concat]
+                target_dtype = np.dtype(object)
                 kinds = {"o"}
+    else:
+        target_dtype = np.find_common_type(list(dtypes), [])
 
-    # error: Argument 1 to "concatenate" has incompatible type
-    # "Sequence[Union[ExtensionArray, ndarray[Any, Any]]]"; expected
-    # "Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[dtype[Any]]]]"
-    result: np.ndarray = np.concatenate(to_concat, axis=axis)  # type: ignore[arg-type]
-    if "b" in kinds and result.dtype.kind in "iuf":
-        # GH#39817 cast to object instead of casting bools to numeric
-        result = result.astype(object, copy=False)
-    return result
+    return any_ea, kinds, target_dtype
 
 
 def union_categoricals(
@@ -347,45 +339,3 @@ def _maybe_unwrap(x):
 
     dtype = CategoricalDtype(categories=categories, ordered=ordered)
     return Categorical._simple_new(new_codes, dtype=dtype)
-
-
-def _concatenate_2d(to_concat: Sequence[np.ndarray], axis: AxisInt) -> np.ndarray:
-    # coerce to 2d if needed & concatenate
-    if axis == 1:
-        to_concat = [np.atleast_2d(x) for x in to_concat]
-    return np.concatenate(to_concat, axis=axis)
-
-
-def _concat_datetime(to_concat: Sequence[ArrayLike], axis: AxisInt = 0) -> ArrayLike:
-    """
-    provide concatenation of an datetimelike array of arrays each of which is a
-    single M8[ns], datetime64[ns, tz] or m8[ns] dtype
-
-    Parameters
-    ----------
-    to_concat : sequence of arrays
-    axis : axis to provide concatenation
-
-    Returns
-    -------
-    a single array, preserving the combined dtypes
-    """
-    from pandas.core.construction import ensure_wrapped_if_datetimelike
-
-    to_concat = [ensure_wrapped_if_datetimelike(x) for x in to_concat]
-
-    single_dtype = lib.dtypes_all_equal([x.dtype for x in to_concat])
-
-    # multiple types, need to coerce to object
-    if not single_dtype:
-        # ensure_wrapped_if_datetimelike ensures that astype(object) wraps
-        #  in Timestamp/Timedelta
-        return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)
-
-    # error: Unexpected keyword argument "axis" for "_concat_same_type" of
-    # "ExtensionArray"
-    to_concat_eas = cast("list[ExtensionArray]", to_concat)
-    result = type(to_concat_eas[0])._concat_same_type(  # type: ignore[call-arg]
-        to_concat_eas, axis=axis
-    )
-    return result

From 03a06412e09fc5135dba9c35d9e9ed625559332a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 11 Apr 2023 19:56:18 -0700
Subject: [PATCH 07/12] un-xfail

---
 pandas/tests/io/formats/test_info.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py
index 42aebb3d9ac1c..e79e135208995 100644
--- a/pandas/tests/io/formats/test_info.py
+++ b/pandas/tests/io/formats/test_info.py
@@ -495,7 +495,6 @@ def test_info_int_columns():
     assert result == expected
 
 
-@pytest.mark.xfail(not IS64, reason="concat will cast to int64")
 def test_memory_usage_empty_no_warning():
     # GH#50066
     df = DataFrame(index=["a", "b"])

From 7e2e995e4272f208f8e00bd98704fb10f02c8bce Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 12 Apr 2023 07:17:29 -0700
Subject: [PATCH 08/12] mypy fixup

---
 pandas/core/dtypes/concat.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index c2e059952abb5..64cef1ba52c97 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -170,7 +170,11 @@ def _get_result_dtype(to_concat: Sequence[ArrayLike], non_empties: Sequence[Arra
                 target_dtype = np.dtype(object)
                 kinds = {"o"}
     else:
-        target_dtype = np.find_common_type(list(dtypes), [])
+        # Argument 1 to "list" has incompatible type "Set[Union[ExtensionDtype,
+        # Any]]"; expected "Iterable[Union[dtype[Any], None, Type[Any],
+        # _SupportsDType[dtype[Any]], str, Tuple[Any, Union[SupportsIndex,
+        # Sequence[SupportsIndex]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
+        target_dtype = np.find_common_type(list(dtypes), [])  # type: ignore[arg-type]
 
     return any_ea, kinds, target_dtype
 

From 75d5041dd4b7dc0818f8ad7fa209a2e826b48971 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 18 Apr 2023 14:21:38 -0700
Subject: [PATCH 09/12] update test

---
 pandas/tests/reshape/concat/test_datetimes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py
index 950b90e581060..ce4a2abc82a55 100644
--- a/pandas/tests/reshape/concat/test_datetimes.py
+++ b/pandas/tests/reshape/concat/test_datetimes.py
@@ -567,7 +567,7 @@ def test_concat_float_datetime64(using_array_manager):
 
     if not using_array_manager:
         expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")})
-        msg = "The behavior of DataFrame concatenation with empty entries is deprecated"
+        msg = "The behavior of DataFrame concatenation with empty or all-NA entries"
         with tm.assert_produces_warning(FutureWarning, match=msg):
             result = concat([df_time, df_float.iloc[:0]])
         tm.assert_frame_equal(result, expected)

From 392b40ab0b5e07faa539e94658abf64121e99fce Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 3 May 2023 21:44:07 -0700
Subject: [PATCH 10/12] Fix broken test

---
 pandas/tests/groupby/test_groupby.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index daf2d76548f11..9ade9c9b3b111 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -374,9 +374,13 @@ def f3(x):
 
     df2 = DataFrame({"a": [3, 2, 2, 2], "b": range(4), "c": range(5, 9)})
 
+    depr_msg = "The behavior of array concatenation with empty entries is deprecated"
+
     # correct result
-    result1 = df.groupby("a").apply(f1)
-    result2 = df2.groupby("a").apply(f1)
+    with tm.assert_produces_warning(FutureWarning, match=depr_msg):
+        result1 = df.groupby("a").apply(f1)
+    with tm.assert_produces_warning(FutureWarning, match=depr_msg):
+        result2 = df2.groupby("a").apply(f1)
     tm.assert_frame_equal(result1, result2)
 
     # should fail (not the same number of levels)
@@ -387,7 +391,6 @@ def f3(x):
         df2.groupby("a").apply(f2)
 
     # should fail (incorrect shape)
-    depr_msg = "The behavior of array concatenation with empty entries is deprecated"
     with pytest.raises(AssertionError, match=msg):
         df.groupby("a").apply(f3)
     with pytest.raises(AssertionError, match=msg):

From 3666bca065000a6261582574e780cfc9c392295d Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 16 May 2023 08:28:44 -0700
Subject: [PATCH 11/12] remove duplicate whatsnew entries

---
 doc/source/whatsnew/v2.1.0.rst | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 94abccdda638b..9176773172f04 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -225,23 +225,17 @@ Deprecations
 - Deprecated :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`)
 - Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`)
 - Deprecated :meth:`DataFrameGroupBy.dtypes`, check ``dtypes`` on the underlying object instead (:issue:`51045`)
-- Deprecated :meth:`DataFrameGroupBy.dtypes`, check ``dtypes`` on the underlying object instead (:issue:`51045`)
 - Deprecated ``axis=1`` in :meth:`DataFrame.ewm`, :meth:`DataFrame.rolling`, :meth:`DataFrame.expanding`, transpose before calling the method instead (:issue:`51778`)
 - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
-- Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
 - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
 - Deprecated explicit support for subclassing :class:`Index` (:issue:`45289`)
-- Deprecated explicit support for subclassing :class:`Index` (:issue:`45289`)
 - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
 - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
 - Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`)
 - Deprecated the ``axis`` keyword in :meth:`DataFrame.resample`, :meth:`Series.resample` (:issue:`51778`)
 - Deprecated the behavior of :func:`concat` with both ``len(keys) != len(objs)``, in a future version this will raise instead of truncating to the shorter of the two sequences (:issue:`43485`)
-- Deprecated the behavior of :func:`concat` with both ``len(keys) != len(objs)``, in a future version this will raise instead of truncating to the shorter of the two sequences (:issue:`43485`)
-- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
 - Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
 - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
-- Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
 - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
 - Deprecated :class:`.DataFrameGroupBy` with ``as_index=False`` not including groupings in the result when they are not columns of the DataFrame (:issue:`49519`)
 - Deprecated :func:`is_categorical_dtype`, use ``isinstance(obj.dtype, pd.CategoricalDtype)`` instead (:issue:`52527`)

From e696c539ab2816eeda00bea2fef7bab2ffa85e39 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 25 May 2023 13:04:42 -0700
Subject: [PATCH 12/12] remove unused

---
 pandas/core/dtypes/concat.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 7de76157143e6..35ebd9a4f4f52 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -40,9 +40,6 @@
     )
 
 
-_dtype_obj = np.dtype(object)
-
-
 def _is_nonempty(x, axis) -> bool:
     # filter empty arrays
     # 1-d dtypes always are included here