From a83184f826d905357ea4feebfc26f1e82fbcd439 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 6 Nov 2024 19:36:22 +0100 Subject: [PATCH] [backport 2.3.x] TST (string dtype): update all tests in tests/frame/indexing (#60193) (#60212) * TST (string dtype): update all tests in tests/frame/indexing (#60193) (cherry picked from commit bec2dbca274a4f983790d069279a4b3aec184f49) * update for 2.3.x --- pandas/tests/frame/indexing/test_coercion.py | 24 +++++++++++++----- pandas/tests/frame/indexing/test_indexing.py | 18 +++++--------- pandas/tests/frame/indexing/test_insert.py | 6 ++--- pandas/tests/frame/indexing/test_setitem.py | 26 +++++++++----------- pandas/tests/frame/indexing/test_where.py | 18 +++++++++----- pandas/tests/frame/indexing/test_xs.py | 5 +--- 6 files changed, 51 insertions(+), 46 deletions(-) diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py index 9d20821ae8bc6..f7f7b2c7c872a 100644 --- a/pandas/tests/frame/indexing/test_coercion.py +++ b/pandas/tests/frame/indexing/test_coercion.py @@ -7,8 +7,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd from pandas import ( DataFrame, @@ -99,28 +97,42 @@ def test_6942(indexer_al): assert df.iloc[0, 0] == t2 -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_26395(indexer_al): # .at case fixed by GH#45121 (best guess) df = DataFrame(index=["A", "B", "C"]) df["D"] = 0 indexer_al(df)["C", "D"] = 2 - expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64) + expected = DataFrame( + {"D": [0, 0, 2]}, + index=["A", "B", "C"], + columns=pd.Index(["D"], dtype=object), + dtype=np.int64, + ) tm.assert_frame_equal(df, expected) with tm.assert_produces_warning( FutureWarning, match="Setting an item of incompatible dtype" ): indexer_al(df)["C", "D"] = 44.5 - expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64) + expected = DataFrame( + {"D": [0, 0, 44.5]}, + index=["A", "B", "C"], + columns=pd.Index(["D"], dtype=object), + dtype=np.float64, + ) tm.assert_frame_equal(df, expected) with tm.assert_produces_warning( FutureWarning, match="Setting an item of incompatible dtype" ): indexer_al(df)["C", "D"] = "hello" - expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object) + expected = DataFrame( + {"D": [0, 0, "hello"]}, + index=["A", "B", "C"], + columns=pd.Index(["D"], dtype=object), + dtype=object, + ) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 4094f14c50608..1721fe2c0eb8b 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -12,7 +12,6 @@ from pandas._config import using_string_dtype from pandas._libs import iNaT -from pandas.compat import HAS_PYARROW from pandas.errors import ( InvalidIndexError, PerformanceWarning, @@ -518,18 +517,17 @@ def test_setitem_ambig(self, using_infer_string): else: assert dm[2].dtype == np.object_ - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) - def test_setitem_None(self, float_frame, using_infer_string): + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") + def test_setitem_None(self, float_frame): # GH #766 float_frame[None] = float_frame["A"] - key = None if not using_infer_string else np.nan tm.assert_series_equal( float_frame.iloc[:, -1], float_frame["A"], check_names=False ) tm.assert_series_equal( - float_frame.loc[:, key], float_frame["A"], check_names=False + float_frame.loc[:, None], float_frame["A"], check_names=False ) - tm.assert_series_equal(float_frame[key], float_frame["A"], check_names=False) + tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False) def test_loc_setitem_boolean_mask_allfalse(self): # GH 9596 @@ -1191,7 +1189,6 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self): df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]] tm.assert_series_equal(df["dates"], column) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_loc_setitem_datetimelike_with_inference(self): # GH 7592 # assignment of timedeltas with NaT @@ -1210,13 +1207,10 @@ def test_loc_setitem_datetimelike_with_inference(self): result = df.dtypes expected = Series( [np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2, - index=list("ABCDEFGH"), + index=Index(list("ABCDEFGH"), dtype=object), ) tm.assert_series_equal(result, expected) - @pytest.mark.xfail( - using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)" - ) def test_getitem_boolean_indexing_mixed(self): df = DataFrame( { @@ -1258,7 +1252,7 @@ def test_getitem_boolean_indexing_mixed(self): tm.assert_frame_equal(df2, expected) df["foo"] = "test" - msg = "not supported between instances|unorderable types" + msg = "not supported between instances|unorderable types|Invalid comparison" with pytest.raises(TypeError, match=msg): df[df > 0.3] = 1 diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index 82b75459f08d0..4cf297b4c037d 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -6,8 +6,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.errors import PerformanceWarning from pandas import ( @@ -62,7 +60,6 @@ def test_insert_column_bug_4032(self): expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"]) tm.assert_frame_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_insert_with_columns_dups(self): # GH#14291 df = DataFrame() @@ -70,7 +67,8 @@ def test_insert_with_columns_dups(self): df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True) df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True) exp = DataFrame( - [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"] + [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], + columns=Index(["A", "A", "A"], dtype=object), ) tm.assert_frame_equal(df, exp) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index bce3cb5dacabe..190218a82d231 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas.util._test_decorators as td from pandas.core.dtypes.base import _registry as ea_registry @@ -148,13 +146,16 @@ def test_setitem_different_dtype(self): ) tm.assert_series_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_empty_columns(self): # GH 13522 df = DataFrame(index=["A", "B", "C"]) df["X"] = df.index df["X"] = ["x", "y", "z"] - exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"]) + exp = DataFrame( + data={"X": ["x", "y", "z"]}, + index=["A", "B", "C"], + columns=Index(["X"], dtype=object), + ) tm.assert_frame_equal(df, exp) def test_setitem_dt64_index_empty_columns(self): @@ -164,14 +165,15 @@ def test_setitem_dt64_index_empty_columns(self): df["A"] = rng assert df["A"].dtype == np.dtype("M8[ns]") - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_timestamp_empty_columns(self): # GH#19843 df = DataFrame(index=range(3)) df["now"] = Timestamp("20130101", tz="UTC").as_unit("ns") expected = DataFrame( - [[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"] + [[Timestamp("20130101", tz="UTC")]] * 3, + index=range(3), + columns=Index(["now"], dtype=object), ) tm.assert_frame_equal(df, expected) @@ -204,14 +206,13 @@ def test_setitem_with_unaligned_sparse_value(self): expected = Series(SparseArray([1, 0, 0]), name="new_column") tm.assert_series_equal(df["new_column"], expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_period_preserves_dtype(self): # GH: 26861 data = [Period("2003-12", "D")] result = DataFrame([]) result["a"] = data - expected = DataFrame({"a": data}) + expected = DataFrame({"a": data}, columns=Index(["a"], dtype=object)) tm.assert_frame_equal(result, expected) @@ -677,11 +678,10 @@ def test_setitem_iloc_two_dimensional_generator(self): expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_dtypes_bytes_type_to_object(self): # GH 20734 index = Series(name="id", dtype="S24") - df = DataFrame(index=index) + df = DataFrame(index=index, columns=Index([], dtype="str")) df["a"] = Series(name="a", index=index, dtype=np.uint32) df["b"] = Series(name="b", index=index, dtype="S64") df["c"] = Series(name="c", index=index, dtype="S64") @@ -712,7 +712,6 @@ def test_setitem_ea_dtype_rhs_series(self): # TODO(ArrayManager) set column with 2d column array, see #44788 @td.skip_array_manager_not_yet_implemented - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_npmatrix_2d(self): # GH#42376 # for use-case df["x"] = sparse.random((10, 10)).mean(axis=1) @@ -721,7 +720,7 @@ def test_setitem_npmatrix_2d(self): ) a = np.ones((10, 1)) - df = DataFrame(index=np.arange(10)) + df = DataFrame(index=np.arange(10), columns=Index([], dtype="str")) df["np-array"] = a # Instantiation of `np.matrix` gives PendingDeprecationWarning @@ -936,12 +935,11 @@ def test_setitem_with_expansion_categorical_dtype(self): ser.name = "E" tm.assert_series_equal(result2.sort_index(), ser.sort_index()) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_setitem_scalars_no_index(self): # GH#16823 / GH#17894 df = DataFrame() df["foo"] = 1 - expected = DataFrame(columns=["foo"]).astype(np.int64) + expected = DataFrame(columns=Index(["foo"], dtype=object)).astype(np.int64) tm.assert_frame_equal(df, expected) def test_setitem_newcol_tuple_key(self, float_frame): diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 0517485888b38..861147f5b58dd 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -46,7 +46,6 @@ def is_ok(s): class TestDataFrameIndexingWhere: - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_where_get(self, where_frame, float_string_frame): def _check_get(df, cond, check_dtypes=True): other1 = _safe_add(df) @@ -64,7 +63,10 @@ def _check_get(df, cond, check_dtypes=True): # check getting df = where_frame if df is float_string_frame: - msg = "'>' not supported between instances of 'str' and 'int'" + msg = ( + "'>' not supported between instances of 'str' and 'int'" + "|Invalid comparison" + ) with pytest.raises(TypeError, match=msg): df > 0 return @@ -98,7 +100,6 @@ def test_where_upcasting(self): tm.assert_series_equal(result, expected) @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning") - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_where_alignment(self, where_frame, float_string_frame): # aligning def _check_align(df, cond, other, check_dtypes=True): @@ -130,7 +131,10 @@ def _check_align(df, cond, other, check_dtypes=True): df = where_frame if df is float_string_frame: - msg = "'>' not supported between instances of 'str' and 'int'" + msg = ( + "'>' not supported between instances of 'str' and 'int'" + "|Invalid comparison" + ) with pytest.raises(TypeError, match=msg): df > 0 return @@ -174,7 +178,6 @@ def test_where_invalid(self): df.mask(0) @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning") - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_where_set(self, where_frame, float_string_frame, mixed_int_frame): # where inplace @@ -196,7 +199,10 @@ def _check_set(df, cond, check_dtypes=True): df = where_frame if df is float_string_frame: - msg = "'>' not supported between instances of 'str' and 'int'" + msg = ( + "'>' not supported between instances of 'str' and 'int'" + "|Invalid comparison" + ) with pytest.raises(TypeError, match=msg): df > 0 return diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 4ca435fa5acc5..2aa27d1d6a548 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.errors import SettingWithCopyError from pandas import ( @@ -79,10 +77,9 @@ def test_xs( else: assert (expected == 5).all() - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_xs_corner(self): # pathological mixed-type reordering case - df = DataFrame(index=[0]) + df = DataFrame(index=[0], columns=Index([], dtype="str")) df["A"] = 1.0 df["B"] = "foo" df["C"] = 2.0