From dbeeb1f05bca199b3c1aed979e6ae72074a82243 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 4 Nov 2024 02:38:45 -0800 Subject: [PATCH] TST (string dtype): un-xfail string tests specific to object dtype (#59433) Co-authored-by: Joris Van den Bossche --- pandas/tests/copy_view/test_interp_fillna.py | 8 ++---- pandas/tests/copy_view/test_replace.py | 3 +-- pandas/tests/test_algos.py | 28 +++++++++++++------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index fc57178b897b9..6bcda0ef2c35a 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -1,10 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - -from pandas.compat import HAS_PYARROW - from pandas import ( NA, DataFrame, @@ -114,18 +110,18 @@ def test_interp_fill_functions_inplace(func, dtype): assert view._mgr._has_no_reference(0) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_interpolate_cannot_with_object_dtype(): df = DataFrame({"a": ["a", np.nan, "c"], "b": 1}) + df["a"] = df["a"].astype(object) msg = "DataFrame cannot interpolate with object dtype" with pytest.raises(TypeError, match=msg): df.interpolate() -@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") def test_interpolate_object_convert_no_op(): df = DataFrame({"a": ["a", "b", "c"], "b": 1}) + df["a"] = df["a"].astype(object) arr_a = get_array(df, "a") # Now CoW makes a copy, it should not! diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index a8acd446ff5f5..e57514bffdf1e 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -259,10 +259,9 @@ def test_replace_empty_list(): assert not df2._mgr._has_no_reference(0) -@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") @pytest.mark.parametrize("value", ["d", None]) def test_replace_object_list_inplace(value): - df = DataFrame({"a": ["a", "b", "c"]}) + df = DataFrame({"a": ["a", "b", "c"]}, dtype=object) arr = get_array(df, "a") df.replace(["c"], value, inplace=True) assert np.shares_memory(arr, get_array(df, "a")) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index dac74a0e32a42..3d1177c23c612 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -4,8 +4,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas._libs import ( algos as libalgos, hashtable as ht, @@ -1684,12 +1682,17 @@ def test_unique_complex_numbers(self, array, expected): class TestHashTable: - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "htable, data", [ - (ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]), - (ht.StringHashTable, [f"foo_{i}" for i in range(1000)]), + ( + ht.PyObjectHashTable, + np.array([f"foo_{i}" for i in range(1000)], dtype=object), + ), + ( + ht.StringHashTable, + np.array([f"foo_{i}" for i in range(1000)], dtype=object), + ), (ht.Float64HashTable, np.arange(1000, dtype=np.float64)), (ht.Int64HashTable, np.arange(1000, dtype=np.int64)), (ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)), @@ -1697,7 +1700,7 @@ class TestHashTable: ) def test_hashtable_unique(self, htable, data, writable): # output of maker has guaranteed unique elements - s = Series(data) + s = Series(data, dtype=data.dtype) if htable == ht.Float64HashTable: # add NaN for float column s.loc[500] = np.nan @@ -1724,12 +1727,17 @@ def test_hashtable_unique(self, htable, data, writable): reconstr = result_unique[result_inverse] tm.assert_numpy_array_equal(reconstr, s_duplicated.values) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "htable, data", [ - (ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]), - (ht.StringHashTable, [f"foo_{i}" for i in range(1000)]), + ( + ht.PyObjectHashTable, + np.array([f"foo_{i}" for i in range(1000)], dtype=object), + ), + ( + ht.StringHashTable, + np.array([f"foo_{i}" for i in range(1000)], dtype=object), + ), (ht.Float64HashTable, np.arange(1000, dtype=np.float64)), (ht.Int64HashTable, np.arange(1000, dtype=np.int64)), (ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)), @@ -1737,7 +1745,7 @@ def test_hashtable_unique(self, htable, data, writable): ) def test_hashtable_factorize(self, htable, writable, data): # output of maker has guaranteed unique elements - s = Series(data) + s = Series(data, dtype=data.dtype) if htable == ht.Float64HashTable: # add NaN for float column s.loc[500] = np.nan