From 3ff4ecc80c64f0043f9f7b5318085b596fcf43c4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 30 Oct 2024 11:28:35 +0100 Subject: [PATCH 1/3] TST (string dtype): remove xfails in extension tests + fix categorical/string dispatch --- pandas/core/arrays/string_.py | 2 +- pandas/core/ops/array_ops.py | 1 - pandas/tests/extension/base/ops.py | 26 ---------------------- pandas/tests/extension/test_categorical.py | 2 -- pandas/tests/extension/test_numpy.py | 7 ------ 5 files changed, 1 insertion(+), 37 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f20c4c8625475..231564259fec8 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -890,7 +890,7 @@ def _cmp_method(self, other, op): if not is_array_like(other): other = np.asarray(other) other = other[valid] - other = np.asarray(other) + # other = np.asarray(other) if op.__name__ in ops.ARITHMETIC_BINOPS: result = np.empty_like(self._ndarray, dtype="object") diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 983a3df57e369..301dced370aa4 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -261,7 +261,6 @@ def arithmetic_op(left: ArrayLike, right: Any, op): # and `maybe_prepare_scalar_for_op` has already been called on `right` # We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390) - if ( should_extension_dispatch(left, right) or isinstance(right, (Timedelta, BaseOffset, Timestamp)) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 547114ecfddd0..222ff42d45052 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -5,10 +5,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - -from pandas.compat import HAS_PYARROW - from pandas.core.dtypes.common import is_string_dtype import pandas as pd @@ -134,12 +130,6 @@ class BaseArithmeticOpsTests(BaseOpsUtil): series_array_exc: type[Exception] | None = TypeError divmod_exc: type[Exception] | None = TypeError - # TODO(infer_string) need to remove import of pyarrow - @pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, - reason="TODO(infer_string)", - strict=False, - ) def test_arith_series_with_scalar(self, data, all_arithmetic_operators): # series & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): @@ -149,11 +139,6 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): ser = pd.Series(data) self.check_opname(ser, op_name, ser.iloc[0]) - @pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, - reason="TODO(infer_string)", - strict=False, - ) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # frame & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): @@ -163,22 +148,12 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): df = pd.DataFrame({"A": data}) self.check_opname(df, op_name, data[0]) - @pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, - reason="TODO(infer_string)", - strict=False, - ) def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series op_name = all_arithmetic_operators ser = pd.Series(data) self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser))) - @pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, - reason="TODO(infer_string)", - strict=False, - ) def test_divmod(self, data): ser = pd.Series(data) self._check_divmod_op(ser, divmod, 1) @@ -194,7 +169,6 @@ def test_divmod_series_array(self, data, data_for_twos): other = pd.Series(other) self._check_divmod_op(other, ops.rdivmod, ser) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_add_series_with_extension_array(self, data): # Check adding an ExtensionArray to a Series of the same dtype matches # the behavior of adding the arrays directly and then wrapping in a diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index c3d4b83f731a3..8f8af607585df 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -140,7 +140,6 @@ def test_map(self, data, na_action): result = data.map(lambda x: x, na_action=na_action) tm.assert_extension_array_equal(result, data) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): # frame & scalar op_name = all_arithmetic_operators @@ -152,7 +151,6 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): ) super().test_arith_frame_with_scalar(data, op_name) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): op_name = all_arithmetic_operators if op_name == "__rmod__": diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 1b251a5118681..79cfb736941d6 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -19,8 +19,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.core.dtypes.dtypes import NumpyEADtype import pandas as pd @@ -257,7 +255,6 @@ def test_insert_invalid(self, data, invalid_scalar): frame_scalar_exc = None series_array_exc = None - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_divmod(self, data): divmod_exc = None if data.dtype.kind == "O": @@ -265,7 +262,6 @@ def test_divmod(self, data): self.divmod_exc = divmod_exc super().test_divmod(data) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_divmod_series_array(self, data): ser = pd.Series(data) exc = None @@ -274,7 +270,6 @@ def test_divmod_series_array(self, data): self.divmod_exc = exc self._check_divmod_op(ser, divmod, data) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): opname = all_arithmetic_operators series_scalar_exc = None @@ -288,7 +283,6 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) self.series_scalar_exc = series_scalar_exc super().test_arith_series_with_scalar(data, all_arithmetic_operators) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_series_with_array(self, data, all_arithmetic_operators): opname = all_arithmetic_operators series_array_exc = None @@ -297,7 +291,6 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): self.series_array_exc = series_array_exc super().test_arith_series_with_array(data, all_arithmetic_operators) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): opname = all_arithmetic_operators frame_scalar_exc = None From 16a850105e70ef417e7f9d9821705219961be583 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 4 Nov 2024 07:52:56 +0100 Subject: [PATCH 2/3] Update pandas/core/arrays/string_.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/arrays/string_.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 231564259fec8..490ca93212f3a 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -890,7 +890,6 @@ def _cmp_method(self, other, op): if not is_array_like(other): other = np.asarray(other) other = other[valid] - # other = np.asarray(other) if op.__name__ in ops.ARITHMETIC_BINOPS: result = np.empty_like(self._ndarray, dtype="object") From 0458834cc495f2c172d39d1b24e703e8019a17c1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 4 Nov 2024 07:53:47 +0100 Subject: [PATCH 3/3] fixup --- pandas/core/ops/array_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 301dced370aa4..983a3df57e369 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -261,6 +261,7 @@ def arithmetic_op(left: ArrayLike, right: Any, op): # and `maybe_prepare_scalar_for_op` has already been called on `right` # We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390) + if ( should_extension_dispatch(left, right) or isinstance(right, (Timedelta, BaseOffset, Timestamp))