diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 512e6e6cbb391c..92efb225682b7c 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -423,7 +423,7 @@ Deprecations - Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`) - Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`) - Deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` in favour of :meth:`.Styler.format` with ``na_rep`` and ``precision`` as existing and new input arguments respectively (:issue:`40134`, :issue:`40425`) -- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like; will raise if any function fails on a column in a future version (:issue:`40211`) +- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like and raises anything but ``TypeError``; ``func`` raising anything but a ``TypeError`` will raise in a future version (:issue:`40211`) - Deprecated support for ``np.ma.mrecords.MaskedRecords`` in the :class:`DataFrame` constructor, pass ``{name: data[name] for name in data.dtype.names}`` instead (:issue:`40363`) - Deprecated the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 3a2c2d7124963f..090ae266906f35 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -227,8 +227,10 @@ def transform(self) -> FrameOrSeriesUnion: func = cast(AggFuncTypeBase, func) try: result = self.transform_str_or_callable(func) - except Exception: - raise ValueError("Transform function failed") + except TypeError: + raise + except Exception as err: + raise ValueError("Transform function failed") from err # Functions that transform may return empty Series/DataFrame # when the dtype is not appropriate @@ -265,6 +267,7 @@ def transform_dict_like(self, func): results: Dict[Hashable, FrameOrSeriesUnion] = {} failed_names = [] + all_type_errors = True for name, how in func.items(): colg = obj._gotitem(name, ndim=1) try: @@ -275,16 +278,18 @@ def transform_dict_like(self, func): "No transform functions were provided", }: raise err - else: + elif not isinstance(err, TypeError): + all_type_errors = False failed_names.append(name) # combine results if not results: - raise ValueError("Transform function failed") + klass = TypeError if all_type_errors else ValueError + raise klass("Transform function failed") if len(failed_names) > 0: warnings.warn( - f"{failed_names} did not transform successfully. " - f"Allowing for partial failure is deprecated, this will raise " - f"a ValueError in a future version of pandas." + f"{failed_names} did not transform successfully and did not raise " + f"a TypeError. If any error is raised except for TypeError, " + f"this will raise in a future version of pandas. " f"Drop these columns/ops to avoid this warning.", FutureWarning, stacklevel=4, diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 6a3634ca8b62a3..d56c8c1e83ab49 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -156,35 +156,68 @@ def test_transform_method_name(method): @pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1]) -def test_transform_bad_dtype(op, frame_or_series): +def test_transform_bad_dtype(op, frame_or_series, request): # GH 35964 + if op == "rank": + request.node.add_marker( + pytest.mark.xfail(reason="GH 40418: rank does not raise a TypeError") + ) + obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms if frame_or_series is not DataFrame: obj = obj["A"] - msg = "Transform function failed" - # tshift is deprecated warn = None if op != "tshift" else FutureWarning with tm.assert_produces_warning(warn): - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match="unsupported operand|not supported"): obj.transform(op) - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match="Transform function failed"): obj.transform([op]) - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match="Transform function failed"): obj.transform({"A": op}) - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match="Transform function failed"): obj.transform({"A": [op]}) @pytest.mark.parametrize("op", frame_kernels_raise) -def test_transform_partial_failure(op): - # GH 35964 & GH 40211 - match = "Allowing for partial failure is deprecated" +def test_transform_partial_failure_typeerror(op): + # GH 35964 + + if op == "rank": + pytest.skip("GH 40418: rank does not raise a TypeError") # Using object makes most transform kernels fail df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]}) + expected = df[["B"]].transform([op]) + result = df.transform([op]) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": op}) + result = df.transform({"A": op, "B": op}) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": [op]}) + result = df.transform({"A": [op], "B": [op]}) + tm.assert_equal(result, expected) + + expected = df.transform({"A": ["shift"], "B": [op]}) + result = df.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + + +def test_transform_partial_failure_valueerror(): + # GH 40211 + match = ".*did not transform successfully and did not raise a TypeError" + + def op(x): + if np.sum(np.sum(x)) < 10: + raise ValueError + return x + + df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]}) + expected = df[["B"]].transform([op]) with tm.assert_produces_warning(FutureWarning, match=match): result = df.transform([op]) @@ -200,6 +233,11 @@ def test_transform_partial_failure(op): result = df.transform({"A": [op], "B": [op]}) tm.assert_equal(result, expected) + expected = df.transform({"A": ["shift"], "B": [op]}) + with tm.assert_produces_warning(FutureWarning, match=match, check_stacklevel=False): + result = df.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + @pytest.mark.parametrize("use_apply", [True, False]) def test_transform_passes_args(use_apply, frame_or_series): diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 690d6bed0cb9bd..5a8f238cd56053 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -276,7 +276,7 @@ def test_transform_none_to_type(): # GH#34377 df = DataFrame({"a": [None]}) msg = "Transform function failed" - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match=msg): df.transform({"a": int}) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index dcb5de29da320b..6722fc43aa75e3 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -259,29 +259,64 @@ def test_transform(string_series): @pytest.mark.parametrize("op", series_transform_kernels) def test_transform_partial_failure(op, request): - # GH 35964 & GH 40211 + # GH 35964 if op in ("ffill", "bfill", "pad", "backfill", "shift"): request.node.add_marker( pytest.mark.xfail(reason=f"{op} is successful on any dtype") ) - match = "Allowing for partial failure is deprecated" + if op in ("rank", "fillna"): + pytest.skip(f"{op} doesn't raise TypeError on object") # Using object makes most transform kernels fail ser = Series(3 * [object]) expected = ser.transform(["shift"]) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform([op, "shift"]) + result = ser.transform([op, "shift"]) tm.assert_equal(result, expected) expected = ser.transform({"B": "shift"}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": op, "B": "shift"}) + result = ser.transform({"A": op, "B": "shift"}) tm.assert_equal(result, expected) expected = ser.transform({"B": ["shift"]}) + result = ser.transform({"A": [op], "B": ["shift"]}) + tm.assert_equal(result, expected) + + expected = ser.transform({"A": ["shift"], "B": [op]}) + result = ser.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + + +def test_transform_partial_failure_valueerror(): + # GH 40211 + match = ".*did not transform successfully and did not raise a TypeError" + + def noop(x): + return x + + def raising_op(_): + raise ValueError + + ser = Series(3 * [object]) + + expected = ser.transform([noop]) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform([noop, raising_op]) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": noop}) with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": [op], "B": ["shift"]}) + result = ser.transform({"A": raising_op, "B": noop}) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": [noop]}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": [raising_op], "B": [noop]}) + tm.assert_equal(result, expected) + + expected = ser.transform({"A": [noop], "B": [noop]}) + with tm.assert_produces_warning(FutureWarning, match=match, check_stacklevel=False): + result = ser.transform({"A": [noop, raising_op], "B": [noop]}) tm.assert_equal(result, expected)