Skip to content

Commit

Permalink
DEPR: Series/DataFrame.transform partial failure except TypeError (pa…
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach authored and vladu committed Apr 5, 2021
1 parent 1b3198f commit d231fb5
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 26 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ Deprecations
- Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`)
- Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`)
- Deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` in favour of :meth:`.Styler.format` with ``na_rep`` and ``precision`` as existing and new input arguments respectively (:issue:`40134`, :issue:`40425`)
- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like; will raise if any function fails on a column in a future version (:issue:`40211`)
- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like and raises anything but ``TypeError``; ``func`` raising anything but a ``TypeError`` will raise in a future version (:issue:`40211`)
- Deprecated support for ``np.ma.mrecords.MaskedRecords`` in the :class:`DataFrame` constructor, pass ``{name: data[name] for name in data.dtype.names}`` instead (:issue:`40363`)
- Deprecated the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`)

Expand Down
19 changes: 12 additions & 7 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,10 @@ def transform(self) -> FrameOrSeriesUnion:
func = cast(AggFuncTypeBase, func)
try:
result = self.transform_str_or_callable(func)
except Exception:
raise ValueError("Transform function failed")
except TypeError:
raise
except Exception as err:
raise ValueError("Transform function failed") from err

# Functions that transform may return empty Series/DataFrame
# when the dtype is not appropriate
Expand Down Expand Up @@ -265,6 +267,7 @@ def transform_dict_like(self, func):

results: Dict[Hashable, FrameOrSeriesUnion] = {}
failed_names = []
all_type_errors = True
for name, how in func.items():
colg = obj._gotitem(name, ndim=1)
try:
Expand All @@ -275,16 +278,18 @@ def transform_dict_like(self, func):
"No transform functions were provided",
}:
raise err
else:
elif not isinstance(err, TypeError):
all_type_errors = False
failed_names.append(name)
# combine results
if not results:
raise ValueError("Transform function failed")
klass = TypeError if all_type_errors else ValueError
raise klass("Transform function failed")
if len(failed_names) > 0:
warnings.warn(
f"{failed_names} did not transform successfully. "
f"Allowing for partial failure is deprecated, this will raise "
f"a ValueError in a future version of pandas."
f"{failed_names} did not transform successfully and did not raise "
f"a TypeError. If any error is raised except for TypeError, "
f"this will raise in a future version of pandas. "
f"Drop these columns/ops to avoid this warning.",
FutureWarning,
stacklevel=4,
Expand Down
58 changes: 48 additions & 10 deletions pandas/tests/apply/test_frame_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,35 +156,68 @@ def test_transform_method_name(method):


@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1])
def test_transform_bad_dtype(op, frame_or_series):
def test_transform_bad_dtype(op, frame_or_series, request):
# GH 35964
if op == "rank":
request.node.add_marker(
pytest.mark.xfail(reason="GH 40418: rank does not raise a TypeError")
)

obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms
if frame_or_series is not DataFrame:
obj = obj["A"]

msg = "Transform function failed"

# tshift is deprecated
warn = None if op != "tshift" else FutureWarning
with tm.assert_produces_warning(warn):
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match="unsupported operand|not supported"):
obj.transform(op)
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match="Transform function failed"):
obj.transform([op])
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match="Transform function failed"):
obj.transform({"A": op})
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match="Transform function failed"):
obj.transform({"A": [op]})


@pytest.mark.parametrize("op", frame_kernels_raise)
def test_transform_partial_failure(op):
# GH 35964 & GH 40211
match = "Allowing for partial failure is deprecated"
def test_transform_partial_failure_typeerror(op):
# GH 35964

if op == "rank":
pytest.skip("GH 40418: rank does not raise a TypeError")

# Using object makes most transform kernels fail
df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})

expected = df[["B"]].transform([op])
result = df.transform([op])
tm.assert_equal(result, expected)

expected = df[["B"]].transform({"B": op})
result = df.transform({"A": op, "B": op})
tm.assert_equal(result, expected)

expected = df[["B"]].transform({"B": [op]})
result = df.transform({"A": [op], "B": [op]})
tm.assert_equal(result, expected)

expected = df.transform({"A": ["shift"], "B": [op]})
result = df.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)


def test_transform_partial_failure_valueerror():
# GH 40211
match = ".*did not transform successfully and did not raise a TypeError"

def op(x):
if np.sum(np.sum(x)) < 10:
raise ValueError
return x

df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]})

expected = df[["B"]].transform([op])
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform([op])
Expand All @@ -200,6 +233,11 @@ def test_transform_partial_failure(op):
result = df.transform({"A": [op], "B": [op]})
tm.assert_equal(result, expected)

expected = df.transform({"A": ["shift"], "B": [op]})
with tm.assert_produces_warning(FutureWarning, match=match, check_stacklevel=False):
result = df.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)


@pytest.mark.parametrize("use_apply", [True, False])
def test_transform_passes_args(use_apply, frame_or_series):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/apply/test_invalid_arg.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def test_transform_none_to_type():
# GH#34377
df = DataFrame({"a": [None]})
msg = "Transform function failed"
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
df.transform({"a": int})


Expand Down
49 changes: 42 additions & 7 deletions pandas/tests/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,29 +259,64 @@ def test_transform(string_series):

@pytest.mark.parametrize("op", series_transform_kernels)
def test_transform_partial_failure(op, request):
# GH 35964 & GH 40211
# GH 35964
if op in ("ffill", "bfill", "pad", "backfill", "shift"):
request.node.add_marker(
pytest.mark.xfail(reason=f"{op} is successful on any dtype")
)
match = "Allowing for partial failure is deprecated"
if op in ("rank", "fillna"):
pytest.skip(f"{op} doesn't raise TypeError on object")

# Using object makes most transform kernels fail
ser = Series(3 * [object])

expected = ser.transform(["shift"])
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform([op, "shift"])
result = ser.transform([op, "shift"])
tm.assert_equal(result, expected)

expected = ser.transform({"B": "shift"})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": op, "B": "shift"})
result = ser.transform({"A": op, "B": "shift"})
tm.assert_equal(result, expected)

expected = ser.transform({"B": ["shift"]})
result = ser.transform({"A": [op], "B": ["shift"]})
tm.assert_equal(result, expected)

expected = ser.transform({"A": ["shift"], "B": [op]})
result = ser.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)


def test_transform_partial_failure_valueerror():
# GH 40211
match = ".*did not transform successfully and did not raise a TypeError"

def noop(x):
return x

def raising_op(_):
raise ValueError

ser = Series(3 * [object])

expected = ser.transform([noop])
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform([noop, raising_op])
tm.assert_equal(result, expected)

expected = ser.transform({"B": noop})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [op], "B": ["shift"]})
result = ser.transform({"A": raising_op, "B": noop})
tm.assert_equal(result, expected)

expected = ser.transform({"B": [noop]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [raising_op], "B": [noop]})
tm.assert_equal(result, expected)

expected = ser.transform({"A": [noop], "B": [noop]})
with tm.assert_produces_warning(FutureWarning, match=match, check_stacklevel=False):
result = ser.transform({"A": [noop, raising_op], "B": [noop]})
tm.assert_equal(result, expected)


Expand Down

0 comments on commit d231fb5

Please sign in to comment.