Skip to content

Commit

Permalink
BUG: fix treatment of NaNs when .apply() function is used on categori…
Browse files Browse the repository at this point in the history
…cal columns. (#59966)

* remove action=ignore for .apply() on cat dtype

* add PR reference in comments

* fix pytest linting

* refac failing test_series_apply.py

* Trigger CI

* changes post review

* rephrase change log
  • Loading branch information
saldanhad authored Oct 4, 2024
1 parent 4ad6c7a commit 58de332
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 17 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ Bug fixes

Categorical
^^^^^^^^^^^
-
- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
-

Datetimelike
Expand Down
14 changes: 2 additions & 12 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@
is_numeric_dtype,
is_sequence,
)
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
ExtensionDtype,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCNDFrame,
Expand Down Expand Up @@ -1465,14 +1462,7 @@ def curried(x):

else:
curried = func

# row-wise access
# apply doesn't have a `na_action` keyword and for backward compat reasons
# we need to give `na_action="ignore"` for categorical data.
# TODO: remove the `na_action="ignore"` when that default has been changed in
# Categorical (GH51645).
action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None
mapped = obj._map_values(mapper=curried, na_action=action)
mapped = obj._map_values(mapper=curried)

if len(mapped) and isinstance(mapped[0], ABCSeries):
# GH#43986 Need to do list(mapped) in order to get treated as nested
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,8 +741,9 @@ def test_apply_category_equalness(val):

result = df.a.apply(lambda x: x == val)
expected = Series(
[np.nan if pd.isnull(x) else x == val for x in df_values], name="a"
[False if pd.isnull(x) else x == val for x in df_values], name="a"
)
# False since behavior of NaN for categorical dtype has been changed (GH 59966)
tm.assert_series_equal(result, expected)


Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,10 @@ def test_apply_categorical_with_nan_values(series, by_row):
with pytest.raises(AttributeError, match=msg):
s.apply(lambda x: x.split("-")[0], by_row=by_row)
return

result = s.apply(lambda x: x.split("-")[0], by_row=by_row)
# NaN for cat dtype fixed in (GH 59966)
result = s.apply(lambda x: x.split("-")[0] if pd.notna(x) else False, by_row=by_row)
result = result.astype(object)
expected = Series(["1", "1", np.nan], dtype="category")
expected = Series(["1", "1", False], dtype="category")
expected = expected.astype(object)
tm.assert_series_equal(result, expected)

Expand Down

0 comments on commit 58de332

Please sign in to comment.