Skip to content

Commit

Permalink
Deprecate non-keyword arguments for drop_duplicates. (pandas-dev#41500)
Browse files Browse the repository at this point in the history
* ENH: Deprecate non-keyword arguments for drop_duplicates.

* leave newline

* ENH: Deprecate non-keyword arguments for drop_duplicates.

* ENH: Deprecate non-keyword arguments for drop_duplicates.

* ENH: Deprecate non-keyword arguments for drop_duplicates.

* ENH: Deprecate non-keyword arguments for drop_duplicates.

* ENH: Deprecate non-keyword arguments for drop_duplicates.

* ENH: Deprecate non-keyword arguments for drop_duplicates.

* ENH: Deprecate non-keyword arguments for drop_duplicates.

* remove redundant line

* ENH: Deprecate non-keyword arguments for drop_duplicates.

Co-authored-by: Marco Gorelli <marcogorelli@protonmail.com>
  • Loading branch information
2 people authored and TLouf committed Jun 1, 2021
1 parent 3bba8b2 commit c5490cf
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,7 @@ Deprecations
- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
- Deprecated special treatment of lists with first element a Categorical in the :class:`DataFrame` constructor; pass as ``pd.DataFrame({col: categorical, ...})`` instead (:issue:`38845`)
- Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`)
- Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` (except for ``subset``), :meth:`Series.drop_duplicates`, :meth:`Index.drop_duplicates` and :meth:`MultiIndex.drop_duplicates`(:issue:`41485`)
- Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`)
- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`)

Expand Down
1 change: 1 addition & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6007,6 +6007,7 @@ def dropna(
else:
return result

@deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"])
def drop_duplicates(
self,
subset: Hashable | Sequence[Hashable] | None = None,
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from pandas.util._decorators import (
Appender,
cache_readonly,
deprecate_nonkeyword_arguments,
doc,
)

Expand Down Expand Up @@ -2651,7 +2652,7 @@ def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT:
result = super().unique()
return self._shallow_copy(result)

@final
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT:
"""
Return Index with duplicate values removed.
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from pandas.util._decorators import (
Appender,
cache_readonly,
deprecate_nonkeyword_arguments,
doc,
)

Expand Down Expand Up @@ -3775,6 +3776,10 @@ def isin(self, values, level=None) -> np.ndarray:
return np.zeros(len(levs), dtype=np.bool_)
return levs.isin(values)

@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
def drop_duplicates(self, keep: str | bool = "first") -> MultiIndex:
return super().drop_duplicates(keep=keep)

# ---------------------------------------------------------------
# Arithmetic/Numeric Methods - Disabled

Expand Down
1 change: 1 addition & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2057,6 +2057,7 @@ def drop_duplicates(self, *, inplace: Literal[True]) -> None:
def drop_duplicates(self, keep=..., inplace: bool = ...) -> Series | None:
...

@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
def drop_duplicates(self, keep="first", inplace=False) -> Series | None:
"""
Return Series with duplicate values removed.
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/frame/methods/test_drop_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,3 +471,17 @@ def test_drop_duplicates_non_boolean_ignore_index(arg):
msg = '^For argument "ignore_index" expected type bool, received type .*.$'
with pytest.raises(ValueError, match=msg):
df.drop_duplicates(ignore_index=arg)


def test_drop_duplicates_pos_args_deprecation():
# GH#41485
df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]})
msg = (
"In a future version of pandas all arguments of "
"DataFrame.drop_duplicates except for the argument 'subset' "
"will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.drop_duplicates(["b", "c"], "last")
expected = DataFrame({"a": [1, 2], "b": [1, 3], "c": [1, 3]}, index=[1, 2])
tm.assert_frame_equal(expected, result)
13 changes: 13 additions & 0 deletions pandas/tests/indexes/multi/test_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,3 +306,16 @@ def test_duplicated_drop_duplicates():
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)


def test_multi_drop_duplicates_pos_args_deprecation():
# GH#41485
idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]])
msg = (
"In a future version of pandas all arguments of "
"MultiIndex.drop_duplicates will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = idx.drop_duplicates("last")
expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]])
tm.assert_index_equal(expected, result)
14 changes: 14 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1738,3 +1738,17 @@ def test_construct_from_memoryview(klass, extra_kwargs):
result = klass(memoryview(np.arange(2000, 2005)), **extra_kwargs)
expected = klass(range(2000, 2005), **extra_kwargs)
tm.assert_index_equal(result, expected)


def test_drop_duplicates_pos_args_deprecation():
# GH#41485
idx = Index([1, 2, 3, 1])
msg = (
"In a future version of pandas all arguments of "
"Index.drop_duplicates will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
idx.drop_duplicates("last")
result = idx.drop_duplicates("last")
expected = Index([2, 3, 1])
tm.assert_index_equal(expected, result)
13 changes: 13 additions & 0 deletions pandas/tests/series/methods/test_drop_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,16 @@ def test_drop_duplicates_categorical_bool(self, ordered):
return_value = sc.drop_duplicates(keep=False, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])


def test_drop_duplicates_pos_args_deprecation():
# GH#41485
s = Series(["a", "b", "c", "b"])
msg = (
"In a future version of pandas all arguments of "
"Series.drop_duplicates will be keyword-only"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.drop_duplicates("last")
expected = Series(["a", "c", "b"], index=[0, 2, 3])
tm.assert_series_equal(expected, result)

0 comments on commit c5490cf

Please sign in to comment.