diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index a4e2a81512f83..338187a6b189d 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -680,6 +680,7 @@ Deprecations - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) - Deprecated special treatment of lists with first element a Categorical in the :class:`DataFrame` constructor; pass as ``pd.DataFrame({col: categorical, ...})`` instead (:issue:`38845`) - Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`) +- Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` (except for ``subset``), :meth:`Series.drop_duplicates`, :meth:`Index.drop_duplicates` and :meth:`MultiIndex.drop_duplicates`(:issue:`41485`) - Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`) - Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7b564d55a342c..f89860e37f876 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6007,6 +6007,7 @@ def dropna( else: return result + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"]) def drop_duplicates( self, subset: Hashable | Sequence[Hashable] | None = None, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 37af2074e5150..2a6f044288fea 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -54,6 +54,7 @@ from pandas.util._decorators import ( Appender, cache_readonly, + deprecate_nonkeyword_arguments, doc, ) @@ -2651,7 +2652,7 @@ def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT: result = super().unique() return self._shallow_copy(result) - @final + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT: """ Return Index with duplicate values removed. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index b50c741b123e2..ec226223f078f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -41,6 +41,7 @@ from pandas.util._decorators import ( Appender, cache_readonly, + deprecate_nonkeyword_arguments, doc, ) @@ -3775,6 +3776,10 @@ def isin(self, values, level=None) -> np.ndarray: return np.zeros(len(levs), dtype=np.bool_) return levs.isin(values) + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def drop_duplicates(self, keep: str | bool = "first") -> MultiIndex: + return super().drop_duplicates(keep=keep) + # --------------------------------------------------------------- # Arithmetic/Numeric Methods - Disabled diff --git a/pandas/core/series.py b/pandas/core/series.py index 4eba0db7e98ec..36b9b452c6b85 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2057,6 +2057,7 @@ def drop_duplicates(self, *, inplace: Literal[True]) -> None: def drop_duplicates(self, keep=..., inplace: bool = ...) -> Series | None: ... + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def drop_duplicates(self, keep="first", inplace=False) -> Series | None: """ Return Series with duplicate values removed. diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index 10c1f37f4c9ba..8cbf7bbfe0368 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -471,3 +471,17 @@ def test_drop_duplicates_non_boolean_ignore_index(arg): msg = '^For argument "ignore_index" expected type bool, received type .*.$' with pytest.raises(ValueError, match=msg): df.drop_duplicates(ignore_index=arg) + + +def test_drop_duplicates_pos_args_deprecation(): + # GH#41485 + df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]}) + msg = ( + "In a future version of pandas all arguments of " + "DataFrame.drop_duplicates except for the argument 'subset' " + "will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.drop_duplicates(["b", "c"], "last") + expected = DataFrame({"a": [1, 2], "b": [1, 3], "c": [1, 3]}, index=[1, 2]) + tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index ea59d55989f8b..c2b3647379234 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -306,3 +306,16 @@ def test_duplicated_drop_duplicates(): assert duplicated.dtype == bool expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) + + +def test_multi_drop_duplicates_pos_args_deprecation(): + # GH#41485 + idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]]) + msg = ( + "In a future version of pandas all arguments of " + "MultiIndex.drop_duplicates will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.drop_duplicates("last") + expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]]) + tm.assert_index_equal(expected, result) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 47657fff56ceb..f41c79bd09f67 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1738,3 +1738,17 @@ def test_construct_from_memoryview(klass, extra_kwargs): result = klass(memoryview(np.arange(2000, 2005)), **extra_kwargs) expected = klass(range(2000, 2005), **extra_kwargs) tm.assert_index_equal(result, expected) + + +def test_drop_duplicates_pos_args_deprecation(): + # GH#41485 + idx = Index([1, 2, 3, 1]) + msg = ( + "In a future version of pandas all arguments of " + "Index.drop_duplicates will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + idx.drop_duplicates("last") + result = idx.drop_duplicates("last") + expected = Index([2, 3, 1]) + tm.assert_index_equal(expected, result) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index dae1bbcd86e81..7eb51f8037792 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -223,3 +223,16 @@ def test_drop_duplicates_categorical_bool(self, ordered): return_value = sc.drop_duplicates(keep=False, inplace=True) assert return_value is None tm.assert_series_equal(sc, tc[~expected]) + + +def test_drop_duplicates_pos_args_deprecation(): + # GH#41485 + s = Series(["a", "b", "c", "b"]) + msg = ( + "In a future version of pandas all arguments of " + "Series.drop_duplicates will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.drop_duplicates("last") + expected = Series(["a", "c", "b"], index=[0, 2, 3]) + tm.assert_series_equal(expected, result)