pandas-dev · mroeschke · Aug 8, 2022 · Jul 23, 2022 · Jul 25, 2022 · Jul 25, 2022
diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
@@ -680,12 +680,12 @@ Converting the raw grades to a categorical data type:
     df["grade"] = df["raw_grade"].astype("category")
     df["grade"]
 
-Rename the categories to more meaningful names (assigning to
-:meth:`Series.cat.categories` is in place!):
+Rename the categories to more meaningful names:
 
 .. ipython:: python
 
-    df["grade"].cat.categories = ["very good", "good", "very bad"]
+    new_categories = ["very good", "good", "very bad"]
+    df["grade"] = df["grade"].cat.rename_categories(new_categories)
 
 Reorder the categories and simultaneously add the missing categories (methods under :meth:`Series.cat` return a new :class:`Series` by default):
 

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
@@ -334,18 +334,16 @@ It's also possible to pass in the categories in a specific order:
 Renaming categories
 ~~~~~~~~~~~~~~~~~~~
 
-Renaming categories is done by assigning new values to the
-``Series.cat.categories`` property or by using the
+Renaming categories is done by using the
 :meth:`~pandas.Categorical.rename_categories` method:
 
 
 .. ipython:: python
 
     s = pd.Series(["a", "b", "c", "a"], dtype="category")
     s
-    s.cat.categories = ["Group %s" % g for g in s.cat.categories]
-    s
-    s = s.cat.rename_categories([1, 2, 3])
+    new_categories = ["Group %s" % g for g in s.cat.categories]
+    s = s.cat.rename_categories(new_categories)
     s
     # You can also pass a dict-like object to map the renaming
     s = s.cat.rename_categories({1: "x", 2: "y", 3: "z"})
@@ -365,7 +363,7 @@ Categories must be unique or a ``ValueError`` is raised:
 .. ipython:: python
 
     try:
-        s.cat.categories = [1, 1, 1]
+        s = s.cat.rename_categories([1, 1, 1])
     except ValueError as e:
         print("ValueError:", str(e))
 
@@ -374,7 +372,7 @@ Categories must also not be ``NaN`` or a ``ValueError`` is raised:
 .. ipython:: python
 
     try:
-        s.cat.categories = [1, 2, np.nan]
+        s = s.cat.rename_categories([1, 2, np.nan])
     except ValueError as e:
         print("ValueError:", str(e))
 
@@ -702,7 +700,7 @@ of length "1".
 .. ipython:: python
 
     df.iat[0, 0]
-    df["cats"].cat.categories = ["x", "y", "z"]
+    df["cats"] = df["cats"].cat.rename_categories(["x", "y", "z"])
     df.at["h", "cats"]  # returns a string
 
 .. note::
@@ -960,7 +958,7 @@ relevant columns back to ``category`` and assign the right categories and catego
 
     s = pd.Series(pd.Categorical(["a", "b", "b", "a", "a", "d"]))
     # rename the categories
-    s.cat.categories = ["very good", "good", "bad"]
+    s = s.cat.rename_categories(["very good", "good", "bad"])
     # reorder the categories and add missing categories
     s = s.cat.set_categories(["very bad", "bad", "medium", "good", "very good"])
     df = pd.DataFrame({"cats": s, "vals": [1, 2, 3, 4, 5, 6]})
@@ -1164,6 +1162,7 @@ Constructing a ``Series`` from a ``Categorical`` will not copy the input
 change the original ``Categorical``:
 
 .. ipython:: python
+    :okwarning:
 
     cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
     s = pd.Series(cat, name="cat")

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -558,7 +558,8 @@ This matches the behavior of :meth:`Categorical.set_categories`.
       df = pd.read_csv(StringIO(data), dtype="category")
       df.dtypes
       df["col3"]
-      df["col3"].cat.categories = pd.to_numeric(df["col3"].cat.categories)
+      new_categories = pd.to_numeric(df["col3"].cat.categories)
+      df["col3"] = df["col3"].cat.rename_categories(new_categories)
       df["col3"]
 
 

diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
@@ -271,6 +271,7 @@ Individual columns can be parsed as a ``Categorical`` using a dict specification
    such as :func:`to_datetime`.
 
    .. ipython:: python
+      :okwarning:
 
       df = pd.read_csv(StringIO(data), dtype="category")
       df.dtypes

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -775,6 +775,8 @@ Other Deprecations
 - Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`)
 - Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`)
 - Deprecated arguments ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. (:issue:`47836`)
+- Deprecated the ``inplace`` keyword in :meth:`Categorical.set_ordered`, :meth:`Categorical.as_ordered`, and :meth:`Categorical.as_unordered` (:issue:`37643`)
+- Deprecated setting a categorical's categories with ``cat.categories = ['a', 'b', 'c']``, use :meth:`Categorical.rename_categories` instead (:issue:`37643`)
 - Deprecated unused arguments ``encoding`` and ``verbose`` in :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` (:issue:`47912`)
 - Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`)
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -745,15 +745,14 @@ def categories(self) -> Index:
 
     @categories.setter
     def categories(self, categories) -> None:
-        new_dtype = CategoricalDtype(categories, ordered=self.ordered)
-        if self.dtype.categories is not None and len(self.dtype.categories) != len(
-            new_dtype.categories
-        ):
-            raise ValueError(
-                "new categories need to have the same number of "
-                "items as the old categories!"
-            )
-        super().__init__(self._ndarray, new_dtype)
+        warn(
+            "Setting categories in-place is deprecated and will raise in a "
+            "future version. Use rename_categories instead.",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
+
+        self._set_categories(categories)
 
     @property
     def ordered(self) -> Ordered:
@@ -814,7 +813,7 @@ def _set_categories(self, categories, fastpath=False):
         ):
             raise ValueError(
                 "new categories need to have the same number of "
-                "items than the old categories!"
+                "items as the old categories!"
             )
 
         super().__init__(self._ndarray, new_dtype)
@@ -836,7 +835,9 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Categorical:
         return type(self)(codes, dtype=dtype, fastpath=True)
 
     @overload
-    def set_ordered(self, value, *, inplace: Literal[False] = ...) -> Categorical:
+    def set_ordered(
+        self, value, *, inplace: NoDefault | Literal[False] = ...
+    ) -> Categorical:
         ...
 
     @overload
@@ -848,7 +849,9 @@ def set_ordered(self, value, *, inplace: bool) -> Categorical | None:
         ...
 
     @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"])
-    def set_ordered(self, value, inplace: bool = False) -> Categorical | None:
+    def set_ordered(
+        self, value, inplace: bool | NoDefault = no_default
+    ) -> Categorical | None:
         """
         Set the ordered attribute to the boolean value.
 
@@ -859,7 +862,22 @@ def set_ordered(self, value, inplace: bool = False) -> Categorical | None:
         inplace : bool, default False
            Whether or not to set the ordered attribute in-place or return
            a copy of this categorical with ordered set to the value.
+
+           .. deprecated:: 1.5.0
+
         """
+        if inplace is not no_default:
+            warn(
+                "The `inplace` parameter in pandas.Categorical."
+                "set_ordered is deprecated and will be removed in "
+                "a future version. setting ordered-ness on categories will always "
+                "return a new Categorical object.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+        else:
+            inplace = False
+
         inplace = validate_bool_kwarg(inplace, "inplace")
         new_dtype = CategoricalDtype(self.categories, ordered=value)
         cat = self if inplace else self.copy()
@@ -869,15 +887,15 @@ def set_ordered(self, value, inplace: bool = False) -> Categorical | None:
         return None
 
     @overload
-    def as_ordered(self, *, inplace: Literal[False] = ...) -> Categorical:
+    def as_ordered(self, *, inplace: NoDefault | Literal[False] = ...) -> Categorical:
         ...
 
     @overload
     def as_ordered(self, *, inplace: Literal[True]) -> None:
         ...
 
     @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
-    def as_ordered(self, inplace: bool = False) -> Categorical | None:
+    def as_ordered(self, inplace: bool | NoDefault = no_default) -> Categorical | None:
         """
         Set the Categorical to be ordered.
 
@@ -887,24 +905,29 @@ def as_ordered(self, inplace: bool = False) -> Categorical | None:
            Whether or not to set the ordered attribute in-place or return
            a copy of this categorical with ordered set to True.
 
+           .. deprecated:: 1.5.0
+
         Returns
         -------
         Categorical or None
             Ordered Categorical or None if ``inplace=True``.
         """
-        inplace = validate_bool_kwarg(inplace, "inplace")
+        if inplace is not no_default:
+            inplace = validate_bool_kwarg(inplace, "inplace")
         return self.set_ordered(True, inplace=inplace)
 
     @overload
-    def as_unordered(self, *, inplace: Literal[False] = ...) -> Categorical:
+    def as_unordered(self, *, inplace: NoDefault | Literal[False] = ...) -> Categorical:
         ...
 
     @overload
     def as_unordered(self, *, inplace: Literal[True]) -> None:
         ...
 
     @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
-    def as_unordered(self, inplace: bool = False) -> Categorical | None:
+    def as_unordered(
+        self, inplace: bool | NoDefault = no_default
+    ) -> Categorical | None:
         """
         Set the Categorical to be unordered.
 
@@ -914,12 +937,15 @@ def as_unordered(self, inplace: bool = False) -> Categorical | None:
            Whether or not to set the ordered attribute in-place or return
            a copy of this categorical with ordered set to False.
 
+           .. deprecated:: 1.5.0
+
         Returns
         -------
         Categorical or None
             Unordered Categorical or None if ``inplace=True``.
         """
-        inplace = validate_bool_kwarg(inplace, "inplace")
+        if inplace is not no_default:
+            inplace = validate_bool_kwarg(inplace, "inplace")
         return self.set_ordered(False, inplace=inplace)
 
     def set_categories(
@@ -1108,11 +1134,11 @@ def rename_categories(
         cat = self if inplace else self.copy()
 
         if is_dict_like(new_categories):
-            cat.categories = [new_categories.get(item, item) for item in cat.categories]
+            new_categories = [new_categories.get(item, item) for item in cat.categories]
         elif callable(new_categories):
-            cat.categories = [new_categories(item) for item in cat.categories]
-        else:
-            cat.categories = new_categories
+            new_categories = [new_categories(item) for item in cat.categories]
+
+        cat._set_categories(new_categories)
         if not inplace:
             return cat
         return None

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -1931,9 +1931,8 @@ def _do_convert_categoricals(
                     categories = list(vl.values())
                 try:
                     # Try to catch duplicate categories
-                    # error: Incompatible types in assignment (expression has
-                    # type "List[str]", variable has type "Index")
-                    cat_data.categories = categories  # type: ignore[assignment]
+                    # TODO: if we get a non-copying rename_categories, use that
+                    cat_data = cat_data.rename_categories(categories)
                 except ValueError as err:
                     vc = Series(categories).value_counts()
                     repeated_cats = list(vc.index[vc > 1])

diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py
@@ -323,13 +323,22 @@ def test_validate_inplace_raises(self, value):
             f"received type {type(value).__name__}"
         )
         with pytest.raises(ValueError, match=msg):
-            cat.set_ordered(value=True, inplace=value)
+            with tm.assert_produces_warning(
+                FutureWarning, match="Use rename_categories"
+            ):
+                cat.set_ordered(value=True, inplace=value)
 
         with pytest.raises(ValueError, match=msg):
-            cat.as_ordered(inplace=value)
+            with tm.assert_produces_warning(
+                FutureWarning, match="Use rename_categories"
+            ):
+                cat.as_ordered(inplace=value)
 
         with pytest.raises(ValueError, match=msg):
-            cat.as_unordered(inplace=value)
+            with tm.assert_produces_warning(
+                FutureWarning, match="Use rename_categories"
+            ):
+                cat.as_unordered(inplace=value)
 
         with pytest.raises(ValueError, match=msg):
             with tm.assert_produces_warning(FutureWarning):

diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py
@@ -34,22 +34,30 @@ def test_ordered_api(self):
         assert cat4.ordered
 
     def test_set_ordered(self):
-
+        msg = (
+            "The `inplace` parameter in pandas.Categorical.set_ordered is "
+            "deprecated and will be removed in a future version. setting "
+            "ordered-ness on categories will always return a new Categorical object"
+        )
         cat = Categorical(["a", "b", "c", "a"], ordered=True)
         cat2 = cat.as_unordered()
         assert not cat2.ordered
         cat2 = cat.as_ordered()
         assert cat2.ordered
-        cat2.as_unordered(inplace=True)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            cat2.as_unordered(inplace=True)
         assert not cat2.ordered
-        cat2.as_ordered(inplace=True)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            cat2.as_ordered(inplace=True)
         assert cat2.ordered
 
         assert cat2.set_ordered(True).ordered
         assert not cat2.set_ordered(False).ordered
-        cat2.set_ordered(True, inplace=True)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            cat2.set_ordered(True, inplace=True)
         assert cat2.ordered
-        cat2.set_ordered(False, inplace=True)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            cat2.set_ordered(False, inplace=True)
         assert not cat2.ordered
 
         # removed in 0.19.0

diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
@@ -194,7 +194,8 @@ def test_periodindex(self):
     def test_categories_assignments(self):
         cat = Categorical(["a", "b", "c", "a"])
         exp = np.array([1, 2, 3, 1], dtype=np.int64)
-        cat.categories = [1, 2, 3]
+        with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"):
+            cat.categories = [1, 2, 3]
         tm.assert_numpy_array_equal(cat.__array__(), exp)
         tm.assert_index_equal(cat.categories, Index([1, 2, 3]))
 
@@ -216,8 +217,9 @@ def test_categories_assignments_wrong_length_raises(self, new_categories):
             "new categories need to have the same number of items "
             "as the old categories!"
         )
-        with pytest.raises(ValueError, match=msg):
-            cat.categories = new_categories
+        with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"):
+            with pytest.raises(ValueError, match=msg):
+                cat.categories = new_categories
 
     # Combinations of sorted/unique:
     @pytest.mark.parametrize(

diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py
@@ -110,7 +110,8 @@ def test_categorical_delegations(self):
         ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
         exp_categories = Index(["a", "b", "c"])
         tm.assert_index_equal(ser.cat.categories, exp_categories)
-        ser.cat.categories = [1, 2, 3]
+        with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"):
+            ser.cat.categories = [1, 2, 3]
         exp_categories = Index([1, 2, 3])
         tm.assert_index_equal(ser.cat.categories, exp_categories)
 
@@ -120,7 +121,8 @@ def test_categorical_delegations(self):
         assert ser.cat.ordered
         ser = ser.cat.as_unordered()
         assert not ser.cat.ordered
-        return_value = ser.cat.as_ordered(inplace=True)
+        with tm.assert_produces_warning(FutureWarning, match="The `inplace`"):
+            return_value = ser.cat.as_ordered(inplace=True)
         assert return_value is None
         assert ser.cat.ordered
 
@@ -267,8 +269,10 @@ def test_set_categories_setitem(self):
         df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category")
 
         # change the dtype in-place
-        df["Survived"].cat.categories = ["No", "Yes"]
-        df["Sex"].cat.categories = ["female", "male"]
+        with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"):
+            df["Survived"].cat.categories = ["No", "Yes"]
+        with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"):
+            df["Sex"].cat.categories = ["female", "male"]
 
         # values should not be coerced to NaN
         assert list(df["Sex"]) == ["female", "male", "male"]