rapidsai · rapids-bot · Feb 1, 2021 · Jan 26, 2021 · Jan 26, 2021 · Jan 26, 2021
@@ -1106,6 +1106,15 @@ def find_and_replace(
         """
         Return col with *to_replace* replaced with *replacement*.
         """
+        to_replace_col = column.as_column(to_replace)
+        replacement_col = column.as_column(replacement)
+
+        if type(to_replace_col) != type(replacement_col):
+            raise TypeError(
+                f"to_replace and value should be of same types,"
+                f"got to_replace dtype: {to_replace_col.dtype} and "
+                f"value dtype: {replacement_col.dtype}"
+            )
 
         # create a dataframe containing the pre-replacement categories
         # and a copy of them to work with. The index of this dataframe
@@ -1116,16 +1125,18 @@ def find_and_replace(
 
         # Create a column with the appropriate labels replaced
         old_cats["cats_replace"] = old_cats["cats"].replace(
-            to_replace, replacement
+            to_replace_col, replacement_col
         )
 
         # Construct the new categorical labels
         # If a category is being replaced by an existing one, we
         # want to map it to None. If it's totally new, we want to
         # map it to the new label it is to be replaced by
-        dtype_replace = cudf.Series(replacement)
+        dtype_replace = cudf.Series(replacement_col)
         dtype_replace[dtype_replace.isin(old_cats["cats"])] = None
-        new_cats["cats"] = new_cats["cats"].replace(to_replace, dtype_replace)
+        new_cats["cats"] = new_cats["cats"].replace(
+            to_replace_col, dtype_replace
+        )
 
         # anything we mapped to None, we want to now filter out since
         # those categories don't exist anymore

@@ -514,21 +514,24 @@ def nullmask(self) -> Buffer:
         else:
             raise ValueError("Column has no null mask")
 
-    def copy(self, deep: bool = True) -> ColumnBase:
+    def copy(self: T, deep: bool = True) -> T:
         """Columns are immutable, so a deep copy produces a copy of the
         underlying data and mask and a shallow copy creates a new column and
         copies the references of the data and mask.
         """
         if deep:
             return libcudf.copying.copy_column(self)
         else:
-            return build_column(
-                self.base_data,
-                self.dtype,
-                mask=self.base_mask,
-                size=self.size,
-                offset=self.offset,
-                children=self.base_children,
+            return cast(
+                T,
+                build_column(
+                    self.base_data,
+                    self.dtype,
+                    mask=self.base_mask,
+                    size=self.size,
+                    offset=self.offset,
+                    children=self.base_children,
+                ),
             )
 
     def view(self, dtype: Dtype) -> ColumnBase:

@@ -1,4 +1,5 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
+
 from __future__ import annotations
 
 from numbers import Number
@@ -412,6 +413,21 @@ def find_and_replace(
         """
         Return col with *to_replace* replaced with *value*.
         """
+        to_replace_col = as_column(to_replace)
+        replacement_col = as_column(replacement)
+
+        if type(to_replace_col) != type(replacement_col):
+            raise TypeError(
+                f"to_replace and value should be of same types,"
+                f"got to_replace dtype: {to_replace_col.dtype} and "
+                f"value dtype: {replacement_col.dtype}"
+            )
+
+        if not isinstance(to_replace_col, NumericalColumn) and not isinstance(
+            replacement_col, NumericalColumn
+        ):
+            return self.copy()
+
         to_replace_col = _normalize_find_and_replace_input(
             self.dtype, to_replace
         )
@@ -421,13 +437,15 @@ def find_and_replace(
             replacement_col = _normalize_find_and_replace_input(
                 self.dtype, replacement
             )
+        replaced = self.copy()
         if len(replacement_col) == 1 and len(to_replace_col) > 1:
             replacement_col = column.as_column(
                 utils.scalar_broadcast_to(
                     replacement[0], (len(to_replace_col),), self.dtype
                 )
             )
-        replaced = self.copy()
+        elif len(replacement_col) == 1 and len(to_replace_col) == 0:
+            return replaced
         to_replace_col, replacement_col, replaced = numeric_normalize_types(
             to_replace_col, replacement_col, replaced
         )

@@ -1,4 +1,5 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+
 from __future__ import annotations
 
 import builtins
@@ -5009,9 +5010,37 @@ def find_and_replace(
         """
         Return col with *to_replace* replaced with *value*
         """
-        to_replace = column.as_column(to_replace, dtype=self.dtype)
-        replacement = column.as_column(replacement, dtype=self.dtype)
-        return libcudf.replace.replace(self, to_replace, replacement)
+
+        to_replace_col = column.as_column(to_replace)
+        if to_replace_col.null_count == len(to_replace_col):
+            # If all of `to_replace` are `None`, dtype of `to_replace_col`
+            # is inferred as `float64`, but this is a valid
+            # string column too, Hence we will need to type-cast
+            # to self.dtype.
+            to_replace_col = to_replace_col.astype(self.dtype)
+
+        replacement_col = column.as_column(replacement)
+        if replacement_col.null_count == len(replacement_col):
+            # If all of `replacement` are `None`, dtype of `replacement_col`
+            # is inferred as `float64`, but this is a valid
+            # string column too, Hence we will need to type-cast
+            # to self.dtype.
+            replacement_col = replacement_col.astype(self.dtype)
+
+        if type(to_replace_col) != type(replacement_col):
+            raise TypeError(
+                f"to_replace and value should be of same types,"
+                f"got to_replace dtype: {to_replace_col.dtype} and "
+                f"value dtype: {replacement_col.dtype}"
+            )
+
+        if (
+            to_replace_col.dtype != self.dtype
+            and replacement_col.dtype != self.dtype
+        ):
+            return self.copy()
+
+        return libcudf.replace.replace(self, to_replace_col, replacement_col)
 
     def fillna(
         self,

@@ -1,4 +1,5 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+
 from __future__ import division
 
 import inspect
@@ -4600,24 +4601,24 @@ def replace(
         Parameters
         ----------
         to_replace : numeric, str, list-like or dict
-            Value(s) to replace.
+            Value(s) that will be replaced.
 
             * numeric or str:
-
                 - values equal to *to_replace* will be replaced
                   with *replacement*
-
             * list of numeric or str:
-
                 - If *replacement* is also list-like,
                   *to_replace* and *replacement* must be of same length.
-
             * dict:
-
                 - Dicts can be used to replace different values in different
                   columns. For example, `{'a': 1, 'z': 2}` specifies that the
                   value 1 in column `a` and the value 2 in column `z` should be
                   replaced with replacement*.
+                - Dicts can be used to specify different replacement values for
+                  different existing values. For example, {'a': 'b', 'y': 'z'}
+                  replaces the value ‘a’ with ‘b’ and ‘y’ with ‘z’.
+                  To use a dict in this way the value parameter should be None.
+
         value : numeric, str, list-like, or dict
             Value(s) to replace `to_replace` with. If a dict is provided, then
             its keys must match the keys in *to_replace*, and corresponding
@@ -4626,26 +4627,78 @@ def replace(
         inplace : bool, default False
             If True, in place.
 
+        Raises
+        ------
+        TypeError
+            - If ``to_replace`` is not a scalar, array-like, dict, or None
+            - If ``to_replace`` is a dict and value is not a list, dict,
+              or Series
+        ValueError
+            - If a list is passed to ``to_replace`` and ``value`` but they
+              are not the same length.
+
         Returns
         -------
         result : DataFrame
             DataFrame after replacement.
 
         Examples
         --------
+
+        Scalar ``to_replace`` and ``value``
+
         >>> import cudf
-        >>> df = cudf.DataFrame()
-        >>> df['id']= [0, 1, 2, -1, 4, -1, 6]
-        >>> df['id']= df['id'].replace(-1, None)
+        >>> df = cudf.DataFrame({'A': [0, 1, 2, 3, 4],
+        ...                    'B': [5, 6, 7, 8, 9],
+        ...                    'C': ['a', 'b', 'c', 'd', 'e']})
         >>> df
-             id
-        0     0
-        1     1
-        2     2
-        3  <NA>
-        4     4
-        5  <NA>
-        6     6
+           A  B  C
+        0  0  5  a
+        1  1  6  b
+        2  2  7  c
+        3  3  8  d
+        4  4  9  e
+        >>> df.replace(0, 5)
+           A  B  C
+        0  5  5  a
+        1  1  6  b
+        2  2  7  c
+        3  3  8  d
+        4  4  9  e
+
+        List-like ``to_replace``
+
+        >>> df.replace([0, 1, 2, 3], 4)
+           A  B  C
+        0  4  5  a
+        1  4  6  b
+        2  4  7  c
+        3  4  8  d
+        4  4  9  e
+        >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
+           A  B  C
+        0  4  5  a
+        1  3  6  b
+        2  2  7  c
+        3  1  8  d
+        4  4  9  e
+
+        dict-like ``to_replace``
+
+        >>> df.replace({0: 10, 1: 100})
+             A  B  C
+        0   10  5  a
+        1  100  6  b
+        2    2  7  c
+        3    3  8  d
+        4    4  9  e
+        >>> df.replace({'A': 0, 'B': 5}, 100)
+             A    B  C
+        0  100  100  a
+        1    1    6  b
+        2    2    7  c
+        3    3    8  d
+        4    4    9  e
 
         Notes
         -----