Skip to content

Commit

Permalink
Replace is_datetime/timedelta_dtype checks with .kind checks (#16262)
Browse files Browse the repository at this point in the history
It appears this was called when we already had a dtype object so can instead just simply check the .kind attribute

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #16262
  • Loading branch information
mroeschke authored Jul 16, 2024
1 parent 04330f2 commit dba46e7
Show file tree
Hide file tree
Showing 11 changed files with 37 additions and 86 deletions.
3 changes: 1 addition & 2 deletions python/cudf/cudf/_fuzz_testing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,7 @@ def convert_nulls_to_none(records, df):
col
for col in df.columns
if df[col].dtype in pandas_dtypes_to_np_dtypes
or pd.api.types.is_datetime64_dtype(df[col].dtype)
or pd.api.types.is_timedelta64_dtype(df[col].dtype)
or df[col].dtype.kind in "mM"
]

for record in records:
Expand Down
7 changes: 2 additions & 5 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from cudf import _lib as libcudf
from cudf._lib.labeling import label_bins
from cudf._lib.search import search_sorted
from cudf.api.types import is_datetime64_dtype, is_timedelta64_dtype
from cudf.core._compat import PANDAS_GE_220
from cudf.core._internals.timezones import (
check_ambiguous_and_nonexistent,
Expand Down Expand Up @@ -565,10 +564,8 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:

# We check this on `other` before reflection since we already know the
# dtype of `self`.
other_is_timedelta = is_timedelta64_dtype(other.dtype)
other_is_datetime64 = not other_is_timedelta and is_datetime64_dtype(
other.dtype
)
other_is_timedelta = other.dtype.kind == "m"
other_is_datetime64 = other.dtype.kind == "M"
lhs, rhs = (other, self) if reflect else (self, other)
out_dtype = None

Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import cudf
from cudf import _lib as libcudf
from cudf.api.types import is_scalar, is_timedelta64_dtype
from cudf.api.types import is_scalar
from cudf.core.buffer import Buffer, acquire_spill_lock
from cudf.core.column import ColumnBase, column, string
from cudf.utils.dtypes import np_to_pa_dtype
Expand Down Expand Up @@ -153,7 +153,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
this: ColumnBinaryOperand = self
out_dtype = None

if is_timedelta64_dtype(other.dtype):
if other.dtype.kind == "m":
# TODO: pandas will allow these operators to work but return false
# when comparing to non-timedelta dtypes. We should do the same.
if op in {
Expand Down
7 changes: 3 additions & 4 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_bool_dtype,
is_datetime_dtype,
is_dict_like,
is_dtype_equal,
is_list_like,
Expand Down Expand Up @@ -6113,7 +6112,7 @@ def _prepare_for_rowwise_op(self, method, skipna, numeric_only):
else:
filtered = self.copy(deep=False)

is_pure_dt = all(is_datetime_dtype(dt) for dt in filtered.dtypes)
is_pure_dt = all(dt.kind == "M" for dt in filtered.dtypes)

common_dtype = find_common_type(filtered.dtypes)
if (
Expand Down Expand Up @@ -6510,7 +6509,7 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
cudf.utils.dtypes.get_min_float_dtype(
prepared._data[col]
)
if not is_datetime_dtype(common_dtype)
if common_dtype.kind != "M"
else cudf.dtype("float64")
)
.fillna(np.nan)
Expand All @@ -6537,7 +6536,7 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
result_dtype = (
common_dtype
if method in type_coerced_methods
or is_datetime_dtype(common_dtype)
or (common_dtype is not None and common_dtype.kind == "M")
else None
)
result = column.as_column(result, dtype=result_dtype)
Expand Down
8 changes: 2 additions & 6 deletions python/cudf/cudf/core/scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pyarrow as pa

import cudf
from cudf.api.types import is_datetime64_dtype, is_scalar, is_timedelta64_dtype
from cudf.api.types import is_scalar
from cudf.core.dtypes import ListDtype, StructDtype
from cudf.core.missing import NA, NaT
from cudf.core.mixins import BinaryOperand
Expand Down Expand Up @@ -245,11 +245,7 @@ def _preprocess_host_value(self, value, dtype):
dtype = cudf.dtype(dtype)

if not valid:
value = (
NaT
if is_datetime64_dtype(dtype) or is_timedelta64_dtype(dtype)
else NA
)
value = NaT if dtype.kind in "mM" else NA

return value, dtype

Expand Down
9 changes: 2 additions & 7 deletions python/cudf/cudf/core/tools/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,7 @@
import cudf
from cudf import _lib as libcudf
from cudf._lib import strings as libstrings
from cudf.api.types import (
_is_non_decimal_numeric_dtype,
is_datetime_dtype,
is_string_dtype,
is_timedelta_dtype,
)
from cudf.api.types import _is_non_decimal_numeric_dtype, is_string_dtype
from cudf.core.column import as_column
from cudf.core.dtypes import CategoricalDtype
from cudf.utils.dtypes import can_convert_to_column
Expand Down Expand Up @@ -114,7 +109,7 @@ def to_numeric(arg, errors="raise", downcast=None):
col = as_column(arg)
dtype = col.dtype

if is_datetime_dtype(dtype) or is_timedelta_dtype(dtype):
if dtype.kind in "mM":
col = col.astype(cudf.dtype("int64"))
elif isinstance(dtype, CategoricalDtype):
cat_dtype = col.dtype.type
Expand Down
7 changes: 1 addition & 6 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1694,12 +1694,7 @@ def test_scalar_null_binops(op, dtype_l, dtype_r):
rhs = cudf.Scalar(cudf.NA, dtype=dtype_r)

result = op(lhs, rhs)
assert result.value is (
cudf.NaT
if cudf.api.types.is_datetime64_dtype(result.dtype)
or cudf.api.types.is_timedelta64_dtype(result.dtype)
else cudf.NA
)
assert result.value is (cudf.NaT if result.dtype.kind in "mM" else cudf.NA)

# make sure dtype is the same as had there been a valid scalar
valid_lhs = cudf.Scalar(1, dtype=dtype_l)
Expand Down
4 changes: 1 addition & 3 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5457,9 +5457,7 @@ def test_rowwise_ops_datetime_dtypes(data, op, skipna, numeric_only):
gdf = cudf.DataFrame(data)
pdf = gdf.to_pandas()

if not numeric_only and not all(
cudf.api.types.is_datetime64_dtype(dt) for dt in gdf.dtypes
):
if not numeric_only and not all(dt.kind == "M" for dt in gdf.dtypes):
with pytest.raises(TypeError):
got = getattr(gdf, op)(
axis=1, skipna=skipna, numeric_only=numeric_only
Expand Down
7 changes: 1 addition & 6 deletions python/cudf/cudf/tests/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,12 +694,7 @@ def test_list_scalar_host_construction_null(elem_type, nesting_level):
dtype = cudf.ListDtype(dtype)

slr = cudf.Scalar(None, dtype=dtype)
assert slr.value is (
cudf.NaT
if cudf.api.types.is_datetime64_dtype(slr.dtype)
or cudf.api.types.is_timedelta64_dtype(slr.dtype)
else cudf.NA
)
assert slr.value is (cudf.NaT if slr.dtype.kind in "mM" else cudf.NA)


@pytest.mark.parametrize(
Expand Down
11 changes: 2 additions & 9 deletions python/cudf/cudf/tests/test_scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,7 @@ def test_scalar_roundtrip(value):
)
def test_null_scalar(dtype):
s = cudf.Scalar(None, dtype=dtype)
if cudf.api.types.is_datetime64_dtype(
dtype
) or cudf.api.types.is_timedelta64_dtype(dtype):
if s.dtype.kind in "mM":
assert s.value is cudf.NaT
else:
assert s.value is cudf.NA
Expand Down Expand Up @@ -369,12 +367,7 @@ def test_scalar_implicit_int_conversion(value):
@pytest.mark.parametrize("dtype", sorted(set(ALL_TYPES) - {"category"}))
def test_scalar_invalid_implicit_conversion(cls, dtype):
try:
cls(
pd.NaT
if cudf.api.types.is_datetime64_dtype(dtype)
or cudf.api.types.is_timedelta64_dtype(dtype)
else pd.NA
)
cls(pd.NaT if cudf.dtype(dtype).kind in "mM" else pd.NA)
except TypeError as e:
with pytest.raises(TypeError, match=re.escape(str(e))):
slr = cudf.Scalar(None, dtype=dtype)
Expand Down
56 changes: 20 additions & 36 deletions python/cudf/cudf/utils/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,9 +424,7 @@ def get_time_unit(obj):

def _get_nan_for_dtype(dtype):
dtype = cudf.dtype(dtype)
if pd.api.types.is_datetime64_dtype(
dtype
) or pd.api.types.is_timedelta64_dtype(dtype):
if dtype.kind in "mM":
time_unit, _ = np.datetime_data(dtype)
return dtype.type("nat", time_unit)
elif dtype.kind == "f":
Expand Down Expand Up @@ -527,16 +525,14 @@ def find_common_type(dtypes):
return cudf.dtype("O")

# Aggregate same types
dtypes = set(dtypes)
dtypes = {cudf.dtype(dtype) for dtype in dtypes}
if len(dtypes) == 1:
return dtypes.pop()

if any(
isinstance(dtype, cudf.core.dtypes.DecimalDtype) for dtype in dtypes
):
if all(
cudf.api.types.is_decimal_dtype(dtype)
or cudf.api.types.is_numeric_dtype(dtype)
for dtype in dtypes
):
if all(cudf.api.types.is_numeric_dtype(dtype) for dtype in dtypes):
return _find_common_type_decimal(
[
dtype
Expand All @@ -546,40 +542,28 @@ def find_common_type(dtypes):
)
else:
return cudf.dtype("O")
if any(isinstance(dtype, cudf.ListDtype) for dtype in dtypes):
if len(dtypes) == 1:
return dtypes.get(0)
else:
# TODO: As list dtypes allow casting
# to identical types, improve this logic of returning a
# common dtype, for example:
# ListDtype(int64) & ListDtype(int32) common
# dtype could be ListDtype(int64).
raise NotImplementedError(
"Finding a common type for `ListDtype` is currently "
"not supported"
)
if any(isinstance(dtype, cudf.StructDtype) for dtype in dtypes):
if len(dtypes) == 1:
return dtypes.get(0)
else:
raise NotImplementedError(
"Finding a common type for `StructDtype` is currently "
"not supported"
)
elif any(
isinstance(dtype, (cudf.ListDtype, cudf.StructDtype))
for dtype in dtypes
):
# TODO: As list dtypes allow casting
# to identical types, improve this logic of returning a
# common dtype, for example:
# ListDtype(int64) & ListDtype(int32) common
# dtype could be ListDtype(int64).
raise NotImplementedError(
"Finding a common type for `ListDtype` or `StructDtype` is currently "
"not supported"
)

# Corner case 1:
# Resort to np.result_type to handle "M" and "m" types separately
dt_dtypes = set(
filter(lambda t: cudf.api.types.is_datetime_dtype(t), dtypes)
)
dt_dtypes = set(filter(lambda t: t.kind == "M", dtypes))
if len(dt_dtypes) > 0:
dtypes = dtypes - dt_dtypes
dtypes.add(np.result_type(*dt_dtypes))

td_dtypes = set(
filter(lambda t: pd.api.types.is_timedelta64_dtype(t), dtypes)
)
td_dtypes = set(filter(lambda t: t.kind == "m", dtypes))
if len(td_dtypes) > 0:
dtypes = dtypes - td_dtypes
dtypes.add(np.result_type(*td_dtypes))
Expand Down

0 comments on commit dba46e7

Please sign in to comment.