Skip to content

Commit

Permalink
ENH: retain reso in Timestamp(dt64_obj) (#49008)
Browse files Browse the repository at this point in the history
* BUG: Timestamp.__add__(np_dt64_obj) result resolution

* ENH: retain reso in Timestamp(dt64_obj)

* GH ref

* update GH ref

* troubleshoot npdev build

* implement _TSObject.ensure_reso

* troubleshoot npdev build

* troubleshoot npdev
  • Loading branch information
jbrockmendel authored Oct 12, 2022
1 parent 20bbd12 commit 98323ee
Show file tree
Hide file tree
Showing 11 changed files with 106 additions and 73 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.6.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ Other API changes
- Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`)
- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`)
- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`)
- Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`)
-

.. ---------------------------------------------------------------------------
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ cpdef array_to_datetime(
found_tz = True
if utc_convert:
_ts = convert_datetime_to_tsobject(val, None)
_ts.ensure_reso(NPY_FR_ns)
iresult[i] = _ts.value
elif found_naive:
raise ValueError('Tz-aware datetime.datetime '
Expand All @@ -527,6 +528,7 @@ cpdef array_to_datetime(
found_tz = True
tz_out = val.tzinfo
_ts = convert_datetime_to_tsobject(val, None)
_ts.ensure_reso(NPY_FR_ns)
iresult[i] = _ts.value

else:
Expand All @@ -535,7 +537,7 @@ cpdef array_to_datetime(
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
if isinstance(val, _Timestamp):
iresult[i] = val.value
iresult[i] = val._as_unit("ns").value
else:
iresult[i] = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ cdef class _TSObject:
int64_t value # numpy dt64
tzinfo tzinfo
bint fold
NPY_DATETIMEUNIT reso

cdef void ensure_reso(self, NPY_DATETIMEUNIT reso)


cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
Expand Down
17 changes: 14 additions & 3 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ import_datetime()
from pandas._libs.tslibs.base cimport ABCTimestamp
from pandas._libs.tslibs.dtypes cimport (
abbrev_to_npy_unit,
get_supported_reso,
periods_per_second,
)
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
NPY_FR_ns,
check_dts_bounds,
convert_reso,
get_datetime64_unit,
get_datetime64_value,
get_implementation_bounds,
Expand Down Expand Up @@ -204,10 +206,16 @@ cdef class _TSObject:
# int64_t value # numpy dt64
# tzinfo tzinfo
# bint fold
# NPY_DATETIMEUNIT reso

def __cinit__(self):
# GH 25057. As per PEP 495, set fold to 0 by default
self.fold = 0
self.reso = NPY_FR_ns # default value

cdef void ensure_reso(self, NPY_DATETIMEUNIT reso):
if self.reso != reso:
self.value = convert_reso(self.value, self.reso, reso, False)


cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
Expand All @@ -228,6 +236,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
"""
cdef:
_TSObject obj
NPY_DATETIMEUNIT reso

obj = _TSObject()

Expand All @@ -237,9 +246,11 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
if ts is None or ts is NaT:
obj.value = NPY_NAT
elif is_datetime64_object(ts):
obj.value = get_datetime64_nanos(ts, NPY_FR_ns)
reso = get_supported_reso(get_datetime64_unit(ts))
obj.reso = reso
obj.value = get_datetime64_nanos(ts, reso)
if obj.value != NPY_NAT:
pandas_datetime_to_datetimestruct(obj.value, NPY_FR_ns, &obj.dts)
pandas_datetime_to_datetimestruct(obj.value, reso, &obj.dts)
elif is_integer_object(ts):
try:
ts = <int64_t>ts
Expand Down Expand Up @@ -295,7 +306,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to '
f'Timestamp')

maybe_localize_tso(obj, tz, NPY_FR_ns)
maybe_localize_tso(obj, tz, obj.reso)
return obj


Expand Down
63 changes: 13 additions & 50 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ from pandas._libs.tslibs.util cimport (
is_array,
is_datetime64_object,
is_integer_object,
is_timedelta64_object,
)

from pandas._libs.tslibs.fields import (
Expand Down Expand Up @@ -107,7 +106,6 @@ from pandas._libs.tslibs.offsets cimport (
from pandas._libs.tslibs.timedeltas cimport (
_Timedelta,
delta_to_nanoseconds,
ensure_td64ns,
is_any_td_scalar,
)

Expand Down Expand Up @@ -282,6 +280,7 @@ cdef class _Timestamp(ABCTimestamp):
)

obj.value = value
obj.reso = reso
pandas_datetime_to_datetimestruct(value, reso, &obj.dts)
maybe_localize_tso(obj, tz, reso)

Expand Down Expand Up @@ -432,62 +431,26 @@ cdef class _Timestamp(ABCTimestamp):
int64_t nanos = 0

if is_any_td_scalar(other):
if is_timedelta64_object(other):
other_reso = get_datetime64_unit(other)
if (
other_reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
# TODO: deprecate allowing this? We only get here
# with test_timedelta_add_timestamp_interval
other = np.timedelta64(other.view("i8"), "ns")
other_reso = NPY_DATETIMEUNIT.NPY_FR_ns
elif (
other_reso == NPY_DATETIMEUNIT.NPY_FR_Y or other_reso == NPY_DATETIMEUNIT.NPY_FR_M
):
# TODO: deprecate allowing these? or handle more like the
# corresponding DateOffsets?
# TODO: no tests get here
other = ensure_td64ns(other)
other_reso = NPY_DATETIMEUNIT.NPY_FR_ns

if other_reso > NPY_DATETIMEUNIT.NPY_FR_ns:
# TODO: no tests
other = ensure_td64ns(other)
if other_reso > self._reso:
# Following numpy, we cast to the higher resolution
# test_sub_timedelta64_mismatched_reso
self = (<_Timestamp>self)._as_reso(other_reso)


if isinstance(other, _Timedelta):
# TODO: share this with __sub__, Timedelta.__add__
# Matching numpy, we cast to the higher resolution. Unlike numpy,
# we raise instead of silently overflowing during this casting.
if self._reso < other._reso:
self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True)
elif self._reso > other._reso:
other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True)
other = Timedelta(other)

try:
nanos = delta_to_nanoseconds(
other, reso=self._reso, round_ok=False
)
except OutOfBoundsTimedelta:
raise
# TODO: share this with __sub__, Timedelta.__add__
# Matching numpy, we cast to the higher resolution. Unlike numpy,
# we raise instead of silently overflowing during this casting.
if self._reso < other._reso:
self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True)
elif self._reso > other._reso:
other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True)

try:
new_value = self.value + nanos
except OverflowError:
# Use Python ints
# Hit in test_tdi_add_overflow
new_value = int(self.value) + int(nanos)
nanos = other.value

try:
new_value = self.value + nanos
result = type(self)._from_value_and_reso(
new_value, reso=self._reso, tz=self.tzinfo
)
except OverflowError as err:
# TODO: don't hard-code nanosecond here
new_value = int(self.value) + int(nanos)
raise OutOfBoundsDatetime(
f"Out of bounds nanosecond timestamp: {new_value}"
) from err
Expand Down Expand Up @@ -1713,7 +1676,7 @@ class Timestamp(_Timestamp):
if not is_offset_object(freq):
freq = to_offset(freq)

return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold)
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, ts.reso)

def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'):
cdef:
Expand Down
24 changes: 22 additions & 2 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3113,14 +3113,34 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls):
with pytest.raises(TypeError, match=msg):
constructor(scalar, dtype=dtype)

@pytest.mark.xfail(
reason="Timestamp constructor has been updated to cast dt64 to non-nano, "
"but DatetimeArray._from_sequence has not"
)
@pytest.mark.parametrize("cls", [datetime, np.datetime64])
def test_from_out_of_bounds_datetime(self, constructor, cls):
def test_from_out_of_ns_bounds_datetime(self, constructor, cls, request):
# scalar that won't fit in nanosecond dt64, but will fit in microsecond
scalar = datetime(9999, 1, 1)
exp_dtype = "M8[us]" # smallest reso that fits
if cls is np.datetime64:
scalar = np.datetime64(scalar, "D")
exp_dtype = "M8[s]" # closest reso to input
result = constructor(scalar)

assert type(get1(result)) is cls
item = get1(result)
dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0]

assert type(item) is Timestamp
assert item.asm8.dtype == exp_dtype
assert dtype == exp_dtype

def test_out_of_s_bounds_datetime64(self, constructor):
scalar = np.datetime64(np.iinfo(np.int64).max, "D")
result = constructor(scalar)
item = get1(result)
assert type(item) is np.datetime64
dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0]
assert dtype == object

@pytest.mark.xfail(
reason="TimedeltaArray constructor has been updated to cast td64 to non-nano, "
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/scalar/timedelta/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ def test_td_add_datetimelike_scalar(self, op):
assert result is NaT

def test_td_add_timestamp_overflow(self):
msg = "Cannot cast 259987 from D to 'ns' without overflow"
msg = "Cannot cast 259987 from D to 'ns' without overflow."
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D")

msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"
msg = "Cannot cast 259987 days 00:00:00 to unit='ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timestamp("1700-01-01") + timedelta(days=13 * 19999)

Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/scalar/timestamp/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_overflow_offset_raises(self):
r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
"will overflow"
)
lmsg2 = r"Cannot cast <-?20169940 \* Days> to unit=ns without overflow"
lmsg2 = r"Cannot cast -?20169940 days \+?00:00:00 to unit='ns' without overflow"

with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
stamp + offset_overflow
Expand All @@ -62,7 +62,9 @@ def test_overflow_offset_raises(self):
stamp = Timestamp("2000/1/1")
offset_overflow = to_offset("D") * 100**5

lmsg3 = r"Cannot cast <-?10000000000 \* Days> to unit=ns without overflow"
lmsg3 = (
r"Cannot cast -?10000000000 days \+?00:00:00 to unit='ns' without overflow"
)
with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
stamp + offset_overflow

Expand Down
32 changes: 29 additions & 3 deletions pandas/tests/scalar/timestamp/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pytest
import pytz

from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
from pandas.compat import PY310
from pandas.errors import OutOfBoundsDatetime

Expand Down Expand Up @@ -455,14 +456,26 @@ def test_out_of_bounds_value(self):
Timestamp(min_ts_us)
Timestamp(max_ts_us)

# We used to raise on these before supporting non-nano
us_val = NpyDatetimeUnit.NPY_FR_us.value
assert Timestamp(min_ts_us - one_us)._reso == us_val
assert Timestamp(max_ts_us + one_us)._reso == us_val

# https://github.com/numpy/numpy/issues/22346 for why
# we can't use the same construction as above with minute resolution

# too_low, too_high are the _just_ outside the range of M8[s]
too_low = np.datetime64("-292277022657-01-27T08:29", "m")
too_high = np.datetime64("292277026596-12-04T15:31", "m")

msg = "Out of bounds"
# One us less than the minimum is an error
with pytest.raises(ValueError, match=msg):
Timestamp(min_ts_us - one_us)
Timestamp(too_low)

# One us more than the maximum is an error
with pytest.raises(ValueError, match=msg):
Timestamp(max_ts_us + one_us)
Timestamp(too_high)

def test_out_of_bounds_string(self):
msg = "Out of bounds"
Expand All @@ -487,7 +500,20 @@ def test_bounds_with_different_units(self):
for date_string in out_of_bounds_dates:
for unit in time_units:
dt64 = np.datetime64(date_string, unit)
msg = "Out of bounds"
ts = Timestamp(dt64)
if unit in ["s", "ms", "us"]:
# We can preserve the input unit
assert ts.value == dt64.view("i8")
else:
# we chose the closest unit that we _do_ support
assert ts._reso == NpyDatetimeUnit.NPY_FR_s.value

# With more extreme cases, we can't even fit inside second resolution
info = np.iinfo(np.int64)
msg = "Out of bounds nanosecond timestamp:"
for value in [info.min + 1, info.max]:
for unit in ["D", "h", "m"]:
dt64 = np.datetime64(value, unit)
with pytest.raises(OutOfBoundsDatetime, match=msg):
Timestamp(dt64)

Expand Down
9 changes: 2 additions & 7 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ def test_cmp_cross_reso(self):

# subtracting 3600*24 gives a datetime64 that _can_ fit inside the
# nanosecond implementation bounds.
other = Timestamp(dt64 - 3600 * 24)
other = Timestamp(dt64 - 3600 * 24)._as_unit("ns")
assert other < ts
assert other.asm8 > ts.asm8 # <- numpy gets this wrong
assert ts > other
Expand Down Expand Up @@ -884,12 +884,7 @@ def test_to_period(self, dt64, ts):
)
def test_addsub_timedeltalike_non_nano(self, dt64, ts, td):

if isinstance(td, Timedelta):
# td._reso is ns
exp_reso = td._reso
else:
# effective td._reso is s
exp_reso = ts._reso
exp_reso = max(ts._reso, Timedelta(td)._reso)

result = ts - td
expected = Timestamp(dt64) - td
Expand Down
Loading

0 comments on commit 98323ee

Please sign in to comment.