diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index acac6ac0a727c..a2c77715aa46d 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -121,6 +121,7 @@ Other API changes - Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`) - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`) - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`) +- Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3526ea3438aff..c47fca79afb45 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -514,6 +514,7 @@ cpdef array_to_datetime( found_tz = True if utc_convert: _ts = convert_datetime_to_tsobject(val, None) + _ts.ensure_reso(NPY_FR_ns) iresult[i] = _ts.value elif found_naive: raise ValueError('Tz-aware datetime.datetime ' @@ -527,6 +528,7 @@ cpdef array_to_datetime( found_tz = True tz_out = val.tzinfo _ts = convert_datetime_to_tsobject(val, None) + _ts.ensure_reso(NPY_FR_ns) iresult[i] = _ts.value else: @@ -535,7 +537,7 @@ cpdef array_to_datetime( raise ValueError('Cannot mix tz-aware with ' 'tz-naive values') if isinstance(val, _Timestamp): - iresult[i] = val.value + iresult[i] = val._as_unit("ns").value else: iresult[i] = pydatetime_to_dt64(val, &dts) check_dts_bounds(&dts) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index a90347415ec76..5c73b908d7eff 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -20,6 +20,9 @@ cdef class _TSObject: int64_t value # numpy dt64 tzinfo tzinfo bint fold + NPY_DATETIMEUNIT reso + + cdef void ensure_reso(self, NPY_DATETIMEUNIT reso) cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 026bf44300407..733385dc8d6ba 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -31,12 +31,14 @@ import_datetime() from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.dtypes cimport ( abbrev_to_npy_unit, + get_supported_reso, periods_per_second, ) from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, check_dts_bounds, + convert_reso, get_datetime64_unit, get_datetime64_value, get_implementation_bounds, @@ -204,10 +206,16 @@ cdef class _TSObject: # int64_t value # numpy dt64 # tzinfo tzinfo # bint fold + # NPY_DATETIMEUNIT reso def __cinit__(self): # GH 25057. As per PEP 495, set fold to 0 by default self.fold = 0 + self.reso = NPY_FR_ns # default value + + cdef void ensure_reso(self, NPY_DATETIMEUNIT reso): + if self.reso != reso: + self.value = convert_reso(self.value, self.reso, reso, False) cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, @@ -228,6 +236,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, """ cdef: _TSObject obj + NPY_DATETIMEUNIT reso obj = _TSObject() @@ -237,9 +246,11 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, if ts is None or ts is NaT: obj.value = NPY_NAT elif is_datetime64_object(ts): - obj.value = get_datetime64_nanos(ts, NPY_FR_ns) + reso = get_supported_reso(get_datetime64_unit(ts)) + obj.reso = reso + obj.value = get_datetime64_nanos(ts, reso) if obj.value != NPY_NAT: - pandas_datetime_to_datetimestruct(obj.value, NPY_FR_ns, &obj.dts) + pandas_datetime_to_datetimestruct(obj.value, reso, &obj.dts) elif is_integer_object(ts): try: ts = ts @@ -295,7 +306,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to ' f'Timestamp') - maybe_localize_tso(obj, tz, NPY_FR_ns) + maybe_localize_tso(obj, tz, obj.reso) return obj diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3ec7379e080d9..ccba037c57d7e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -65,7 +65,6 @@ from pandas._libs.tslibs.util cimport ( is_array, is_datetime64_object, is_integer_object, - is_timedelta64_object, ) from pandas._libs.tslibs.fields import ( @@ -107,7 +106,6 @@ from pandas._libs.tslibs.offsets cimport ( from pandas._libs.tslibs.timedeltas cimport ( _Timedelta, delta_to_nanoseconds, - ensure_td64ns, is_any_td_scalar, ) @@ -282,6 +280,7 @@ cdef class _Timestamp(ABCTimestamp): ) obj.value = value + obj.reso = reso pandas_datetime_to_datetimestruct(value, reso, &obj.dts) maybe_localize_tso(obj, tz, reso) @@ -432,62 +431,26 @@ cdef class _Timestamp(ABCTimestamp): int64_t nanos = 0 if is_any_td_scalar(other): - if is_timedelta64_object(other): - other_reso = get_datetime64_unit(other) - if ( - other_reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC - ): - # TODO: deprecate allowing this? We only get here - # with test_timedelta_add_timestamp_interval - other = np.timedelta64(other.view("i8"), "ns") - other_reso = NPY_DATETIMEUNIT.NPY_FR_ns - elif ( - other_reso == NPY_DATETIMEUNIT.NPY_FR_Y or other_reso == NPY_DATETIMEUNIT.NPY_FR_M - ): - # TODO: deprecate allowing these? or handle more like the - # corresponding DateOffsets? - # TODO: no tests get here - other = ensure_td64ns(other) - other_reso = NPY_DATETIMEUNIT.NPY_FR_ns - - if other_reso > NPY_DATETIMEUNIT.NPY_FR_ns: - # TODO: no tests - other = ensure_td64ns(other) - if other_reso > self._reso: - # Following numpy, we cast to the higher resolution - # test_sub_timedelta64_mismatched_reso - self = (<_Timestamp>self)._as_reso(other_reso) - - - if isinstance(other, _Timedelta): - # TODO: share this with __sub__, Timedelta.__add__ - # Matching numpy, we cast to the higher resolution. Unlike numpy, - # we raise instead of silently overflowing during this casting. - if self._reso < other._reso: - self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True) - elif self._reso > other._reso: - other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True) + other = Timedelta(other) - try: - nanos = delta_to_nanoseconds( - other, reso=self._reso, round_ok=False - ) - except OutOfBoundsTimedelta: - raise + # TODO: share this with __sub__, Timedelta.__add__ + # Matching numpy, we cast to the higher resolution. Unlike numpy, + # we raise instead of silently overflowing during this casting. + if self._reso < other._reso: + self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True) + elif self._reso > other._reso: + other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True) - try: - new_value = self.value + nanos - except OverflowError: - # Use Python ints - # Hit in test_tdi_add_overflow - new_value = int(self.value) + int(nanos) + nanos = other.value try: + new_value = self.value + nanos result = type(self)._from_value_and_reso( new_value, reso=self._reso, tz=self.tzinfo ) except OverflowError as err: # TODO: don't hard-code nanosecond here + new_value = int(self.value) + int(nanos) raise OutOfBoundsDatetime( f"Out of bounds nanosecond timestamp: {new_value}" ) from err @@ -1713,7 +1676,7 @@ class Timestamp(_Timestamp): if not is_offset_object(freq): freq = to_offset(freq) - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold) + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, ts.reso) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): cdef: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 02313e429f3b6..4963ba6114a0e 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3113,14 +3113,34 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls): with pytest.raises(TypeError, match=msg): constructor(scalar, dtype=dtype) + @pytest.mark.xfail( + reason="Timestamp constructor has been updated to cast dt64 to non-nano, " + "but DatetimeArray._from_sequence has not" + ) @pytest.mark.parametrize("cls", [datetime, np.datetime64]) - def test_from_out_of_bounds_datetime(self, constructor, cls): + def test_from_out_of_ns_bounds_datetime(self, constructor, cls, request): + # scalar that won't fit in nanosecond dt64, but will fit in microsecond scalar = datetime(9999, 1, 1) + exp_dtype = "M8[us]" # smallest reso that fits if cls is np.datetime64: scalar = np.datetime64(scalar, "D") + exp_dtype = "M8[s]" # closest reso to input result = constructor(scalar) - assert type(get1(result)) is cls + item = get1(result) + dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0] + + assert type(item) is Timestamp + assert item.asm8.dtype == exp_dtype + assert dtype == exp_dtype + + def test_out_of_s_bounds_datetime64(self, constructor): + scalar = np.datetime64(np.iinfo(np.int64).max, "D") + result = constructor(scalar) + item = get1(result) + assert type(item) is np.datetime64 + dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0] + assert dtype == object @pytest.mark.xfail( reason="TimedeltaArray constructor has been updated to cast td64 to non-nano, " diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index f3b84388b0f70..f5cfc6fecb5d0 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -99,11 +99,11 @@ def test_td_add_datetimelike_scalar(self, op): assert result is NaT def test_td_add_timestamp_overflow(self): - msg = "Cannot cast 259987 from D to 'ns' without overflow" + msg = "Cannot cast 259987 from D to 'ns' without overflow." with pytest.raises(OutOfBoundsTimedelta, match=msg): Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D") - msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow" + msg = "Cannot cast 259987 days 00:00:00 to unit='ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): Timestamp("1700-01-01") + timedelta(days=13 * 19999) diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 65610bbe14e41..4283575a67f4c 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -45,7 +45,7 @@ def test_overflow_offset_raises(self): r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} " "will overflow" ) - lmsg2 = r"Cannot cast <-?20169940 \* Days> to unit=ns without overflow" + lmsg2 = r"Cannot cast -?20169940 days \+?00:00:00 to unit='ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=lmsg2): stamp + offset_overflow @@ -62,7 +62,9 @@ def test_overflow_offset_raises(self): stamp = Timestamp("2000/1/1") offset_overflow = to_offset("D") * 100**5 - lmsg3 = r"Cannot cast <-?10000000000 \* Days> to unit=ns without overflow" + lmsg3 = ( + r"Cannot cast -?10000000000 days \+?00:00:00 to unit='ns' without overflow" + ) with pytest.raises(OutOfBoundsTimedelta, match=lmsg3): stamp + offset_overflow diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 58150fdce8503..9b7d8d82a9b98 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -11,6 +11,7 @@ import pytest import pytz +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.compat import PY310 from pandas.errors import OutOfBoundsDatetime @@ -455,14 +456,26 @@ def test_out_of_bounds_value(self): Timestamp(min_ts_us) Timestamp(max_ts_us) + # We used to raise on these before supporting non-nano + us_val = NpyDatetimeUnit.NPY_FR_us.value + assert Timestamp(min_ts_us - one_us)._reso == us_val + assert Timestamp(max_ts_us + one_us)._reso == us_val + + # https://github.com/numpy/numpy/issues/22346 for why + # we can't use the same construction as above with minute resolution + + # too_low, too_high are the _just_ outside the range of M8[s] + too_low = np.datetime64("-292277022657-01-27T08:29", "m") + too_high = np.datetime64("292277026596-12-04T15:31", "m") + msg = "Out of bounds" # One us less than the minimum is an error with pytest.raises(ValueError, match=msg): - Timestamp(min_ts_us - one_us) + Timestamp(too_low) # One us more than the maximum is an error with pytest.raises(ValueError, match=msg): - Timestamp(max_ts_us + one_us) + Timestamp(too_high) def test_out_of_bounds_string(self): msg = "Out of bounds" @@ -487,7 +500,20 @@ def test_bounds_with_different_units(self): for date_string in out_of_bounds_dates: for unit in time_units: dt64 = np.datetime64(date_string, unit) - msg = "Out of bounds" + ts = Timestamp(dt64) + if unit in ["s", "ms", "us"]: + # We can preserve the input unit + assert ts.value == dt64.view("i8") + else: + # we chose the closest unit that we _do_ support + assert ts._reso == NpyDatetimeUnit.NPY_FR_s.value + + # With more extreme cases, we can't even fit inside second resolution + info = np.iinfo(np.int64) + msg = "Out of bounds nanosecond timestamp:" + for value in [info.min + 1, info.max]: + for unit in ["D", "h", "m"]: + dt64 = np.datetime64(value, unit) with pytest.raises(OutOfBoundsDatetime, match=msg): Timestamp(dt64) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index b6bc3a866fc8e..c195b96a1500d 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -826,7 +826,7 @@ def test_cmp_cross_reso(self): # subtracting 3600*24 gives a datetime64 that _can_ fit inside the # nanosecond implementation bounds. - other = Timestamp(dt64 - 3600 * 24) + other = Timestamp(dt64 - 3600 * 24)._as_unit("ns") assert other < ts assert other.asm8 > ts.asm8 # <- numpy gets this wrong assert ts > other @@ -884,12 +884,7 @@ def test_to_period(self, dt64, ts): ) def test_addsub_timedeltalike_non_nano(self, dt64, ts, td): - if isinstance(td, Timedelta): - # td._reso is ns - exp_reso = td._reso - else: - # effective td._reso is s - exp_reso = ts._reso + exp_reso = max(ts._reso, Timedelta(td)._reso) result = ts - td expected = Timestamp(dt64) - td diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 3d59e115d4cf9..4dd1b32ba65e4 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -21,6 +21,7 @@ iNaT, parsing, ) +from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas.errors import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, @@ -692,9 +693,18 @@ def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): msg = "Out of bounds .* present at position 0" with pytest.raises(OutOfBoundsDatetime, match=msg): to_datetime(dt, errors="raise") - msg = f"Out of bounds nanosecond timestamp: {dt}" + + # TODO(2.0): The Timestamp and to_datetime behaviors should match; + # as of 2022-09-28, the Timestamp constructor has been updated + # to cast to M8[s] but to_datetime has not + ts = Timestamp(dt) + assert ts._reso == NpyDatetimeUnit.NPY_FR_s.value + assert ts.asm8 == dt + + msg = "Out of bounds nanosecond timestamp" with pytest.raises(OutOfBoundsDatetime, match=msg): - Timestamp(dt) + Timestamp(np.datetime64(np.iinfo(np.int64).max, "D")) + assert to_datetime(dt, errors="coerce", cache=cache) is NaT @pytest.mark.parametrize("unit", ["s", "D"]) @@ -1878,7 +1888,7 @@ def test_to_datetime_list_of_integers(self): def test_to_datetime_overflow(self): # gh-17637 # we are overflowing Timedelta range here - msg = "Cannot cast 139999 days, 0:00:00 to unit=ns without overflow" + msg = "Cannot cast 139999 days 00:00:00 to unit='ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): date_range(start="1/1/1700", freq="B", periods=100000)