From 9beb129d0bdaf17ac9932aeb097e583fbebc585d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 20 Apr 2022 09:34:14 -0700 Subject: [PATCH 1/6] BUG: parsing nanoseconds incorrect resolution --- pandas/_libs/tslibs/test_resolution.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 pandas/_libs/tslibs/test_resolution.py diff --git a/pandas/_libs/tslibs/test_resolution.py b/pandas/_libs/tslibs/test_resolution.py new file mode 100644 index 0000000000000..9aa6de3695d7f --- /dev/null +++ b/pandas/_libs/tslibs/test_resolution.py @@ -0,0 +1,9 @@ +from pandas._libs.tslibs import Resolution, get_resolution +import numpy as np + + +def test_get_resolution_nano(): + # don't return the fallback RESO_DAY + arr = np.array([1]) + res = get_resolution(arr) + assert res == Resolution.RESO_NS From a55cee0e57f1f0cfd0ec8b24defab705232fb392 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 29 Apr 2022 19:14:00 -0700 Subject: [PATCH 2/6] BUG: DatetimeIndex.resolution --- doc/source/whatsnew/v1.5.0.rst | 3 +++ pandas/_libs/tslib.pyx | 23 +++++++++++------------ pandas/_libs/tslibs/vectorized.pyx | 4 +++- pandas/core/indexes/datetimes.py | 9 +++++++-- pandas/tests/series/methods/test_asof.py | 13 ++++++++++++- 5 files changed, 36 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 931d18dc349f3..b787da0f468bf 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -448,6 +448,8 @@ Other Deprecations - Deprecated passing arguments as positional in :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44802`) - Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`) - Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`) +- Deprecated indexing on a timezone-naive :class:`DatetimeIndex` using a string representing a timezone-aware datetime (:issue:`??`, :issue:`36148`) +- .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: @@ -489,6 +491,7 @@ Datetimelike - Bug in :meth:`Index.astype` when casting from object dtype to ``timedelta64[ns]`` dtype incorrectly casting ``np.datetime64("NaT")`` values to ``np.timedelta64("NaT")`` instead of raising (:issue:`45722`) - Bug in :meth:`SeriesGroupBy.value_counts` index when passing categorical column (:issue:`44324`) - Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`) +- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`??`) - Timedelta diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 700f66840f128..9492888e7db77 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -60,6 +60,10 @@ from pandas._libs.tslibs.nattype cimport ( ) from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs import ( + Resolution, + get_resolution, +) from pandas._libs.tslibs.timestamps import Timestamp # Note: this is the only non-tslibs intra-pandas dependency here @@ -122,11 +126,11 @@ def format_array_from_datetime( """ cdef: int64_t val, ns, N = len(values) - ndarray[int64_t] consider_values bint show_ms = False, show_us = False, show_ns = False bint basic_format = False ndarray[object] result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) - object ts, res + _Timestamp ts + str res npy_datetimestruct dts if na_rep is None: @@ -136,16 +140,10 @@ def format_array_from_datetime( # a format based on precision basic_format = format is None and tz is None if basic_format: - consider_values = values[values != NPY_NAT] - show_ns = (consider_values % 1000).any() - - if not show_ns: - consider_values //= 1000 - show_us = (consider_values % 1000).any() - - if not show_ms: - consider_values //= 1000 - show_ms = (consider_values % 1000).any() + reso_obj = get_resolution(values) + show_ns = reso_obj == Resolution.RESO_NS + show_us = reso_obj == Resolution.RESO_US + show_ms = reso_obj == Resolution.RESO_MS for i in range(N): val = values[i] @@ -178,6 +176,7 @@ def format_array_from_datetime( # invalid format string # requires dates > 1900 try: + # Note: dispatches to pydatetime result[i] = ts.strftime(format) except ValueError: result[i] = str(ts) diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 6b78100705a93..907c1071b04fd 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -253,7 +253,9 @@ def ints_to_pydatetime( cdef inline c_Resolution _reso_stamp(npy_datetimestruct *dts): - if dts.us != 0: + if dts.ps != 0: + return c_Resolution.RESO_NS + elif dts.us != 0: if dts.us % 1000 == 0: return c_Resolution.RESO_MS return c_Resolution.RESO_US diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3954cb28c2aca..0754958d6f202 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -597,7 +597,7 @@ def _can_partial_date_slice(self, reso: Resolution) -> bool: # History of conversation GH#3452, GH#3931, GH#2369, GH#14826 return reso > self._resolution_obj - def _deprecate_mismatched_indexing(self, key) -> None: + def _deprecate_mismatched_indexing(self, key, one_way: bool = False) -> None: # GH#36148 # we get here with isinstance(key, self._data._recognized_scalars) try: @@ -610,6 +610,10 @@ def _deprecate_mismatched_indexing(self, key) -> None: "raise KeyError in a future version. " "Use a timezone-naive object instead." ) + elif one_way: + # we special-case timezone-naive strings and timezone-aware + # DatetimeIndex + return else: msg = ( "Indexing a timezone-aware DatetimeIndex with a " @@ -644,6 +648,7 @@ def get_loc(self, key, method=None, tolerance=None): parsed, reso = self._parse_with_reso(key) except ValueError as err: raise KeyError(key) from err + self._deprecate_mismatched_indexing(parsed, one_way=True) if self._can_partial_date_slice(reso): try: @@ -652,7 +657,7 @@ def get_loc(self, key, method=None, tolerance=None): if method is None: raise KeyError(key) from err try: - key = self._maybe_cast_for_get_loc(key) + key = self._maybe_cast_for_get_loc(parsed) except ValueError as err: # FIXME(dateutil#1180): we get here because parse_with_reso # doesn't raise on "t2m" diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 5557322eae42d..49603e7b8db11 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -23,9 +23,20 @@ def test_asof_nanosecond_index_access(self): first_value = ser.asof(ser.index[0]) + assert dti.resolution == "nanosecond" # previously was incorrect "day" + + key = "2013-01-01 00:00:00.000000050+0000" + msg = "Indexing a timezone-naive DatetimeIndex with a timezone-aware datetime" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = ser[key] + assert res == first_value + + with tm.assert_produces_warning(FutureWarning, match=msg): + res = dti.get_loc(key) + # this used to not work bc parsing was done by dateutil that didn't # handle nanoseconds - assert first_value == ser["2013-01-01 00:00:00.000000050+0000"] + assert first_value == ser["2013-01-01 00:00:00.000000050"] expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns") assert first_value == ser[Timestamp(expected_ts)] From 3d00ab7e6a1cadd044cf8e5629dbaef2fab1d92d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 29 Apr 2022 19:15:34 -0700 Subject: [PATCH 3/6] GH refs --- doc/source/whatsnew/v1.5.0.rst | 4 ++-- pandas/tests/series/methods/test_asof.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index b787da0f468bf..bcd212d8d9a76 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -448,7 +448,7 @@ Other Deprecations - Deprecated passing arguments as positional in :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44802`) - Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`) - Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`) -- Deprecated indexing on a timezone-naive :class:`DatetimeIndex` using a string representing a timezone-aware datetime (:issue:`??`, :issue:`36148`) +- Deprecated indexing on a timezone-naive :class:`DatetimeIndex` using a string representing a timezone-aware datetime (:issue:`46903`, :issue:`36148`) - .. --------------------------------------------------------------------------- @@ -491,7 +491,7 @@ Datetimelike - Bug in :meth:`Index.astype` when casting from object dtype to ``timedelta64[ns]`` dtype incorrectly casting ``np.datetime64("NaT")`` values to ``np.timedelta64("NaT")`` instead of raising (:issue:`45722`) - Bug in :meth:`SeriesGroupBy.value_counts` index when passing categorical column (:issue:`44324`) - Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`) -- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`??`) +- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`) - Timedelta diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 49603e7b8db11..280198a9af713 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -23,7 +23,8 @@ def test_asof_nanosecond_index_access(self): first_value = ser.asof(ser.index[0]) - assert dti.resolution == "nanosecond" # previously was incorrect "day" + # GH#46903 previously incorrectly was "day" + assert dti.resolution == "nanosecond" key = "2013-01-01 00:00:00.000000050+0000" msg = "Indexing a timezone-naive DatetimeIndex with a timezone-aware datetime" From a2956f1a05344e9e1d9345c1e130364b0181e793 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 2 May 2022 16:49:27 -0700 Subject: [PATCH 4/6] woops, file in the wrong place --- pandas/{_libs => tests}/tslibs/test_resolution.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) rename pandas/{_libs => tests}/tslibs/test_resolution.py (71%) diff --git a/pandas/_libs/tslibs/test_resolution.py b/pandas/tests/tslibs/test_resolution.py similarity index 71% rename from pandas/_libs/tslibs/test_resolution.py rename to pandas/tests/tslibs/test_resolution.py index 9aa6de3695d7f..a61b17d57c2cd 100644 --- a/pandas/_libs/tslibs/test_resolution.py +++ b/pandas/tests/tslibs/test_resolution.py @@ -1,6 +1,10 @@ -from pandas._libs.tslibs import Resolution, get_resolution import numpy as np +from pandas._libs.tslibs import ( + Resolution, + get_resolution, +) + def test_get_resolution_nano(): # don't return the fallback RESO_DAY From fdf4c3cb5eb87b8d4a2d0ead679ac066be649bee Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 May 2022 12:13:30 -0700 Subject: [PATCH 5/6] fix windows builds --- pandas/tests/tslibs/test_resolution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tslibs/test_resolution.py b/pandas/tests/tslibs/test_resolution.py index a61b17d57c2cd..15f4a9d032e5c 100644 --- a/pandas/tests/tslibs/test_resolution.py +++ b/pandas/tests/tslibs/test_resolution.py @@ -8,6 +8,6 @@ def test_get_resolution_nano(): # don't return the fallback RESO_DAY - arr = np.array([1]) + arr = np.array([1], dtype=np.int64) res = get_resolution(arr) assert res == Resolution.RESO_NS From 58e73850cdd9d04ad8ae40d8a747782ae08cf507 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 5 May 2022 10:05:35 -0700 Subject: [PATCH 6/6] separate out test for deprecation --- pandas/tests/indexing/test_datetime.py | 16 ++++++++++++++++ pandas/tests/series/methods/test_asof.py | 9 --------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 332ab02255911..8d498b59c55d1 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -10,6 +10,22 @@ class TestDatetimeIndex: + def test_get_loc_naive_dti_aware_str_deprecated(self): + # GH#46903 + ts = Timestamp("20130101").value + dti = pd.DatetimeIndex([ts + 50 + i for i in range(100)]) + ser = Series(range(100), index=dti) + + key = "2013-01-01 00:00:00.000000050+0000" + msg = "Indexing a timezone-naive DatetimeIndex with a timezone-aware datetime" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = ser[key] + assert res == 0 + + with tm.assert_produces_warning(FutureWarning, match=msg): + loc = dti.get_loc(key) + assert loc == 0 + def test_indexing_with_datetime_tz(self): # GH#8260 diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 280198a9af713..4381aa3f34f8d 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -26,15 +26,6 @@ def test_asof_nanosecond_index_access(self): # GH#46903 previously incorrectly was "day" assert dti.resolution == "nanosecond" - key = "2013-01-01 00:00:00.000000050+0000" - msg = "Indexing a timezone-naive DatetimeIndex with a timezone-aware datetime" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = ser[key] - assert res == first_value - - with tm.assert_produces_warning(FutureWarning, match=msg): - res = dti.get_loc(key) - # this used to not work bc parsing was done by dateutil that didn't # handle nanoseconds assert first_value == ser["2013-01-01 00:00:00.000000050"]