diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 128fd68674f96..4e2f547d7d2dc 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -553,6 +553,8 @@ Other Deprecations - Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`) - Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`) - Deprecated positional arguments to :meth:`Index.join` except for ``other``, use keyword-only arguments instead of positional arguments (:issue:`46518`) +- Deprecated indexing on a timezone-naive :class:`DatetimeIndex` using a string representing a timezone-aware datetime (:issue:`46903`, :issue:`36148`) +- .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: @@ -594,6 +596,7 @@ Datetimelike - Bug in :meth:`Index.astype` when casting from object dtype to ``timedelta64[ns]`` dtype incorrectly casting ``np.datetime64("NaT")`` values to ``np.timedelta64("NaT")`` instead of raising (:issue:`45722`) - Bug in :meth:`SeriesGroupBy.value_counts` index when passing categorical column (:issue:`44324`) - Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`) +- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`) - Timedelta diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 700f66840f128..9492888e7db77 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -60,6 +60,10 @@ from pandas._libs.tslibs.nattype cimport ( ) from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs import ( + Resolution, + get_resolution, +) from pandas._libs.tslibs.timestamps import Timestamp # Note: this is the only non-tslibs intra-pandas dependency here @@ -122,11 +126,11 @@ def format_array_from_datetime( """ cdef: int64_t val, ns, N = len(values) - ndarray[int64_t] consider_values bint show_ms = False, show_us = False, show_ns = False bint basic_format = False ndarray[object] result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) - object ts, res + _Timestamp ts + str res npy_datetimestruct dts if na_rep is None: @@ -136,16 +140,10 @@ def format_array_from_datetime( # a format based on precision basic_format = format is None and tz is None if basic_format: - consider_values = values[values != NPY_NAT] - show_ns = (consider_values % 1000).any() - - if not show_ns: - consider_values //= 1000 - show_us = (consider_values % 1000).any() - - if not show_ms: - consider_values //= 1000 - show_ms = (consider_values % 1000).any() + reso_obj = get_resolution(values) + show_ns = reso_obj == Resolution.RESO_NS + show_us = reso_obj == Resolution.RESO_US + show_ms = reso_obj == Resolution.RESO_MS for i in range(N): val = values[i] @@ -178,6 +176,7 @@ def format_array_from_datetime( # invalid format string # requires dates > 1900 try: + # Note: dispatches to pydatetime result[i] = ts.strftime(format) except ValueError: result[i] = str(ts) diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 31d0579900abd..511ce26feeefa 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -204,7 +204,9 @@ def ints_to_pydatetime( cdef inline c_Resolution _reso_stamp(npy_datetimestruct *dts): - if dts.us != 0: + if dts.ps != 0: + return c_Resolution.RESO_NS + elif dts.us != 0: if dts.us % 1000 == 0: return c_Resolution.RESO_MS return c_Resolution.RESO_US diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5274f68eb3171..806d081c0176b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -593,7 +593,7 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): end = self._maybe_cast_for_get_loc(end) return start, end - def _deprecate_mismatched_indexing(self, key) -> None: + def _deprecate_mismatched_indexing(self, key, one_way: bool = False) -> None: # GH#36148 # we get here with isinstance(key, self._data._recognized_scalars) try: @@ -606,6 +606,10 @@ def _deprecate_mismatched_indexing(self, key) -> None: "raise KeyError in a future version. " "Use a timezone-naive object instead." ) + elif one_way: + # we special-case timezone-naive strings and timezone-aware + # DatetimeIndex + return else: msg = ( "Indexing a timezone-aware DatetimeIndex with a " @@ -640,6 +644,7 @@ def get_loc(self, key, method=None, tolerance=None): parsed, reso = self._parse_with_reso(key) except ValueError as err: raise KeyError(key) from err + self._deprecate_mismatched_indexing(parsed, one_way=True) if self._can_partial_date_slice(reso): try: diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 332ab02255911..8d498b59c55d1 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -10,6 +10,22 @@ class TestDatetimeIndex: + def test_get_loc_naive_dti_aware_str_deprecated(self): + # GH#46903 + ts = Timestamp("20130101").value + dti = pd.DatetimeIndex([ts + 50 + i for i in range(100)]) + ser = Series(range(100), index=dti) + + key = "2013-01-01 00:00:00.000000050+0000" + msg = "Indexing a timezone-naive DatetimeIndex with a timezone-aware datetime" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = ser[key] + assert res == 0 + + with tm.assert_produces_warning(FutureWarning, match=msg): + loc = dti.get_loc(key) + assert loc == 0 + def test_indexing_with_datetime_tz(self): # GH#8260 diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 5557322eae42d..4381aa3f34f8d 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -23,9 +23,12 @@ def test_asof_nanosecond_index_access(self): first_value = ser.asof(ser.index[0]) + # GH#46903 previously incorrectly was "day" + assert dti.resolution == "nanosecond" + # this used to not work bc parsing was done by dateutil that didn't # handle nanoseconds - assert first_value == ser["2013-01-01 00:00:00.000000050+0000"] + assert first_value == ser["2013-01-01 00:00:00.000000050"] expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns") assert first_value == ser[Timestamp(expected_ts)] diff --git a/pandas/tests/tslibs/test_resolution.py b/pandas/tests/tslibs/test_resolution.py new file mode 100644 index 0000000000000..15f4a9d032e5c --- /dev/null +++ b/pandas/tests/tslibs/test_resolution.py @@ -0,0 +1,13 @@ +import numpy as np + +from pandas._libs.tslibs import ( + Resolution, + get_resolution, +) + + +def test_get_resolution_nano(): + # don't return the fallback RESO_DAY + arr = np.array([1], dtype=np.int64) + res = get_resolution(arr) + assert res == Resolution.RESO_NS