Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: DatetimeIndex.resolution with nanosecond reso #46903

Merged
merged 12 commits into from
May 19, 2022
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,8 @@ Other Deprecations
- Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
- Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`)
- Deprecated positional arguments to :meth:`Index.join` except for ``other``, use keyword-only arguments instead of positional arguments (:issue:`46518`)
- Deprecated indexing on a timezone-naive :class:`DatetimeIndex` using a string representing a timezone-aware datetime (:issue:`46903`, :issue:`36148`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_150.performance:
Expand Down Expand Up @@ -594,6 +596,7 @@ Datetimelike
- Bug in :meth:`Index.astype` when casting from object dtype to ``timedelta64[ns]`` dtype incorrectly casting ``np.datetime64("NaT")`` values to ``np.timedelta64("NaT")`` instead of raising (:issue:`45722`)
- Bug in :meth:`SeriesGroupBy.value_counts` index when passing categorical column (:issue:`44324`)
- Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`)
- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`)
-

Timedelta
Expand Down
23 changes: 11 additions & 12 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ from pandas._libs.tslibs.nattype cimport (
)
from pandas._libs.tslibs.timestamps cimport _Timestamp

from pandas._libs.tslibs import (
Resolution,
get_resolution,
)
from pandas._libs.tslibs.timestamps import Timestamp

# Note: this is the only non-tslibs intra-pandas dependency here
Expand Down Expand Up @@ -122,11 +126,11 @@ def format_array_from_datetime(
"""
cdef:
int64_t val, ns, N = len(values)
ndarray[int64_t] consider_values
bint show_ms = False, show_us = False, show_ns = False
bint basic_format = False
ndarray[object] result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
object ts, res
_Timestamp ts
str res
npy_datetimestruct dts

if na_rep is None:
Expand All @@ -136,16 +140,10 @@ def format_array_from_datetime(
# a format based on precision
basic_format = format is None and tz is None
if basic_format:
consider_values = values[values != NPY_NAT]
show_ns = (consider_values % 1000).any()

if not show_ns:
consider_values //= 1000
show_us = (consider_values % 1000).any()

if not show_ms:
consider_values //= 1000
show_ms = (consider_values % 1000).any()
reso_obj = get_resolution(values)
show_ns = reso_obj == Resolution.RESO_NS
show_us = reso_obj == Resolution.RESO_US
show_ms = reso_obj == Resolution.RESO_MS

for i in range(N):
val = values[i]
Expand Down Expand Up @@ -178,6 +176,7 @@ def format_array_from_datetime(
# invalid format string
# requires dates > 1900
try:
# Note: dispatches to pydatetime
result[i] = ts.strftime(format)
except ValueError:
result[i] = str(ts)
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/vectorized.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ def ints_to_pydatetime(


cdef inline c_Resolution _reso_stamp(npy_datetimestruct *dts):
if dts.us != 0:
if dts.ps != 0:
return c_Resolution.RESO_NS
elif dts.us != 0:
if dts.us % 1000 == 0:
return c_Resolution.RESO_MS
return c_Resolution.RESO_US
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
end = self._maybe_cast_for_get_loc(end)
return start, end

def _deprecate_mismatched_indexing(self, key) -> None:
def _deprecate_mismatched_indexing(self, key, one_way: bool = False) -> None:
# GH#36148
# we get here with isinstance(key, self._data._recognized_scalars)
try:
Expand All @@ -606,6 +606,10 @@ def _deprecate_mismatched_indexing(self, key) -> None:
"raise KeyError in a future version. "
"Use a timezone-naive object instead."
)
elif one_way:
# we special-case timezone-naive strings and timezone-aware
# DatetimeIndex
return
else:
msg = (
"Indexing a timezone-aware DatetimeIndex with a "
Expand Down Expand Up @@ -640,6 +644,7 @@ def get_loc(self, key, method=None, tolerance=None):
parsed, reso = self._parse_with_reso(key)
except ValueError as err:
raise KeyError(key) from err
self._deprecate_mismatched_indexing(parsed, one_way=True)

if self._can_partial_date_slice(reso):
try:
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/indexing/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@


class TestDatetimeIndex:
def test_get_loc_naive_dti_aware_str_deprecated(self):
# GH#46903
ts = Timestamp("20130101").value
dti = pd.DatetimeIndex([ts + 50 + i for i in range(100)])
ser = Series(range(100), index=dti)

key = "2013-01-01 00:00:00.000000050+0000"
msg = "Indexing a timezone-naive DatetimeIndex with a timezone-aware datetime"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = ser[key]
assert res == 0

with tm.assert_produces_warning(FutureWarning, match=msg):
loc = dti.get_loc(key)
assert loc == 0

def test_indexing_with_datetime_tz(self):

# GH#8260
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/series/methods/test_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ def test_asof_nanosecond_index_access(self):

first_value = ser.asof(ser.index[0])

# GH#46903 previously incorrectly was "day"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you put this in a dedicated method (the deprecation check)

assert dti.resolution == "nanosecond"

# this used to not work bc parsing was done by dateutil that didn't
# handle nanoseconds
assert first_value == ser["2013-01-01 00:00:00.000000050+0000"]
assert first_value == ser["2013-01-01 00:00:00.000000050"]

expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns")
assert first_value == ser[Timestamp(expected_ts)]
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/tslibs/test_resolution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import numpy as np

from pandas._libs.tslibs import (
Resolution,
get_resolution,
)


def test_get_resolution_nano():
# don't return the fallback RESO_DAY
arr = np.array([1], dtype=np.int64)
res = get_resolution(arr)
assert res == Resolution.RESO_NS