Skip to content

Commit

Permalink
BUG: DatetimeIndex.resolution with nanosecond reso (#46903)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored May 19, 2022
1 parent 281d650 commit 4f92db3
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 15 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,8 @@ Other Deprecations
- Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
- Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`)
- Deprecated positional arguments to :meth:`Index.join` except for ``other``, use keyword-only arguments instead of positional arguments (:issue:`46518`)
- Deprecated indexing on a timezone-naive :class:`DatetimeIndex` using a string representing a timezone-aware datetime (:issue:`46903`, :issue:`36148`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_150.performance:
Expand Down Expand Up @@ -641,6 +643,7 @@ Datetimelike
- Bug in :meth:`Index.astype` when casting from object dtype to ``timedelta64[ns]`` dtype incorrectly casting ``np.datetime64("NaT")`` values to ``np.timedelta64("NaT")`` instead of raising (:issue:`45722`)
- Bug in :meth:`SeriesGroupBy.value_counts` index when passing categorical column (:issue:`44324`)
- Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`)
- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`)
-

Timedelta
Expand Down
23 changes: 11 additions & 12 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ from pandas._libs.tslibs.nattype cimport (
)
from pandas._libs.tslibs.timestamps cimport _Timestamp

from pandas._libs.tslibs import (
Resolution,
get_resolution,
)
from pandas._libs.tslibs.timestamps import Timestamp

# Note: this is the only non-tslibs intra-pandas dependency here
Expand Down Expand Up @@ -122,11 +126,11 @@ def format_array_from_datetime(
"""
cdef:
int64_t val, ns, N = len(values)
ndarray[int64_t] consider_values
bint show_ms = False, show_us = False, show_ns = False
bint basic_format = False
ndarray[object] result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
object ts, res
_Timestamp ts
str res
npy_datetimestruct dts

if na_rep is None:
Expand All @@ -136,16 +140,10 @@ def format_array_from_datetime(
# a format based on precision
basic_format = format is None and tz is None
if basic_format:
consider_values = values[values != NPY_NAT]
show_ns = (consider_values % 1000).any()

if not show_ns:
consider_values //= 1000
show_us = (consider_values % 1000).any()

if not show_ms:
consider_values //= 1000
show_ms = (consider_values % 1000).any()
reso_obj = get_resolution(values)
show_ns = reso_obj == Resolution.RESO_NS
show_us = reso_obj == Resolution.RESO_US
show_ms = reso_obj == Resolution.RESO_MS

for i in range(N):
val = values[i]
Expand Down Expand Up @@ -178,6 +176,7 @@ def format_array_from_datetime(
# invalid format string
# requires dates > 1900
try:
# Note: dispatches to pydatetime
result[i] = ts.strftime(format)
except ValueError:
result[i] = str(ts)
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/vectorized.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ def ints_to_pydatetime(


cdef inline c_Resolution _reso_stamp(npy_datetimestruct *dts):
if dts.us != 0:
if dts.ps != 0:
return c_Resolution.RESO_NS
elif dts.us != 0:
if dts.us % 1000 == 0:
return c_Resolution.RESO_MS
return c_Resolution.RESO_US
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
end = self._maybe_cast_for_get_loc(end)
return start, end

def _deprecate_mismatched_indexing(self, key) -> None:
def _deprecate_mismatched_indexing(self, key, one_way: bool = False) -> None:
# GH#36148
# we get here with isinstance(key, self._data._recognized_scalars)
try:
Expand All @@ -606,6 +606,10 @@ def _deprecate_mismatched_indexing(self, key) -> None:
"raise KeyError in a future version. "
"Use a timezone-naive object instead."
)
elif one_way:
# we special-case timezone-naive strings and timezone-aware
# DatetimeIndex
return
else:
msg = (
"Indexing a timezone-aware DatetimeIndex with a "
Expand Down Expand Up @@ -640,6 +644,7 @@ def get_loc(self, key, method=None, tolerance=None):
parsed, reso = self._parse_with_reso(key)
except ValueError as err:
raise KeyError(key) from err
self._deprecate_mismatched_indexing(parsed, one_way=True)

if self._can_partial_date_slice(reso):
try:
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/indexing/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@


class TestDatetimeIndex:
def test_get_loc_naive_dti_aware_str_deprecated(self):
# GH#46903
ts = Timestamp("20130101").value
dti = pd.DatetimeIndex([ts + 50 + i for i in range(100)])
ser = Series(range(100), index=dti)

key = "2013-01-01 00:00:00.000000050+0000"
msg = "Indexing a timezone-naive DatetimeIndex with a timezone-aware datetime"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = ser[key]
assert res == 0

with tm.assert_produces_warning(FutureWarning, match=msg):
loc = dti.get_loc(key)
assert loc == 0

def test_indexing_with_datetime_tz(self):

# GH#8260
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/series/methods/test_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ def test_asof_nanosecond_index_access(self):

first_value = ser.asof(ser.index[0])

# GH#46903 previously incorrectly was "day"
assert dti.resolution == "nanosecond"

# this used to not work bc parsing was done by dateutil that didn't
# handle nanoseconds
assert first_value == ser["2013-01-01 00:00:00.000000050+0000"]
assert first_value == ser["2013-01-01 00:00:00.000000050"]

expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns")
assert first_value == ser[Timestamp(expected_ts)]
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/tslibs/test_resolution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import numpy as np

from pandas._libs.tslibs import (
Resolution,
get_resolution,
)


def test_get_resolution_nano():
# don't return the fallback RESO_DAY
arr = np.array([1], dtype=np.int64)
res = get_resolution(arr)
assert res == Resolution.RESO_NS

0 comments on commit 4f92db3

Please sign in to comment.