diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 23f1aabd69ff3..4e2c428415926 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -633,6 +633,16 @@ We are stopping on the included end-point as it is part of the index: dft2 = dft2.swaplevel(0, 1).sort_index() dft2.loc[idx[:, '2013-01-05'], :] +.. versionadded:: 0.25.0 + +Slicing with string indexing also honors UTC offset. + +.. ipython:: python + + df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) + df + df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00'] + .. _timeseries.slice_vs_exact_match: Slice vs. Exact Match diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 6e225185ecf84..5716bea7ce694 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -29,7 +29,37 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- :meth:`Timestamp.strptime` will now raise a NotImplementedError (:issue:`25016`) +.. _whatsnew_0250.api_breaking.utc_offset_indexing: + +Indexing with date strings with UTC offsets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Indexing a :class:`DataFrame` or :class:`Series` with a :class:`DatetimeIndex` with a +date string with a UTC offset would previously ignore the UTC offset. Now, the UTC offset +is respected in indexing. (:issue:`24076`, :issue:`16785`) + +*Previous Behavior*: + +.. code-block:: ipython + + In [1]: df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) + + In [2]: df + Out[2]: + 0 + 2019-01-01 00:00:00-08:00 0 + + In [3]: df['2019-01-01 00:00:00+04:00':'2019-01-01 01:00:00+04:00'] + Out[3]: + 0 + 2019-01-01 00:00:00-08:00 0 + +*New Behavior*: + +.. ipython:: ipython + + df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) + df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00'] .. _whatsnew_0250.api.other: @@ -38,7 +68,7 @@ Other API Changes - :class:`DatetimeTZDtype` will now standardize pytz timezones to a common timezone instance (:issue:`24713`) - ``Timestamp`` and ``Timedelta`` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`) -- +- :meth:`Timestamp.strptime` will now rise a ``NotImplementedError`` (:issue:`25016`) - .. _whatsnew_0250.deprecations: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b5f3c929a7f36..1cdacc908b663 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6,9 +6,10 @@ import numpy as np from pandas._libs import ( - Timedelta, algos as libalgos, index as libindex, join as libjoin, lib, - tslibs) + algos as libalgos, index as libindex, join as libjoin, lib) from pandas._libs.lib import is_datetime_array +from pandas._libs.tslibs import OutOfBoundsDatetime, Timedelta, Timestamp +from pandas._libs.tslibs.timezones import tz_compare import pandas.compat as compat from pandas.compat import range, set_function_name, u from pandas.compat.numpy import function as nv @@ -447,7 +448,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, try: return DatetimeIndex(subarr, copy=copy, name=name, **kwargs) - except tslibs.OutOfBoundsDatetime: + except OutOfBoundsDatetime: pass elif inferred.startswith('timedelta'): @@ -4867,6 +4868,20 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): # If it's a reverse slice, temporarily swap bounds. start, end = end, start + # GH 16785: If start and end happen to be date strings with UTC offsets + # attempt to parse and check that the offsets are the same + if (isinstance(start, (compat.string_types, datetime)) + and isinstance(end, (compat.string_types, datetime))): + try: + ts_start = Timestamp(start) + ts_end = Timestamp(end) + except (ValueError, TypeError): + pass + else: + if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): + raise ValueError("Both dates must have the " + "same UTC offset") + start_slice = None if start is not None: start_slice = self.get_slice_bound(start, 'left', kind) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1037e2d9a3bd6..527991f15364b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -32,9 +32,8 @@ from pandas.core.ops import get_op_result_name import pandas.core.tools.datetimes as tools -from pandas.tseries import offsets from pandas.tseries.frequencies import Resolution, to_offset -from pandas.tseries.offsets import CDay, prefix_mapping +from pandas.tseries.offsets import CDay, Nano, prefix_mapping def _new_DatetimeIndex(cls, d): @@ -826,54 +825,57 @@ def _parsed_string_to_bounds(self, reso, parsed): lower, upper: pd.Timestamp """ + valid_resos = {'year', 'month', 'quarter', 'day', 'hour', 'minute', + 'second', 'minute', 'second', 'microsecond'} + if reso not in valid_resos: + raise KeyError if reso == 'year': - return (Timestamp(datetime(parsed.year, 1, 1), tz=self.tz), - Timestamp(datetime(parsed.year, 12, 31, 23, - 59, 59, 999999), tz=self.tz)) + start = Timestamp(parsed.year, 1, 1) + end = Timestamp(parsed.year, 12, 31, 23, 59, 59, 999999) elif reso == 'month': d = ccalendar.get_days_in_month(parsed.year, parsed.month) - return (Timestamp(datetime(parsed.year, parsed.month, 1), - tz=self.tz), - Timestamp(datetime(parsed.year, parsed.month, d, 23, - 59, 59, 999999), tz=self.tz)) + start = Timestamp(parsed.year, parsed.month, 1) + end = Timestamp(parsed.year, parsed.month, d, 23, 59, 59, 999999) elif reso == 'quarter': qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead d = ccalendar.get_days_in_month(parsed.year, qe) # at end of month - return (Timestamp(datetime(parsed.year, parsed.month, 1), - tz=self.tz), - Timestamp(datetime(parsed.year, qe, d, 23, 59, - 59, 999999), tz=self.tz)) + start = Timestamp(parsed.year, parsed.month, 1) + end = Timestamp(parsed.year, qe, d, 23, 59, 59, 999999) elif reso == 'day': - st = datetime(parsed.year, parsed.month, parsed.day) - return (Timestamp(st, tz=self.tz), - Timestamp(Timestamp(st + offsets.Day(), - tz=self.tz).value - 1)) + start = Timestamp(parsed.year, parsed.month, parsed.day) + end = start + timedelta(days=1) - Nano(1) elif reso == 'hour': - st = datetime(parsed.year, parsed.month, parsed.day, - hour=parsed.hour) - return (Timestamp(st, tz=self.tz), - Timestamp(Timestamp(st + offsets.Hour(), - tz=self.tz).value - 1)) + start = Timestamp(parsed.year, parsed.month, parsed.day, + parsed.hour) + end = start + timedelta(hours=1) - Nano(1) elif reso == 'minute': - st = datetime(parsed.year, parsed.month, parsed.day, - hour=parsed.hour, minute=parsed.minute) - return (Timestamp(st, tz=self.tz), - Timestamp(Timestamp(st + offsets.Minute(), - tz=self.tz).value - 1)) + start = Timestamp(parsed.year, parsed.month, parsed.day, + parsed.hour, parsed.minute) + end = start + timedelta(minutes=1) - Nano(1) elif reso == 'second': - st = datetime(parsed.year, parsed.month, parsed.day, - hour=parsed.hour, minute=parsed.minute, - second=parsed.second) - return (Timestamp(st, tz=self.tz), - Timestamp(Timestamp(st + offsets.Second(), - tz=self.tz).value - 1)) + start = Timestamp(parsed.year, parsed.month, parsed.day, + parsed.hour, parsed.minute, parsed.second) + end = start + timedelta(seconds=1) - Nano(1) elif reso == 'microsecond': - st = datetime(parsed.year, parsed.month, parsed.day, - parsed.hour, parsed.minute, parsed.second, - parsed.microsecond) - return (Timestamp(st, tz=self.tz), Timestamp(st, tz=self.tz)) - else: - raise KeyError + start = Timestamp(parsed.year, parsed.month, parsed.day, + parsed.hour, parsed.minute, parsed.second, + parsed.microsecond) + end = start + timedelta(microseconds=1) - Nano(1) + # GH 24076 + # If an incoming date string contained a UTC offset, need to localize + # the parsed date to this offset first before aligning with the index's + # timezone + if parsed.tzinfo is not None: + if self.tz is None: + raise ValueError("The index must be timezone aware " + "when indexing with a date string with a " + "UTC offset") + start = start.tz_localize(parsed.tzinfo).tz_convert(self.tz) + end = end.tz_localize(parsed.tzinfo).tz_convert(self.tz) + elif self.tz is not None: + start = start.tz_localize(self.tz) + end = end.tz_localize(self.tz) + return start, end def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): is_monotonic = self.is_monotonic diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index e1ba0e1708442..a3ee5fe39769f 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -102,7 +102,7 @@ def test_stringified_slice_with_tz(self): # GH#2658 import datetime start = datetime.datetime.now() - idx = date_range(start=start, freq="1d", periods=10) + idx = date_range(start=start, freq="1d", periods=10, tz='US/Eastern') df = DataFrame(lrange(10), index=idx) df["2013-01-14 23:44:34.437768-05:00":] # no exception here diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index a0c9d9f02385c..64693324521b3 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -396,3 +396,30 @@ def test_selection_by_datetimelike(self, datetimelike, op, expected): result = op(df.A, datetimelike) expected = Series(expected, name='A') tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('start', [ + '2018-12-02 21:50:00+00:00', pd.Timestamp('2018-12-02 21:50:00+00:00'), + pd.Timestamp('2018-12-02 21:50:00+00:00').to_pydatetime() + ]) + @pytest.mark.parametrize('end', [ + '2018-12-02 21:52:00+00:00', pd.Timestamp('2018-12-02 21:52:00+00:00'), + pd.Timestamp('2018-12-02 21:52:00+00:00').to_pydatetime() + ]) + def test_getitem_with_datestring_with_UTC_offset(self, start, end): + # GH 24076 + idx = pd.date_range(start='2018-12-02 14:50:00-07:00', + end='2018-12-02 14:50:00-07:00', freq='1min') + df = pd.DataFrame(1, index=idx, columns=['A']) + result = df[start:end] + expected = df.iloc[0:3, :] + tm.assert_frame_equal(result, expected) + + # GH 16785 + start = str(start) + end = str(end) + with pytest.raises(ValueError, match="Both dates must"): + df[start:end[:-4] + '1:00'] + + with pytest.raises(ValueError, match="The index must be timezone"): + df = df.tz_localize(None) + df[start:end]