From 953921f2782062a67aa3886837fac563892d7ec6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 29 Sep 2022 06:40:39 -0700 Subject: [PATCH] BUG: to_datetime(format='...%f') parses nanoseconds (#48820) --- doc/source/whatsnew/v1.5.1.rst | 1 + pandas/_libs/tslibs/strptime.pyx | 5 ++++- pandas/tests/tools/test_to_datetime.py | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst index ee66f2f649bc2..a8ec836ba93df 100644 --- a/doc/source/whatsnew/v1.5.1.rst +++ b/doc/source/whatsnew/v1.5.1.rst @@ -76,6 +76,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.plot` ignoring invalid ``colormap`` for ``kind="scatter"`` (:issue:`48726`) - Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`) - Fixed regression causing an ``AttributeError`` during warning emitted if the provided table name in :meth:`DataFrame.to_sql` and the table name actually used in the database do not match (:issue:`48733`) +- Fixed regression in :func:`to_datetime` when ``arg`` was a date string with nanosecond and ``format`` contained ``%f`` would raise a ``ValueError`` (:issue:`48767`) - Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`) - Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index ab877b08b1ec7..6287c2fbc5d34 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -347,7 +347,7 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai """ TimeRE, _calc_julian_from_U_or_W are vendored from the standard library, see -https://github.com/python/cpython/blob/master/Lib/_strptime.py +https://github.com/python/cpython/blob/main/Lib/_strptime.py The original module-level docstring follows. Strptime-related classes and functions. @@ -383,6 +383,9 @@ class TimeRE(_TimeRE): """ self._Z = None super().__init__(locale_time=locale_time) + # GH 48767: Overrides for cpython's TimeRE + # 1) Parse up to nanos instead of micros + self.update({"f": r"(?P[0-9]{1,9})"}), def __getitem__(self, key): if key == "Z": diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 7e698b7a6b83d..9f7b1433b61bb 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2815,3 +2815,21 @@ def test_to_datetime_cache_coerce_50_lines_outofbounds(series_length): with pytest.raises(OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"): to_datetime(s, errors="raise", utc=True) + + +def test_to_datetime_format_f_parse_nanos(): + # GH 48767 + timestamp = "15/02/2020 02:03:04.123456789" + timestamp_format = "%d/%m/%Y %H:%M:%S.%f" + result = to_datetime(timestamp, format=timestamp_format) + expected = Timestamp( + year=2020, + month=2, + day=15, + hour=2, + minute=3, + second=4, + microsecond=123456, + nanosecond=789, + ) + assert result == expected