diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index ff4aa9968f2947..88214d1567b0ec 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1550,6 +1550,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`). - Fixed bug in :meth:`Series.max` with ``datetime64[ns]``-dtype failing to return ``NaT`` when nulls are present and ``skipna=False`` is passed (:issue:`24265`) - Bug in :func:`to_datetime` where arrays of ``datetime`` objects containing both timezone-aware and timezone-naive ``datetimes`` would fail to raise ``ValueError`` (:issue:`24569`) +- Bug in :func:`to_datetime` with invalid datetime format doesn't coerce input to ``NaT`` even if ``errors='coerce'`` (:issue:`24763`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 5b540ee88a3f37..e6478da400d76f 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -265,7 +265,12 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise - result = arg + elif errors == 'coerce': + result = np.empty(arg.shape, dtype='M8[ns]') + iresult = result.view('i8') + iresult.fill(tslibs.iNaT) + else: + result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here @@ -273,7 +278,12 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, if not infer_datetime_format: if errors == 'raise': raise - result = arg + elif errors == 'coerce': + result = np.empty(arg.shape, dtype='M8[ns]') + iresult = result.view('i8') + iresult.fill(tslibs.iNaT) + else: + result = arg except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 50c8f8d4c1f4c0..bec2fa66c43cdc 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -555,6 +555,63 @@ def test_datetime_invalid_datatype(self): with pytest.raises(TypeError): pd.to_datetime(pd.to_datetime) + @pytest.mark.parametrize('value', ["a", "00:01:99"]) + @pytest.mark.parametrize('infer', [True, False]) + @pytest.mark.parametrize('format', [None, 'H%:M%:S%']) + def test_datetime_invalid_scalar(self, value, format, infer): + # GH24763 + res = pd.to_datetime(value, errors='ignore', format=format, + infer_datetime_format=infer) + assert res == value + + res = pd.to_datetime(value, errors='coerce', format=format, + infer_datetime_format=infer) + assert res is pd.NaT + + with pytest.raises(ValueError): + pd.to_datetime(value, errors='raise', format=format, + infer_datetime_format=infer) + + @pytest.mark.parametrize('value', ["3000/12/11 00:00:00"]) + @pytest.mark.parametrize('infer', [True, False]) + @pytest.mark.parametrize('format', [None, 'H%:M%:S%']) + def test_datetime_outofbounds_scalar(self, value, format, infer): + # GH24763 + res = pd.to_datetime(value, errors='ignore', format=format, + infer_datetime_format=infer) + assert res == value + + res = pd.to_datetime(value, errors='coerce', format=format, + infer_datetime_format=infer) + assert res is pd.NaT + + if format is not None: + with pytest.raises(ValueError): + pd.to_datetime(value, errors='raise', format=format, + infer_datetime_format=infer) + else: + with pytest.raises(OutOfBoundsDatetime): + pd.to_datetime(value, errors='raise', format=format, + infer_datetime_format=infer) + + @pytest.mark.parametrize('values', [["a"], ["00:01:99"], + ["a", "b", "99:00:00"]]) + @pytest.mark.parametrize('infer', [True, False]) + @pytest.mark.parametrize('format', [None, 'H%:M%:S%']) + def test_datetime_invalid_index(self, values, format, infer): + # GH24763 + res = pd.to_datetime(values, errors='ignore', format=format, + infer_datetime_format=infer) + tm.assert_index_equal(res, pd.Index(values)) + + res = pd.to_datetime(values, errors='coerce', format=format, + infer_datetime_format=infer) + tm.assert_index_equal(res, pd.DatetimeIndex([pd.NaT] * len(values))) + + with pytest.raises(ValueError): + pd.to_datetime(values, errors='raise', format=format, + infer_datetime_format=infer) + @pytest.mark.parametrize("utc", [True, None]) @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None]) @pytest.mark.parametrize("box", [True, False])