Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parsing corner case closes #19382 #19529

Merged
merged 3 commits into from
Feb 6, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 31 additions & 10 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -609,20 +609,26 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
value = tz_convert_single(value, tz, 'UTC')
iresult[i] = value
check_dts_bounds(&dts)
except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to
# dateutil parser will return incorrect result because
# it will ignore nanoseconds
if require_iso8601:
if _handle_error_require_iso8601(val, &iresult[i],
is_coerce, is_raise):
continue
return values
elif is_coerce:
iresult[i] = NPY_NAT
continue
raise
except ValueError:
# if requiring iso8601 strings, skip trying other formats
if require_iso8601:
if _parse_today_now(val, &iresult[i]):
if _handle_error_require_iso8601(val, &iresult[i],
is_coerce, is_raise):
continue
if is_coerce:
iresult[i] = NPY_NAT
continue
elif is_raise:
raise ValueError(
"time data %r doesn't match format "
"specified" % (val,))
else:
return values
return values

try:
py_dt = parse_datetime_string(val, dayfirst=dayfirst,
Expand Down Expand Up @@ -725,6 +731,21 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
return oresult


cdef bint _handle_error_require_iso8601(object val, int64_t* iresult,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather do this inline, you are mutating the result here, and it makes the logic much harder to follow. If you want to do this in a separate PR that just refactors might be ok, but not on this one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK.

bint is_coerce,
bint is_raise) except? -1:
# Return True to continue, False to return values, or raise
if _parse_today_now(val, iresult):
return True
elif is_coerce:
iresult[0] = NPY_NAT
return True
elif is_raise:
raise ValueError("time data {val} doesn't match format "
"specified".format(val=val))
return False


cdef inline bint _parse_today_now(str val, int64_t* iresult):
# We delay this check for as long as possible
# because it catches relatively rare cases
Expand Down
8 changes: 8 additions & 0 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ from np_datetime cimport (check_dts_bounds,
dt64_to_dtstruct, dtstruct_to_dt64,
get_datetime64_unit, get_datetime64_value,
pydatetime_to_dt64)
from np_datetime import OutOfBoundsDatetime

from util cimport (is_string_object,
is_datetime64_object,
Expand Down Expand Up @@ -472,6 +473,13 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
ambiguous='raise',
errors='raise')[0]

except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
# parser will return incorrect result because it will ignore
# nanoseconds
raise

except ValueError:
try:
ts = parse_datetime_string(ts, dayfirst=dayfirst,
Expand Down
14 changes: 13 additions & 1 deletion pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,6 @@ def test_dataframe_dtypes(self, cache):


class TestToDatetimeMisc(object):

@pytest.mark.parametrize('cache', [True, False])
def test_to_datetime_iso8601(self, cache):
result = to_datetime(["2012-01-01 00:00:00"], cache=cache)
Expand Down Expand Up @@ -1596,6 +1595,19 @@ def test_coerce_of_invalid_datetimes(self):
)
)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

import from pandas.errors

def test_to_datetime_barely_out_of_bounds(self):
# GH#19382 close enough to bounds that dropping nanos would result
# in an in-bounds datetime
arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)

with pytest.raises(tslib.OutOfBoundsDatetime):
to_datetime(arr)

with pytest.raises(tslib.OutOfBoundsDatetime):
# Essentially the same as above, but more directly calling
# the relevant function
tslib.array_to_datetime(arr)


def test_normalize_date():
value = date(2012, 9, 7)
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/scalar/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from pandas.tseries import offsets

from pandas._libs.tslib import OutOfBoundsDatetime
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from pandas.errors import

from pandas._libs.tslibs import conversion
from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz

Expand Down Expand Up @@ -410,6 +411,12 @@ def test_out_of_bounds_string(self):
with pytest.raises(ValueError):
Timestamp('2263-01-01')

def test_barely_out_of_bounds(self):
# GH#19382 close enough to bounds that dropping nanos would result
# in an in-bounds datetime
with pytest.raises(OutOfBoundsDatetime):
Timestamp('2262-04-11 23:47:16.854775808')

def test_bounds_with_different_units(self):
out_of_bounds_dates = ('1677-09-21', '2262-04-12')

Expand Down