Skip to content

Commit

Permalink
BUG: DataFrame.min/max dt64 with skipna=False (pandas-dev#37425)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and Kevin D Smith committed Nov 2, 2020
1 parent f1bf424 commit 362a183
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 24 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ Numeric
- Bug in :class:`IntervalArray` comparisons with :class:`Series` not returning :class:`Series` (:issue:`36908`)
- Bug in :class:`DataFrame` allowing arithmetic operations with list of array-likes with undefined results. Behavior changed to raising ``ValueError`` (:issue:`36702`)
- Bug in :meth:`DataFrame.std`` with ``timedelta64`` dtype and ``skipna=False`` (:issue:`37392`)
- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with ``datetime64`` dtype and ``skipna=False`` (:issue:`36907`)

Conversion
^^^^^^^^^^
Expand Down
4 changes: 2 additions & 2 deletions pandas/compat/numpy/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,20 +387,20 @@ def validate_resampler_func(method: str, args, kwargs) -> None:
raise TypeError("too many arguments passed in")


def validate_minmax_axis(axis: Optional[int]) -> None:
def validate_minmax_axis(axis: Optional[int], ndim: int = 1) -> None:
"""
Ensure that the axis argument passed to min, max, argmin, or argmax is zero
or None, as otherwise it will be incorrectly ignored.
Parameters
----------
axis : int or None
ndim : int, default 1
Raises
------
ValueError
"""
ndim = 1 # hard-coded for Index
if axis is None:
return
if axis >= ndim or (axis < 0 and ndim + axis < 0):
Expand Down
54 changes: 33 additions & 21 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1264,13 +1264,24 @@ def min(self, axis=None, skipna=True, *args, **kwargs):
Series.min : Return the minimum value in a Series.
"""
nv.validate_min(args, kwargs)
nv.validate_minmax_axis(axis)
nv.validate_minmax_axis(axis, self.ndim)

result = nanops.nanmin(self.asi8, skipna=skipna, mask=self.isna())
if isna(result):
# Period._from_ordinal does not handle np.nan gracefully
return NaT
return self._box_func(result)
if is_period_dtype(self.dtype):
# pass datetime64 values to nanops to get correct NaT semantics
result = nanops.nanmin(
self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna
)
if result is NaT:
return NaT
result = result.view("i8")
if axis is None or self.ndim == 1:
return self._box_func(result)
return self._from_backing_data(result)

result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
if lib.is_scalar(result):
return self._box_func(result)
return self._from_backing_data(result)

def max(self, axis=None, skipna=True, *args, **kwargs):
"""
Expand All @@ -1286,23 +1297,24 @@ def max(self, axis=None, skipna=True, *args, **kwargs):
# TODO: skipna is broken with max.
# See https://github.com/pandas-dev/pandas/issues/24265
nv.validate_max(args, kwargs)
nv.validate_minmax_axis(axis)

mask = self.isna()
if skipna:
values = self[~mask].asi8
elif mask.any():
return NaT
else:
values = self.asi8
nv.validate_minmax_axis(axis, self.ndim)

if not len(values):
# short-circuit for empty max / min
return NaT
if is_period_dtype(self.dtype):
# pass datetime64 values to nanops to get correct NaT semantics
result = nanops.nanmax(
self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna
)
if result is NaT:
return result
result = result.view("i8")
if axis is None or self.ndim == 1:
return self._box_func(result)
return self._from_backing_data(result)

result = nanops.nanmax(values, skipna=skipna)
# Don't have to worry about NA `result`, since no NA went in.
return self._box_func(result)
result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
if lib.is_scalar(result):
return self._box_func(result)
return self._from_backing_data(result)

def mean(self, skipna=True, axis: Optional[int] = 0):
"""
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def _wrap_results(result, dtype: DtypeObj, fill_value=None):
if result == fill_value:
result = np.nan
if tz is not None:
# we get here e.g. via nanmean when we call it on a DTA[tz]
result = Timestamp(result, tz=tz)
elif isna(result):
result = np.datetime64("NaT", "ns")
Expand Down Expand Up @@ -919,10 +920,13 @@ def reduction(
mask: Optional[np.ndarray] = None,
) -> Dtype:

orig_values = values
values, mask, dtype, dtype_max, fill_value = _get_values(
values, skipna, fill_value_typ=fill_value_typ, mask=mask
)

datetimelike = orig_values.dtype.kind in ["m", "M"]

if (axis is not None and values.shape[axis] == 0) or values.size == 0:
try:
result = getattr(values, meth)(axis, dtype=dtype_max)
Expand All @@ -933,7 +937,12 @@ def reduction(
result = getattr(values, meth)(axis)

result = _wrap_results(result, dtype, fill_value)
return _maybe_null_out(result, axis, mask, values.shape)
result = _maybe_null_out(result, axis, mask, values.shape)

if datetimelike and not skipna:
result = _mask_datetimelike_result(result, axis, mask, orig_values)

return result

return reduction

Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,31 @@ def test_min_max_dt64_with_NaT(self):
exp = Series([pd.NaT], index=["foo"])
tm.assert_series_equal(res, exp)

def test_min_max_dt64_with_NaT_skipna_false(self, tz_naive_fixture):
# GH#36907
tz = tz_naive_fixture
df = DataFrame(
{
"a": [
Timestamp("2020-01-01 08:00:00", tz=tz),
Timestamp("1920-02-01 09:00:00", tz=tz),
],
"b": [Timestamp("2020-02-01 08:00:00", tz=tz), pd.NaT],
}
)

res = df.min(axis=1, skipna=False)
expected = Series([df.loc[0, "a"], pd.NaT])
assert expected.dtype == df["a"].dtype

tm.assert_series_equal(res, expected)

res = df.max(axis=1, skipna=False)
expected = Series([df.loc[0, "b"], pd.NaT])
assert expected.dtype == df["a"].dtype

tm.assert_series_equal(res, expected)

def test_min_max_dt64_api_consistency_with_NaT(self):
# Calling the following sum functions returned an error for dataframes but
# returned NaT for series. These tests check that the API is consistent in
Expand Down

0 comments on commit 362a183

Please sign in to comment.