Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: DataFrame.min/max dt64 with skipna=False #37425

Merged
merged 18 commits into from
Oct 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
08ff20f
checkpoint tests passing
jbrockmendel Oct 24, 2020
6f86df6
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Oct 24, 2020
0f6b64d
checkpoint tests passing
jbrockmendel Oct 25, 2020
ce08f4f
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Oct 25, 2020
313d360
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Oct 25, 2020
8ab2429
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Oct 26, 2020
8c4f1db
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Oct 26, 2020
30ca9d2
follow-up clenaup
jbrockmendel Oct 26, 2020
1eda3a8
cln: no need to pass mask
jbrockmendel Oct 26, 2020
89c0a23
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Oct 26, 2020
2c46e67
unnecessary mask arg
jbrockmendel Oct 26, 2020
37895d2
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Oct 26, 2020
471bf94
lint fixup
jbrockmendel Oct 26, 2020
9756559
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Oct 26, 2020
0c39626
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Oct 27, 2020
8cc32d3
whatsnew, non-copy for perioddtype
jbrockmendel Oct 27, 2020
5ade21b
catch nat
jbrockmendel Oct 27, 2020
bd8b2f3
Merge branch 'master' of https://github.com/pandas-dev/pandas into bu…
jbrockmendel Oct 28, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ Numeric
- Bug in :class:`IntervalArray` comparisons with :class:`Series` not returning :class:`Series` (:issue:`36908`)
- Bug in :class:`DataFrame` allowing arithmetic operations with list of array-likes with undefined results. Behavior changed to raising ``ValueError`` (:issue:`36702`)
- Bug in :meth:`DataFrame.std`` with ``timedelta64`` dtype and ``skipna=False`` (:issue:`37392`)
- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with ``datetime64`` dtype and ``skipna=False`` (:issue:`36907`)

Conversion
^^^^^^^^^^
Expand Down
4 changes: 2 additions & 2 deletions pandas/compat/numpy/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,20 +387,20 @@ def validate_resampler_func(method: str, args, kwargs) -> None:
raise TypeError("too many arguments passed in")


def validate_minmax_axis(axis: Optional[int]) -> None:
def validate_minmax_axis(axis: Optional[int], ndim: int = 1) -> None:
"""
Ensure that the axis argument passed to min, max, argmin, or argmax is zero
or None, as otherwise it will be incorrectly ignored.

Parameters
----------
axis : int or None
ndim : int, default 1

Raises
------
ValueError
"""
ndim = 1 # hard-coded for Index
if axis is None:
return
if axis >= ndim or (axis < 0 and ndim + axis < 0):
Expand Down
54 changes: 33 additions & 21 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1264,13 +1264,24 @@ def min(self, axis=None, skipna=True, *args, **kwargs):
Series.min : Return the minimum value in a Series.
"""
nv.validate_min(args, kwargs)
nv.validate_minmax_axis(axis)
nv.validate_minmax_axis(axis, self.ndim)

result = nanops.nanmin(self.asi8, skipna=skipna, mask=self.isna())
if isna(result):
# Period._from_ordinal does not handle np.nan gracefully
return NaT
return self._box_func(result)
if is_period_dtype(self.dtype):
jreback marked this conversation as resolved.
Show resolved Hide resolved
# pass datetime64 values to nanops to get correct NaT semantics
result = nanops.nanmin(
self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna
)
if result is NaT:
return NaT
result = result.view("i8")
if axis is None or self.ndim == 1:
return self._box_func(result)
return self._from_backing_data(result)

result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
if lib.is_scalar(result):
return self._box_func(result)
return self._from_backing_data(result)

def max(self, axis=None, skipna=True, *args, **kwargs):
"""
Expand All @@ -1286,23 +1297,24 @@ def max(self, axis=None, skipna=True, *args, **kwargs):
# TODO: skipna is broken with max.
# See https://github.com/pandas-dev/pandas/issues/24265
nv.validate_max(args, kwargs)
nv.validate_minmax_axis(axis)

mask = self.isna()
if skipna:
values = self[~mask].asi8
elif mask.any():
return NaT
else:
values = self.asi8
nv.validate_minmax_axis(axis, self.ndim)

if not len(values):
# short-circuit for empty max / min
return NaT
if is_period_dtype(self.dtype):
# pass datetime64 values to nanops to get correct NaT semantics
result = nanops.nanmax(
self._ndarray.view("M8[ns]"), axis=axis, skipna=skipna
)
if result is NaT:
return result
result = result.view("i8")
if axis is None or self.ndim == 1:
return self._box_func(result)
return self._from_backing_data(result)

result = nanops.nanmax(values, skipna=skipna)
# Don't have to worry about NA `result`, since no NA went in.
return self._box_func(result)
result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
if lib.is_scalar(result):
return self._box_func(result)
return self._from_backing_data(result)

def mean(self, skipna=True, axis: Optional[int] = 0):
"""
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def _wrap_results(result, dtype: DtypeObj, fill_value=None):
if result == fill_value:
result = np.nan
if tz is not None:
# we get here e.g. via nanmean when we call it on a DTA[tz]
result = Timestamp(result, tz=tz)
elif isna(result):
result = np.datetime64("NaT", "ns")
Expand Down Expand Up @@ -919,10 +920,13 @@ def reduction(
mask: Optional[np.ndarray] = None,
) -> Dtype:

orig_values = values
values, mask, dtype, dtype_max, fill_value = _get_values(
values, skipna, fill_value_typ=fill_value_typ, mask=mask
)

datetimelike = orig_values.dtype.kind in ["m", "M"]

if (axis is not None and values.shape[axis] == 0) or values.size == 0:
try:
result = getattr(values, meth)(axis, dtype=dtype_max)
Expand All @@ -933,7 +937,12 @@ def reduction(
result = getattr(values, meth)(axis)

result = _wrap_results(result, dtype, fill_value)
return _maybe_null_out(result, axis, mask, values.shape)
result = _maybe_null_out(result, axis, mask, values.shape)

if datetimelike and not skipna:
result = _mask_datetimelike_result(result, axis, mask, orig_values)
jreback marked this conversation as resolved.
Show resolved Hide resolved

return result

return reduction

Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,31 @@ def test_min_max_dt64_with_NaT(self):
exp = Series([pd.NaT], index=["foo"])
tm.assert_series_equal(res, exp)

def test_min_max_dt64_with_NaT_skipna_false(self, tz_naive_fixture):
# GH#36907
tz = tz_naive_fixture
df = DataFrame(
{
"a": [
Timestamp("2020-01-01 08:00:00", tz=tz),
Timestamp("1920-02-01 09:00:00", tz=tz),
],
"b": [Timestamp("2020-02-01 08:00:00", tz=tz), pd.NaT],
}
)

res = df.min(axis=1, skipna=False)
expected = Series([df.loc[0, "a"], pd.NaT])
assert expected.dtype == df["a"].dtype

tm.assert_series_equal(res, expected)

res = df.max(axis=1, skipna=False)
expected = Series([df.loc[0, "b"], pd.NaT])
assert expected.dtype == df["a"].dtype

tm.assert_series_equal(res, expected)

def test_min_max_dt64_api_consistency_with_NaT(self):
# Calling the following sum functions returned an error for dataframes but
# returned NaT for series. These tests check that the API is consistent in
Expand Down