Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: Fix interchange/plotting/groupby test warnings #48159

Merged
merged 10 commits into from
Aug 26, 2022
8 changes: 6 additions & 2 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1634,7 +1634,9 @@ def func(df):
return df._constructor_sliced(result, index=res.index)

func.__name__ = "idxmax"
result = self._python_apply_general(func, self._obj_with_exclusions)
result = self._python_apply_general(
func, self._obj_with_exclusions, not_indexed_same=True
)
self._maybe_warn_numeric_only_depr("idxmax", result, numeric_only)
return result

Expand Down Expand Up @@ -1673,7 +1675,9 @@ def func(df):
return df._constructor_sliced(result, index=res.index)

func.__name__ = "idxmin"
result = self._python_apply_general(func, self._obj_with_exclusions)
result = self._python_apply_general(
func, self._obj_with_exclusions, not_indexed_same=True
)
self._maybe_warn_numeric_only_depr("idxmin", result, numeric_only)
return result

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,10 @@ def curried(x):
return self._obj_with_exclusions

result = self._python_apply_general(
curried, self._obj_with_exclusions, is_transform=is_transform
curried,
self._obj_with_exclusions,
is_transform=is_transform,
not_indexed_same=not is_transform,
)

if self._selected_obj.ndim != 1 and self.axis != 1 and result.ndim != 1:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/interchange/from_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ def set_nulls(
null_pos = None

if null_kind == ColumnNullType.USE_SENTINEL:
null_pos = data == sentinel_val
null_pos = pd.Series(data) == sentinel_val
elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
assert validity, "Expected to have a validity buffer for the mask"
valid_buff, valid_dtype = validity
Expand Down
3 changes: 2 additions & 1 deletion pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from pandas.io.formats.printing import pprint_thing
from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters
from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by
from pandas.plotting._matplotlib.misc import unpack_single_str_list
from pandas.plotting._matplotlib.style import get_standard_colors
from pandas.plotting._matplotlib.timeseries import (
decorate_axes,
Expand Down Expand Up @@ -177,7 +178,7 @@ def __init__(
# For `hist` plot, need to get grouped original data before `self.data` is
# updated later
if self.by is not None and self._kind == "hist":
self._grouped = data.groupby(self.by)
self._grouped = data.groupby(unpack_single_str_list(self.by))

self.kind = kind

Expand Down
1 change: 0 additions & 1 deletion pandas/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def __init__(
MPLPlot.__init__(self, data, **kwargs)

def _args_adjust(self):

# calculate bin number separately in different subplots
# where subplots are created based on by argument
if is_integer(self.bins):
Expand Down
5 changes: 2 additions & 3 deletions pandas/plotting/_matplotlib/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,6 @@ def r(h):

def unpack_single_str_list(keys):
# GH 42795
if isinstance(keys, list):
if len(keys) == 1 and isinstance(keys[0], str):
keys = keys[0]
if isinstance(keys, list) and len(keys) == 1:
keys = keys[0]
return keys
25 changes: 12 additions & 13 deletions pandas/tests/groupby/test_counting.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,21 +188,20 @@ def test_ngroup_cumcount_pair(self):
tm.assert_series_equal(g.ngroup(), Series(ngroupd))
tm.assert_series_equal(g.cumcount(), Series(cumcounted))

def test_ngroup_respects_groupby_order(self):
def test_ngroup_respects_groupby_order(self, sort):
np.random.seed(0)
df = DataFrame({"a": np.random.choice(list("abcdef"), 100)})
for sort_flag in (False, True):
g = df.groupby(["a"], sort=sort_flag)
df["group_id"] = -1
df["group_index"] = -1

for i, (_, group) in enumerate(g):
df.loc[group.index, "group_id"] = i
for j, ind in enumerate(group.index):
df.loc[ind, "group_index"] = j

tm.assert_series_equal(Series(df["group_id"].values), g.ngroup())
tm.assert_series_equal(Series(df["group_index"].values), g.cumcount())
g = df.groupby("a", sort=sort)
df["group_id"] = -1
df["group_index"] = -1

for i, (_, group) in enumerate(g):
df.loc[group.index, "group_id"] = i
for j, ind in enumerate(group.index):
df.loc[ind, "group_index"] = j

tm.assert_series_equal(Series(df["group_id"].values), g.ngroup())
tm.assert_series_equal(Series(df["group_index"].values), g.cumcount())

@pytest.mark.parametrize(
"datetimelike",
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1590,11 +1590,11 @@ def test_corrwith_with_1_axis():
tm.assert_series_equal(result, expected)


@pytest.mark.filterwarnings("ignore:The 'mad' method.*:FutureWarning")
@pytest.mark.filterwarnings("ignore:.* is deprecated:FutureWarning")
def test_multiindex_group_all_columns_when_empty(groupby_func):
# GH 32464
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
gb = df.groupby(["a", "b", "c"])
gb = df.groupby(["a", "b", "c"], group_keys=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These warnings should only be generated when using groupby(...).apply; I think we should instead suppress internally. In

result = self._python_apply_general(
curried, self._obj_with_exclusions, is_transform=is_transform
)

we could pass not_indexed_same=not is_transform and that would suppress all cases called from here. I believe it should also be correct as something is indexed the same precisely when its a transform.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 16ee154156..8d442c6ae3 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1040,7 +1040,7 @@ class GroupBy(BaseGroupBy[NDFrameT]):
                 return self._obj_with_exclusions

             result = self._python_apply_general(
-                curried, self._obj_with_exclusions, is_transform=is_transform
+                curried, self._obj_with_exclusions, is_transform=is_transform, not_indexed_same=not is_transform
             )

             if self._selected_obj.ndim != 1 and self.axis != 1 and result.ndim != 1:

and still get

pandas/tests/groupby/test_function.py::test_multiindex_group_all_columns_when_empty[idxmax]
pandas/tests/groupby/test_function.py::test_multiindex_group_all_columns_when_empty[idxmin]
  .../pandas/tests/groupby/test_function.py:1601: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
  To preserve the previous behavior, use

  	>>> .groupby(..., group_keys=False)

  To adopt the future behavior and silence this warning, use

  	>>> .groupby(..., group_keys=True)
    result = method(*args).index

Ignoring the warnings at this line also still raises this warnings for idxmin/idxmax so that must take a different code path?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, that's right; they are listed in common_apply_allowlist but then also defined on DataFrameGroupBy (I have a PR for #48028 that does away with common_apply_allowlist completely that I'll put up once 1.5 is released).

The same can be done with idxmin/max on DataFrameGroupBy.

method = getattr(gb, groupby_func)
args = get_groupby_method_args(groupby_func, df)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/sas/test_sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def data_test_ix(request, dirpath):
for k in range(df.shape[1]):
col = df.iloc[:, k]
if col.dtype == np.int64:
df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
df.isetitem(k, df.iloc[:, k].astype(np.float64))
return df, test_ix


Expand Down
22 changes: 15 additions & 7 deletions pandas/tests/plotting/frame/test_hist_box_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ class TestHistWithBy(TestPlotBase):
)
def test_hist_plot_by_argument(self, by, column, titles, legends, hist_df):
# GH 15079
axes = _check_plot_works(hist_df.plot.hist, column=column, by=by)
axes = _check_plot_works(
hist_df.plot.hist, column=column, by=by, default_axes=True
)
result_titles = [ax.get_title() for ax in axes]
result_legends = [
[legend.get_text() for legend in ax.get_legend().texts] for ax in axes
Expand Down Expand Up @@ -120,7 +122,7 @@ def test_hist_plot_by_0(self, by, column, titles, legends, hist_df):
df = hist_df.copy()
df = df.rename(columns={"C": 0})

axes = _check_plot_works(df.plot.hist, column=column, by=by)
axes = _check_plot_works(df.plot.hist, default_axes=True, column=column, by=by)
result_titles = [ax.get_title() for ax in axes]
result_legends = [
[legend.get_text() for legend in ax.get_legend().texts] for ax in axes
Expand All @@ -142,7 +144,9 @@ def test_hist_plot_empty_list_string_tuple_by(self, by, column, hist_df):
# GH 15079
msg = "No group keys passed"
with pytest.raises(ValueError, match=msg):
_check_plot_works(hist_df.plot.hist, column=column, by=by)
_check_plot_works(
hist_df.plot.hist, default_axes=True, column=column, by=by
)

@pytest.mark.slow
@pytest.mark.parametrize(
Expand Down Expand Up @@ -274,7 +278,9 @@ class TestBoxWithBy(TestPlotBase):
)
def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df):
# GH 15079
axes = _check_plot_works(hist_df.plot.box, column=column, by=by)
axes = _check_plot_works(
hist_df.plot.box, default_axes=True, column=column, by=by
)
result_titles = [ax.get_title() for ax in axes]
result_xticklabels = [
[label.get_text() for label in ax.get_xticklabels()] for ax in axes
Expand Down Expand Up @@ -313,7 +319,7 @@ def test_box_plot_by_0(self, by, column, titles, xticklabels, hist_df):
df = hist_df.copy()
df = df.rename(columns={"C": 0})

axes = _check_plot_works(df.plot.box, column=column, by=by)
axes = _check_plot_works(df.plot.box, default_axes=True, column=column, by=by)
result_titles = [ax.get_title() for ax in axes]
result_xticklabels = [
[label.get_text() for label in ax.get_xticklabels()] for ax in axes
Expand All @@ -335,7 +341,7 @@ def test_box_plot_with_none_empty_list_by(self, by, column, hist_df):
# GH 15079
msg = "No group keys passed"
with pytest.raises(ValueError, match=msg):
_check_plot_works(hist_df.plot.box, column=column, by=by)
_check_plot_works(hist_df.plot.box, default_axes=True, column=column, by=by)

@pytest.mark.slow
@pytest.mark.parametrize(
Expand All @@ -351,7 +357,9 @@ def test_box_plot_with_none_empty_list_by(self, by, column, hist_df):
)
def test_box_plot_layout_with_by(self, by, column, layout, axes_num, hist_df):
# GH 15079
axes = _check_plot_works(hist_df.plot.box, column=column, by=by, layout=layout)
axes = _check_plot_works(
hist_df.plot.box, default_axes=True, column=column, by=by, layout=layout
)
self._check_axes_shape(axes, axes_num=axes_num, layout=layout)

@pytest.mark.parametrize(
Expand Down