Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport PR #48159 on branch 1.5.x (TST: Fix interchange/plotting/groupby test warnings) #48279

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1634,7 +1634,9 @@ def func(df):
return df._constructor_sliced(result, index=res.index)

func.__name__ = "idxmax"
result = self._python_apply_general(func, self._obj_with_exclusions)
result = self._python_apply_general(
func, self._obj_with_exclusions, not_indexed_same=True
)
self._maybe_warn_numeric_only_depr("idxmax", result, numeric_only)
return result

Expand Down Expand Up @@ -1673,7 +1675,9 @@ def func(df):
return df._constructor_sliced(result, index=res.index)

func.__name__ = "idxmin"
result = self._python_apply_general(func, self._obj_with_exclusions)
result = self._python_apply_general(
func, self._obj_with_exclusions, not_indexed_same=True
)
self._maybe_warn_numeric_only_depr("idxmin", result, numeric_only)
return result

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,10 @@ def curried(x):
return self._obj_with_exclusions

result = self._python_apply_general(
curried, self._obj_with_exclusions, is_transform=is_transform
curried,
self._obj_with_exclusions,
is_transform=is_transform,
not_indexed_same=not is_transform,
)

if self._selected_obj.ndim != 1 and self.axis != 1 and result.ndim != 1:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/interchange/from_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ def set_nulls(
null_pos = None

if null_kind == ColumnNullType.USE_SENTINEL:
null_pos = data == sentinel_val
null_pos = pd.Series(data) == sentinel_val
elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
assert validity, "Expected to have a validity buffer for the mask"
valid_buff, valid_dtype = validity
Expand Down
3 changes: 2 additions & 1 deletion pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from pandas.io.formats.printing import pprint_thing
from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters
from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by
from pandas.plotting._matplotlib.misc import unpack_single_str_list
from pandas.plotting._matplotlib.style import get_standard_colors
from pandas.plotting._matplotlib.timeseries import (
decorate_axes,
Expand Down Expand Up @@ -177,7 +178,7 @@ def __init__(
# For `hist` plot, need to get grouped original data before `self.data` is
# updated later
if self.by is not None and self._kind == "hist":
self._grouped = data.groupby(self.by)
self._grouped = data.groupby(unpack_single_str_list(self.by))

self.kind = kind

Expand Down
1 change: 0 additions & 1 deletion pandas/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def __init__(
MPLPlot.__init__(self, data, **kwargs)

def _args_adjust(self):

# calculate bin number separately in different subplots
# where subplots are created based on by argument
if is_integer(self.bins):
Expand Down
5 changes: 2 additions & 3 deletions pandas/plotting/_matplotlib/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,6 @@ def r(h):

def unpack_single_str_list(keys):
# GH 42795
if isinstance(keys, list):
if len(keys) == 1 and isinstance(keys[0], str):
keys = keys[0]
if isinstance(keys, list) and len(keys) == 1:
keys = keys[0]
return keys
25 changes: 12 additions & 13 deletions pandas/tests/groupby/test_counting.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,21 +188,20 @@ def test_ngroup_cumcount_pair(self):
tm.assert_series_equal(g.ngroup(), Series(ngroupd))
tm.assert_series_equal(g.cumcount(), Series(cumcounted))

def test_ngroup_respects_groupby_order(self):
def test_ngroup_respects_groupby_order(self, sort):
np.random.seed(0)
df = DataFrame({"a": np.random.choice(list("abcdef"), 100)})
for sort_flag in (False, True):
g = df.groupby(["a"], sort=sort_flag)
df["group_id"] = -1
df["group_index"] = -1

for i, (_, group) in enumerate(g):
df.loc[group.index, "group_id"] = i
for j, ind in enumerate(group.index):
df.loc[ind, "group_index"] = j

tm.assert_series_equal(Series(df["group_id"].values), g.ngroup())
tm.assert_series_equal(Series(df["group_index"].values), g.cumcount())
g = df.groupby("a", sort=sort)
df["group_id"] = -1
df["group_index"] = -1

for i, (_, group) in enumerate(g):
df.loc[group.index, "group_id"] = i
for j, ind in enumerate(group.index):
df.loc[ind, "group_index"] = j

tm.assert_series_equal(Series(df["group_id"].values), g.ngroup())
tm.assert_series_equal(Series(df["group_index"].values), g.cumcount())

@pytest.mark.parametrize(
"datetimelike",
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1590,11 +1590,11 @@ def test_corrwith_with_1_axis():
tm.assert_series_equal(result, expected)


@pytest.mark.filterwarnings("ignore:The 'mad' method.*:FutureWarning")
@pytest.mark.filterwarnings("ignore:.* is deprecated:FutureWarning")
def test_multiindex_group_all_columns_when_empty(groupby_func):
# GH 32464
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
gb = df.groupby(["a", "b", "c"])
gb = df.groupby(["a", "b", "c"], group_keys=False)
method = getattr(gb, groupby_func)
args = get_groupby_method_args(groupby_func, df)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/sas/test_sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def data_test_ix(request, dirpath):
for k in range(df.shape[1]):
col = df.iloc[:, k]
if col.dtype == np.int64:
df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
df.isetitem(k, df.iloc[:, k].astype(np.float64))
return df, test_ix


Expand Down
22 changes: 15 additions & 7 deletions pandas/tests/plotting/frame/test_hist_box_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ class TestHistWithBy(TestPlotBase):
)
def test_hist_plot_by_argument(self, by, column, titles, legends, hist_df):
# GH 15079
axes = _check_plot_works(hist_df.plot.hist, column=column, by=by)
axes = _check_plot_works(
hist_df.plot.hist, column=column, by=by, default_axes=True
)
result_titles = [ax.get_title() for ax in axes]
result_legends = [
[legend.get_text() for legend in ax.get_legend().texts] for ax in axes
Expand Down Expand Up @@ -120,7 +122,7 @@ def test_hist_plot_by_0(self, by, column, titles, legends, hist_df):
df = hist_df.copy()
df = df.rename(columns={"C": 0})

axes = _check_plot_works(df.plot.hist, column=column, by=by)
axes = _check_plot_works(df.plot.hist, default_axes=True, column=column, by=by)
result_titles = [ax.get_title() for ax in axes]
result_legends = [
[legend.get_text() for legend in ax.get_legend().texts] for ax in axes
Expand All @@ -142,7 +144,9 @@ def test_hist_plot_empty_list_string_tuple_by(self, by, column, hist_df):
# GH 15079
msg = "No group keys passed"
with pytest.raises(ValueError, match=msg):
_check_plot_works(hist_df.plot.hist, column=column, by=by)
_check_plot_works(
hist_df.plot.hist, default_axes=True, column=column, by=by
)

@pytest.mark.slow
@pytest.mark.parametrize(
Expand Down Expand Up @@ -274,7 +278,9 @@ class TestBoxWithBy(TestPlotBase):
)
def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df):
# GH 15079
axes = _check_plot_works(hist_df.plot.box, column=column, by=by)
axes = _check_plot_works(
hist_df.plot.box, default_axes=True, column=column, by=by
)
result_titles = [ax.get_title() for ax in axes]
result_xticklabels = [
[label.get_text() for label in ax.get_xticklabels()] for ax in axes
Expand Down Expand Up @@ -313,7 +319,7 @@ def test_box_plot_by_0(self, by, column, titles, xticklabels, hist_df):
df = hist_df.copy()
df = df.rename(columns={"C": 0})

axes = _check_plot_works(df.plot.box, column=column, by=by)
axes = _check_plot_works(df.plot.box, default_axes=True, column=column, by=by)
result_titles = [ax.get_title() for ax in axes]
result_xticklabels = [
[label.get_text() for label in ax.get_xticklabels()] for ax in axes
Expand All @@ -335,7 +341,7 @@ def test_box_plot_with_none_empty_list_by(self, by, column, hist_df):
# GH 15079
msg = "No group keys passed"
with pytest.raises(ValueError, match=msg):
_check_plot_works(hist_df.plot.box, column=column, by=by)
_check_plot_works(hist_df.plot.box, default_axes=True, column=column, by=by)

@pytest.mark.slow
@pytest.mark.parametrize(
Expand All @@ -351,7 +357,9 @@ def test_box_plot_with_none_empty_list_by(self, by, column, hist_df):
)
def test_box_plot_layout_with_by(self, by, column, layout, axes_num, hist_df):
# GH 15079
axes = _check_plot_works(hist_df.plot.box, column=column, by=by, layout=layout)
axes = _check_plot_works(
hist_df.plot.box, default_axes=True, column=column, by=by, layout=layout
)
self._check_axes_shape(axes, axes_num=axes_num, layout=layout)

@pytest.mark.parametrize(
Expand Down