diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cc771e612aef7..b1633cd6d8975 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -124,6 +124,7 @@ relabel_result, transform, ) +from pandas.core.arraylike import OpsMixin from pandas.core.arrays import Categorical, ExtensionArray from pandas.core.arrays.sparse import SparseFrameAccessor from pandas.core.construction import extract_array @@ -336,7 +337,7 @@ # DataFrame class -class DataFrame(NDFrame): +class DataFrame(NDFrame, OpsMixin): """ Two-dimensional, size-mutable, potentially heterogeneous tabular data. @@ -5838,7 +5839,87 @@ def reorder_levels(self, order, axis=0) -> DataFrame: return result # ---------------------------------------------------------------------- - # Arithmetic / combination related + # Arithmetic Methods + + def _cmp_method(self, other, op): + axis = 1 # only relevant for Series other case + + self, other = ops.align_method_FRAME(self, other, axis, flex=False, level=None) + + # See GH#4537 for discussion of scalar op behavior + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + def _arith_method(self, other, op): + if ops.should_reindex_frame_op(self, other, op, 1, 1, None, None): + return ops.frame_arith_method_with_reindex(self, other, op) + + axis = 1 # only relevant for Series other case + + self, other = ops.align_method_FRAME(self, other, axis, flex=True, level=None) + + new_data = self._dispatch_frame_op(other, op, axis=axis) + return self._construct_result(new_data) + + _logical_method = _arith_method + + def _dispatch_frame_op(self, right, func, axis: Optional[int] = None): + """ + Evaluate the frame operation func(left, right) by evaluating + column-by-column, dispatching to the Series implementation. + + Parameters + ---------- + right : scalar, Series, or DataFrame + func : arithmetic or comparison operator + axis : {None, 0, 1} + + Returns + ------- + DataFrame + """ + # Get the appropriate array-op to apply to each column/block's values. + array_op = ops.get_array_op(func) + + right = lib.item_from_zerodim(right) + if not is_list_like(right): + # i.e. scalar, faster than checking np.ndim(right) == 0 + bm = self._mgr.apply(array_op, right=right) + return type(self)(bm) + + elif isinstance(right, DataFrame): + assert self.index.equals(right.index) + assert self.columns.equals(right.columns) + # TODO: The previous assertion `assert right._indexed_same(self)` + # fails in cases with empty columns reached via + # _frame_arith_method_with_reindex + + bm = self._mgr.operate_blockwise(right._mgr, array_op) + return type(self)(bm) + + elif isinstance(right, Series) and axis == 1: + # axis=1 means we want to operate row-by-row + assert right.index.equals(self.columns) + + right = right._values + # maybe_align_as_frame ensures we do not have an ndarray here + assert not isinstance(right, np.ndarray) + + arrays = [array_op(l, r) for l, r in zip(self._iter_column_arrays(), right)] + + elif isinstance(right, Series): + assert right.index.equals(self.index) # Handle other cases later + right = right._values + + arrays = [array_op(l, right) for l in self._iter_column_arrays()] + + else: + # Remaining cases have less-obvious dispatch rules + raise NotImplementedError(right) + + return type(self)._from_arrays( + arrays, self.columns, self.index, verify_integrity=False + ) def _combine_frame(self, other: DataFrame, func, fill_value=None): # at this point we have `self._indexed_same(other)` @@ -5857,7 +5938,7 @@ def _arith_op(left, right): left, right = ops.fill_binop(left, right, fill_value) return func(left, right) - new_data = ops.dispatch_to_series(self, other, _arith_op) + new_data = self._dispatch_frame_op(other, _arith_op) return new_data def _construct_result(self, result) -> DataFrame: @@ -5879,6 +5960,9 @@ def _construct_result(self, result) -> DataFrame: out.index = self.index return out + # ---------------------------------------------------------------------- + # Combination-Related + @Appender( """ Returns @@ -7254,7 +7338,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: bm_axis = self._get_block_manager_axis(axis) if bm_axis == 0 and periods != 0: - return self - self.shift(periods, axis=axis) # type: ignore[operator] + return self - self.shift(periods, axis=axis) new_data = self._mgr.diff(n=periods, axis=bm_axis) return self._constructor(new_data) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index b656aef64cde9..87da8f8fa146c 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -4,12 +4,11 @@ This is not a public API. """ import operator -from typing import TYPE_CHECKING, Optional, Set, Type +from typing import TYPE_CHECKING, Optional, Set import warnings import numpy as np -from pandas._libs import lib from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401 from pandas._typing import Level from pandas.util._decorators import Appender @@ -28,7 +27,6 @@ ) from pandas.core.ops.common import unpack_zerodim_and_defer # noqa:F401 from pandas.core.ops.docstrings import ( - _arith_doc_FRAME, _flex_comp_doc_FRAME, _make_flex_doc, _op_descriptions, @@ -143,29 +141,6 @@ def _maybe_match_name(a, b): return None -# ----------------------------------------------------------------------------- - - -def _get_op_name(op, special: bool) -> str: - """ - Find the name to attach to this method according to conventions - for special and non-special methods. - - Parameters - ---------- - op : binary operator - special : bool - - Returns - ------- - op_name : str - """ - opname = op.__name__.strip("_") - if special: - opname = f"__{opname}__" - return opname - - # ----------------------------------------------------------------------------- # Masking NA values and fallbacks for operations numpy does not support @@ -211,70 +186,6 @@ def fill_binop(left, right, fill_value): return left, right -# ----------------------------------------------------------------------------- -# Dispatch logic - - -def dispatch_to_series(left, right, func, axis: Optional[int] = None): - """ - Evaluate the frame operation func(left, right) by evaluating - column-by-column, dispatching to the Series implementation. - - Parameters - ---------- - left : DataFrame - right : scalar, Series, or DataFrame - func : arithmetic or comparison operator - axis : {None, 0, 1} - - Returns - ------- - DataFrame - """ - # Get the appropriate array-op to apply to each column/block's values. - array_op = get_array_op(func) - - right = lib.item_from_zerodim(right) - if not is_list_like(right): - # i.e. scalar, faster than checking np.ndim(right) == 0 - bm = left._mgr.apply(array_op, right=right) - return type(left)(bm) - - elif isinstance(right, ABCDataFrame): - assert left.index.equals(right.index) - assert left.columns.equals(right.columns) - # TODO: The previous assertion `assert right._indexed_same(left)` - # fails in cases with empty columns reached via - # _frame_arith_method_with_reindex - - bm = left._mgr.operate_blockwise(right._mgr, array_op) - return type(left)(bm) - - elif isinstance(right, ABCSeries) and axis == 1: - # axis=1 means we want to operate row-by-row - assert right.index.equals(left.columns) - - right = right._values - # maybe_align_as_frame ensures we do not have an ndarray here - assert not isinstance(right, np.ndarray) - - arrays = [array_op(l, r) for l, r in zip(left._iter_column_arrays(), right)] - - elif isinstance(right, ABCSeries): - assert right.index.equals(left.index) # Handle other cases later - right = right._values - - arrays = [array_op(l, right) for l in left._iter_column_arrays()] - - else: - # Remaining cases have less-obvious dispatch rules - raise NotImplementedError(right) - - return type(left)._from_arrays( - arrays, left.columns, left.index, verify_integrity=False - ) - - # ----------------------------------------------------------------------------- # Series @@ -299,9 +210,8 @@ def align_method_SERIES(left: "Series", right, align_asobject: bool = False): return left, right -def flex_method_SERIES(cls, op, special): - assert not special # "special" also means "not flex" - name = _get_op_name(op, special) +def flex_method_SERIES(op): + name = op.__name__.strip("_") doc = _make_flex_doc(name, "series") @Appender(doc) @@ -427,7 +337,7 @@ def to_series(right): "Do `left, right = left.align(right, axis=1, copy=False)` " "before e.g. `left == right`", FutureWarning, - stacklevel=3, + stacklevel=5, ) left, right = left.align( @@ -438,7 +348,7 @@ def to_series(right): return left, right -def _should_reindex_frame_op( +def should_reindex_frame_op( left: "DataFrame", right, op, axis, default_axis, fill_value, level ) -> bool: """ @@ -464,7 +374,7 @@ def _should_reindex_frame_op( return False -def _frame_arith_method_with_reindex( +def frame_arith_method_with_reindex( left: "DataFrame", right: "DataFrame", op ) -> "DataFrame": """ @@ -533,10 +443,9 @@ def _maybe_align_series_as_frame(frame: "DataFrame", series: "Series", axis: int return type(frame)(rvalues, index=frame.index, columns=frame.columns) -def flex_arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): - assert not special - op_name = _get_op_name(op, special) - default_axis = None if special else "columns" +def flex_arith_method_FRAME(op): + op_name = op.__name__.strip("_") + default_axis = "columns" na_op = get_array_op(op) doc = _make_flex_doc(op_name, "dataframe") @@ -544,10 +453,10 @@ def flex_arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): - if _should_reindex_frame_op( + if should_reindex_frame_op( self, other, op, axis, default_axis, fill_value, level ): - return _frame_arith_method_with_reindex(self, other, op) + return frame_arith_method_with_reindex(self, other, op) if isinstance(other, ABCSeries) and fill_value is not None: # TODO: We could allow this in cases where we end up going @@ -563,37 +472,14 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): new_data = self._combine_frame(other, na_op, fill_value) elif isinstance(other, ABCSeries): - new_data = dispatch_to_series(self, other, op, axis=axis) + new_data = self._dispatch_frame_op(other, op, axis=axis) else: # in this case we always have `np.ndim(other) == 0` if fill_value is not None: self = self.fillna(fill_value) - new_data = dispatch_to_series(self, other, op) - - return self._construct_result(new_data) - - f.__name__ = op_name - - return f - + new_data = self._dispatch_frame_op(other, op) -def arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): - assert special - op_name = _get_op_name(op, special) - doc = _arith_doc_FRAME % op_name - - @Appender(doc) - def f(self, other): - - if _should_reindex_frame_op(self, other, op, 1, 1, None, None): - return _frame_arith_method_with_reindex(self, other, op) - - axis = 1 # only relevant for Series other case - - self, other = align_method_FRAME(self, other, axis, flex=True, level=None) - - new_data = dispatch_to_series(self, other, op, axis=axis) return self._construct_result(new_data) f.__name__ = op_name @@ -601,9 +487,8 @@ def f(self, other): return f -def flex_comp_method_FRAME(cls: Type["DataFrame"], op, special: bool): - assert not special # "special" also means "not flex" - op_name = _get_op_name(op, special) +def flex_comp_method_FRAME(op): + op_name = op.__name__.strip("_") default_axis = "columns" # because we are "flex" doc = _flex_comp_doc_FRAME.format( @@ -616,26 +501,7 @@ def f(self, other, axis=default_axis, level=None): self, other = align_method_FRAME(self, other, axis, flex=True, level=level) - new_data = dispatch_to_series(self, other, op, axis=axis) - return self._construct_result(new_data) - - f.__name__ = op_name - - return f - - -def comp_method_FRAME(cls: Type["DataFrame"], op, special: bool): - assert special # "special" also means "not flex" - op_name = _get_op_name(op, special) - - @Appender(f"Wrapper for comparison method {op_name}") - def f(self, other): - axis = 1 # only relevant for Series other case - - self, other = align_method_FRAME(self, other, axis, flex=False, level=None) - - # See GH#4537 for discussion of scalar op behavior - new_data = dispatch_to_series(self, other, op, axis=axis) + new_data = self._dispatch_frame_op(other, op, axis=axis) return self._construct_result(new_data) f.__name__ = op_name diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 86981f007a678..c05f457f1e4f5 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -7,16 +7,13 @@ from pandas.core.ops.roperator import ( radd, - rand_, rdivmod, rfloordiv, rmod, rmul, - ror_, rpow, rsub, rtruediv, - rxor, ) @@ -33,19 +30,10 @@ def _get_method_wrappers(cls): ------- arith_flex : function or None comp_flex : function or None - arith_special : function - comp_special : function - bool_special : function - - Notes - ----- - None is only returned for SparseArray """ # TODO: make these non-runtime imports once the relevant functions # are no longer in __init__ from pandas.core.ops import ( - arith_method_FRAME, - comp_method_FRAME, flex_arith_method_FRAME, flex_comp_method_FRAME, flex_method_SERIES, @@ -55,16 +43,10 @@ def _get_method_wrappers(cls): # Just Series arith_flex = flex_method_SERIES comp_flex = flex_method_SERIES - arith_special = None - comp_special = None - bool_special = None elif issubclass(cls, ABCDataFrame): arith_flex = flex_arith_method_FRAME comp_flex = flex_comp_method_FRAME - arith_special = arith_method_FRAME - comp_special = comp_method_FRAME - bool_special = arith_method_FRAME - return arith_flex, comp_flex, arith_special, comp_special, bool_special + return arith_flex, comp_flex def add_special_arithmetic_methods(cls): @@ -77,12 +59,7 @@ def add_special_arithmetic_methods(cls): cls : class special methods will be defined and pinned to this class """ - _, _, arith_method, comp_method, bool_method = _get_method_wrappers(cls) - new_methods = _create_methods( - cls, arith_method, comp_method, bool_method, special=True - ) - # inplace operators (I feel like these should get passed an `inplace=True` - # or just be removed + new_methods = {} def _wrap_inplace_method(method): """ @@ -105,45 +82,25 @@ def f(self, other): f.__name__ = f"__i{name}__" return f - if bool_method is None: - # Series gets bool_method, arith_method via OpsMixin - new_methods.update( - dict( - __iadd__=_wrap_inplace_method(cls.__add__), - __isub__=_wrap_inplace_method(cls.__sub__), - __imul__=_wrap_inplace_method(cls.__mul__), - __itruediv__=_wrap_inplace_method(cls.__truediv__), - __ifloordiv__=_wrap_inplace_method(cls.__floordiv__), - __imod__=_wrap_inplace_method(cls.__mod__), - __ipow__=_wrap_inplace_method(cls.__pow__), - ) - ) - new_methods.update( - dict( - __iand__=_wrap_inplace_method(cls.__and__), - __ior__=_wrap_inplace_method(cls.__or__), - __ixor__=_wrap_inplace_method(cls.__xor__), - ) - ) - else: - new_methods.update( - dict( - __iadd__=_wrap_inplace_method(new_methods["__add__"]), - __isub__=_wrap_inplace_method(new_methods["__sub__"]), - __imul__=_wrap_inplace_method(new_methods["__mul__"]), - __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]), - __ifloordiv__=_wrap_inplace_method(new_methods["__floordiv__"]), - __imod__=_wrap_inplace_method(new_methods["__mod__"]), - __ipow__=_wrap_inplace_method(new_methods["__pow__"]), - ) + # wrap methods that we get from OpsMixin + new_methods.update( + dict( + __iadd__=_wrap_inplace_method(cls.__add__), + __isub__=_wrap_inplace_method(cls.__sub__), + __imul__=_wrap_inplace_method(cls.__mul__), + __itruediv__=_wrap_inplace_method(cls.__truediv__), + __ifloordiv__=_wrap_inplace_method(cls.__floordiv__), + __imod__=_wrap_inplace_method(cls.__mod__), + __ipow__=_wrap_inplace_method(cls.__pow__), ) - new_methods.update( - dict( - __iand__=_wrap_inplace_method(new_methods["__and__"]), - __ior__=_wrap_inplace_method(new_methods["__or__"]), - __ixor__=_wrap_inplace_method(new_methods["__xor__"]), - ) + ) + new_methods.update( + dict( + __iand__=_wrap_inplace_method(cls.__and__), + __ior__=_wrap_inplace_method(cls.__or__), + __ixor__=_wrap_inplace_method(cls.__xor__), ) + ) _add_methods(cls, new_methods=new_methods) @@ -158,10 +115,8 @@ def add_flex_arithmetic_methods(cls): cls : class flex methods will be defined and pinned to this class """ - flex_arith_method, flex_comp_method, _, _, _ = _get_method_wrappers(cls) - new_methods = _create_methods( - cls, flex_arith_method, flex_comp_method, bool_method=None, special=False - ) + flex_arith_method, flex_comp_method = _get_method_wrappers(cls) + new_methods = _create_methods(cls, flex_arith_method, flex_comp_method) new_methods.update( dict( multiply=new_methods["mul"], @@ -175,72 +130,52 @@ def add_flex_arithmetic_methods(cls): _add_methods(cls, new_methods=new_methods) -def _create_methods(cls, arith_method, comp_method, bool_method, special): - # creates actual methods based upon arithmetic, comp and bool method +def _create_methods(cls, arith_method, comp_method): + # creates actual flex methods based upon arithmetic, and comp method # constructors. have_divmod = issubclass(cls, ABCSeries) # divmod is available for Series new_methods = {} - if arith_method is not None: - new_methods.update( - dict( - add=arith_method(cls, operator.add, special), - radd=arith_method(cls, radd, special), - sub=arith_method(cls, operator.sub, special), - mul=arith_method(cls, operator.mul, special), - truediv=arith_method(cls, operator.truediv, special), - floordiv=arith_method(cls, operator.floordiv, special), - mod=arith_method(cls, operator.mod, special), - pow=arith_method(cls, operator.pow, special), - # not entirely sure why this is necessary, but previously was included - # so it's here to maintain compatibility - rmul=arith_method(cls, rmul, special), - rsub=arith_method(cls, rsub, special), - rtruediv=arith_method(cls, rtruediv, special), - rfloordiv=arith_method(cls, rfloordiv, special), - rpow=arith_method(cls, rpow, special), - rmod=arith_method(cls, rmod, special), - ) - ) - new_methods["div"] = new_methods["truediv"] - new_methods["rdiv"] = new_methods["rtruediv"] - if have_divmod: - # divmod doesn't have an op that is supported by numexpr - new_methods["divmod"] = arith_method(cls, divmod, special) - new_methods["rdivmod"] = arith_method(cls, rdivmod, special) - - if comp_method is not None: - # Series already has this pinned - new_methods.update( - dict( - eq=comp_method(cls, operator.eq, special), - ne=comp_method(cls, operator.ne, special), - lt=comp_method(cls, operator.lt, special), - gt=comp_method(cls, operator.gt, special), - le=comp_method(cls, operator.le, special), - ge=comp_method(cls, operator.ge, special), - ) + + new_methods.update( + dict( + add=arith_method(operator.add), + radd=arith_method(radd), + sub=arith_method(operator.sub), + mul=arith_method(operator.mul), + truediv=arith_method(operator.truediv), + floordiv=arith_method(operator.floordiv), + mod=arith_method(operator.mod), + pow=arith_method(operator.pow), + rmul=arith_method(rmul), + rsub=arith_method(rsub), + rtruediv=arith_method(rtruediv), + rfloordiv=arith_method(rfloordiv), + rpow=arith_method(rpow), + rmod=arith_method(rmod), ) + ) + new_methods["div"] = new_methods["truediv"] + new_methods["rdiv"] = new_methods["rtruediv"] + if have_divmod: + # divmod doesn't have an op that is supported by numexpr + new_methods["divmod"] = arith_method(divmod) + new_methods["rdivmod"] = arith_method(rdivmod) - if bool_method is not None: - new_methods.update( - dict( - and_=bool_method(cls, operator.and_, special), - or_=bool_method(cls, operator.or_, special), - xor=bool_method(cls, operator.xor, special), - rand_=bool_method(cls, rand_, special), - ror_=bool_method(cls, ror_, special), - rxor=bool_method(cls, rxor, special), - ) + new_methods.update( + dict( + eq=comp_method(operator.eq), + ne=comp_method(operator.ne), + lt=comp_method(operator.lt), + gt=comp_method(operator.gt), + le=comp_method(operator.le), + ge=comp_method(operator.ge), ) + ) - if special: - dunderize = lambda x: f"__{x.strip('_')}__" - else: - dunderize = lambda x: x - new_methods = {dunderize(k): v for k, v in new_methods.items()} + new_methods = {k.strip("_"): v for k, v in new_methods.items()} return new_methods