diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 2a21593fab8f5..850c4dd7c45e7 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -235,7 +235,7 @@ def _gen_eval_kwargs(name): {} >>> _gen_eval_kwargs("rtruediv") - {"reversed": True, "truediv": True} + {'reversed': True, 'truediv': True} """ kwargs = {} @@ -384,124 +384,21 @@ def _get_op_name(op, special): # ----------------------------------------------------------------------------- # Docstring Generation and Templates -_add_example_FRAME = """ ->>> a = pd.DataFrame([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd'], -... columns=['one']) ->>> a - one -a 1.0 -b 1.0 -c 1.0 -d NaN ->>> b = pd.DataFrame(dict(one=[1, np.nan, 1, np.nan], -... two=[np.nan, 2, np.nan, 2]), -... index=['a', 'b', 'd', 'e']) ->>> b - one two -a 1.0 NaN -b NaN 2.0 -d 1.0 NaN -e NaN 2.0 ->>> a.add(b, fill_value=0) - one two -a 2.0 NaN -b 1.0 2.0 -c 1.0 NaN -d 1.0 NaN -e NaN 2.0 -""" - -_sub_example_FRAME = """ ->>> a = pd.DataFrame([2, 1, 1, np.nan], index=['a', 'b', 'c', 'd'], -... columns=['one']) ->>> a - one -a 2.0 -b 1.0 -c 1.0 -d NaN ->>> b = pd.DataFrame(dict(one=[1, np.nan, 1, np.nan], -... two=[3, 2, np.nan, 2]), -... index=['a', 'b', 'd', 'e']) ->>> b - one two -a 1.0 3.0 -b NaN 2.0 -d 1.0 NaN -e NaN 2.0 ->>> a.sub(b, fill_value=0) - one two -a 1.0 -3.0 -b 1.0 -2.0 -c 1.0 NaN -d -1.0 NaN -e NaN -2.0 -""" - -_mod_example_FRAME = """ -**Using a scalar argument** - ->>> df = pd.DataFrame([2, 4, np.nan, 6.2], index=["a", "b", "c", "d"], -... columns=['one']) ->>> df - one -a 2.0 -b 4.0 -c NaN -d 6.2 ->>> df.mod(3, fill_value=-1) - one -a 2.0 -b 1.0 -c 2.0 -d 0.2 - -**Using a DataFrame argument** - ->>> df = pd.DataFrame(dict(one=[np.nan, 2, 3, 14], two=[np.nan, 1, 1, 3]), -... index=['a', 'b', 'c', 'd']) ->>> df - one two -a NaN NaN -b 2.0 1.0 -c 3.0 1.0 -d 14.0 3.0 ->>> other = pd.DataFrame(dict(one=[np.nan, np.nan, 6, np.nan], -... three=[np.nan, 10, np.nan, -7]), -... index=['a', 'b', 'd', 'e']) ->>> other - one three -a NaN NaN -b NaN 10.0 -d 6.0 NaN -e NaN -7.0 ->>> df.mod(other, fill_value=3) - one three two -a NaN NaN NaN -b 2.0 3.0 1.0 -c 0.0 NaN 1.0 -d 2.0 NaN 0.0 -e NaN -4.0 NaN -""" - _op_descriptions = { # Arithmetic Operators 'add': {'op': '+', 'desc': 'Addition', - 'reverse': 'radd', - 'df_examples': _add_example_FRAME}, + 'reverse': 'radd'}, 'sub': {'op': '-', 'desc': 'Subtraction', - 'reverse': 'rsub', - 'df_examples': _sub_example_FRAME}, + 'reverse': 'rsub'}, 'mul': {'op': '*', 'desc': 'Multiplication', 'reverse': 'rmul', 'df_examples': None}, 'mod': {'op': '%', 'desc': 'Modulo', - 'reverse': 'rmod', - 'df_examples': _mod_example_FRAME}, + 'reverse': 'rmod'}, 'pow': {'op': '**', 'desc': 'Exponential power', 'reverse': 'rpow', @@ -522,28 +419,23 @@ def _get_op_name(op, special): # Comparison Operators 'eq': {'op': '==', 'desc': 'Equal to', - 'reverse': None, - 'df_examples': None}, + 'reverse': None}, 'ne': {'op': '!=', 'desc': 'Not equal to', - 'reverse': None, - 'df_examples': None}, + 'reverse': None}, 'lt': {'op': '<', 'desc': 'Less than', - 'reverse': None, - 'df_examples': None}, + 'reverse': None}, 'le': {'op': '<=', 'desc': 'Less than or equal to', - 'reverse': None, - 'df_examples': None}, + 'reverse': None}, 'gt': {'op': '>', 'desc': 'Greater than', - 'reverse': None, - 'df_examples': None}, + 'reverse': None}, 'ge': {'op': '>=', 'desc': 'Greater than or equal to', - 'reverse': None, - 'df_examples': None}} + 'reverse': None} +} _op_names = list(_op_descriptions.keys()) for key in _op_names: @@ -635,38 +527,295 @@ def _get_op_name(op, special): _flex_doc_FRAME = """ {desc} of dataframe and other, element-wise (binary operator `{op_name}`). -Equivalent to ``{equiv}``, but with support to substitute a fill_value for -missing data in one of the inputs. +Equivalent to ``{equiv}``, but with support to substitute a fill_value +for missing data in one of the inputs. With reverse version, `{reverse}`. + +Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to +arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**. Parameters ---------- -other : Series, DataFrame, or constant -axis : {{0, 1, 'index', 'columns'}} - For Series input, axis to match Series index on -level : int or name +other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. +axis : {{0 or 'index', 1 or 'columns'}} + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). For Series input, axis to match Series index on. +level : int or label Broadcast across a level, matching Index values on the - passed MultiIndex level -fill_value : None or float value, default None + passed MultiIndex level. +fill_value : float or None, default None Fill existing missing (NaN) values, and any new element needed for successful DataFrame alignment, with this value before computation. If data in both corresponding DataFrame locations is missing - the result will be missing + the result will be missing. Notes ----- -Mismatched indices will be unioned together +Mismatched indices will be unioned together. Returns ------- -result : DataFrame +DataFrame + Result of the arithmetic operation. + +See Also +-------- +DataFrame.add : Add DataFrames. +DataFrame.sub : Subtract DataFrames. +DataFrame.mul : Multiply DataFrames. +DataFrame.div : Divide DataFrames (float division). +DataFrame.truediv : Divide DataFrames (float division). +DataFrame.floordiv : Divide DataFrames (integer division). +DataFrame.mod : Calculate modulo (remainder after division). +DataFrame.pow : Calculate exponential power. Examples -------- -{df_examples} +>>> df = pd.DataFrame({{'angles': [0, 3, 4], +... 'degrees': [360, 180, 360]}}, +... index=['circle', 'triangle', 'rectangle']) +>>> df + angles degrees +circle 0 360 +triangle 3 180 +rectangle 4 360 + +Add a scalar with operator version which return the same +results. + +>>> df + 1 + angles degrees +circle 1 361 +triangle 4 181 +rectangle 5 361 + +>>> df.add(1) + angles degrees +circle 1 361 +triangle 4 181 +rectangle 5 361 + +Divide by constant with reverse version. + +>>> df.div(10) + angles degrees +circle 0.0 36.0 +triangle 0.3 18.0 +rectangle 0.4 36.0 + +>>> df.rdiv(10) + angles degrees +circle inf 0.027778 +triangle 3.333333 0.055556 +rectangle 2.500000 0.027778 + +Subtract a list and Series by axis with operator version. + +>>> df - [1, 2] + angles degrees +circle -1 358 +triangle 2 178 +rectangle 3 358 + +>>> df.sub([1, 2], axis='columns') + angles degrees +circle -1 358 +triangle 2 178 +rectangle 3 358 + +>>> df.sub(pd.Series([1, 1, 1], index=['circle', 'triangle', 'rectangle']), +... axis='index') + angles degrees +circle -1 359 +triangle 2 179 +rectangle 3 359 + +Multiply a DataFrame of different shape with operator version. + +>>> other = pd.DataFrame({{'angles': [0, 3, 4]}}, +... index=['circle', 'triangle', 'rectangle']) +>>> other + angles +circle 0 +triangle 3 +rectangle 4 + +>>> df * other + angles degrees +circle 0 NaN +triangle 9 NaN +rectangle 16 NaN + +>>> df.mul(other, fill_value=0) + angles degrees +circle 0 0.0 +triangle 9 0.0 +rectangle 16 0.0 + +Divide by a MultiIndex by level. + +>>> df_multindex = pd.DataFrame({{'angles': [0, 3, 4, 4, 5, 6], +... 'degrees': [360, 180, 360, 360, 540, 720]}}, +... index=[['A', 'A', 'A', 'B', 'B', 'B'], +... ['circle', 'triangle', 'rectangle', +... 'square', 'pentagon', 'hexagon']]) +>>> df_multindex + angles degrees +A circle 0 360 + triangle 3 180 + rectangle 4 360 +B square 4 360 + pentagon 5 540 + hexagon 6 720 + +>>> df.div(df_multindex, level=1, fill_value=0) + angles degrees +A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 +B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 +""" + +_flex_comp_doc_FRAME = """ +{desc} of dataframe and other, element-wise (binary operator `{op_name}`). + +Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison +operators. + +Equivalent to `==`, `=!`, `<=`, `<`, `>=`, `>` with support to choose axis +(rows or columns) and level for comparison. + +Parameters +---------- +other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. +axis : {{0 or 'index', 1 or 'columns'}}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). +level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + +Returns +------- +DataFrame of bool + Result of the comparison. See Also -------- -DataFrame.{reverse} +DataFrame.eq : Compare DataFrames for equality elementwise. +DataFrame.ne : Compare DataFrames for inequality elementwise. +DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. +DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. +DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. +DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + +Notes +-------- +Mismatched indices will be unioned together. +`NaN` values are considered different (i.e. `NaN` != `NaN`). + +Examples +-------- +>>> df = pd.DataFrame({{'cost': [250, 150, 100], +... 'revenue': [100, 250, 300]}}, +... index=['A', 'B', 'C']) +>>> df + cost revenue +A 250 100 +B 150 250 +C 100 300 + +Compare to a scalar and operator version which return the same +results. + +>>> df == 100 + cost revenue +A False True +B False False +C True False + +>>> df.eq(100) + cost revenue +A False True +B False False +C True False + +Compare to a list and Series by axis and operator version. As shown, +for list axis is by default 'index', but for Series axis is by +default 'columns'. + +>>> df != [100, 250, 300] + cost revenue +A True False +B True False +C True False + +>>> df.ne([100, 250, 300], axis='index') + cost revenue +A True False +B True False +C True False + +>>> df != pd.Series([100, 250, 300]) + cost revenue 0 1 2 +A True True True True True +B True True True True True +C True True True True True + +>>> df.ne(pd.Series([100, 250, 300]), axis='columns') + cost revenue 0 1 2 +A True True True True True +B True True True True True +C True True True True True + +Compare to a DataFrame of different shape. + +>>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}}, +... index=['A', 'B', 'C', 'D']) +>>> other + revenue +A 300 +B 250 +C 100 +D 150 + +>>> df.gt(other) + cost revenue +A False False +B False False +C False True +D False False + +Compare to a MultiIndex by level. + +>>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220], +... 'revenue': [100, 250, 300, 200, 175, 225]}}, +... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'], +... ['A', 'B', 'C', 'A', 'B' ,'C']]) +>>> df_multindex + cost revenue +Q1 A 250 100 + B 150 250 + C 100 300 +Q2 A 150 200 + B 300 175 + C 220 225 + +>>> df.le(df_multindex, level=1) + cost revenue +Q1 A True True + B True True + C True True +Q2 A False True + B True False + C True False """ _flex_doc_PANEL = """ @@ -734,8 +883,7 @@ def _make_flex_doc(op_name, typ): elif typ == 'dataframe': base_doc = _flex_doc_FRAME doc = base_doc.format(desc=op_desc['desc'], op_name=op_name, - equiv=equiv, reverse=op_desc['reverse'], - df_examples=op_desc['df_examples']) + equiv=equiv, reverse=op_desc['reverse']) elif typ == 'panel': base_doc = _flex_doc_PANEL doc = base_doc.format(desc=op_desc['desc'], op_name=op_name, @@ -1894,8 +2042,10 @@ def na_op(x, y): result = mask_cmp_op(x, y, op, (np.ndarray, ABCSeries)) return result - @Appender('Wrapper for flexible comparison methods {name}' - .format(name=op_name)) + doc = _flex_comp_doc_FRAME.format(op_name=op_name, + desc=_op_descriptions[op_name]['desc']) + + @Appender(doc) def f(self, other, axis=default_axis, level=None): other = _align_method_FRAME(self, other, axis)