diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c8bfc564e75736..f71d2ce68f8005 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -206,7 +206,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then MSG='Doctests frame.py' ; echo $MSG pytest -q --doctest-modules pandas/core/frame.py \ - -k"-axes -combine -itertuples -join -pivot_table -query -reindex -reindex_axis -round" + -k" -itertuples -join -reindex -reindex_axis -round" RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests series.py' ; echo $MSG diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ade05ab27093e3..11a5c2c065f93d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -483,7 +483,7 @@ def axes(self): -------- >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df.axes - [RangeIndex(start=0, stop=2, step=1), Index(['coll', 'col2'], + [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], dtype='object')] """ return [self.index, self.columns] @@ -3016,28 +3016,30 @@ def query(self, expr, inplace=False, **kwargs): Parameters ---------- - expr : string + expr : str The query string to evaluate. You can refer to variables in the environment by prefixing them with an '@' character like ``@a + b``. inplace : bool Whether the query should modify the data in place or return - a modified copy - - .. versionadded:: 0.18.0 - - kwargs : dict + a modified copy. + **kwargs See the documentation for :func:`pandas.eval` for complete details on the keyword arguments accepted by :meth:`DataFrame.query`. + .. versionadded:: 0.18.0 + Returns ------- - q : DataFrame + DataFrame + DataFrame resulting from the provided query expression. See Also -------- - pandas.eval - DataFrame.eval + eval : Evaluate a string describing operations on + DataFrame columns. + DataFrame.eval : Evaluate a string describing operations on + DataFrame columns. Notes ----- @@ -3076,9 +3078,23 @@ def query(self, expr, inplace=False, **kwargs): Examples -------- - >>> df = pd.DataFrame(np.random.randn(10, 2), columns=list('ab')) - >>> df.query('a > b') - >>> df[df.a > df.b] # same result as the previous expression + >>> df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)}) + >>> df + A B + 0 1 10 + 1 2 8 + 2 3 6 + 3 4 4 + 4 5 2 + >>> df.query('A > B') + A B + 4 5 2 + + The previous expression is equivalent to + + >>> df[df.A > df.B] + A B + 4 5 2 """ inplace = validate_bool_kwarg(inplace, 'inplace') if not isinstance(expr, compat.string_types): @@ -5142,8 +5158,7 @@ def _combine_const(self, other, func): def combine(self, other, func, fill_value=None, overwrite=True): """ - Perform column-wise combine with another DataFrame based on a - passed function. + Perform column-wise combine with another DataFrame. Combines a DataFrame with `other` DataFrame using `func` to element-wise combine columns. The row and column indexes of the @@ -5159,13 +5174,14 @@ def combine(self, other, func, fill_value=None, overwrite=True): fill_value : scalar value, default None The value to fill NaNs with prior to passing any column to the merge func. - overwrite : boolean, default True + overwrite : bool, default True If True, columns in `self` that do not exist in `other` will be overwritten with NaNs. Returns ------- - result : DataFrame + DataFrame + Combination of the provided DataFrames. See Also -------- @@ -5209,15 +5225,15 @@ def combine(self, other, func, fill_value=None, overwrite=True): >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [None, 3]}) >>> df1.combine(df2, take_smaller, fill_value=-5) - A B - 0 0 NaN + A B + 0 0 -5.0 1 0 3.0 Example that demonstrates the use of `overwrite` and behavior when the axis differ between the dataframes. >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) - >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [-10, 1],}, index=[1, 2]) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [-10, 1], }, index=[1, 2]) >>> df1.combine(df2, take_smaller) A B C 0 NaN NaN NaN @@ -5232,7 +5248,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): Demonstrating the preference of the passed in dataframe. - >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1],}, index=[1, 2]) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1], }, index=[1, 2]) >>> df2.combine(df1, take_smaller) A B C 0 0.0 NaN NaN @@ -5716,19 +5732,19 @@ def pivot(self, index=None, columns=None, values=None): This first example aggregates values by taking the sum. - >>> table = pivot_table(df, values='D', index=['A', 'B'], + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], ... columns=['C'], aggfunc=np.sum) >>> table C large small A B - bar one 4 5 - two 7 6 - foo one 4 1 - two NaN 6 + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 We can also fill missing values using the `fill_value` parameter. - >>> table = pivot_table(df, values='D', index=['A', 'B'], + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], ... columns=['C'], aggfunc=np.sum, fill_value=0) >>> table C large small @@ -5740,12 +5756,11 @@ def pivot(self, index=None, columns=None, values=None): The next example aggregates by taking the mean across multiple columns. - >>> table = pivot_table(df, values=['D', 'E'], index=['A', 'C'], + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], ... aggfunc={'D': np.mean, ... 'E': np.mean}) >>> table - D E - mean mean + D E A C bar large 5.500000 7.500000 small 5.500000 8.500000 @@ -5755,17 +5770,17 @@ def pivot(self, index=None, columns=None, values=None): We can also calculate multiple types of aggregations for any given value column. - >>> table = pivot_table(df, values=['D', 'E'], index=['A', 'C'], + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], ... aggfunc={'D': np.mean, ... 'E': [min, max, np.mean]}) >>> table - D E - mean max mean min + D E + mean max mean min A C - bar large 5.500000 9 7.500000 6 - small 5.500000 9 8.500000 8 - foo large 2.000000 5 4.500000 4 - small 2.333333 6 4.333333 2 + bar large 5.500000 9.0 7.500000 6.0 + small 5.500000 9.0 8.500000 8.0 + foo large 2.000000 5.0 4.500000 4.0 + small 2.333333 6.0 4.333333 2.0 """ @Substitution('') @@ -6903,41 +6918,67 @@ def round(self, decimals=0, *args, **kwargs): columns not included in `decimals` will be left as is. Elements of `decimals` which are not columns of the input will be ignored. + *args + Additional keywords have no effect but might be accepted for + compatibility with numpy. + **kwargs + Additional keywords have no effect but might be accepted for + compatibility with numpy. Returns ------- - DataFrame + DataFrame : + A DataFrame with the affected columns rounded to the specified + number of decimal places. See Also -------- - numpy.around - Series.round + numpy.around : Round a numpy array to the given number of decimals. + Series.round : Round a Series to the given number of decimals. Examples -------- - >>> df = pd.DataFrame(np.random.random([3, 3]), - ... columns=['A', 'B', 'C'], index=['first', 'second', 'third']) + >>> df = pd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)], + ... columns=['dogs', 'cats']) >>> df - A B C - first 0.028208 0.992815 0.173891 - second 0.038683 0.645646 0.577595 - third 0.877076 0.149370 0.491027 - >>> df.round(2) - A B C - first 0.03 0.99 0.17 - second 0.04 0.65 0.58 - third 0.88 0.15 0.49 - >>> df.round({'A': 1, 'C': 2}) - A B C - first 0.0 0.992815 0.17 - second 0.0 0.645646 0.58 - third 0.9 0.149370 0.49 - >>> decimals = pd.Series([1, 0, 2], index=['A', 'B', 'C']) + dogs cats + 0 0.21 0.32 + 1 0.01 0.67 + 2 0.66 0.03 + 3 0.21 0.18 + + By providing an integer each column is rounded to the same number + of decimal places + + >>> df.round(1) + dogs cats + 0 0.2 0.3 + 1 0.0 0.7 + 2 0.7 0.0 + 3 0.2 0.2 + + With a dict, the number of places for specific columns can be + specfified with the column names as key and the number of decimal + places as value + + >>> df.round({'dogs': 1, 'cats': 0}) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + + Using a Series, the number of places for specific columns can be + specfified with the column names as index and the number of + decimal places as value + + >>> decimals = pd.Series([0, 1], index=['cats', 'dogs']) >>> df.round(decimals) - A B C - first 0.0 1 0.17 - second 0.0 1 0.58 - third 0.9 0 0.49 + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 """ from pandas.core.reshape.concat import concat diff --git a/pandas/core/window.py b/pandas/core/window.py index 5a9157b43ecd65..5556a01307a7ee 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -2117,7 +2117,7 @@ def _constructor(self): class EWM(_Rolling): r""" - Provides exponential weighted functions. + Provide exponential weighted functions. .. versionadded:: 0.18.0 @@ -2125,16 +2125,17 @@ class EWM(_Rolling): ---------- com : float, optional Specify decay in terms of center of mass, - :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0` + :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0`. span : float, optional Specify decay in terms of span, - :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1` + :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`. halflife : float, optional Specify decay in terms of half-life, - :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0` + :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } + halflife > 0`. alpha : float, optional Specify smoothing factor :math:`\alpha` directly, - :math:`0 < \alpha \leq 1` + :math:`0 < \alpha \leq 1`. .. versionadded:: 0.18.0 @@ -2143,14 +2144,19 @@ class EWM(_Rolling): (otherwise result is NA). adjust : bool, default True Divide by decaying adjustment factor in beginning periods to account - for imbalance in relative weightings (viewing EWMA as a moving average) + for imbalance in relative weightings + (viewing EWMA as a moving average). ignore_na : bool, default False Ignore missing values when calculating weights; - specify True to reproduce pre-0.15.0 behavior + specify True to reproduce pre-0.15.0 behavior. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. The value 0 identifies the rows, and 1 + identifies the columns. Returns ------- - a Window sub-classed for the particular operation + DataFrame + A Window sub-classed for the particular operation. See Also -------- @@ -2188,6 +2194,7 @@ class EWM(_Rolling): -------- >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df B 0 0.0 1 1.0