diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 85aa13526e77c..242f4e7b605c2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1065,7 +1065,7 @@ def from_dict(cls, data, orient='columns', dtype=None, columns=None): Returns ------- - pandas.DataFrame + DataFrame See Also -------- @@ -1145,7 +1145,7 @@ def to_numpy(self, dtype=None, copy=False): Returns ------- - array : numpy.ndarray + numpy.ndarray See Also -------- @@ -1439,7 +1439,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, Returns ------- - df : DataFrame + DataFrame """ # Make a copy of the input columns so we can modify it @@ -1755,7 +1755,7 @@ def from_items(cls, items, columns=None, orient='columns'): Returns ------- - frame : DataFrame + DataFrame """ warnings.warn("from_items is deprecated. Please use " @@ -1866,7 +1866,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, Returns ------- - y : DataFrame + DataFrame See Also -------- @@ -1956,7 +1956,7 @@ def to_panel(self): Returns ------- - panel : Panel + Panel """ raise NotImplementedError("Panel is being removed in pandas 0.25.0.") @@ -2478,7 +2478,7 @@ def memory_usage(self, index=True, deep=False): Returns ------- - sizes : Series + Series A Series whose index is the original column names and whose values is the memory usage of each column in bytes. @@ -2696,7 +2696,7 @@ def get_value(self, index, col, takeable=False): Returns ------- - value : scalar value + scalar value """ warnings.warn("get_value is deprecated and will be removed " @@ -2741,7 +2741,7 @@ def set_value(self, index, col, value, takeable=False): Returns ------- - frame : DataFrame + DataFrame If label pair is contained, will be reference to calling DataFrame, otherwise a new object """ @@ -3177,7 +3177,7 @@ def select_dtypes(self, include=None, exclude=None): Returns ------- - subset : DataFrame + DataFrame The subset of the frame including the dtypes in ``include`` and excluding the dtypes in ``exclude``. @@ -3542,7 +3542,7 @@ def _sanitize_column(self, key, value, broadcast=True): Returns ------- - sanitized_column : numpy-array + numpy.ndarray """ def reindexer(value): @@ -3811,7 +3811,7 @@ def drop(self, labels=None, axis=0, index=None, columns=None, Returns ------- - dropped : pandas.DataFrame + DataFrame Raises ------ @@ -3936,7 +3936,7 @@ def rename(self, *args, **kwargs): Returns ------- - renamed : DataFrame + DataFrame See Also -------- @@ -4579,7 +4579,7 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False): Returns ------- - deduplicated : DataFrame + DataFrame """ if self.empty: return self.copy() @@ -4613,7 +4613,7 @@ def duplicated(self, subset=None, keep='first'): Returns ------- - duplicated : Series + Series """ from pandas.core.sorting import get_group_index from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT @@ -4981,7 +4981,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): Returns ------- - swapped : same type as caller (new object) + DataFrame .. versionchanged:: 0.18.1 @@ -5260,7 +5260,7 @@ def combine_first(self, other): Returns ------- - combined : DataFrame + DataFrame See Also -------- @@ -5621,7 +5621,7 @@ def pivot(self, index=None, columns=None, values=None): Returns ------- - table : DataFrame + DataFrame See Also -------- @@ -5907,7 +5907,7 @@ def unstack(self, level=-1, fill_value=None): Returns ------- - unstacked : DataFrame or Series + Series or DataFrame See Also -------- @@ -6073,7 +6073,7 @@ def diff(self, periods=1, axis=0): Returns ------- - diffed : DataFrame + DataFrame See Also -------- @@ -6345,7 +6345,7 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, Returns ------- - applied : Series or DataFrame + Series or DataFrame See Also -------- @@ -6538,7 +6538,7 @@ def append(self, other, ignore_index=False, Returns ------- - appended : DataFrame + DataFrame See Also -------- @@ -6956,12 +6956,13 @@ def corr(self, method='pearson', min_periods=1): min_periods : int, optional Minimum number of observations required per pair of columns - to have a valid result. Currently only available for pearson - and spearman correlation + to have a valid result. Currently only available for Pearson + and Spearman correlation. Returns ------- - y : DataFrame + DataFrame + Correlation matrix. See Also -------- @@ -6970,14 +6971,15 @@ def corr(self, method='pearson', min_periods=1): Examples -------- - >>> histogram_intersection = lambda a, b: np.minimum(a, b - ... ).sum().round(decimals=1) + >>> def histogram_intersection(a, b): + ... v = np.minimum(a, b).sum().round(decimals=1) + ... return v >>> df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], ... columns=['dogs', 'cats']) >>> df.corr(method=histogram_intersection) - dogs cats - dogs 1.0 0.3 - cats 0.3 1.0 + dogs cats + dogs 1.0 0.3 + cats 0.3 1.0 """ numeric_df = self._get_numeric_data() cols = numeric_df.columns @@ -7140,10 +7142,11 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'): Parameters ---------- other : DataFrame, Series + Object with which to compute correlations. axis : {0 or 'index', 1 or 'columns'}, default 0 - 0 or 'index' to compute column-wise, 1 or 'columns' for row-wise - drop : boolean, default False - Drop missing indices from result + 0 or 'index' to compute column-wise, 1 or 'columns' for row-wise. + drop : bool, default False + Drop missing indices from result. method : {'pearson', 'kendall', 'spearman'} or callable * pearson : standard correlation coefficient * kendall : Kendall Tau correlation coefficient @@ -7155,7 +7158,8 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'): Returns ------- - correls : Series + Series + Pairwise correlations. See Also ------- @@ -7485,7 +7489,7 @@ def nunique(self, axis=0, dropna=True): Returns ------- - nunique : Series + Series See Also -------- @@ -7523,7 +7527,8 @@ def idxmin(self, axis=0, skipna=True): Returns ------- - idxmin : Series + Series + Indexes of minima along the specified axis. Raises ------ @@ -7559,7 +7564,8 @@ def idxmax(self, axis=0, skipna=True): Returns ------- - idxmax : Series + Series + Indexes of maxima along the specified axis. Raises ------ @@ -7706,7 +7712,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, Returns ------- - quantiles : Series or DataFrame + Series or DataFrame If ``q`` is an array, a DataFrame will be returned where the index is ``q``, the columns are the columns of self, and the @@ -7776,19 +7782,19 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True): Parameters ---------- - freq : string, default frequency of PeriodIndex - Desired frequency + freq : str, default frequency of PeriodIndex + Desired frequency. how : {'s', 'e', 'start', 'end'} Convention for converting period to timestamp; start of period - vs. end + vs. end. axis : {0 or 'index', 1 or 'columns'}, default 0 - The axis to convert (the index by default) - copy : boolean, default True - If false then underlying input data is not copied + The axis to convert (the index by default). + copy : bool, default True + If False then underlying input data is not copied. Returns ------- - df : DataFrame with DatetimeIndex + DataFrame with DatetimeIndex """ new_data = self._data if copy: @@ -7812,15 +7818,16 @@ def to_period(self, freq=None, axis=0, copy=True): Parameters ---------- - freq : string, default + freq : str, default + Frequency of the PeriodIndex. axis : {0 or 'index', 1 or 'columns'}, default 0 - The axis to convert (the index by default) - copy : boolean, default True - If False then underlying input data is not copied + The axis to convert (the index by default). + copy : bool, default True + If False then underlying input data is not copied. Returns ------- - ts : TimeSeries with PeriodIndex + TimeSeries with PeriodIndex """ new_data = self._data if copy: @@ -7893,7 +7900,7 @@ def isin(self, values): match. Note that 'falcon' does not match based on the number of legs in df2. - >>> other = pd.DataFrame({'num_legs': [8, 2],'num_wings': [0, 2]}, + >>> other = pd.DataFrame({'num_legs': [8, 2], 'num_wings': [0, 2]}, ... index=['spider', 'falcon']) >>> df.isin(other) num_legs num_wings diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c886493f90eaf..1a404630b660e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -774,18 +774,18 @@ def pop(self, item): Parameters ---------- item : str - Column label to be popped + Label of column to be popped. Returns ------- - popped : Series + Series Examples -------- - >>> df = pd.DataFrame([('falcon', 'bird', 389.0), - ... ('parrot', 'bird', 24.0), - ... ('lion', 'mammal', 80.5), - ... ('monkey', 'mammal', np.nan)], + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ... ('parrot', 'bird', 24.0), + ... ('lion', 'mammal', 80.5), + ... ('monkey','mammal', np.nan)], ... columns=('name', 'class', 'max_speed')) >>> df name class max_speed @@ -937,7 +937,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): Parameters ---------- - i, j : int, string (can be mixed) + i, j : int, str (can be mixed) Level of index to be swapped. Can pass level name as string. Returns @@ -973,9 +973,9 @@ def rename(self, *args, **kwargs): and raise on DataFrame or Panel. dict-like or functions are transformations to apply to that axis' values - copy : boolean, default True - Also copy underlying data - inplace : boolean, default False + copy : bool, default True + Also copy underlying data. + inplace : bool, default False Whether to return a new %(klass)s. If True then value of copy is ignored. level : int or level name, default None @@ -2947,7 +2947,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, will treat them as non-numeric. quotechar : str, default '\"' String of length 1. Character used to quote fields. - line_terminator : string, optional + line_terminator : str, optional The newline character or character sequence to use in the output file. Defaults to `os.linesep`, which depends on the OS in which this method is called ('\n' for linux, '\r\n' for Windows, i.e.). @@ -10282,7 +10282,7 @@ def _doc_parms(cls): Parameters ---------- axis : %(axis_descr)s -skipna : boolean, default True +skipna : bool, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA level : int or level name, default None @@ -10291,7 +10291,7 @@ def _doc_parms(cls): ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. -numeric_only : boolean, default None +numeric_only : bool, default None Include only float, int, boolean columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series.