From b37e6f493037ee24415f731f66affd3ff0807734 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Thu, 29 Nov 2018 14:54:15 +0100 Subject: [PATCH] MAINT Use list and dict comprehension (#23894) --- pandas/core/computation/align.py | 5 ++--- pandas/core/config.py | 7 ++----- pandas/core/dtypes/concat.py | 6 ++---- pandas/core/frame.py | 18 ++++++------------ pandas/core/groupby/ops.py | 6 ++---- pandas/core/internals/blocks.py | 5 +---- pandas/core/panel.py | 10 ++++------ pandas/core/reshape/melt.py | 5 ++--- pandas/core/sparse/frame.py | 5 ++--- pandas/core/window.py | 5 ++--- pandas/io/excel.py | 7 +++---- pandas/io/formats/printing.py | 8 ++++---- pandas/io/formats/style.py | 6 ++---- pandas/io/sas/sas_xport.py | 5 ++--- pandas/io/stata.py | 8 ++++---- pandas/plotting/_core.py | 9 +++------ pandas/tests/frame/test_axis_select_reindex.py | 4 +--- pandas/tests/frame/test_indexing.py | 8 +++----- pandas/tests/frame/test_replace.py | 15 ++++++--------- pandas/tests/generic/test_generic.py | 5 ++--- pandas/tests/groupby/aggregate/test_cython.py | 4 +--- pandas/tests/groupby/test_groupby.py | 12 +++--------- pandas/tests/groupby/test_grouping.py | 4 +--- pandas/tests/groupby/test_rank.py | 10 ++++------ pandas/tests/indexes/multi/test_constructor.py | 10 ++++------ pandas/tests/sparse/frame/test_frame.py | 4 +--- pandas/tests/sparse/series/test_series.py | 8 ++++---- 27 files changed, 73 insertions(+), 126 deletions(-) diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index f7f40a66af9c63..951174648091f7 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -30,9 +30,8 @@ def _align_core_single_unary_op(term): def _zip_axes_from_type(typ, new_axes): - axes = {} - for ax_ind, ax_name in compat.iteritems(typ._AXIS_NAMES): - axes[ax_name] = new_axes[ax_ind] + axes = {ax_name: new_axes[ax_ind] + for ax_ind, ax_name in compat.iteritems(typ._AXIS_NAMES)} return axes diff --git a/pandas/core/config.py b/pandas/core/config.py index 6b50ab9ffe7d44..0f43ca65d187ab 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -395,11 +395,8 @@ def __init__(self, *args): self.ops = list(zip(args[::2], args[1::2])) def __enter__(self): - undo = [] - for pat, val in self.ops: - undo.append((pat, _get_option(pat, silent=True))) - - self.undo = undo + self.undo = [(pat, _get_option(pat, silent=True)) + for pat, val in self.ops] for pat, val in self.ops: _set_option(pat, val, silent=True) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 098ac3e0c00a53..58f1bcbfa74c01 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -373,10 +373,8 @@ def _maybe_unwrap(x): if sort_categories: categories = categories.sort_values() - new_codes = [] - for c in to_union: - new_codes.append(_recode_for_categories(c.codes, c.categories, - categories)) + new_codes = [_recode_for_categories(c.codes, c.categories, categories) + for c in to_union] new_codes = np.concatenate(new_codes) else: # ordered - to show a proper error message diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4b8d151151bd6..ab240218ecda14 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3569,11 +3569,8 @@ def reindexer(value): @property def _series(self): - result = {} - for idx, item in enumerate(self.columns): - result[item] = Series(self._data.iget(idx), index=self.index, - name=item) - return result + return {item: Series(self._data.iget(idx), index=self.index, name=item) + for idx, item in enumerate(self.columns)} def lookup(self, row_labels, col_labels): """ @@ -3593,9 +3590,8 @@ def lookup(self, row_labels, col_labels): ----- Akin to:: - result = [] - for row, col in zip(row_labels, col_labels): - result.append(df.get_value(row, col)) + result = [df.get_value(row, col) + for row, col in zip(row_labels, col_labels)] Examples -------- @@ -4600,10 +4596,8 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, if len(by) > 1: from pandas.core.sorting import lexsort_indexer - keys = [] - for x in by: - k = self._get_label_or_level_values(x, axis=axis) - keys.append(k) + keys = [self._get_label_or_level_values(x, axis=axis) + for x in by] indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position) indexer = ensure_platform_int(indexer) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 7391190b5f1922..8455c03953ad15 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -678,10 +678,8 @@ def groups(self): # this is mainly for compat # GH 3881 - result = {} - for key, value in zip(self.binlabels, self.bins): - if key is not NaT: - result[key] = value + result = {key: value for key, value in zip(self.binlabels, self.bins) + if key is not NaT} return result @property diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4ae7a812e014d5..1b67c20530eb0e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2295,10 +2295,7 @@ def convert(self, *args, **kwargs): 'convert_timedeltas'] fn_inputs += ['copy'] - fn_kwargs = {} - for key in fn_inputs: - if key in kwargs: - fn_kwargs[key] = kwargs[key] + fn_kwargs = {key: kwargs[key] for key in fn_inputs if key in kwargs} # operate column-by-column def f(m, v, i): diff --git a/pandas/core/panel.py b/pandas/core/panel.py index b976dc27a69f70..65dfd45fcb9c22 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -336,9 +336,8 @@ def _compare_constructor(self, other, func): raise Exception('Can only compare identically-labeled ' 'same type objects') - new_data = {} - for col in self._info_axis: - new_data[col] = func(self[col], other[col]) + new_data = {col: func(self[col], other[col]) + for col in self._info_axis} d = self._construct_axes_dict(copy=False) return self._constructor(data=new_data, **d) @@ -949,9 +948,8 @@ def to_frame(self, filter_observations=True): # size = N * K selector = slice(None, None) - data = {} - for item in self.items: - data[item] = self[item].values.ravel()[selector] + data = {item: self[item].values.ravel()[selector] + for item in self.items} def construct_multi_parts(idx, n_repeat, n_shuffle=1): # Replicates and shuffles MultiIndex, returns individual attributes diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 2dd6dc71b9d98b..aafc0de64ee12c 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -448,9 +448,8 @@ def melt_stub(df, stub, i, j, value_vars, sep): value_vars_flattened = [e for sublist in value_vars for e in sublist] id_vars = list(set(df.columns.tolist()).difference(value_vars_flattened)) - melted = [] - for s, v in zip(stubnames, value_vars): - melted.append(melt_stub(df, s, i, j, v, sep)) + melted = [melt_stub(df, s, i, j, v, sep) + for s, v in zip(stubnames, value_vars)] melted = melted[0].join(melted[1:], how='outer') if len(i) == 1: diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index a25ffa2744cb79..f1c46abfab0b21 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -339,9 +339,8 @@ def to_dense(self): def _apply_columns(self, func): """ get new SparseDataFrame applying func to each columns """ - new_data = {} - for col, series in compat.iteritems(self): - new_data[col] = func(series) + new_data = {col: func(series) + for col, series in compat.iteritems(self)} return self._constructor( data=new_data, index=self.index, columns=self.columns, diff --git a/pandas/core/window.py b/pandas/core/window.py index a079cea0fabd1f..faaef4211ca8e8 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -2479,9 +2479,8 @@ def dataframe_from_int_dict(data, frame_template): else: raise ValueError("'pairwise' is not True/False") else: - results = {} - for i, col in enumerate(arg1.columns): - results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2)) + results = {i: f(*_prep_binary(arg1.iloc[:, i], arg2)) + for i, col in enumerate(arg1.columns)} return dataframe_from_int_dict(results, arg1) else: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 880ff5a56804fa..03d873467dc10a 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -586,10 +586,9 @@ def _parse_cell(cell_contents, cell_typ): usecols = _maybe_convert_usecols(usecols) for i in range(sheet.nrows): - row = [] - for j, (value, typ) in enumerate(zip(sheet.row_values(i), - sheet.row_types(i))): - row.append(_parse_cell(value, typ)) + row = [_parse_cell(value, typ) + for value, typ in zip(sheet.row_values(i), + sheet.row_types(i))] data.append(row) if sheet.nrows == 0: diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index f814bf965a1e99..e671571560b192 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -110,10 +110,10 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): nitems = max_seq_items or get_option("max_seq_items") or len(seq) s = iter(seq) - r = [] - for i in range(min(nitems, len(seq))): # handle sets, no slicing - r.append(pprint_thing( - next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)) + # handle sets, no slicing + r = [pprint_thing(next(s), + _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) + for i in range(min(nitems, len(seq)))] body = ", ".join(r) if nitems < len(seq): diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 8ee9ea5b3d9806..4fdcb978b4695a 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1349,10 +1349,8 @@ def _get_level_lengths(index, hidden_elements=None): elif(j not in hidden_elements): lengths[(i, last_label)] += 1 - non_zero_lengths = {} - for element, length in lengths.items(): - if(length >= 1): - non_zero_lengths[element] = length + non_zero_lengths = { + element: length for element, length in lengths.items() if length >= 1} return non_zero_lengths diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 6a38e3d2eb783d..3c607d62b42868 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -353,9 +353,8 @@ def _read_header(self): self.columns = [x['name'].decode() for x in self.fields] # Setup the dtype. - dtypel = [] - for i, field in enumerate(self.fields): - dtypel.append(('s' + str(i), "S" + str(field['field_length']))) + dtypel = [('s' + str(i), "S" + str(field['field_length'])) + for i, field in enumerate(self.fields)] dtype = np.dtype(dtypel) self._dtype = dtype diff --git a/pandas/io/stata.py b/pandas/io/stata.py index d7beeb02a13c4b..403137b695cb7b 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2947,10 +2947,10 @@ def _update_strl_names(self): def _convert_strls(self, data): """Convert columns to StrLs if either very large or in the convert_strl variable""" - convert_cols = [] - for i, col in enumerate(data): - if self.typlist[i] == 32768 or col in self._convert_strl: - convert_cols.append(col) + convert_cols = [ + col for i, col in enumerate(data) + if self.typlist[i] == 32768 or col in self._convert_strl] + if convert_cols: ssw = StataStrLWriter(data, convert_cols) tab, new_data = ssw.generate_table() diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 8574275c8478b0..7ec97df69c05fe 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -157,9 +157,8 @@ def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, # parse errorbar input if given xerr = kwds.pop('xerr', None) yerr = kwds.pop('yerr', None) - self.errors = {} - for kw, err in zip(['xerr', 'yerr'], [xerr, yerr]): - self.errors[kw] = self._parse_errorbars(kw, err) + self.errors = {kw: self._parse_errorbars(kw, err) + for kw, err in zip(['xerr', 'yerr'], [xerr, yerr])} if not isinstance(secondary_y, (bool, tuple, list, np.ndarray, ABCIndexClass)): @@ -1721,9 +1720,7 @@ def result(self): _klasses = [LinePlot, BarPlot, BarhPlot, KdePlot, HistPlot, BoxPlot, ScatterPlot, HexBinPlot, AreaPlot, PiePlot] -_plot_klass = {} -for klass in _klasses: - _plot_klass[klass._kind] = klass +_plot_klass = {klass._kind: klass for klass in _klasses} def _plot(data, x=None, y=None, subplots=False, diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index de6ac251d117b9..fd2ccb2d36ec04 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -634,9 +634,7 @@ def test_align(self): left, right = self.frame.align(s, broadcast_axis=1) tm.assert_index_equal(left.index, self.frame.index) - expected = {} - for c in self.frame.columns: - expected[c] = s + expected = {c: s for c in self.frame.columns} expected = DataFrame(expected, index=self.frame.index, columns=self.frame.columns) tm.assert_frame_equal(right, expected) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index de810a656c3eaf..0a61c844f1af83 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1782,11 +1782,9 @@ def test_get_value(self): def test_lookup(self): def alt(df, rows, cols, dtype): - result = [] - for r, c in zip(rows, cols): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result.append(df.get_value(r, c)) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = [df.get_value(r, c) for r, c in zip(rows, cols)] return np.array(result, dtype=dtype) def testit(df): diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index bfb358a3e8c458..d6536bbd3c97cf 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -806,9 +806,8 @@ def test_replace_input_formats_listlike(self): df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5], 'C': ['', 'asdf', 'fd']}) filled = df.replace(to_rep, values) - expected = {} - for k, v in compat.iteritems(df): - expected[k] = v.replace(to_rep[k], values[k]) + expected = {k: v.replace(to_rep[k], values[k]) + for k, v in compat.iteritems(df)} assert_frame_equal(filled, DataFrame(expected)) result = df.replace([0, 2, 5], [5, 2, 0]) @@ -821,9 +820,8 @@ def test_replace_input_formats_listlike(self): df = DataFrame({'A': [np.nan, 0, np.nan], 'B': [0, 2, 5], 'C': ['', 'asdf', 'fd']}) filled = df.replace(np.nan, values) - expected = {} - for k, v in compat.iteritems(df): - expected[k] = v.replace(np.nan, values[k]) + expected = {k: v.replace(np.nan, values[k]) + for k, v in compat.iteritems(df)} assert_frame_equal(filled, DataFrame(expected)) # list to list @@ -844,9 +842,8 @@ def test_replace_input_formats_scalar(self): # dict to scalar to_rep = {'A': np.nan, 'B': 0, 'C': ''} filled = df.replace(to_rep, 0) - expected = {} - for k, v in compat.iteritems(df): - expected[k] = v.replace(to_rep[k], 0) + expected = {k: v.replace(to_rep[k], 0) + for k, v in compat.iteritems(df)} assert_frame_equal(filled, DataFrame(expected)) pytest.raises(TypeError, df.replace, to_rep, [np.nan, 0, '']) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index e7d5aebeb97f2b..7183fea85a069e 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -92,9 +92,8 @@ def test_rename(self): def test_get_numeric_data(self): n = 4 - kwargs = {} - for i in range(self._ndim): - kwargs[self._typ._AXIS_NAMES[i]] = list(range(n)) + kwargs = {self._typ._AXIS_NAMES[i]: list(range(n)) + for i in range(self._ndim)} # get the numeric data o = self._construct(n, **kwargs) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index a0cc653a28b06f..ad5968bca5c033 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -45,9 +45,7 @@ def test_cythonized_aggers(op_name): # single column grouped = df.drop(['B'], axis=1).groupby('A') - exp = {} - for cat, group in grouped: - exp[cat] = op(group['C']) + exp = {cat: op(group['C']) for cat, group in grouped} exp = DataFrame({'C': exp}) exp.index.name = 'A' result = op(grouped) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d5147790a66a1c..162800b68de4fe 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -348,9 +348,7 @@ def test_attr_wrapper(ts): # this is pretty cool result = grouped.describe() - expected = {} - for name, gp in grouped: - expected[name] = gp.describe() + expected = {name: gp.describe() for name, gp in grouped} expected = DataFrame(expected).T assert_frame_equal(result, expected) @@ -1312,9 +1310,7 @@ def test_skip_group_keys(): grouped = tsf.groupby(lambda x: x.month, group_keys=False) result = grouped.apply(lambda x: x.sort_values(by='A')[:3]) - pieces = [] - for key, group in grouped: - pieces.append(group.sort_values(by='A')[:3]) + pieces = [group.sort_values(by='A')[:3] for key, group in grouped] expected = pd.concat(pieces) assert_frame_equal(result, expected) @@ -1322,9 +1318,7 @@ def test_skip_group_keys(): grouped = tsf['A'].groupby(lambda x: x.month, group_keys=False) result = grouped.apply(lambda x: x.sort_values()[:3]) - pieces = [] - for key, group in grouped: - pieces.append(group.sort_values()[:3]) + pieces = [group.sort_values()[:3] for key, group in grouped] expected = pd.concat(pieces) assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 546a37bf3d56aa..b6c20d31cddf3d 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -727,9 +727,7 @@ def test_multi_iter_frame(self, three_group): df['k1'] = np.array(['b', 'b', 'b', 'a', 'a', 'a']) df['k2'] = np.array(['1', '1', '1', '2', '2', '2']) grouped = df.groupby(['k1', 'k2']) - groups = {} - for key, gp in grouped: - groups[key] = gp + groups = {key: gp for key, gp in grouped} assert len(groups) == 2 # axis = 1 diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index e7e91572c56d17..59284f4a5d47e2 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -17,18 +17,16 @@ def test_rank_apply(): result = df.groupby(['key1', 'key2']).value.rank() - expected = [] - for key, piece in df.groupby(['key1', 'key2']): - expected.append(piece.value.rank()) + expected = [piece.value.rank() + for key, piece in df.groupby(['key1', 'key2'])] expected = concat(expected, axis=0) expected = expected.reindex(result.index) tm.assert_series_equal(result, expected) result = df.groupby(['key1', 'key2']).value.rank(pct=True) - expected = [] - for key, piece in df.groupby(['key1', 'key2']): - expected.append(piece.value.rank(pct=True)) + expected = [piece.value.rank(pct=True) + for key, piece in df.groupby(['key1', 'key2'])] expected = concat(expected, axis=0) expected = expected.reindex(result.index) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index fb15d674613d4d..4ad20e9d6ee81c 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -100,9 +100,8 @@ def test_copy_in_constructor(): def test_from_arrays(idx): - arrays = [] - for lev, lab in zip(idx.levels, idx.labels): - arrays.append(np.asarray(lev).take(lab)) + arrays = [np.asarray(lev).take(lab) + for lev, lab in zip(idx.levels, idx.labels)] # list of arrays as input result = MultiIndex.from_arrays(arrays, names=idx.names) @@ -117,9 +116,8 @@ def test_from_arrays(idx): def test_from_arrays_iterator(idx): # GH 18434 - arrays = [] - for lev, lab in zip(idx.levels, idx.labels): - arrays.append(np.asarray(lev).take(lab)) + arrays = [np.asarray(lev).take(lab) + for lev, lab in zip(idx.levels, idx.labels)] # iterator as input result = MultiIndex.from_arrays(iter(arrays), names=idx.names) diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index f802598542cb90..f799eab2f64060 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -101,9 +101,7 @@ def test_constructor(self, float_frame, float_frame_int_kind, assert isinstance(series, SparseSeries) # construct from nested dict - data = {} - for c, s in compat.iteritems(float_frame): - data[c] = s.to_dict() + data = {c: s.to_dict() for c, s in compat.iteritems(float_frame)} sdf = SparseDataFrame(data) tm.assert_sp_frame_equal(sdf, float_frame) diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index fd5dbcd932993c..225ef96581e72c 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -843,10 +843,10 @@ def test_dropna(self): def test_homogenize(self): def _check_matches(indices, expected): - data = {} - for i, idx in enumerate(indices): - data[i] = SparseSeries(idx.to_int_index().indices, - sparse_index=idx, fill_value=np.nan) + data = {i: SparseSeries(idx.to_int_index().indices, + sparse_index=idx, fill_value=np.nan) + for i, idx in enumerate(indices)} + # homogenized is only valid with NaN fill values homogenized = spf.homogenize(data)