diff --git a/taxcalc/utils.py b/taxcalc/utils.py index 95b17dfaa..5e52f8ec1 100644 --- a/taxcalc/utils.py +++ b/taxcalc/utils.py @@ -310,14 +310,16 @@ def stat_dataframe(gpdf): Nested function that returns statistics DataFrame derived from the specified grouped Dataframe object, gpdf. """ - sdf = pd.DataFrame() - unweighted_columns = set(['s006', 'num_returns_StandardDed', - 'num_returns_ItemDed', 'num_returns_AMT']) - for col in unweighted_columns: - sdf[col] = gpdf.apply(unweighted_sum, col) - weighted_columns = set(DIST_TABLE_COLUMNS) - unweighted_columns - for col in weighted_columns: - sdf[col] = gpdf.apply(weighted_sum, col) + unweighted_columns = ['s006', 'num_returns_StandardDed', + 'num_returns_ItemDed', 'num_returns_AMT'] + stats = list() + for col in DIST_TABLE_COLUMNS: + if col in unweighted_columns: + stats.append(gpdf.apply(unweighted_sum, col)) + else: + stats.append(gpdf.apply(weighted_sum, col)) + sdf = pd.DataFrame(data=np.column_stack(stats), + columns=DIST_TABLE_COLUMNS) return sdf # main logic of create_distribution_table @@ -369,6 +371,8 @@ def stat_dataframe(gpdf): dist_table[col] /= dist_table['s006'] # set print display format for float table elements pd.options.display.float_format = '{:8,.0f}'.format + # ensure dist_table columns are in correct order + assert dist_table.columns.values.tolist() == DIST_TABLE_COLUMNS return dist_table @@ -536,6 +540,8 @@ def weighted_share_of_total(gpdf, colname, total): diffs[col] *= 100.0 # set print display format for float table elements pd.options.display.float_format = '{:10,.2f}'.format + # ensure diffs columns are in correct order + assert diffs.columns.values.tolist() == DIFF_TABLE_COLUMNS return diffs