From fd94bfded6b6411fc9975ea5fa995221bdd066ec Mon Sep 17 00:00:00 2001 From: martinholmer Date: Tue, 10 Oct 2017 11:58:47 -0400 Subject: [PATCH 1/3] Rename and reorder difference table columns --- taxcalc/utils.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/taxcalc/utils.py b/taxcalc/utils.py index 2eefdf6d9..617121ed9 100644 --- a/taxcalc/utils.py +++ b/taxcalc/utils.py @@ -76,18 +76,17 @@ 'Payroll Tax Liablities', 'Combined Payroll and Individual Income Tax Liabilities'] -# Following list is used in our difference table to label its columns. -DIFF_TABLE_LABELS = ['Tax Units with Tax Cut', +# Following list is used in the difference table to label its columns. +DIFF_TABLE_LABELS = ['All Tax Units', + 'Tax Units with Tax Cut', + 'Percent with Tax Cut', 'Tax Units with Tax Increase', - 'Count', + 'Percent with Tax Increase', 'Average Tax Change', 'Total Tax Difference', - 'Percent with Tax Increase', - 'Percent with Tax Decrease', 'Share of Overall Change', 'Change as % of Aftertax Income'] - WEBAPP_INCOME_BINS = [-9e99, 0, 9999, 19999, 29999, 39999, 49999, 74999, 99999, 199999, 499999, 1000000, 9e99] @@ -436,13 +435,13 @@ def weighted_share_of_total(gpdf, colname, total): # print gpdf.count() # show unweighted number of filing units per bin # create difference table statistics from gpdf in a new DataFrame diffs = pd.DataFrame() + diffs['count'] = gpdf.apply(weighted_count) diffs['tax_cut'] = gpdf.apply(weighted_count_lt_zero, 'tax_diff') + diffs['perc_cut'] = gpdf.apply(weighted_perc_cut, 'tax_diff') diffs['tax_inc'] = gpdf.apply(weighted_count_gt_zero, 'tax_diff') - diffs['count'] = gpdf.apply(weighted_count) + diffs['perc_inc'] = gpdf.apply(weighted_perc_inc, 'tax_diff') diffs['mean'] = gpdf.apply(weighted_mean, 'tax_diff') diffs['tot_change'] = gpdf.apply(weighted_sum, 'tax_diff') - diffs['perc_inc'] = gpdf.apply(weighted_perc_inc, 'tax_diff') - diffs['perc_cut'] = gpdf.apply(weighted_perc_cut, 'tax_diff') wtotal = (res2['tax_diff'] * res2['s006']).sum() diffs['share_of_change'] = gpdf.apply(weighted_share_of_total, 'tax_diff', wtotal) From eeb60b7538a92eaab8918b0fe12f8e51420de2cd Mon Sep 17 00:00:00 2001 From: martinholmer Date: Tue, 10 Oct 2017 12:12:18 -0400 Subject: [PATCH 2/3] Update RELEASES.md info --- RELEASES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/RELEASES.md b/RELEASES.md index ce20d76a5..1f4cc2fc7 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -19,6 +19,9 @@ Release 0.12.0 on 2017-??-?? - Rename dropq as tbi (taxbrain interface) and refactor run_nth_year_*_model functions [[#1577](https://github.com/open-source-economics/Tax-Calculator/pull/1577) by Martin Holmer] +- Rename and reorder difference table columns + [[#1584](https://github.com/open-source-economics/Tax-Calculator/pull/1584) + by Martin Holmer] **New Features** - Add Calculator.reform_documentation that generates plain text documentation of a reform From b98e4ff924d292814bc76dea2c753ffd241bf86d Mon Sep 17 00:00:00 2001 From: martinholmer Date: Wed, 11 Oct 2017 15:41:38 -0400 Subject: [PATCH 3/3] Reorder difference table column types --- taxcalc/tbi/__init__.py | 1 - taxcalc/tbi/tbi.py | 65 +++++++------------------------------ taxcalc/tbi/tbi_utils.py | 45 +++++++++++++++++++++++++ taxcalc/tests/test_utils.py | 7 ++++ taxcalc/utils.py | 19 ++++++++--- 5 files changed, 78 insertions(+), 59 deletions(-) diff --git a/taxcalc/tbi/__init__.py b/taxcalc/tbi/__init__.py index b46660300..2730759f7 100644 --- a/taxcalc/tbi/__init__.py +++ b/taxcalc/tbi/__init__.py @@ -1,4 +1,3 @@ from taxcalc.tbi.tbi import (run_nth_year_tax_calc_model, run_nth_year_gdp_elast_model, - create_dict_table, reform_warnings_errors) diff --git a/taxcalc/tbi/tbi.py b/taxcalc/tbi/tbi.py index 31422e491..d20fdcf4e 100644 --- a/taxcalc/tbi/tbi.py +++ b/taxcalc/tbi/tbi.py @@ -19,16 +19,13 @@ from taxcalc.tbi.tbi_utils import (calculate, random_seed, summary, + create_dict_table, AGGR_ROW_NAMES) -from taxcalc import (results, DIST_TABLE_LABELS, +from taxcalc import (results, DIST_TABLE_LABELS, DIFF_TABLE_LABELS, proportional_change_gdp, Growdiff, Growfactors, Policy) # specify constants -DIST_COLUMN_TYPES = [float] * len(DIST_TABLE_LABELS) - -DIFF_COLUMN_TYPES = [int, int, int, float, float, str, str, str, str] - DEC_ROW_NAMES = ['perc0-10', 'perc10-20', 'perc20-30', 'perc30-40', 'perc40-50', 'perc50-60', 'perc60-70', 'perc70-80', 'perc80-90', 'perc90-100', 'all'] @@ -123,7 +120,7 @@ def append_year(pdf): pdf.columns = [str(col) + '_{}'.format(year_n) for col in pdf.columns] return pdf - # optionally return non-JSON results + # optionally return non-JSON-like results if not return_dict: res = dict() for tbl in summ: @@ -132,10 +129,13 @@ def append_year(pdf): print('elapsed time for this run: {:.1f}'.format(elapsed_time)) return res - # optionally construct JSON results tables for year n + # optionally construct JSON-like results dictionaries for year n dec_row_names_n = [x + '_' + str(year_n) for x in DEC_ROW_NAMES] bin_row_names_n = [x + '_' + str(year_n) for x in BIN_ROW_NAMES] agg_row_names_n = [x + '_' + str(year_n) for x in AGG_ROW_NAMES] + dist_column_types = [float] * len(DIST_TABLE_LABELS) + diff_column_types = [int, int, str, int, str, float, float, str, str] + assert len(diff_column_types) == len(DIFF_TABLE_LABELS) info = dict() for tbl in summ: info[tbl] = {'row_names': [], 'col_types': []} @@ -146,9 +146,9 @@ def append_year(pdf): else: info[tbl]['row_names'] = agg_row_names_n if 'dist' in tbl: - info[tbl]['col_types'] = DIST_COLUMN_TYPES + info[tbl]['col_types'] = dist_column_types elif 'diff' in tbl: - info[tbl]['col_types'] = DIFF_COLUMN_TYPES + info[tbl]['col_types'] = diff_column_types res = dict() for tbl in summ: if 'aggr' in tbl: @@ -159,8 +159,10 @@ def append_year(pdf): res[tbl] = create_dict_table(summ[tbl], row_names=info[tbl]['row_names'], column_types=info[tbl]['col_types']) + elapsed_time = time.time() - start_time print('elapsed time for this run: {:.1f}'.format(elapsed_time)) + return res @@ -202,48 +204,3 @@ def run_nth_year_gdp_elast_model(year_n, start_year, return gdp_elast_total else: return gdp_effect - - -def create_dict_table(dframe, row_names=None, column_types=None, - num_decimals=2): - """ - Create and return dictionary with JSON-like content from specified dframe. - """ - # embedded formatted_string function - def formatted_string(val, _type, num_decimals): - """ - Return formatted conversion of number val into a string. - """ - float_types = [float, np.dtype('f8')] - int_types = [int, np.dtype('i8')] - frmat_str = "0:.{num}f".format(num=num_decimals) - frmat_str = "{" + frmat_str + "}" - try: - if _type in float_types or _type is None: - return frmat_str.format(val) - elif _type in int_types: - return str(int(val)) - elif _type == str: - return str(val) - else: - raise NotImplementedError() - except ValueError: - # try making it a string - good luck! - return str(val) - # high-level create_dict_table function logic - out = dict() - if row_names is None: - row_names = [str(x) for x in list(dframe.index)] - else: - assert len(row_names) == len(dframe.index) - if column_types is None: - column_types = [dframe[col].dtype for col in dframe.columns] - else: - assert len(column_types) == len(dframe.columns) - for idx, row_name in zip(dframe.index, row_names): - row_out = out.get(row_name, []) - for col, dtype in zip(dframe.columns, column_types): - row_out.append(formatted_string(dframe.loc[idx, col], - dtype, num_decimals)) - out[row_name] = row_out - return out diff --git a/taxcalc/tbi/tbi_utils.py b/taxcalc/tbi/tbi_utils.py index 306611e32..5b478deba 100644 --- a/taxcalc/tbi/tbi_utils.py +++ b/taxcalc/tbi/tbi_utils.py @@ -577,3 +577,48 @@ def summary(df1, df2, mask): # return dictionary of summary results return summ + + +def create_dict_table(dframe, row_names=None, column_types=None, + num_decimals=2): + """ + Create and return dictionary with JSON-like content from specified dframe. + """ + # embedded formatted_string function + def formatted_string(val, _type, num_decimals): + """ + Return formatted conversion of number val into a string. + """ + float_types = [float, np.dtype('f8')] + int_types = [int, np.dtype('i8')] + frmat_str = "0:.{num}f".format(num=num_decimals) + frmat_str = "{" + frmat_str + "}" + try: + if _type in float_types or _type is None: + return frmat_str.format(val) + elif _type in int_types: + return str(int(val)) + elif _type == str: + return str(val) + else: + raise NotImplementedError() + except ValueError: + # try making it a string - good luck! + return str(val) + # high-level create_dict_table function logic + out = dict() + if row_names is None: + row_names = [str(x) for x in list(dframe.index)] + else: + assert len(row_names) == len(dframe.index) + if column_types is None: + column_types = [dframe[col].dtype for col in dframe.columns] + else: + assert len(column_types) == len(dframe.columns) + for idx, row_name in zip(dframe.index, row_names): + row_out = out.get(row_name, []) + for col, dtype in zip(dframe.columns, column_types): + row_out.append(formatted_string(dframe.loc[idx, col], + dtype, num_decimals)) + out[row_name] = row_out + return out diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py index d930a6173..114956b60 100644 --- a/taxcalc/tests/test_utils.py +++ b/taxcalc/tests/test_utils.py @@ -17,6 +17,7 @@ from taxcalc import Policy, Records, Behavior, Calculator from taxcalc.utils import (STATS_COLUMNS, DIST_TABLE_COLUMNS, DIST_TABLE_LABELS, + DIFF_TABLE_COLUMNS, DIFF_TABLE_LABELS, create_distribution_table, create_difference_table, weighted_count_lt_zero, weighted_count_gt_zero, weighted_count, weighted_sum, weighted_mean, @@ -851,3 +852,9 @@ def test_bootstrap_se_ci(): assert abs(bsd['se'] / 23.02 - 1) < 0.02 assert abs(bsd['cilo'] / 45.9 - 1) < 0.02 assert abs(bsd['cihi'] / 135.4 - 1) < 0.03 + + +def test_table_columns_labels(): + # check that length of two lists are the same + assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS) + assert len(DIFF_TABLE_COLUMNS) == len(DIFF_TABLE_LABELS) diff --git a/taxcalc/utils.py b/taxcalc/utils.py index 617121ed9..7cc3e2cd0 100644 --- a/taxcalc/utils.py +++ b/taxcalc/utils.py @@ -33,9 +33,8 @@ 'payrolltax', 'combined', 's006'] # Items in the DIST_TABLE_COLUMNS list below correspond to the items in the -# DIST_TABLE_LABELS list below; this correspondence allows us to use -# DIST_TABLE_LABELS to map a label to the correct column in the distribution -# tables. +# DIST_TABLE_LABELS list below; this correspondence allows us to use this +# labels list to map a label to the correct column in a distribution table. DIST_TABLE_COLUMNS = ['s006', 'c00100', 'num_returns_StandardDed', @@ -76,7 +75,19 @@ 'Payroll Tax Liablities', 'Combined Payroll and Individual Income Tax Liabilities'] -# Following list is used in the difference table to label its columns. +# Items in the DIFF_TABLE_COLUMNS list below correspond to the items in the +# DIFF_TABLE_LABELS list below; this correspondence allows us to use this +# labels list to map a label to the correct column in a difference table. +DIFF_TABLE_COLUMNS = ['count', + 'tax_cut', + 'perc_cut', + 'tax_inc', + 'perc_inc', + 'mean', + 'tot_change', + 'share_of_change', + 'perc_aftertax'] + DIFF_TABLE_LABELS = ['All Tax Units', 'Tax Units with Tax Cut', 'Percent with Tax Cut',