Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename and reorder difference table columns #1584

Merged
merged 5 commits into from
Oct 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions RELEASES.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ Release 0.12.0 on 2017-??-??
- Change Calculator class constructor so that it makes a deep copy of each specified object for internal use
[[#1582](https://github.com/open-source-economics/Tax-Calculator/pull/1582)
by Martin Holmer]
- Rename and reorder difference table columns
[[#1584](https://github.com/open-source-economics/Tax-Calculator/pull/1584)
by Martin Holmer]

**New Features**
- Add Calculator.reform_documentation that generates plain text documentation of a reform
Expand Down
1 change: 0 additions & 1 deletion taxcalc/tbi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from taxcalc.tbi.tbi import (run_nth_year_tax_calc_model,
run_nth_year_gdp_elast_model,
create_dict_table,
reform_warnings_errors)
68 changes: 12 additions & 56 deletions taxcalc/tbi/tbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,13 @@
calculate,
random_seed,
summary,
create_dict_table,
AGGR_ROW_NAMES)
from taxcalc import (results, DIST_TABLE_LABELS,
proportional_change_in_gdp,
Growdiff, Growfactors, Policy)
from taxcalc import (results, DIST_TABLE_LABELS, DIFF_TABLE_LABELS,
proportional_change_in_gdp, Growdiff, Growfactors, Policy)


# specify constants
DIST_COLUMN_TYPES = [float] * len(DIST_TABLE_LABELS)

DIFF_COLUMN_TYPES = [int, int, int, float, float, str, str, str, str]

DEC_ROW_NAMES = ['perc0-10', 'perc10-20', 'perc20-30', 'perc30-40',
'perc40-50', 'perc50-60', 'perc60-70', 'perc70-80',
'perc80-90', 'perc90-100', 'all']
Expand Down Expand Up @@ -128,7 +124,7 @@ def append_year(pdf):
pdf.columns = [str(col) + '_{}'.format(year_n) for col in pdf.columns]
return pdf

# optionally return non-JSON results
# optionally return non-JSON-like results
if not return_dict:
res = dict()
for tbl in summ:
Expand All @@ -137,10 +133,13 @@ def append_year(pdf):
print('elapsed time for this run: {:.1f}'.format(elapsed_time))
return res

# optionally construct JSON results tables for year n
# optionally construct JSON-like results dictionaries for year n
dec_row_names_n = [x + '_' + str(year_n) for x in DEC_ROW_NAMES]
bin_row_names_n = [x + '_' + str(year_n) for x in BIN_ROW_NAMES]
agg_row_names_n = [x + '_' + str(year_n) for x in AGG_ROW_NAMES]
dist_column_types = [float] * len(DIST_TABLE_LABELS)
diff_column_types = [int, int, str, int, str, float, float, str, str]
assert len(diff_column_types) == len(DIFF_TABLE_LABELS)
info = dict()
for tbl in summ:
info[tbl] = {'row_names': [], 'col_types': []}
Expand All @@ -151,9 +150,9 @@ def append_year(pdf):
else:
info[tbl]['row_names'] = agg_row_names_n
if 'dist' in tbl:
info[tbl]['col_types'] = DIST_COLUMN_TYPES
info[tbl]['col_types'] = dist_column_types
elif 'diff' in tbl:
info[tbl]['col_types'] = DIFF_COLUMN_TYPES
info[tbl]['col_types'] = diff_column_types
res = dict()
for tbl in summ:
if 'aggr' in tbl:
Expand All @@ -164,8 +163,10 @@ def append_year(pdf):
res[tbl] = create_dict_table(summ[tbl],
row_names=info[tbl]['row_names'],
column_types=info[tbl]['col_types'])

elapsed_time = time.time() - start_time
print('elapsed time for this run: {:.1f}'.format(elapsed_time))

return res


Expand Down Expand Up @@ -212,48 +213,3 @@ def run_nth_year_gdp_elast_model(year_n, start_year,
return gdp_elast_total
else:
return gdp_effect


def create_dict_table(dframe, row_names=None, column_types=None,
num_decimals=2):
"""
Create and return dictionary with JSON-like content from specified dframe.
"""
# embedded formatted_string function
def formatted_string(val, _type, num_decimals):
"""
Return formatted conversion of number val into a string.
"""
float_types = [float, np.dtype('f8')]
int_types = [int, np.dtype('i8')]
frmat_str = "0:.{num}f".format(num=num_decimals)
frmat_str = "{" + frmat_str + "}"
try:
if _type in float_types or _type is None:
return frmat_str.format(val)
elif _type in int_types:
return str(int(val))
elif _type == str:
return str(val)
else:
raise NotImplementedError()
except ValueError:
# try making it a string - good luck!
return str(val)
# high-level create_dict_table function logic
out = dict()
if row_names is None:
row_names = [str(x) for x in list(dframe.index)]
else:
assert len(row_names) == len(dframe.index)
if column_types is None:
column_types = [dframe[col].dtype for col in dframe.columns]
else:
assert len(column_types) == len(dframe.columns)
for idx, row_name in zip(dframe.index, row_names):
row_out = out.get(row_name, [])
for col, dtype in zip(dframe.columns, column_types):
row_out.append(formatted_string(dframe.loc[idx, col],
dtype, num_decimals))
out[row_name] = row_out
return out
45 changes: 45 additions & 0 deletions taxcalc/tbi/tbi_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,3 +584,48 @@ def summary(df1, df2, mask):

# return dictionary of summary results
return summ


def create_dict_table(dframe, row_names=None, column_types=None,
num_decimals=2):
"""
Create and return dictionary with JSON-like content from specified dframe.
"""
# embedded formatted_string function
def formatted_string(val, _type, num_decimals):
"""
Return formatted conversion of number val into a string.
"""
float_types = [float, np.dtype('f8')]
int_types = [int, np.dtype('i8')]
frmat_str = "0:.{num}f".format(num=num_decimals)
frmat_str = "{" + frmat_str + "}"
try:
if _type in float_types or _type is None:
return frmat_str.format(val)
elif _type in int_types:
return str(int(val))
elif _type == str:
return str(val)
else:
raise NotImplementedError()
except ValueError:
# try making it a string - good luck!
return str(val)
# high-level create_dict_table function logic
out = dict()
if row_names is None:
row_names = [str(x) for x in list(dframe.index)]
else:
assert len(row_names) == len(dframe.index)
if column_types is None:
column_types = [dframe[col].dtype for col in dframe.columns]
else:
assert len(column_types) == len(dframe.columns)
for idx, row_name in zip(dframe.index, row_names):
row_out = out.get(row_name, [])
for col, dtype in zip(dframe.columns, column_types):
row_out.append(formatted_string(dframe.loc[idx, col],
dtype, num_decimals))
out[row_name] = row_out
return out
7 changes: 7 additions & 0 deletions taxcalc/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from taxcalc import Policy, Records, Behavior, Calculator
from taxcalc.utils import (STATS_COLUMNS,
DIST_TABLE_COLUMNS, DIST_TABLE_LABELS,
DIFF_TABLE_COLUMNS, DIFF_TABLE_LABELS,
create_distribution_table, create_difference_table,
weighted_count_lt_zero, weighted_count_gt_zero,
weighted_count, weighted_sum, weighted_mean,
Expand Down Expand Up @@ -846,3 +847,9 @@ def test_bootstrap_se_ci():
assert abs(bsd['se'] / 23.02 - 1) < 0.02
assert abs(bsd['cilo'] / 45.9 - 1) < 0.02
assert abs(bsd['cihi'] / 135.4 - 1) < 0.03


def test_table_columns_labels():
# check that length of two lists are the same
assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS)
assert len(DIFF_TABLE_COLUMNS) == len(DIFF_TABLE_LABELS)
34 changes: 22 additions & 12 deletions taxcalc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,8 @@
'payrolltax', 'combined', 's006']

# Items in the DIST_TABLE_COLUMNS list below correspond to the items in the
# DIST_TABLE_LABELS list below; this correspondence allows us to use
# DIST_TABLE_LABELS to map a label to the correct column in the distribution
# tables.
# DIST_TABLE_LABELS list below; this correspondence allows us to use this
# labels list to map a label to the correct column in a distribution table.
DIST_TABLE_COLUMNS = ['s006',
'c00100',
'num_returns_StandardDed',
Expand Down Expand Up @@ -76,18 +75,29 @@
'Payroll Tax Liablities',
'Combined Payroll and Individual Income Tax Liabilities']

# Following list is used in our difference table to label its columns.
DIFF_TABLE_LABELS = ['Tax Units with Tax Cut',
# Items in the DIFF_TABLE_COLUMNS list below correspond to the items in the
# DIFF_TABLE_LABELS list below; this correspondence allows us to use this
# labels list to map a label to the correct column in a difference table.
DIFF_TABLE_COLUMNS = ['count',
'tax_cut',
'perc_cut',
'tax_inc',
'perc_inc',
'mean',
'tot_change',
'share_of_change',
'perc_aftertax']

DIFF_TABLE_LABELS = ['All Tax Units',
'Tax Units with Tax Cut',
'Percent with Tax Cut',
'Tax Units with Tax Increase',
'Count',
'Percent with Tax Increase',
'Average Tax Change',
'Total Tax Difference',
'Percent with Tax Increase',
'Percent with Tax Decrease',
'Share of Overall Change',
'Change as % of Aftertax Income']


WEBAPP_INCOME_BINS = [-9e99, 0, 9999, 19999, 29999, 39999, 49999, 74999, 99999,
199999, 499999, 1000000, 9e99]

Expand Down Expand Up @@ -436,13 +446,13 @@ def weighted_share_of_total(gpdf, colname, total):
# print gpdf.count() # show unweighted number of filing units per bin
# create difference table statistics from gpdf in a new DataFrame
diffs = pd.DataFrame()
diffs['count'] = gpdf.apply(weighted_count)
diffs['tax_cut'] = gpdf.apply(weighted_count_lt_zero, 'tax_diff')
diffs['perc_cut'] = gpdf.apply(weighted_perc_cut, 'tax_diff')
diffs['tax_inc'] = gpdf.apply(weighted_count_gt_zero, 'tax_diff')
diffs['count'] = gpdf.apply(weighted_count)
diffs['perc_inc'] = gpdf.apply(weighted_perc_inc, 'tax_diff')
diffs['mean'] = gpdf.apply(weighted_mean, 'tax_diff')
diffs['tot_change'] = gpdf.apply(weighted_sum, 'tax_diff')
diffs['perc_inc'] = gpdf.apply(weighted_perc_inc, 'tax_diff')
diffs['perc_cut'] = gpdf.apply(weighted_perc_cut, 'tax_diff')
wtotal = (res2['tax_diff'] * res2['s006']).sum()
diffs['share_of_change'] = gpdf.apply(weighted_share_of_total,
'tax_diff', wtotal)
Expand Down