Skip to content

Commit

Permalink
Merge pull request #2322 from martinholmer/popquant
Browse files Browse the repository at this point in the history
Allow population (rather than filing-unit) quantiles in tables and graphs
  • Loading branch information
martinholmer authored May 18, 2019
2 parents 18d8867 + b0c3002 commit 540b7c5
Show file tree
Hide file tree
Showing 10 changed files with 303 additions and 513 deletions.
3 changes: 1 addition & 2 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
[run]
omit =
taxcalc/calcfunctions.py
taxcalc/*.json
taxcalc/cli/*
taxcalc/tbi/*
taxcalc/tests/*
taxcalc/validation/*
docs/cookbook/*
7 changes: 3 additions & 4 deletions docs/make_uguide.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,20 @@
containing information from several JSON files.
"""
# CODING-STYLE CHECKS:
# pycodestyle --ignore=E402 make_uguide.py
# pycodestyle make_uguide.py
# pylint --disable=locally-disabled make_uguide.py

import os
import sys
from collections import OrderedDict
CURDIR_PATH = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(CURDIR_PATH, '..'))
# pylint: disable=import-error,wrong-import-position
from taxcalc import Policy, json_to_dict


INPUT_FILENAME = 'uguide.htmx'
OUTPUT_FILENAME = 'uguide.html'

CURDIR_PATH = os.path.abspath(os.path.dirname(__file__))

TAXCALC_PATH = os.path.join(CURDIR_PATH, '..', 'taxcalc')

INPUT_PATH = os.path.join(CURDIR_PATH, INPUT_FILENAME)
Expand Down
9 changes: 9 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[pytest]
markers =
requires_pufcsv
pre_release
local
compatible_data
benefits
itmded_vars
pep8
189 changes: 82 additions & 107 deletions taxcalc/calculator.py

Large diffs are not rendered by default.

11 changes: 8 additions & 3 deletions taxcalc/taxcalcio.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ def write_decile_table(dfx, tfile, tkind='Totals'):
"""
dfx = add_quantile_table_row_variable(dfx, 'expanded_income', 10,
decile_details=False,
pop_quantiles=False,
weight_by_income_measure=False)
gdfx = dfx.groupby('table_row', as_index=False)
rtns_series = gdfx.apply(unweighted_sum, 's006')
Expand Down Expand Up @@ -613,14 +614,15 @@ def write_decile_table(dfx, tfile, tkind='Totals'):
def write_graph_files(self):
"""
Write graphs to HTML files.
All graphs contain same number of filing units in each quantile.
"""
pos_wght_sum = self.calc.total_weight() > 0.0
fig = None
# average-tax-rate graph
atr_fname = self._output_filename.replace('.csv', '-atr.html')
atr_title = 'ATR by Income Percentile'
if pos_wght_sum:
fig = self.calc_base.atr_graph(self.calc)
fig = self.calc_base.atr_graph(self.calc, pop_quantiles=False)
write_graph_file(fig, atr_fname, atr_title)
else:
reason = 'No graph because sum of weights is not positive'
Expand All @@ -630,7 +632,10 @@ def write_graph_files(self):
mtr_title = 'MTR by Income Percentile'
if pos_wght_sum:
fig = self.calc_base.mtr_graph(
self.calc, alt_e00200p_text='Taxpayer Earnings')
self.calc,
alt_e00200p_text='Taxpayer Earnings',
pop_quantiles=False
)
write_graph_file(fig, mtr_fname, mtr_title)
else:
reason = 'No graph because sum of weights is not positive'
Expand All @@ -639,7 +644,7 @@ def write_graph_files(self):
pch_fname = self._output_filename.replace('.csv', '-pch.html')
pch_title = 'PCH by Income Percentile'
if pos_wght_sum:
fig = self.calc_base.pch_graph(self.calc)
fig = self.calc_base.pch_graph(self.calc, pop_quantiles=False)
write_graph_file(fig, pch_fname, pch_title)
else:
reason = 'No graph because sum of weights is not positive'
Expand Down
6 changes: 4 additions & 2 deletions taxcalc/tests/test_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,11 +672,13 @@ def test_mtr_graph(cps_subsample):
fig = calc.mtr_graph(calc,
mars=2,
income_measure='wages',
mtr_measure='ptax')
mtr_measure='ptax',
pop_quantiles=False)
assert fig
fig = calc.mtr_graph(calc,
income_measure='agi',
mtr_measure='itax')
mtr_measure='itax',
pop_quantiles=True)
assert fig


Expand Down
1 change: 1 addition & 0 deletions taxcalc/tests/test_taxcalcio.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,7 @@ def test_graphs(reformfile1):
idict = dict()
idict['RECID'] = [i for i in range(1, nobs + 1)]
idict['MARS'] = [2 for i in range(1, nobs + 1)]
idict['XTOT'] = [3 for i in range(1, nobs + 1)]
idict['s006'] = [10.0 for i in range(1, nobs + 1)]
idict['e00300'] = [10000 * i for i in range(1, nobs + 1)]
idict['expanded_income'] = idict['e00300']
Expand Down
144 changes: 44 additions & 100 deletions taxcalc/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,13 @@
DIFF_VARIABLES,
DIFF_TABLE_COLUMNS, DIFF_TABLE_LABELS,
SOI_AGI_BINS,
create_distribution_table, create_difference_table,
weighted_count_lt_zero, weighted_count_gt_zero,
weighted_count, weighted_sum, weighted_mean,
create_difference_table,
weighted_sum, weighted_mean,
wage_weighted, agi_weighted,
expanded_income_weighted,
add_income_table_row_variable,
add_quantile_table_row_variable,
mtr_graph_data, atr_graph_data, dec_graph_data,
mtr_graph_data, atr_graph_data,
xtr_graph_plot, write_graph_file,
read_egg_csv, read_egg_json, delete_file,
bootstrap_se_ci,
Expand Down Expand Up @@ -59,10 +58,12 @@
def test_validity_of_name_lists():
assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS)
Records.read_var_info()
assert set(DIST_VARIABLES).issubset(Records.CALCULATED_VARS | {'s006'})
extra_vars_set = set(['num_returns_StandardDed',
'num_returns_ItemDed',
'num_returns_AMT'])
assert set(DIST_VARIABLES).issubset(Records.CALCULATED_VARS |
{'s006', 'XTOT'})
extra_vars_set = set(['count',
'count_StandardDed',
'count_ItemDed',
'count_AMT'])
assert (set(DIST_TABLE_COLUMNS) - set(DIST_VARIABLES)) == extra_vars_set


Expand Down Expand Up @@ -213,7 +214,7 @@ def test_create_tables(cps_subsample):
for val in dist[tabcol].values:
print('{:.1f},'.format(val))

tabcol = 'num_returns_ItemDed'
tabcol = 'count_ItemDed'
expected = [0.0,
0.0,
0.4,
Expand Down Expand Up @@ -309,7 +310,7 @@ def test_create_tables(cps_subsample):
for val in dist[tabcol].values:
print('{:.1f},'.format(val))

tabcol = 'num_returns_ItemDed'
tabcol = 'count_ItemDed'
expected = [0.0,
0.0,
0.1,
Expand Down Expand Up @@ -466,45 +467,6 @@ def test_diff_count_precision():
assert not dump


def test_weighted_count_lt_zero():
df1 = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
grped = df1.groupby('label')
diffs = grped.apply(weighted_count_lt_zero, 'tax_diff')
exp = pd.Series(data=[4, 0], index=['a', 'b'])
exp.index.name = 'label'
pd.util.testing.assert_series_equal(exp, diffs)
df2 = pd.DataFrame(data=DATA_FLOAT, columns=['tax_diff', 's006', 'label'])
grped = df2.groupby('label')
diffs = grped.apply(weighted_count_lt_zero, 'tax_diff')
exp = pd.Series(data=[4, 0], index=['a', 'b'])
exp.index.name = 'label'
pd.util.testing.assert_series_equal(exp, diffs)


def test_weighted_count_gt_zero():
df1 = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
grped = df1.groupby('label')
diffs = grped.apply(weighted_count_gt_zero, 'tax_diff')
exp = pd.Series(data=[8, 10], index=['a', 'b'])
exp.index.name = 'label'
pd.util.testing.assert_series_equal(exp, diffs)
df2 = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
grped = df2.groupby('label')
diffs = grped.apply(weighted_count_gt_zero, 'tax_diff')
exp = pd.Series(data=[8, 10], index=['a', 'b'])
exp.index.name = 'label'
pd.util.testing.assert_series_equal(exp, diffs)


def test_weighted_count():
dfx = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
grouped = dfx.groupby('label')
diffs = grouped.apply(weighted_count)
exp = pd.Series(data=[12, 10], index=['a', 'b'])
exp.index.name = 'label'
pd.util.testing.assert_series_equal(exp, diffs)


def test_weighted_mean():
dfx = pd.DataFrame(data=DATA, columns=['tax_diff', 's006', 'label'])
grouped = dfx.groupby('label')
Expand Down Expand Up @@ -578,11 +540,26 @@ def test_dist_table_sum_row(cps_subsample):
rec = Records.cps_constructor(data=cps_subsample)
calc = Calculator(policy=Policy(), records=rec)
calc.calc_all()
tb1 = create_distribution_table(calc.distribution_table_dataframe(),
'standard_income_bins', 'expanded_income')
tb2 = create_distribution_table(calc.distribution_table_dataframe(),
'soi_agi_bins', 'expanded_income')
assert np.allclose(tb1[-1:], tb2[-1:])
# create three distribution tables and compare the ALL row contents
tb1, _ = calc.distribution_tables(None, 'standard_income_bins')
tb2, _ = calc.distribution_tables(None, 'soi_agi_bins')
tb3, _ = calc.distribution_tables(None, 'weighted_deciles')
tb4, _ = calc.distribution_tables(None, 'weighted_deciles',
pop_quantiles=True)
assert np.allclose(tb1.loc['ALL'], tb2.loc['ALL'])
assert np.allclose(tb1.loc['ALL'], tb3.loc['ALL'])
# make sure population count is larger than filing-unit count
assert tb4.at['ALL', 'count'] > tb1.at['ALL', 'count']
# make sure population table has same ALL row values as filing-unit table
for col in ['count', 'count_StandardDed', 'count_ItemDed', 'count_AMT']:
tb4.at['ALL', col] = tb1.at['ALL', col]
assert np.allclose(tb1.loc['ALL'], tb4.loc['ALL'])
# make sure population table has same ALL tax liabilities as diagnostic tbl
dgt = calc.diagnostic_table(1)
assert np.allclose([tb4.at['ALL', 'iitax'],
tb4.at['ALL', 'payrolltax']],
[dgt.at['Ind Income Tax ($b)', calc.current_year],
dgt.at['Payroll Taxes ($b)', calc.current_year]])


def test_diff_table_sum_row(cps_subsample):
Expand All @@ -596,19 +573,19 @@ def test_diff_table_sum_row(cps_subsample):
pol.implement_reform(reform)
calc2 = Calculator(policy=pol, records=rec)
calc2.calc_all()
# create two difference tables and compare their content
tdiff1 = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
calc2.dataframe(DIFF_VARIABLES),
'standard_income_bins', 'iitax')
tdiff2 = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
calc2.dataframe(DIFF_VARIABLES),
'soi_agi_bins', 'iitax')
non_digit_cols = ['perc_inc', 'perc_cut']
digit_cols = [c for c in list(tdiff1) if c not in non_digit_cols]
assert np.allclose(tdiff1[digit_cols][-1:],
tdiff2[digit_cols][-1:])
np.allclose(tdiff1[non_digit_cols][-1:],
tdiff2[non_digit_cols][-1:])
# create three difference tables and compare their content
dv1 = calc1.dataframe(DIFF_VARIABLES)
dv2 = calc2.dataframe(DIFF_VARIABLES)
dt1 = create_difference_table(dv1, dv2, 'standard_income_bins', 'iitax')
dt2 = create_difference_table(dv1, dv2, 'soi_agi_bins', 'iitax')
dt3 = create_difference_table(dv1, dv2, 'weighted_deciles', 'iitax',
pop_quantiles=False)
dt4 = create_difference_table(dv1, dv2, 'weighted_deciles', 'iitax',
pop_quantiles=True)
assert np.allclose(dt1.loc['ALL'], dt2.loc['ALL'])
assert np.allclose(dt1.loc['ALL'], dt3.loc['ALL'])
# make sure population count is larger than filing-unit count
assert dt4.at['ALL', 'count'] > dt1.at['ALL', 'count']


def test_mtr_graph_data(cps_subsample):
Expand Down Expand Up @@ -807,36 +784,3 @@ def test_table_columns_labels():
# check that length of two lists are the same
assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS)
assert len(DIFF_TABLE_COLUMNS) == len(DIFF_TABLE_LABELS)


def test_dec_graph_plots(cps_subsample):
pol = Policy()
rec = Records.cps_constructor(data=cps_subsample)
calc1 = Calculator(policy=pol, records=rec)
year = 2020
calc1.advance_to_year(year)
reform = {
'SS_Earnings_c': {year: 9e99}, # OASDI FICA tax on all earnings
'FICA_ss_trt': {year: 0.107484} # lower rate to keep revenue unchanged
}
pol.implement_reform(reform)
calc2 = Calculator(policy=pol, records=rec)
calc2.advance_to_year(year)
assert calc1.current_year == calc2.current_year
calc1.calc_all()
calc2.calc_all()
fig = calc1.decile_graph(calc2)
assert fig
dt1, dt2 = calc1.distribution_tables(calc2, 'weighted_deciles')
dta = dec_graph_data(dt1, dt2, year,
include_zero_incomes=True,
include_negative_incomes=False)
assert isinstance(dta, dict)
dta = dec_graph_data(dt1, dt2, year,
include_zero_incomes=False,
include_negative_incomes=True)
assert isinstance(dta, dict)
dta = dec_graph_data(dt1, dt2, year,
include_zero_incomes=False,
include_negative_incomes=False)
assert isinstance(dta, dict)
Loading

0 comments on commit 540b7c5

Please sign in to comment.