Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restrict fuzzing to reform-affected filing units #1976

Merged
merged 1 commit into from
Apr 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 11 additions & 14 deletions taxcalc/tbi/tbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# pylint --disable=locally-disabled tbi.py

from __future__ import print_function
import gc
import time
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -101,12 +100,12 @@ def run_nth_year_tax_calc_model(year_n, start_year,

start_time = time.time()

# create calc1 and calc2 calculated for year_n and mask
# create calc1 and calc2 calculated for year_n
check_years_return_first_year(year_n, start_year, use_puf_not_cps)
(calc1, calc2, mask) = calculate(year_n, start_year,
use_puf_not_cps, use_full_sample,
user_mods,
behavior_allowed=True)
(calc1, calc2) = calculate(year_n, start_year,
use_puf_not_cps, use_full_sample,
user_mods,
behavior_allowed=True)

# extract raw results from calc1 and calc2
rawres1 = calc1.distribution_table_dataframe()
Expand All @@ -115,18 +114,16 @@ def run_nth_year_tax_calc_model(year_n, start_year,
# delete calc1 and calc2 now that raw results have been extracted
del calc1
del calc2
gc.collect()

# seed random number generator with a seed value based on user_mods
seed = random_seed(user_mods)
print('seed={}'.format(seed))
np.random.seed(seed) # pylint: disable=no-member

# construct TaxBrain summary results from raw results
summ = summary(rawres1, rawres2, mask)
summ = summary(rawres1, rawres2, use_puf_not_cps)
del rawres1
del rawres2
gc.collect()

def append_year(pdf):
"""
Expand Down Expand Up @@ -202,11 +199,11 @@ def run_nth_year_gdp_elast_model(year_n, start_year,
fyear = check_years_return_first_year(year_n, start_year, use_puf_not_cps)
if year_n > 0 and (start_year + year_n) > fyear:
# create calc1 and calc2 calculated for year_n - 1
(calc1, calc2, _) = calculate((year_n - 1), start_year,
use_puf_not_cps,
use_full_sample,
user_mods,
behavior_allowed=False)
(calc1, calc2) = calculate((year_n - 1), start_year,
use_puf_not_cps,
use_full_sample,
user_mods,
behavior_allowed=False)
# compute GDP effect given specified gdp_elasticity
gdp_effect = proportional_change_in_gdp((start_year + year_n),
calc1, calc2, gdp_elasticity)
Expand Down
115 changes: 37 additions & 78 deletions taxcalc/tbi/tbi_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,9 @@ def calculate(year_n, start_year,
"""
The calculate function assumes the specified user_mods is a dictionary
returned by the Calculator.read_json_param_objects() function.
The function returns (calc1, calc2, mask) where
calc1 is pre-reform Calculator object calculated for year_n,
calc2 is post-reform Calculator object calculated for year_n, and
mask is boolean array marking records with reform-induced iitax diffs
The function returns (calc1, calc2) where
calc1 is pre-reform Calculator object calculated for year_n, and
calc2 is post-reform Calculator object calculated for year_n.
Set behavior_allowed to False when generating static results or
set behavior_allowed to True when generating dynamic results.
"""
Expand Down Expand Up @@ -149,41 +148,6 @@ def calculate(year_n, start_year,
calc1.calc_all()
assert calc1.current_year == start_year

# compute mask array
res1 = calc1.dataframe(DIST_VARIABLES)
if use_puf_not_cps:
# create pre-reform Calculator instance with extra income
recs1p = Records(data=sample, gfactors=growfactors_pre)
# add one dollar to the income of each filing unit to determine
# which filing units undergo a resulting change in tax liability
recs1p.e00200 += 1.0 # pylint: disable=no-member
recs1p.e00200p += 1.0 # pylint: disable=no-member
policy1p = Policy(gfactors=growfactors_pre)
# create Calculator with recs1p and calculate for start_year
calc1p = Calculator(policy=policy1p, records=recs1p,
consumption=consump)
while calc1p.current_year < start_year:
calc1p.increment_year()
calc1p.calc_all()
assert calc1p.current_year == start_year
# compute mask showing which of the calc1 and calc1p results differ;
# mask is true if a filing unit's income tax liability changed after
# a dollar was added to the filing unit's wage and salary income
res1p = calc1p.dataframe(DIST_VARIABLES)
mask = np.logical_not( # pylint: disable=no-member
np.isclose(res1.iitax, res1p.iitax, atol=0.001, rtol=0.0)
)
assert np.any(mask)
# delete intermediate objects
del recs1p
del policy1p
del calc1p
del res1p
else: # if use_puf_not_cps is False
# indicate that fuzzing of reform results is not required
mask = np.full(res1.shape, False)
del res1

# specify Behavior instance
behv = Behavior()
behavior_assumps = user_mods['behavior']
Expand Down Expand Up @@ -239,8 +203,8 @@ def calculate(year_n, start_year,
else:
calc2.calc_all()

# return calculated Calculator objects and mask
return (calc1, calc2, mask)
# return calculated Calculator objects
return (calc1, calc2)


def random_seed(user_mods):
Expand Down Expand Up @@ -372,13 +336,13 @@ def create(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz, do_fuzzing):
if do_fuzzing:
df2['mask'] = mask
df2['expanded_income_baseline'] = df1['expanded_income']
create(df1, df2, 'dec', 'expanded_income_baseline', '_xdec',
create(df1, df2, 'dec', 'expanded_income_baseline', '', # '_xdec',
columns_to_create, do_fuzzing)
df2_xdec = copy.deepcopy(df2)
create(df1, df2, 'bin', 'expanded_income_baseline', '_xbin',
create(df1, df2, 'bin', 'expanded_income_baseline', '', # '_xbin',
columns_to_create, do_fuzzing)
df2_xbin = copy.deepcopy(df2)
create(df1, df2, 'agg', 'expanded_income_baseline', '_agg',
create(df1, df2, 'agg', 'expanded_income_baseline', '', # '_agg',
columns_to_create, do_fuzzing)
df2_aggr = copy.deepcopy(df2)
return (df2_xdec, df2_xbin, df2_aggr)
Expand All @@ -387,31 +351,42 @@ def create(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz, do_fuzzing):
AGGR_ROW_NAMES = ['ind_tax', 'payroll_tax', 'combined_tax']


def summary(df1, df2, mask):
def summary(df1, df2, fuzzing):
"""
df1 contains raw results for baseline plan
df2 contains raw results for reform plan
mask is the boolean array specifying records with reform-induced tax diffs
returns dictionary of summary results DataFrames
df1 contains distribution-table variables for baseline.
df2 contains distribution-table variables for reform.
fuzzing indicates whether or not there is a need to fuzz df2 variables.
returns dictionary of summary-results DataFrames.
"""
# pylint: disable=too-many-statements,too-many-locals

df2_xdec, df2_xbin, df2_aggr = create_results_columns(df1, df2, mask)
df1_xdec = add_quantile_table_row_variable(df1, 'expanded_income',
10, decile_details=True)
del df1_xdec['table_row']
df1_xbin = add_income_table_row_variable(df1, 'expanded_income',
bins=STANDARD_INCOME_BINS)
del df1_xbin['table_row']
if fuzzing:
reform_affected = np.logical_not( # pylint: disable=no-member
np.isclose(df1['combined'], df2['combined'],
atol=0.001, rtol=0.0))
df2_xdec, df2_xbin, df2_aggr = create_results_columns(df1, df2,
reform_affected)
df1_xdec = add_quantile_table_row_variable(df1, 'expanded_income',
10, decile_details=True)
del df1_xdec['table_row']
df1_xbin = add_income_table_row_variable(df1, 'expanded_income',
bins=STANDARD_INCOME_BINS)
del df1_xbin['table_row']
else:
df2_aggr = copy.deepcopy(df2)
df2_xdec = copy.deepcopy(df2)
df2_xbin = copy.deepcopy(df2)
df1_xdec = copy.deepcopy(df1)
df1_xbin = copy.deepcopy(df1)

summ = dict()

# tax difference totals between reform and baseline
tdiff = df2_aggr['iitax_agg'] - df1['iitax']
tdiff = df2_aggr['iitax'] - df1['iitax']
aggr_itax_d = (tdiff * df2['s006']).sum()
tdiff = df2_aggr['payrolltax_agg'] - df1['payrolltax']
tdiff = df2_aggr['payrolltax'] - df1['payrolltax']
aggr_ptax_d = (tdiff * df2['s006']).sum()
tdiff = df2_aggr['combined_agg'] - df1['combined']
tdiff = df2_aggr['combined'] - df1['combined']
aggr_comb_d = (tdiff * df2['s006']).sum()
aggrd = [aggr_itax_d, aggr_ptax_d, aggr_comb_d]
summ['aggr_d'] = pd.DataFrame(data=aggrd, index=AGGR_ROW_NAMES)
Expand All @@ -424,55 +399,49 @@ def summary(df1, df2, mask):
summ['aggr_1'] = pd.DataFrame(data=aggr1, index=AGGR_ROW_NAMES)

# totals for reform
aggr_itax_2 = (df2_aggr['iitax_agg'] * df2['s006']).sum()
aggr_ptax_2 = (df2_aggr['payrolltax_agg'] * df2['s006']).sum()
aggr_comb_2 = (df2_aggr['combined_agg'] * df2['s006']).sum()
aggr_itax_2 = (df2_aggr['iitax'] * df2['s006']).sum()
aggr_ptax_2 = (df2_aggr['payrolltax'] * df2['s006']).sum()
aggr_comb_2 = (df2_aggr['combined'] * df2['s006']).sum()
aggr2 = [aggr_itax_2, aggr_ptax_2, aggr_comb_2]
summ['aggr_2'] = pd.DataFrame(data=aggr2, index=AGGR_ROW_NAMES)

del df1
del df2

# create difference tables grouped by xdec
df2_xdec['iitax'] = df2_xdec['iitax_xdec']
summ['diff_itax_xdec'] = \
create_difference_table(df1_xdec, df2_xdec,
groupby='weighted_deciles',
income_measure='expanded_income',
tax_to_diff='iitax')

df2_xdec['payrolltax'] = df2_xdec['payrolltax_xdec']
summ['diff_ptax_xdec'] = \
create_difference_table(df1_xdec, df2_xdec,
groupby='weighted_deciles',
income_measure='expanded_income',
tax_to_diff='payrolltax')

df2_xdec['combined'] = df2_xdec['combined_xdec']
summ['diff_comb_xdec'] = \
create_difference_table(df1_xdec, df2_xdec,
groupby='weighted_deciles',
income_measure='expanded_income',
tax_to_diff='combined')

# create difference tables grouped by xbin
df2_xbin['iitax'] = df2_xbin['iitax_xbin']
diff_itax_xbin = \
create_difference_table(df1_xdec, df2_xbin,
groupby='standard_income_bins',
income_measure='expanded_income',
tax_to_diff='iitax')
summ['diff_itax_xbin'] = diff_itax_xbin

df2_xbin['payrolltax'] = df2_xbin['payrolltax_xbin']
diff_ptax_xbin = \
create_difference_table(df1_xbin, df2_xbin,
groupby='standard_income_bins',
income_measure='expanded_income',
tax_to_diff='payrolltax')
summ['diff_ptax_xbin'] = diff_ptax_xbin

df2_xbin['combined'] = df2_xbin['combined_xbin']
diff_comb_xbin = \
create_difference_table(df1_xbin, df2_xbin,
groupby='standard_income_bins',
Expand All @@ -486,11 +455,6 @@ def summary(df1, df2, mask):
income_measure='expanded_income',
result_type='weighted_sum')

suffix = '_xdec'
df2_cols_with_suffix = [c for c in list(df2_xdec) if c.endswith(suffix)]
for col in df2_cols_with_suffix:
root_col_name = col.replace(suffix, '')
df2_xdec[root_col_name] = df2_xdec[col]
df2_xdec['expanded_income_baseline'] = df1_xdec['expanded_income']
summ['dist2_xdec'] = \
create_distribution_table(df2_xdec, groupby='weighted_deciles',
Expand All @@ -504,11 +468,6 @@ def summary(df1, df2, mask):
result_type='weighted_sum')
summ['dist1_xbin'] = dist1_xbin

suffix = '_xbin'
df2_cols_with_suffix = [c for c in list(df2_xbin) if c.endswith(suffix)]
for col in df2_cols_with_suffix:
root_col_name = col.replace(suffix, '')
df2_xbin[root_col_name] = df2_xbin[col]
df2_xbin['expanded_income_baseline'] = df1_xbin['expanded_income']
dist2_xbin = \
create_distribution_table(df2_xbin, groupby='standard_income_bins',
Expand Down
4 changes: 2 additions & 2 deletions taxcalc/tests/tbi_cps_expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2324,7 +2324,7 @@ TABLE dist2_xbin RESULTS:
"3869917.61",
"86741286308.30",
"0.00",
"826699773746.32",
"826699773746.31",
"117350446739.81",
"1136777879078.92",
"99669.26",
Expand Down Expand Up @@ -2444,7 +2444,7 @@ TABLE dist2_xbin RESULTS:
"3028591969682.75",
"3028591969682.75",
"15110730620949.12",
"12341701408829.37"
"12341701408829.38"
]
}
TABLE dist2_xdec RESULTS:
Expand Down
Loading