Skip to content

Commit

Permalink
Merge pull request #1258 from martinholmer/tc-tables
Browse files Browse the repository at this point in the history
Add --tables option to Tax-Calculator CLI
  • Loading branch information
martinholmer authored Mar 23, 2017
2 parents 626387a + 9d03299 commit d2fefbd
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 32 deletions.
12 changes: 9 additions & 3 deletions taxcalc/cli/tc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def main():
usage_str = 'tc INPUT TAXYEAR {}{}{}'.format(
'[--reform REFORM] [--assump ASSUMP]\n',
' ',
'[--exact] [--graph] [--ceeu] [--dump]')
'[--exact] [--tables] [--graphs] [--ceeu] [--dump]')
parser = argparse.ArgumentParser(
prog='',
usage=usage_str,
Expand Down Expand Up @@ -56,7 +56,12 @@ def main():
'complicate marginal-tax-rate calculations.'),
default=False,
action="store_true")
parser.add_argument('--graph',
parser.add_argument('--tables',
help=('optional flag that causes distributional '
'tables to be written to a text file.'),
default=False,
action="store_true")
parser.add_argument('--graphs',
help=('optional flag that causes graphs to be written '
'to HTML files for viewing in browser.'),
default=False,
Expand Down Expand Up @@ -132,7 +137,8 @@ def main():
aging_input_data=aging,
exact_calculations=args.exact)
tcio.analyze(writing_output_file=True,
output_graph=args.graph,
output_tables=args.tables,
output_graphs=args.graphs,
output_ceeu=args.ceeu,
output_dump=args.dump)
# return no-error exit code
Expand Down
98 changes: 81 additions & 17 deletions taxcalc/taxcalcio.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import copy
import six
import numpy as np
import pandas as pd
from taxcalc.policy import Policy
from taxcalc.records import Records
Expand All @@ -20,6 +21,8 @@
from taxcalc.utils import ce_aftertax_income
from taxcalc.utils import atr_graph_data, mtr_graph_data
from taxcalc.utils import xtr_graph_plot, write_graph_file
from taxcalc.utils import add_weighted_income_bins
from taxcalc.utils import unweighted_sum, weighted_sum


class TaxCalcIO(object):
Expand Down Expand Up @@ -141,6 +144,7 @@ def __init__(self, input_data, tax_year, reform, assump,
raise ValueError(msg)
self._output_filename = '{}{}{}.csv'.format(inp, ref, asm)
delete_file(self._output_filename)
delete_file(self._output_filename.replace('.csv', '-tab.text'))
delete_file(self._output_filename.replace('.csv', '-atr.html'))
delete_file(self._output_filename.replace('.csv', '-mtr.html'))
# get parameter dictionaries from --reform and --assump files
Expand All @@ -164,10 +168,8 @@ def __init__(self, input_data, tax_year, reform, assump,
# specify gdiff_response object
if growdiff_response is None:
gdiff_response = Growdiff()
using_growmodel = False
elif isinstance(growdiff_response, Growdiff):
gdiff_response = growdiff_response
using_growmodel = True
if self._behavior_has_any_response:
msg = 'cannot assume any "behavior" when using GrowModel'
raise ValueError(msg)
Expand All @@ -184,9 +186,6 @@ def __init__(self, input_data, tax_year, reform, assump,
pol.implement_reform(param_dict['policy'])
else:
pol = Policy(gfactors=gfactors_clp)
if using_growmodel:
msg = 'TaxCalcIO.ctor: no --reform when using GrowModel'
raise ValueError(msg)
clp = Policy(gfactors=gfactors_clp)
# check for valid tax_year value
if tax_year < pol.start_year:
Expand Down Expand Up @@ -241,7 +240,8 @@ def output_filepath(self):
return os.path.join(dirpath, self._output_filename)

def analyze(self, writing_output_file=False,
output_graph=False,
output_tables=False,
output_graphs=False,
output_ceeu=False,
output_dump=False):
"""
Expand All @@ -251,8 +251,12 @@ def analyze(self, writing_output_file=False,
----------
writing_output_file: boolean
output_graph: boolean
whether or not to generate and show HTML graphs of average
output_tables: boolean
whether or not to generate and write distributional tables
to a text file
output_graphs: boolean
whether or not to generate and write HTML graphs of average
and marginal tax rates by income percentile
output_ceeu: boolean
Expand Down Expand Up @@ -296,8 +300,11 @@ def analyze(self, writing_output_file=False,
# extract output if writing_output_file
if writing_output_file:
self.write_output_file(output_dump, mtr_paytax, mtr_inctax)
# optionally write --graph output to HTML files
if output_graph:
# optionally write --tables output to text file
if output_tables:
self.write_tables_file()
# optionally write --graphs output to HTML files
if output_graphs:
self.write_graph_files()
# optionally write --ceeu output to stdout
if ceeu_results:
Expand All @@ -314,6 +321,58 @@ def write_output_file(self, output_dump, mtr_paytax, mtr_inctax):
assert len(outdf.index) == self._calc.records.dim
outdf.to_csv(self._output_filename, index=False, float_format='%.2f')

def write_tables_file(self):
"""
Write tables to text file.
"""
# pylint: disable=too-many-locals
tab_fname = self._output_filename.replace('.csv', '-tab.text')
# create expanded-income decile table containing weighted total levels
record_cols = ['s006', '_payrolltax', '_iitax', 'lumpsum_tax',
'_combined', '_expanded_income']
out = [getattr(self._calc.records, col) for col in record_cols]
dfx = pd.DataFrame(data=np.column_stack(out), columns=record_cols)
# skip tables if there are not some positive weights
if dfx['s006'].sum() <= 0:
with open(tab_fname, 'w') as tfile:
msg = 'No tables because sum of weights is not positive\n'
tfile.write(msg)
return
# construct distributional table elements
dfx = add_weighted_income_bins(dfx, num_bins=10,
income_measure='_expanded_income',
weight_by_income_measure=False)
gdfx = dfx.groupby('bins', as_index=False)
rtns_series = gdfx.apply(unweighted_sum, 's006')
itax_series = gdfx.apply(weighted_sum, '_iitax')
ptax_series = gdfx.apply(weighted_sum, '_payrolltax')
htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax')
ctax_series = gdfx.apply(weighted_sum, '_combined')
# write total levels decile table to text file
with open(tab_fname, 'w') as tfile:
row = 'Weighted Totals by Expanded-Income Decile\n'
tfile.write(row)
row = ' Returns IncTax PayTax LSTax AllTax\n'
tfile.write(row)
row = ' (#m) ($b) ($b) ($b) ($b)\n'
tfile.write(row)
rowfmt = '{:9.1f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}\n'
for decile in range(0, 10):
row = '{:2d}'.format(decile)
row += rowfmt.format(rtns_series[decile] * 1e-6,
itax_series[decile] * 1e-9,
ptax_series[decile] * 1e-9,
htax_series[decile] * 1e-9,
ctax_series[decile] * 1e-9)
tfile.write(row)
row = ' A'
row += rowfmt.format(rtns_series.sum() * 1e-6,
itax_series.sum() * 1e-9,
ptax_series.sum() * 1e-9,
htax_series.sum() * 1e-9,
ctax_series.sum() * 1e-9)
tfile.write(row)

def write_graph_files(self):
"""
Write graphs to HTML files.
Expand Down Expand Up @@ -397,6 +456,7 @@ def dump_output(self, mtr_inctax, mtr_paytax):
for varname in varset:
vardata = getattr(self._calc.records, varname)
odf[varname] = vardata
odf['FLPDYR'] = self.tax_year() # tax calculation year
odf['mtr_inctax'] = mtr_inctax
odf['mtr_paytax'] = mtr_paytax
return odf
Expand All @@ -405,7 +465,8 @@ def dump_output(self, mtr_inctax, mtr_paytax):
def growmodel_analysis(input_data, tax_year, reform, assump,
aging_input_data, exact_calculations,
writing_output_file=False,
output_graph=False,
output_tables=False,
output_graphs=False,
output_ceeu=False,
output_dump=False):
"""
Expand All @@ -416,14 +477,14 @@ def growmodel_analysis(input_data, tax_year, reform, assump,
First six parameters are same as the first six parameters of
the TaxCalcIO constructor.
Last four parameters are same as the first four parameters of
Last five parameters are same as the first five parameters of
the TaxCalcIO analyze method.
Returns
-------
Nothing
"""
# pylint: disable=too-many-arguments
# pylint: disable=too-many-arguments,too-many-locals
# pylint: disable=superfluous-parens
progress = 'STARTING ANALYSIS FOR YEAR {}'
gdiff_dict = {Policy.JSON_START_YEAR: {}}
Expand All @@ -438,7 +499,8 @@ def growmodel_analysis(input_data, tax_year, reform, assump,
exact_calculations,
growdiff_response, year,
writing_output_file,
output_graph,
output_tables,
output_graphs,
output_ceeu,
output_dump)
gdiff_dict[year + 1] = gd_dict
Expand All @@ -448,7 +510,8 @@ def annual_analysis(input_data, tax_year, reform, assump,
aging_input_data, exact_calculations,
growdiff_response, year,
writing_output_file,
output_graph,
output_tables,
output_graphs,
output_ceeu,
output_dump):
"""
Expand All @@ -459,7 +522,7 @@ def annual_analysis(input_data, tax_year, reform, assump,
First six parameters are same as the first six parameters of
the TaxCalcIO constructor.
Last four parameters are same as the first four parameters of
Last five parameters are same as the first five parameters of
the TaxCalcIO analyze method.
Returns
Expand All @@ -478,7 +541,8 @@ def annual_analysis(input_data, tax_year, reform, assump,
if year == tax_year:
# conduct final tax analysis for year equal to tax_year
tcio.analyze(writing_output_file=writing_output_file,
output_graph=output_graph,
output_tables=output_tables,
output_graphs=output_graphs,
output_ceeu=output_ceeu,
output_dump=output_dump)
gd_dict = {}
Expand Down
77 changes: 65 additions & 12 deletions taxcalc/tests/test_taxcalcio.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,9 +279,69 @@ def test_output_otions(rawinputfile, reformfile1, assumpfile1):
pass # sometimes we can't remove a generated temporary file


def test_graph(reformfile1):
def test_no_tables(reformfile1):
"""
Test TaxCalcIO with output_graph=True.
Test TaxCalcIO with output_tables=True but with zero weights.
"""
# create input sample that cannot have distributional tables tabulated
nobs = 10
idict = dict()
idict['RECID'] = [i for i in range(1, nobs + 1)]
idict['MARS'] = [2 for i in range(1, nobs + 1)]
idict['s006'] = [0.0 for i in range(1, nobs + 1)]
idict['e00300'] = [10000 * i for i in range(1, nobs + 1)]
idict['_expanded_income'] = idict['e00300']
idf = pd.DataFrame(idict, columns=list(idict))
# create TaxCalcIO tables file
tcio = TaxCalcIO(input_data=idf,
tax_year=2020,
reform=reformfile1.name,
assump=None,
growdiff_response=None,
aging_input_data=False,
exact_calculations=False)
# create TaxCalcIO tables file
tcio.analyze(writing_output_file=False, output_tables=True)
# delete tables file
output_filename = tcio.output_filepath()
fname = output_filename.replace('.csv', '-tab.text')
if os.path.isfile(fname):
os.remove(fname)


def test_tables(reformfile1):
"""
Test TaxCalcIO with output_tables=True and with positive weights.
"""
# create tabable input
nobs = 100
idict = dict()
idict['RECID'] = [i for i in range(1, nobs + 1)]
idict['MARS'] = [2 for i in range(1, nobs + 1)]
idict['s006'] = [10.0 for i in range(1, nobs + 1)]
idict['e00300'] = [10000 * i for i in range(1, nobs + 1)]
idict['_expanded_income'] = idict['e00300']
idf = pd.DataFrame(idict, columns=list(idict))
# create TaxCalcIO tables file
tcio = TaxCalcIO(input_data=idf,
tax_year=2020,
reform=reformfile1.name,
assump=None,
growdiff_response=None,
aging_input_data=False,
exact_calculations=False)
# create TaxCalcIO tables file
tcio.analyze(writing_output_file=False, output_tables=True)
# delete tables file
output_filename = tcio.output_filepath()
fname = output_filename.replace('.csv', '-tab.text')
if os.path.isfile(fname):
os.remove(fname)


def test_graphs(reformfile1):
"""
Test TaxCalcIO with output_graphs=True.
"""
# create graphable input
nobs = 100
Expand All @@ -300,7 +360,7 @@ def test_graph(reformfile1):
growdiff_response=None,
aging_input_data=False,
exact_calculations=False)
tcio.analyze(writing_output_file=False, output_graph=True)
tcio.analyze(writing_output_file=False, output_graphs=True)
# delete graph files
output_filename = tcio.output_filepath()
fname = output_filename.replace('.csv', '-atr.html')
Expand Down Expand Up @@ -401,14 +461,6 @@ def test_bad_ctor_when_using_growmodel(lumpsumreformfile, assumpfile2):
taxyear = 2020
recdict = {'RECID': 1, 'MARS': 1, 'e00300': 100000, 's006': 1e8}
recdf = pd.DataFrame(data=recdict, index=[0])
with pytest.raises(ValueError):
TaxCalcIO(input_data=recdf,
tax_year=taxyear,
reform=None,
assump=None,
growdiff_response=Growdiff(),
aging_input_data=False,
exact_calculations=False)
with pytest.raises(ValueError):
TaxCalcIO(input_data=recdf,
tax_year=taxyear,
Expand Down Expand Up @@ -461,13 +513,14 @@ def test_bad_assumption_file(reformfile1, assumpfile_bad1):
exact_calculations=False)


def test_growmodel_analysis(reformfile1, assumpfile1):
def test_growmodel_analysis(reformfile1, assumpfile1, assumpfile2):
"""
Test TaxCalcIO.growmodel_analysis method with no output.
"""
taxyear = 2015
recdict = {'RECID': 1, 'MARS': 1, 'e00300': 100000, 's006': 1e8}
recdf = pd.DataFrame(data=recdict, index=[0])
# test growmodel_analysis with legal assumptions
try:
TaxCalcIO.growmodel_analysis(input_data=recdf,
tax_year=taxyear,
Expand Down
7 changes: 7 additions & 0 deletions taxcalc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,13 @@ def expanded_income_weighted(pdf, col_name):
float((pdf[swght] * pdf[expinc]).sum() + EPSILON))


def unweighted_sum(pdf, col_name):
"""
Return unweighted sum of Pandas DataFrame col_name items.
"""
return pdf[col_name].sum()


def weighted_sum(pdf, col_name):
"""
Return weighted sum of Pandas DataFrame col_name items.
Expand Down

0 comments on commit d2fefbd

Please sign in to comment.