diff --git a/taxcalc/cli/tc.py b/taxcalc/cli/tc.py index 9e37076ee..5293c3da4 100644 --- a/taxcalc/cli/tc.py +++ b/taxcalc/cli/tc.py @@ -20,7 +20,7 @@ def main(): usage_str = 'tc INPUT TAXYEAR {}{}{}'.format( '[--reform REFORM] [--assump ASSUMP]\n', ' ', - '[--exact] [--graph] [--ceeu] [--dump]') + '[--exact] [--tables] [--graphs] [--ceeu] [--dump]') parser = argparse.ArgumentParser( prog='', usage=usage_str, @@ -56,7 +56,12 @@ def main(): 'complicate marginal-tax-rate calculations.'), default=False, action="store_true") - parser.add_argument('--graph', + parser.add_argument('--tables', + help=('optional flag that causes distributional ' + 'tables to be written to a text file.'), + default=False, + action="store_true") + parser.add_argument('--graphs', help=('optional flag that causes graphs to be written ' 'to HTML files for viewing in browser.'), default=False, @@ -132,7 +137,8 @@ def main(): aging_input_data=aging, exact_calculations=args.exact) tcio.analyze(writing_output_file=True, - output_graph=args.graph, + output_tables=args.tables, + output_graphs=args.graphs, output_ceeu=args.ceeu, output_dump=args.dump) # return no-error exit code diff --git a/taxcalc/taxcalcio.py b/taxcalc/taxcalcio.py index e172027c7..53f7ce5fa 100644 --- a/taxcalc/taxcalcio.py +++ b/taxcalc/taxcalcio.py @@ -8,6 +8,7 @@ import os import copy import six +import numpy as np import pandas as pd from taxcalc.policy import Policy from taxcalc.records import Records @@ -20,6 +21,8 @@ from taxcalc.utils import ce_aftertax_income from taxcalc.utils import atr_graph_data, mtr_graph_data from taxcalc.utils import xtr_graph_plot, write_graph_file +from taxcalc.utils import add_weighted_income_bins +from taxcalc.utils import unweighted_sum, weighted_sum class TaxCalcIO(object): @@ -141,6 +144,7 @@ def __init__(self, input_data, tax_year, reform, assump, raise ValueError(msg) self._output_filename = '{}{}{}.csv'.format(inp, ref, asm) delete_file(self._output_filename) + delete_file(self._output_filename.replace('.csv', '-tab.text')) delete_file(self._output_filename.replace('.csv', '-atr.html')) delete_file(self._output_filename.replace('.csv', '-mtr.html')) # get parameter dictionaries from --reform and --assump files @@ -164,10 +168,8 @@ def __init__(self, input_data, tax_year, reform, assump, # specify gdiff_response object if growdiff_response is None: gdiff_response = Growdiff() - using_growmodel = False elif isinstance(growdiff_response, Growdiff): gdiff_response = growdiff_response - using_growmodel = True if self._behavior_has_any_response: msg = 'cannot assume any "behavior" when using GrowModel' raise ValueError(msg) @@ -184,9 +186,6 @@ def __init__(self, input_data, tax_year, reform, assump, pol.implement_reform(param_dict['policy']) else: pol = Policy(gfactors=gfactors_clp) - if using_growmodel: - msg = 'TaxCalcIO.ctor: no --reform when using GrowModel' - raise ValueError(msg) clp = Policy(gfactors=gfactors_clp) # check for valid tax_year value if tax_year < pol.start_year: @@ -241,7 +240,8 @@ def output_filepath(self): return os.path.join(dirpath, self._output_filename) def analyze(self, writing_output_file=False, - output_graph=False, + output_tables=False, + output_graphs=False, output_ceeu=False, output_dump=False): """ @@ -251,8 +251,12 @@ def analyze(self, writing_output_file=False, ---------- writing_output_file: boolean - output_graph: boolean - whether or not to generate and show HTML graphs of average + output_tables: boolean + whether or not to generate and write distributional tables + to a text file + + output_graphs: boolean + whether or not to generate and write HTML graphs of average and marginal tax rates by income percentile output_ceeu: boolean @@ -296,8 +300,11 @@ def analyze(self, writing_output_file=False, # extract output if writing_output_file if writing_output_file: self.write_output_file(output_dump, mtr_paytax, mtr_inctax) - # optionally write --graph output to HTML files - if output_graph: + # optionally write --tables output to text file + if output_tables: + self.write_tables_file() + # optionally write --graphs output to HTML files + if output_graphs: self.write_graph_files() # optionally write --ceeu output to stdout if ceeu_results: @@ -314,6 +321,58 @@ def write_output_file(self, output_dump, mtr_paytax, mtr_inctax): assert len(outdf.index) == self._calc.records.dim outdf.to_csv(self._output_filename, index=False, float_format='%.2f') + def write_tables_file(self): + """ + Write tables to text file. + """ + # pylint: disable=too-many-locals + tab_fname = self._output_filename.replace('.csv', '-tab.text') + # create expanded-income decile table containing weighted total levels + record_cols = ['s006', '_payrolltax', '_iitax', 'lumpsum_tax', + '_combined', '_expanded_income'] + out = [getattr(self._calc.records, col) for col in record_cols] + dfx = pd.DataFrame(data=np.column_stack(out), columns=record_cols) + # skip tables if there are not some positive weights + if dfx['s006'].sum() <= 0: + with open(tab_fname, 'w') as tfile: + msg = 'No tables because sum of weights is not positive\n' + tfile.write(msg) + return + # construct distributional table elements + dfx = add_weighted_income_bins(dfx, num_bins=10, + income_measure='_expanded_income', + weight_by_income_measure=False) + gdfx = dfx.groupby('bins', as_index=False) + rtns_series = gdfx.apply(unweighted_sum, 's006') + itax_series = gdfx.apply(weighted_sum, '_iitax') + ptax_series = gdfx.apply(weighted_sum, '_payrolltax') + htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax') + ctax_series = gdfx.apply(weighted_sum, '_combined') + # write total levels decile table to text file + with open(tab_fname, 'w') as tfile: + row = 'Weighted Totals by Expanded-Income Decile\n' + tfile.write(row) + row = ' Returns IncTax PayTax LSTax AllTax\n' + tfile.write(row) + row = ' (#m) ($b) ($b) ($b) ($b)\n' + tfile.write(row) + rowfmt = '{:9.1f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}\n' + for decile in range(0, 10): + row = '{:2d}'.format(decile) + row += rowfmt.format(rtns_series[decile] * 1e-6, + itax_series[decile] * 1e-9, + ptax_series[decile] * 1e-9, + htax_series[decile] * 1e-9, + ctax_series[decile] * 1e-9) + tfile.write(row) + row = ' A' + row += rowfmt.format(rtns_series.sum() * 1e-6, + itax_series.sum() * 1e-9, + ptax_series.sum() * 1e-9, + htax_series.sum() * 1e-9, + ctax_series.sum() * 1e-9) + tfile.write(row) + def write_graph_files(self): """ Write graphs to HTML files. @@ -397,6 +456,7 @@ def dump_output(self, mtr_inctax, mtr_paytax): for varname in varset: vardata = getattr(self._calc.records, varname) odf[varname] = vardata + odf['FLPDYR'] = self.tax_year() # tax calculation year odf['mtr_inctax'] = mtr_inctax odf['mtr_paytax'] = mtr_paytax return odf @@ -405,7 +465,8 @@ def dump_output(self, mtr_inctax, mtr_paytax): def growmodel_analysis(input_data, tax_year, reform, assump, aging_input_data, exact_calculations, writing_output_file=False, - output_graph=False, + output_tables=False, + output_graphs=False, output_ceeu=False, output_dump=False): """ @@ -416,14 +477,14 @@ def growmodel_analysis(input_data, tax_year, reform, assump, First six parameters are same as the first six parameters of the TaxCalcIO constructor. - Last four parameters are same as the first four parameters of + Last five parameters are same as the first five parameters of the TaxCalcIO analyze method. Returns ------- Nothing """ - # pylint: disable=too-many-arguments + # pylint: disable=too-many-arguments,too-many-locals # pylint: disable=superfluous-parens progress = 'STARTING ANALYSIS FOR YEAR {}' gdiff_dict = {Policy.JSON_START_YEAR: {}} @@ -438,7 +499,8 @@ def growmodel_analysis(input_data, tax_year, reform, assump, exact_calculations, growdiff_response, year, writing_output_file, - output_graph, + output_tables, + output_graphs, output_ceeu, output_dump) gdiff_dict[year + 1] = gd_dict @@ -448,7 +510,8 @@ def annual_analysis(input_data, tax_year, reform, assump, aging_input_data, exact_calculations, growdiff_response, year, writing_output_file, - output_graph, + output_tables, + output_graphs, output_ceeu, output_dump): """ @@ -459,7 +522,7 @@ def annual_analysis(input_data, tax_year, reform, assump, First six parameters are same as the first six parameters of the TaxCalcIO constructor. - Last four parameters are same as the first four parameters of + Last five parameters are same as the first five parameters of the TaxCalcIO analyze method. Returns @@ -478,7 +541,8 @@ def annual_analysis(input_data, tax_year, reform, assump, if year == tax_year: # conduct final tax analysis for year equal to tax_year tcio.analyze(writing_output_file=writing_output_file, - output_graph=output_graph, + output_tables=output_tables, + output_graphs=output_graphs, output_ceeu=output_ceeu, output_dump=output_dump) gd_dict = {} diff --git a/taxcalc/tests/test_taxcalcio.py b/taxcalc/tests/test_taxcalcio.py index 9fb64037d..26ce30331 100644 --- a/taxcalc/tests/test_taxcalcio.py +++ b/taxcalc/tests/test_taxcalcio.py @@ -279,9 +279,69 @@ def test_output_otions(rawinputfile, reformfile1, assumpfile1): pass # sometimes we can't remove a generated temporary file -def test_graph(reformfile1): +def test_no_tables(reformfile1): """ - Test TaxCalcIO with output_graph=True. + Test TaxCalcIO with output_tables=True but with zero weights. + """ + # create input sample that cannot have distributional tables tabulated + nobs = 10 + idict = dict() + idict['RECID'] = [i for i in range(1, nobs + 1)] + idict['MARS'] = [2 for i in range(1, nobs + 1)] + idict['s006'] = [0.0 for i in range(1, nobs + 1)] + idict['e00300'] = [10000 * i for i in range(1, nobs + 1)] + idict['_expanded_income'] = idict['e00300'] + idf = pd.DataFrame(idict, columns=list(idict)) + # create TaxCalcIO tables file + tcio = TaxCalcIO(input_data=idf, + tax_year=2020, + reform=reformfile1.name, + assump=None, + growdiff_response=None, + aging_input_data=False, + exact_calculations=False) + # create TaxCalcIO tables file + tcio.analyze(writing_output_file=False, output_tables=True) + # delete tables file + output_filename = tcio.output_filepath() + fname = output_filename.replace('.csv', '-tab.text') + if os.path.isfile(fname): + os.remove(fname) + + +def test_tables(reformfile1): + """ + Test TaxCalcIO with output_tables=True and with positive weights. + """ + # create tabable input + nobs = 100 + idict = dict() + idict['RECID'] = [i for i in range(1, nobs + 1)] + idict['MARS'] = [2 for i in range(1, nobs + 1)] + idict['s006'] = [10.0 for i in range(1, nobs + 1)] + idict['e00300'] = [10000 * i for i in range(1, nobs + 1)] + idict['_expanded_income'] = idict['e00300'] + idf = pd.DataFrame(idict, columns=list(idict)) + # create TaxCalcIO tables file + tcio = TaxCalcIO(input_data=idf, + tax_year=2020, + reform=reformfile1.name, + assump=None, + growdiff_response=None, + aging_input_data=False, + exact_calculations=False) + # create TaxCalcIO tables file + tcio.analyze(writing_output_file=False, output_tables=True) + # delete tables file + output_filename = tcio.output_filepath() + fname = output_filename.replace('.csv', '-tab.text') + if os.path.isfile(fname): + os.remove(fname) + + +def test_graphs(reformfile1): + """ + Test TaxCalcIO with output_graphs=True. """ # create graphable input nobs = 100 @@ -300,7 +360,7 @@ def test_graph(reformfile1): growdiff_response=None, aging_input_data=False, exact_calculations=False) - tcio.analyze(writing_output_file=False, output_graph=True) + tcio.analyze(writing_output_file=False, output_graphs=True) # delete graph files output_filename = tcio.output_filepath() fname = output_filename.replace('.csv', '-atr.html') @@ -401,14 +461,6 @@ def test_bad_ctor_when_using_growmodel(lumpsumreformfile, assumpfile2): taxyear = 2020 recdict = {'RECID': 1, 'MARS': 1, 'e00300': 100000, 's006': 1e8} recdf = pd.DataFrame(data=recdict, index=[0]) - with pytest.raises(ValueError): - TaxCalcIO(input_data=recdf, - tax_year=taxyear, - reform=None, - assump=None, - growdiff_response=Growdiff(), - aging_input_data=False, - exact_calculations=False) with pytest.raises(ValueError): TaxCalcIO(input_data=recdf, tax_year=taxyear, @@ -461,13 +513,14 @@ def test_bad_assumption_file(reformfile1, assumpfile_bad1): exact_calculations=False) -def test_growmodel_analysis(reformfile1, assumpfile1): +def test_growmodel_analysis(reformfile1, assumpfile1, assumpfile2): """ Test TaxCalcIO.growmodel_analysis method with no output. """ taxyear = 2015 recdict = {'RECID': 1, 'MARS': 1, 'e00300': 100000, 's006': 1e8} recdf = pd.DataFrame(data=recdict, index=[0]) + # test growmodel_analysis with legal assumptions try: TaxCalcIO.growmodel_analysis(input_data=recdf, tax_year=taxyear, diff --git a/taxcalc/utils.py b/taxcalc/utils.py index 4f6bf281f..58779727e 100644 --- a/taxcalc/utils.py +++ b/taxcalc/utils.py @@ -139,6 +139,13 @@ def expanded_income_weighted(pdf, col_name): float((pdf[swght] * pdf[expinc]).sum() + EPSILON)) +def unweighted_sum(pdf, col_name): + """ + Return unweighted sum of Pandas DataFrame col_name items. + """ + return pdf[col_name].sum() + + def weighted_sum(pdf, col_name): """ Return weighted sum of Pandas DataFrame col_name items.