Merge pull request #1258 from martinholmer/tc-tables

Add --tables option to Tax-Calculator CLI
PSLmodels · Mar 23, 2017 · d2fefbd · d2fefbd
2 parents 626387a + 9d03299
commit d2fefbd
Show file tree

Hide file tree

Showing 4 changed files with 162 additions and 32 deletions.
diff --git a/taxcalc/cli/tc.py b/taxcalc/cli/tc.py
@@ -20,7 +20,7 @@ def main():
     usage_str = 'tc INPUT TAXYEAR {}{}{}'.format(
         '[--reform REFORM] [--assump  ASSUMP]\n',
         '                        ',
-        '[--exact] [--graph] [--ceeu] [--dump]')
+        '[--exact] [--tables] [--graphs] [--ceeu] [--dump]')
     parser = argparse.ArgumentParser(
         prog='',
         usage=usage_str,
@@ -56,7 +56,12 @@ def main():
                               'complicate marginal-tax-rate calculations.'),
                         default=False,
                         action="store_true")
-    parser.add_argument('--graph',
+    parser.add_argument('--tables',
+                        help=('optional flag that causes distributional '
+                              'tables to be written to a text file.'),
+                        default=False,
+                        action="store_true")
+    parser.add_argument('--graphs',
                         help=('optional flag that causes graphs to be written '
                               'to HTML files for viewing in browser.'),
                         default=False,
@@ -132,7 +137,8 @@ def main():
                      aging_input_data=aging,
                      exact_calculations=args.exact)
     tcio.analyze(writing_output_file=True,
-                 output_graph=args.graph,
+                 output_tables=args.tables,
+                 output_graphs=args.graphs,
                  output_ceeu=args.ceeu,
                  output_dump=args.dump)
     # return no-error exit code

diff --git a/taxcalc/taxcalcio.py b/taxcalc/taxcalcio.py
@@ -8,6 +8,7 @@
 import os
 import copy
 import six
+import numpy as np
 import pandas as pd
 from taxcalc.policy import Policy
 from taxcalc.records import Records
@@ -20,6 +21,8 @@
 from taxcalc.utils import ce_aftertax_income
 from taxcalc.utils import atr_graph_data, mtr_graph_data
 from taxcalc.utils import xtr_graph_plot, write_graph_file
+from taxcalc.utils import add_weighted_income_bins
+from taxcalc.utils import unweighted_sum, weighted_sum
 
 
 class TaxCalcIO(object):
@@ -141,6 +144,7 @@ def __init__(self, input_data, tax_year, reform, assump,
             raise ValueError(msg)
         self._output_filename = '{}{}{}.csv'.format(inp, ref, asm)
         delete_file(self._output_filename)
+        delete_file(self._output_filename.replace('.csv', '-tab.text'))
         delete_file(self._output_filename.replace('.csv', '-atr.html'))
         delete_file(self._output_filename.replace('.csv', '-mtr.html'))
         # get parameter dictionaries from --reform and --assump files
@@ -164,10 +168,8 @@ def __init__(self, input_data, tax_year, reform, assump,
         # specify gdiff_response object
         if growdiff_response is None:
             gdiff_response = Growdiff()
-            using_growmodel = False
         elif isinstance(growdiff_response, Growdiff):
             gdiff_response = growdiff_response
-            using_growmodel = True
             if self._behavior_has_any_response:
                 msg = 'cannot assume any "behavior" when using GrowModel'
                 raise ValueError(msg)
@@ -184,9 +186,6 @@ def __init__(self, input_data, tax_year, reform, assump,
             pol.implement_reform(param_dict['policy'])
         else:
             pol = Policy(gfactors=gfactors_clp)
-            if using_growmodel:
-                msg = 'TaxCalcIO.ctor: no --reform when using GrowModel'
-                raise ValueError(msg)
         clp = Policy(gfactors=gfactors_clp)
         # check for valid tax_year value
         if tax_year < pol.start_year:
@@ -241,7 +240,8 @@ def output_filepath(self):
         return os.path.join(dirpath, self._output_filename)
 
     def analyze(self, writing_output_file=False,
-                output_graph=False,
+                output_tables=False,
+                output_graphs=False,
                 output_ceeu=False,
                 output_dump=False):
         """
@@ -251,8 +251,12 @@ def analyze(self, writing_output_file=False,
         ----------
         writing_output_file: boolean
 
-        output_graph: boolean
-           whether or not to generate and show HTML graphs of average
+        output_tables: boolean
+           whether or not to generate and write distributional tables
+           to a text file
+
+        output_graphs: boolean
+           whether or not to generate and write HTML graphs of average
            and marginal tax rates by income percentile
 
         output_ceeu: boolean
@@ -296,8 +300,11 @@ def analyze(self, writing_output_file=False,
         # extract output if writing_output_file
         if writing_output_file:
             self.write_output_file(output_dump, mtr_paytax, mtr_inctax)
-        # optionally write --graph output to HTML files
-        if output_graph:
+        # optionally write --tables output to text file
+        if output_tables:
+            self.write_tables_file()
+        # optionally write --graphs output to HTML files
+        if output_graphs:
             self.write_graph_files()
         # optionally write --ceeu output to stdout
         if ceeu_results:
@@ -314,6 +321,58 @@ def write_output_file(self, output_dump, mtr_paytax, mtr_inctax):
         assert len(outdf.index) == self._calc.records.dim
         outdf.to_csv(self._output_filename, index=False, float_format='%.2f')
 
+    def write_tables_file(self):
+        """
+        Write tables to text file.
+        """
+        # pylint: disable=too-many-locals
+        tab_fname = self._output_filename.replace('.csv', '-tab.text')
+        # create expanded-income decile table containing weighted total levels
+        record_cols = ['s006', '_payrolltax', '_iitax', 'lumpsum_tax',
+                       '_combined', '_expanded_income']
+        out = [getattr(self._calc.records, col) for col in record_cols]
+        dfx = pd.DataFrame(data=np.column_stack(out), columns=record_cols)
+        # skip tables if there are not some positive weights
+        if dfx['s006'].sum() <= 0:
+            with open(tab_fname, 'w') as tfile:
+                msg = 'No tables because sum of weights is not positive\n'
+                tfile.write(msg)
+            return
+        # construct distributional table elements
+        dfx = add_weighted_income_bins(dfx, num_bins=10,
+                                       income_measure='_expanded_income',
+                                       weight_by_income_measure=False)
+        gdfx = dfx.groupby('bins', as_index=False)
+        rtns_series = gdfx.apply(unweighted_sum, 's006')
+        itax_series = gdfx.apply(weighted_sum, '_iitax')
+        ptax_series = gdfx.apply(weighted_sum, '_payrolltax')
+        htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax')
+        ctax_series = gdfx.apply(weighted_sum, '_combined')
+        # write total levels decile table to text file
+        with open(tab_fname, 'w') as tfile:
+            row = 'Weighted Totals by Expanded-Income Decile\n'
+            tfile.write(row)
+            row = '    Returns    IncTax    PayTax     LSTax    AllTax\n'
+            tfile.write(row)
+            row = '       (#m)      ($b)      ($b)      ($b)      ($b)\n'
+            tfile.write(row)
+            rowfmt = '{:9.1f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}\n'
+            for decile in range(0, 10):
+                row = '{:2d}'.format(decile)
+                row += rowfmt.format(rtns_series[decile] * 1e-6,
+                                     itax_series[decile] * 1e-9,
+                                     ptax_series[decile] * 1e-9,
+                                     htax_series[decile] * 1e-9,
+                                     ctax_series[decile] * 1e-9)
+                tfile.write(row)
+            row = ' A'
+            row += rowfmt.format(rtns_series.sum() * 1e-6,
+                                 itax_series.sum() * 1e-9,
+                                 ptax_series.sum() * 1e-9,
+                                 htax_series.sum() * 1e-9,
+                                 ctax_series.sum() * 1e-9)
+            tfile.write(row)
+
     def write_graph_files(self):
         """
         Write graphs to HTML files.
@@ -397,6 +456,7 @@ def dump_output(self, mtr_inctax, mtr_paytax):
         for varname in varset:
             vardata = getattr(self._calc.records, varname)
             odf[varname] = vardata
+        odf['FLPDYR'] = self.tax_year()  # tax calculation year
         odf['mtr_inctax'] = mtr_inctax
         odf['mtr_paytax'] = mtr_paytax
         return odf
@@ -405,7 +465,8 @@ def dump_output(self, mtr_inctax, mtr_paytax):
     def growmodel_analysis(input_data, tax_year, reform, assump,
                            aging_input_data, exact_calculations,
                            writing_output_file=False,
-                           output_graph=False,
+                           output_tables=False,
+                           output_graphs=False,
                            output_ceeu=False,
                            output_dump=False):
         """
@@ -416,14 +477,14 @@ def growmodel_analysis(input_data, tax_year, reform, assump,
         First six parameters are same as the first six parameters of
         the TaxCalcIO constructor.
 
-        Last four parameters are same as the first four parameters of
+        Last five parameters are same as the first five parameters of
         the TaxCalcIO analyze method.
 
         Returns
         -------
         Nothing
         """
-        # pylint: disable=too-many-arguments
+        # pylint: disable=too-many-arguments,too-many-locals
         # pylint: disable=superfluous-parens
         progress = 'STARTING ANALYSIS FOR YEAR {}'
         gdiff_dict = {Policy.JSON_START_YEAR: {}}
@@ -438,7 +499,8 @@ def growmodel_analysis(input_data, tax_year, reform, assump,
                                                 exact_calculations,
                                                 growdiff_response, year,
                                                 writing_output_file,
-                                                output_graph,
+                                                output_tables,
+                                                output_graphs,
                                                 output_ceeu,
                                                 output_dump)
             gdiff_dict[year + 1] = gd_dict
@@ -448,7 +510,8 @@ def annual_analysis(input_data, tax_year, reform, assump,
                         aging_input_data, exact_calculations,
                         growdiff_response, year,
                         writing_output_file,
-                        output_graph,
+                        output_tables,
+                        output_graphs,
                         output_ceeu,
                         output_dump):
         """
@@ -459,7 +522,7 @@ def annual_analysis(input_data, tax_year, reform, assump,
         First six parameters are same as the first six parameters of
         the TaxCalcIO constructor.
 
-        Last four parameters are same as the first four parameters of
+        Last five parameters are same as the first five parameters of
         the TaxCalcIO analyze method.
 
         Returns
@@ -478,7 +541,8 @@ def annual_analysis(input_data, tax_year, reform, assump,
         if year == tax_year:
             # conduct final tax analysis for year equal to tax_year
             tcio.analyze(writing_output_file=writing_output_file,
-                         output_graph=output_graph,
+                         output_tables=output_tables,
+                         output_graphs=output_graphs,
                          output_ceeu=output_ceeu,
                          output_dump=output_dump)
             gd_dict = {}

diff --git a/taxcalc/tests/test_taxcalcio.py b/taxcalc/tests/test_taxcalcio.py
@@ -279,9 +279,69 @@ def test_output_otions(rawinputfile, reformfile1, assumpfile1):
             pass  # sometimes we can't remove a generated temporary file
 
 
-def test_graph(reformfile1):
+def test_no_tables(reformfile1):
     """
-    Test TaxCalcIO with output_graph=True.
+    Test TaxCalcIO with output_tables=True but with zero weights.
+    """
+    # create input sample that cannot have distributional tables tabulated
+    nobs = 10
+    idict = dict()
+    idict['RECID'] = [i for i in range(1, nobs + 1)]
+    idict['MARS'] = [2 for i in range(1, nobs + 1)]
+    idict['s006'] = [0.0 for i in range(1, nobs + 1)]
+    idict['e00300'] = [10000 * i for i in range(1, nobs + 1)]
+    idict['_expanded_income'] = idict['e00300']
+    idf = pd.DataFrame(idict, columns=list(idict))
+    # create TaxCalcIO tables file
+    tcio = TaxCalcIO(input_data=idf,
+                     tax_year=2020,
+                     reform=reformfile1.name,
+                     assump=None,
+                     growdiff_response=None,
+                     aging_input_data=False,
+                     exact_calculations=False)
+    # create TaxCalcIO tables file
+    tcio.analyze(writing_output_file=False, output_tables=True)
+    # delete tables file
+    output_filename = tcio.output_filepath()
+    fname = output_filename.replace('.csv', '-tab.text')
+    if os.path.isfile(fname):
+        os.remove(fname)
+
+
+def test_tables(reformfile1):
+    """
+    Test TaxCalcIO with output_tables=True and with positive weights.
+    """
+    # create tabable input
+    nobs = 100
+    idict = dict()
+    idict['RECID'] = [i for i in range(1, nobs + 1)]
+    idict['MARS'] = [2 for i in range(1, nobs + 1)]
+    idict['s006'] = [10.0 for i in range(1, nobs + 1)]
+    idict['e00300'] = [10000 * i for i in range(1, nobs + 1)]
+    idict['_expanded_income'] = idict['e00300']
+    idf = pd.DataFrame(idict, columns=list(idict))
+    # create TaxCalcIO tables file
+    tcio = TaxCalcIO(input_data=idf,
+                     tax_year=2020,
+                     reform=reformfile1.name,
+                     assump=None,
+                     growdiff_response=None,
+                     aging_input_data=False,
+                     exact_calculations=False)
+    # create TaxCalcIO tables file
+    tcio.analyze(writing_output_file=False, output_tables=True)
+    # delete tables file
+    output_filename = tcio.output_filepath()
+    fname = output_filename.replace('.csv', '-tab.text')
+    if os.path.isfile(fname):
+        os.remove(fname)
+
+
+def test_graphs(reformfile1):
+    """
+    Test TaxCalcIO with output_graphs=True.
     """
     # create graphable input
     nobs = 100
@@ -300,7 +360,7 @@ def test_graph(reformfile1):
                      growdiff_response=None,
                      aging_input_data=False,
                      exact_calculations=False)
-    tcio.analyze(writing_output_file=False, output_graph=True)
+    tcio.analyze(writing_output_file=False, output_graphs=True)
     # delete graph files
     output_filename = tcio.output_filepath()
     fname = output_filename.replace('.csv', '-atr.html')
@@ -401,14 +461,6 @@ def test_bad_ctor_when_using_growmodel(lumpsumreformfile, assumpfile2):
     taxyear = 2020
     recdict = {'RECID': 1, 'MARS': 1, 'e00300': 100000, 's006': 1e8}
     recdf = pd.DataFrame(data=recdict, index=[0])
-    with pytest.raises(ValueError):
-        TaxCalcIO(input_data=recdf,
-                  tax_year=taxyear,
-                  reform=None,
-                  assump=None,
-                  growdiff_response=Growdiff(),
-                  aging_input_data=False,
-                  exact_calculations=False)
     with pytest.raises(ValueError):
         TaxCalcIO(input_data=recdf,
                   tax_year=taxyear,
@@ -461,13 +513,14 @@ def test_bad_assumption_file(reformfile1, assumpfile_bad1):
                   exact_calculations=False)
 
 
-def test_growmodel_analysis(reformfile1, assumpfile1):
+def test_growmodel_analysis(reformfile1, assumpfile1, assumpfile2):
     """
     Test TaxCalcIO.growmodel_analysis method with no output.
     """
     taxyear = 2015
     recdict = {'RECID': 1, 'MARS': 1, 'e00300': 100000, 's006': 1e8}
     recdf = pd.DataFrame(data=recdict, index=[0])
+    # test growmodel_analysis with legal assumptions
     try:
         TaxCalcIO.growmodel_analysis(input_data=recdf,
                                      tax_year=taxyear,

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
@@ -139,6 +139,13 @@ def expanded_income_weighted(pdf, col_name):
             float((pdf[swght] * pdf[expinc]).sum() + EPSILON))
 
 
+def unweighted_sum(pdf, col_name):
+    """
+    Return unweighted sum of Pandas DataFrame col_name items.
+    """
+    return pdf[col_name].sum()
+
+
 def weighted_sum(pdf, col_name):
     """
     Return weighted sum of Pandas DataFrame col_name items.