From 63869a7759c0a63be2bae86fbe82ecce36486814 Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Thu, 7 Sep 2017 06:09:14 -0400
Subject: [PATCH 1/5] Standardize names for table column and row lists

---
 taxcalc/dropq/dropq.py       | 35 ++++++++++---------
 taxcalc/dropq/dropq_utils.py | 17 ++++++---
 taxcalc/tests/test_utils.py  |  5 +--
 taxcalc/utils.py             | 68 +++++++++++++++++++++++++-----------
 4 files changed, 81 insertions(+), 44 deletions(-)

diff --git a/taxcalc/dropq/dropq.py b/taxcalc/dropq/dropq.py
index b649b21a9..a65a3ec33 100644
--- a/taxcalc/dropq/dropq.py
+++ b/taxcalc/dropq/dropq.py
@@ -17,26 +17,27 @@
 import pandas as pd
 from taxcalc.dropq.dropq_utils import (dropq_calculate,
                                        random_seed,
-                                       dropq_summary)
-from taxcalc import (results, TABLE_LABELS, proportional_change_gdp,
-                     Growdiff, Growfactors, Policy)
+                                       dropq_summary,
+                                       AGGR_ROW_NAMES)
+from taxcalc import (results, DIST_TABLE_LABELS,
+                     proportional_change_gdp, Growdiff, Growfactors, Policy)
 
 
 # specify constants
-PLAN_COLUMN_TYPES = [float] * len(TABLE_LABELS)
+DIST_COLUMN_TYPES = [float] * len(DIST_TABLE_LABELS)
 
 DIFF_COLUMN_TYPES = [int, int, int, float, float, str, str, str, str]
 
-DECILE_ROW_NAMES = ['perc0-10', 'perc10-20', 'perc20-30', 'perc30-40',
-                    'perc40-50', 'perc50-60', 'perc60-70', 'perc70-80',
-                    'perc80-90', 'perc90-100', 'all']
+DEC_ROW_NAMES = ['perc0-10', 'perc10-20', 'perc20-30', 'perc30-40',
+                 'perc40-50', 'perc50-60', 'perc60-70', 'perc70-80',
+                 'perc80-90', 'perc90-100', 'all']
 
 BIN_ROW_NAMES = ['less_than_10', 'ten_twenty', 'twenty_thirty', 'thirty_forty',
                  'forty_fifty', 'fifty_seventyfive', 'seventyfive_hundred',
                  'hundred_twohundred', 'twohundred_fivehundred',
                  'fivehundred_thousand', 'thousand_up', 'all']
 
-TOTAL_ROW_NAMES = ['ind_tax', 'payroll_tax', 'combined_tax']
+AGG_ROW_NAMES = AGGR_ROW_NAMES
 
 GDP_ELAST_ROW_NAMES = ['gdp_elasticity']
 
@@ -120,13 +121,13 @@ def run_nth_year_tax_calc_model(year_n, start_year,
     # construct DataFrames containing aggregate tax totals
     # ... for reform-minus-baseline difference
     aggrd = [aggr_itax_d, aggr_ptax_d, aggr_comb_d]
-    aggr_d = pd.DataFrame(data=aggrd, index=TOTAL_ROW_NAMES)
+    aggr_d = pd.DataFrame(data=aggrd, index=AGGR_ROW_NAMES)
     # ... for baseline
     aggr1 = [aggr_itax_1, aggr_ptax_1, aggr_comb_1]
-    aggr_1 = pd.DataFrame(data=aggr1, index=TOTAL_ROW_NAMES)
+    aggr_1 = pd.DataFrame(data=aggr1, index=AGGR_ROW_NAMES)
     # ... for reform
     aggr2 = [aggr_itax_2, aggr_ptax_2, aggr_comb_2]
-    aggr_2 = pd.DataFrame(data=aggr2, index=TOTAL_ROW_NAMES)
+    aggr_2 = pd.DataFrame(data=aggr2, index=AGGR_ROW_NAMES)
 
     elapsed_time = time.time() - start_time
     print('elapsed time for this run: ', elapsed_time)
@@ -155,13 +156,13 @@ def append_year(pdf):
                 append_year(aggr_2))
 
     # optionally construct JSON results tables for year n
-    dec_row_names_n = [x + '_' + str(year_n) for x in DECILE_ROW_NAMES]
+    dec_row_names_n = [x + '_' + str(year_n) for x in DEC_ROW_NAMES]
     dist2_dec_table_n = create_json_table(dist2_dec,
                                           row_names=dec_row_names_n,
-                                          column_types=PLAN_COLUMN_TYPES)
+                                          column_types=DIST_COLUMN_TYPES)
     dist1_dec_table_n = create_json_table(dist1_dec,
                                           row_names=dec_row_names_n,
-                                          column_types=PLAN_COLUMN_TYPES)
+                                          column_types=DIST_COLUMN_TYPES)
     diff_itax_dec_table_n = create_json_table(diff_itax_dec,
                                               row_names=dec_row_names_n,
                                               column_types=DIFF_COLUMN_TYPES)
@@ -174,10 +175,10 @@ def append_year(pdf):
     bin_row_names_n = [x + '_' + str(year_n) for x in BIN_ROW_NAMES]
     dist2_bin_table_n = create_json_table(dist2_bin,
                                           row_names=bin_row_names_n,
-                                          column_types=PLAN_COLUMN_TYPES)
+                                          column_types=DIST_COLUMN_TYPES)
     dist1_bin_table_n = create_json_table(dist1_bin,
                                           row_names=bin_row_names_n,
-                                          column_types=PLAN_COLUMN_TYPES)
+                                          column_types=DIST_COLUMN_TYPES)
     diff_itax_bin_table_n = create_json_table(diff_itax_bin,
                                               row_names=bin_row_names_n,
                                               column_types=DIFF_COLUMN_TYPES)
@@ -187,7 +188,7 @@ def append_year(pdf):
     diff_comb_bin_table_n = create_json_table(diff_comb_bin,
                                               row_names=bin_row_names_n,
                                               column_types=DIFF_COLUMN_TYPES)
-    total_row_names_n = [x + '_' + str(year_n) for x in TOTAL_ROW_NAMES]
+    total_row_names_n = [x + '_' + str(year_n) for x in AGGR_ROW_NAMES]
     aggr_d_table_n = create_json_table(aggr_d,
                                        row_names=total_row_names_n)
     aggr_d_table_n = dict((k, v[0]) for k, v in aggr_d_table_n.items())
diff --git a/taxcalc/dropq/dropq_utils.py b/taxcalc/dropq/dropq_utils.py
index 8646b2290..64d519d84 100644
--- a/taxcalc/dropq/dropq_utils.py
+++ b/taxcalc/dropq/dropq_utils.py
@@ -12,7 +12,8 @@
                      Consumption, Behavior, Growfactors, Growdiff)
 from taxcalc.utils import (add_income_bins, add_quantile_bins, results,
                            create_difference_table, create_distribution_table,
-                           STATS_COLUMNS, TABLE_COLUMNS, WEBAPP_INCOME_BINS)
+                           STATS_COLUMNS, DIST_TABLE_COLUMNS,
+                           WEBAPP_INCOME_BINS)
 
 
 def check_years(start_year, year_n):
@@ -279,9 +280,11 @@ def fuzz(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz):
             df2[col + suffix] = (df2[col] * df2['nofuzz'] -
                                  df1[col] * df2['nofuzz'] + df1[col])
     # main logic of fuzz_df2_records
-    cols_to_skip = set(['num_returns_ItemDed', 'num_returns_StandardDed',
-                        'num_returns_AMT', 's006'])
-    columns_to_fuzz = (set(TABLE_COLUMNS) | set(STATS_COLUMNS)) - cols_to_skip
+    skips = set(['num_returns_ItemDed',
+                 'num_returns_StandardDed',
+                 'num_returns_AMT',
+                 's006'])
+    columns_to_fuzz = (set(DIST_TABLE_COLUMNS) | set(STATS_COLUMNS)) - skips
     df2['mask'] = mask
     # always use expanded income in df1 baseline to groupby into bins
     df2['expanded_income_baseline'] = df1['expanded_income']
@@ -291,11 +294,15 @@ def fuzz(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz):
     return df2
 
 
+AGGR_ROW_NAMES = ['ind_tax', 'payroll_tax', 'combined_tax']
+
+
 def dropq_summary(df1, df2, mask):
     """
     df1 contains raw results for baseline plan
     df2 contains raw results for reform plan
-    mask is the boolean array specifying which rows might be fuzzed
+    mask is the boolean array specifying which records might be fuzzed
+    returns dictionary of summary results DataFrames
     """
     # pylint: disable=too-many-locals
 
diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
index be5f46b74..ca4ea3eec 100644
--- a/taxcalc/tests/test_utils.py
+++ b/taxcalc/tests/test_utils.py
@@ -15,7 +15,8 @@
 import pytest
 # pylint: disable=import-error
 from taxcalc import Policy, Records, Behavior, Calculator
-from taxcalc.utils import (TABLE_COLUMNS, TABLE_LABELS, STATS_COLUMNS,
+from taxcalc.utils import (STATS_COLUMNS,
+                           DIST_TABLE_COLUMNS, DIST_TABLE_LABELS,
                            create_distribution_table, create_difference_table,
                            weighted_count_lt_zero, weighted_count_gt_zero,
                            weighted_count, weighted_sum, weighted_mean,
@@ -52,7 +53,7 @@
 
 
 def test_validity_of_name_lists():
-    assert len(TABLE_COLUMNS) == len(TABLE_LABELS)
+    assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS)
     assert set(STATS_COLUMNS).issubset(Records.CALCULATED_VARS | {'s006'})
 
 
diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index f57e5bf74..039c403ed 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -32,22 +32,49 @@
                  'c05800', 'othertaxes', 'refund', 'c07100', 'iitax',
                  'payrolltax', 'combined', 's006']
 
-# Items in the TABLE_COLUMNS list below correspond to the items in the
-# TABLE_LABELS list below; this correspondence allows us to use TABLE_LABELS
-# to map a label to the correct column in our distribution tables.
-TABLE_COLUMNS = ['s006', 'c00100', 'num_returns_StandardDed', 'standard',
-                 'num_returns_ItemDed', 'c04470', 'c04600', 'c04800', 'taxbc',
-                 'c62100', 'num_returns_AMT', 'c09600', 'c05800', 'c07100',
-                 'othertaxes', 'refund', 'iitax', 'payrolltax', 'combined']
-
-TABLE_LABELS = ['Returns', 'AGI', 'Standard Deduction Filers',
-                'Standard Deduction', 'Itemizers',
-                'Itemized Deduction', 'Personal Exemption',
-                'Taxable Income', 'Regular Tax', 'AMTI', 'AMT Filers', 'AMT',
-                'Tax before Credits', 'Non-refundable Credits',
-                'Other Taxes', 'Refundable Credits',
-                'Individual Income Tax Liabilities', 'Payroll Tax Liablities',
-                'Combined Payroll and Individual Income Tax Liabilities']
+# Items in the DIST_TABLE_COLUMNS list below correspond to the items in the
+# DIST_TABLE_LABELS list below; this correspondence allows us to use
+# DIST_TABLE_LABELS to map a label to the correct column in the distribution
+# tables.
+DIST_TABLE_COLUMNS = ['s006',
+                      'c00100',
+                      'num_returns_StandardDed',
+                      'standard',
+                      'num_returns_ItemDed',
+                      'c04470',
+                      'c04600',
+                      'c04800',
+                      'taxbc',
+                      'c62100',
+                      'num_returns_AMT',
+                      'c09600',
+                      'c05800',
+                      'c07100',
+                      'othertaxes',
+                      'refund',
+                      'iitax',
+                      'payrolltax',
+                      'combined']
+
+DIST_TABLE_LABELS = ['Returns',
+                     'AGI',
+                     'Standard Deduction Filers',
+                     'Standard Deduction',
+                     'Itemizers',
+                     'Itemized Deduction',
+                     'Personal Exemption',
+                     'Taxable Income',
+                     'Regular Tax',
+                     'AMTI',
+                     'AMT Filers',
+                     'AMT',
+                     'Tax before Credits',
+                     'Non-refundable Credits',
+                     'Other Taxes',
+                     'Refundable Credits',
+                     'Individual Income Tax Liabilities',
+                     'Payroll Tax Liablities',
+                     'Combined Payroll and Individual Income Tax Liabilities']
 
 # Following list is used in our difference table to label its columns.
 DIFF_TABLE_LABELS = ['Tax Units with Tax Cut',
@@ -325,13 +352,14 @@ def add_columns(pdf):
     # manipulate the data given specified result_type
     if result_type == 'weighted_sum':
         pdf = weighted(pdf, STATS_COLUMNS)
-        gpdf_mean = pdf.groupby('bins', as_index=False)[TABLE_COLUMNS].sum()
+        gpdf = pdf.groupby('bins', as_index=False)
+        gpdf_mean = gpdf[DIST_TABLE_COLUMNS].sum()
         gpdf_mean.drop('bins', axis=1, inplace=True)
-        sum_row = get_sums(pdf)[TABLE_COLUMNS]
+        sum_row = get_sums(pdf)[DIST_TABLE_COLUMNS]
     elif result_type == 'weighted_avg':
-        gpdf_mean = weighted_avg_allcols(pdf, TABLE_COLUMNS,
+        gpdf_mean = weighted_avg_allcols(pdf, DIST_TABLE_COLUMNS,
                                          income_measure=income_measure)
-        sum_row = get_sums(pdf, not_available=True)[TABLE_COLUMNS]
+        sum_row = get_sums(pdf, not_available=True)[DIST_TABLE_COLUMNS]
     else:
         msg = "result_type must be either 'weighted_sum' or 'weighted_avg'"
         raise ValueError(msg)

From 82f835c040631a9c68662929bc30529b361c8977 Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Thu, 7 Sep 2017 06:48:35 -0400
Subject: [PATCH 2/5] Make dropq_summary return dictionary-of-DataFrame results

---
 taxcalc/dropq/dropq.py       |  73 ++++++++-------------
 taxcalc/dropq/dropq_utils.py | 122 +++++++++++++++++++++--------------
 2 files changed, 100 insertions(+), 95 deletions(-)

diff --git a/taxcalc/dropq/dropq.py b/taxcalc/dropq/dropq.py
index a65a3ec33..1d0c0b19f 100644
--- a/taxcalc/dropq/dropq.py
+++ b/taxcalc/dropq/dropq.py
@@ -108,26 +108,7 @@ def run_nth_year_tax_calc_model(year_n, start_year,
     np.random.seed(seed)  # pylint: disable=no-member
 
     # construct dropq summary results from raw results
-    (dist1_dec, dist2_dec,
-     diff_itax_dec, diff_ptax_dec, diff_comb_dec,
-     dist1_bin, dist2_bin,
-     diff_itax_bin, diff_ptax_bin, diff_comb_bin,
-     aggr_itax_d, aggr_ptax_d, aggr_comb_d,
-     aggr_itax_1, aggr_ptax_1, aggr_comb_1,
-     aggr_itax_2, aggr_ptax_2, aggr_comb_2) = dropq_summary(rawres1,
-                                                            rawres2,
-                                                            mask)
-
-    # construct DataFrames containing aggregate tax totals
-    # ... for reform-minus-baseline difference
-    aggrd = [aggr_itax_d, aggr_ptax_d, aggr_comb_d]
-    aggr_d = pd.DataFrame(data=aggrd, index=AGGR_ROW_NAMES)
-    # ... for baseline
-    aggr1 = [aggr_itax_1, aggr_ptax_1, aggr_comb_1]
-    aggr_1 = pd.DataFrame(data=aggr1, index=AGGR_ROW_NAMES)
-    # ... for reform
-    aggr2 = [aggr_itax_2, aggr_ptax_2, aggr_comb_2]
-    aggr_2 = pd.DataFrame(data=aggr2, index=AGGR_ROW_NAMES)
+    summ = dropq_summary(rawres1, rawres2, mask)
 
     elapsed_time = time.time() - start_time
     print('elapsed time for this run: ', elapsed_time)
@@ -141,61 +122,61 @@ def append_year(pdf):
 
     # optionally return non-JSON results
     if not return_json:
-        return (append_year(dist2_dec),
-                append_year(dist1_dec),
-                append_year(diff_itax_dec),
-                append_year(diff_ptax_dec),
-                append_year(diff_comb_dec),
-                append_year(dist2_bin),
-                append_year(dist1_bin),
-                append_year(diff_itax_bin),
-                append_year(diff_ptax_bin),
-                append_year(diff_comb_bin),
-                append_year(aggr_d),
-                append_year(aggr_1),
-                append_year(aggr_2))
+        return (append_year(summ['dist2_dec']),
+                append_year(summ['dist1_dec']),
+                append_year(summ['diff_itax_dec']),
+                append_year(summ['diff_ptax_dec']),
+                append_year(summ['diff_comb_dec']),
+                append_year(summ['dist2_bin']),
+                append_year(summ['dist1_bin']),
+                append_year(summ['diff_itax_bin']),
+                append_year(summ['diff_ptax_bin']),
+                append_year(summ['diff_comb_bin']),
+                append_year(summ['aggr_d']),
+                append_year(summ['aggr_1']),
+                append_year(summ['aggr_2']))
 
     # optionally construct JSON results tables for year n
     dec_row_names_n = [x + '_' + str(year_n) for x in DEC_ROW_NAMES]
-    dist2_dec_table_n = create_json_table(dist2_dec,
+    dist2_dec_table_n = create_json_table(summ['dist2_dec'],
                                           row_names=dec_row_names_n,
                                           column_types=DIST_COLUMN_TYPES)
-    dist1_dec_table_n = create_json_table(dist1_dec,
+    dist1_dec_table_n = create_json_table(summ['dist1_dec'],
                                           row_names=dec_row_names_n,
                                           column_types=DIST_COLUMN_TYPES)
-    diff_itax_dec_table_n = create_json_table(diff_itax_dec,
+    diff_itax_dec_table_n = create_json_table(summ['diff_itax_dec'],
                                               row_names=dec_row_names_n,
                                               column_types=DIFF_COLUMN_TYPES)
-    diff_ptax_dec_table_n = create_json_table(diff_ptax_dec,
+    diff_ptax_dec_table_n = create_json_table(summ['diff_ptax_dec'],
                                               row_names=dec_row_names_n,
                                               column_types=DIFF_COLUMN_TYPES)
-    diff_comb_dec_table_n = create_json_table(diff_comb_dec,
+    diff_comb_dec_table_n = create_json_table(summ['diff_comb_dec'],
                                               row_names=dec_row_names_n,
                                               column_types=DIFF_COLUMN_TYPES)
     bin_row_names_n = [x + '_' + str(year_n) for x in BIN_ROW_NAMES]
-    dist2_bin_table_n = create_json_table(dist2_bin,
+    dist2_bin_table_n = create_json_table(summ['dist2_bin'],
                                           row_names=bin_row_names_n,
                                           column_types=DIST_COLUMN_TYPES)
-    dist1_bin_table_n = create_json_table(dist1_bin,
+    dist1_bin_table_n = create_json_table(summ['dist1_bin'],
                                           row_names=bin_row_names_n,
                                           column_types=DIST_COLUMN_TYPES)
-    diff_itax_bin_table_n = create_json_table(diff_itax_bin,
+    diff_itax_bin_table_n = create_json_table(summ['diff_itax_bin'],
                                               row_names=bin_row_names_n,
                                               column_types=DIFF_COLUMN_TYPES)
-    diff_ptax_bin_table_n = create_json_table(diff_ptax_bin,
+    diff_ptax_bin_table_n = create_json_table(summ['diff_ptax_bin'],
                                               row_names=bin_row_names_n,
                                               column_types=DIFF_COLUMN_TYPES)
-    diff_comb_bin_table_n = create_json_table(diff_comb_bin,
+    diff_comb_bin_table_n = create_json_table(summ['diff_comb_bin'],
                                               row_names=bin_row_names_n,
                                               column_types=DIFF_COLUMN_TYPES)
     total_row_names_n = [x + '_' + str(year_n) for x in AGGR_ROW_NAMES]
-    aggr_d_table_n = create_json_table(aggr_d,
+    aggr_d_table_n = create_json_table(summ['aggr_d'],
                                        row_names=total_row_names_n)
     aggr_d_table_n = dict((k, v[0]) for k, v in aggr_d_table_n.items())
-    aggr_1_table_n = create_json_table(aggr_1,
+    aggr_1_table_n = create_json_table(summ['aggr_1'],
                                        row_names=total_row_names_n)
     aggr_1_table_n = dict((k, v[0]) for k, v in aggr_1_table_n.items())
-    aggr_2_table_n = create_json_table(aggr_2,
+    aggr_2_table_n = create_json_table(summ['aggr_2'],
                                        row_names=total_row_names_n)
     aggr_2_table_n = dict((k, v[0]) for k, v in aggr_2_table_n.items())
 
diff --git a/taxcalc/dropq/dropq_utils.py b/taxcalc/dropq/dropq_utils.py
index 64d519d84..aed7b8ff8 100644
--- a/taxcalc/dropq/dropq_utils.py
+++ b/taxcalc/dropq/dropq_utils.py
@@ -8,6 +8,7 @@
 import copy
 import hashlib
 import numpy as np
+import pandas as pd
 from taxcalc import (Policy, Records, Calculator,
                      Consumption, Behavior, Growfactors, Growdiff)
 from taxcalc.utils import (add_income_bins, add_quantile_bins, results,
@@ -304,10 +305,12 @@ def dropq_summary(df1, df2, mask):
     mask is the boolean array specifying which records might be fuzzed
     returns dictionary of summary results DataFrames
     """
-    # pylint: disable=too-many-locals
+    # pylint: disable=too-many-statements,too-many-locals
 
     df2 = fuzz_df2_records(df1, df2, mask)
 
+    summ = dict()
+
     # tax difference totals between reform and baseline
     tdiff = df2['iitax_agg'] - df1['iitax']
     aggr_itax_d = (tdiff * df2['s006']).sum()
@@ -315,66 +318,98 @@ def dropq_summary(df1, df2, mask):
     aggr_ptax_d = (tdiff * df2['s006']).sum()
     tdiff = df2['combined_agg'] - df1['combined']
     aggr_comb_d = (tdiff * df2['s006']).sum()
+    aggrd = [aggr_itax_d, aggr_ptax_d, aggr_comb_d]
+    summ['aggr_d'] = pd.DataFrame(data=aggrd, index=AGGR_ROW_NAMES)
 
     # totals for baseline
     aggr_itax_1 = (df1['iitax'] * df1['s006']).sum()
     aggr_ptax_1 = (df1['payrolltax'] * df1['s006']).sum()
     aggr_comb_1 = (df1['combined'] * df1['s006']).sum()
+    aggr1 = [aggr_itax_1, aggr_ptax_1, aggr_comb_1]
+    summ['aggr_1'] = pd.DataFrame(data=aggr1, index=AGGR_ROW_NAMES)
 
     # totals for reform
     aggr_itax_2 = (df2['iitax_agg'] * df2['s006']).sum()
     aggr_ptax_2 = (df2['payrolltax_agg'] * df2['s006']).sum()
     aggr_comb_2 = (df2['combined_agg'] * df2['s006']).sum()
+    aggr2 = [aggr_itax_2, aggr_ptax_2, aggr_comb_2]
+    summ['aggr_2'] = pd.DataFrame(data=aggr2, index=AGGR_ROW_NAMES)
 
-    # create difference tables grouped by deciles and bins
+    # create difference tables grouped by deciles
     df2['iitax'] = df2['iitax_xdec']
-    diff_itax_dec = create_difference_table(df1, df2,
-                                            groupby='weighted_deciles',
-                                            income_measure='expanded_income',
-                                            tax_to_diff='iitax')
+    summ['diff_itax_dec'] = \
+        create_difference_table(df1, df2,
+                                groupby='weighted_deciles',
+                                income_measure='expanded_income',
+                                tax_to_diff='iitax')
+
     df2['payrolltax'] = df2['payrolltax_xdec']
-    diff_ptax_dec = create_difference_table(df1, df2,
-                                            groupby='weighted_deciles',
-                                            income_measure='expanded_income',
-                                            tax_to_diff='payrolltax')
+    summ['diff_ptax_dec'] = \
+        create_difference_table(df1, df2,
+                                groupby='weighted_deciles',
+                                income_measure='expanded_income',
+                                tax_to_diff='payrolltax')
+
     df2['combined'] = df2['combined_xdec']
-    diff_comb_dec = create_difference_table(df1, df2,
-                                            groupby='weighted_deciles',
-                                            income_measure='expanded_income',
-                                            tax_to_diff='combined')
+    summ['diff_comb_dec'] = \
+        create_difference_table(df1, df2,
+                                groupby='weighted_deciles',
+                                income_measure='expanded_income',
+                                tax_to_diff='combined')
+
+    # create difference tables grouped by bins (removing negative-income bin)
     df2['iitax'] = df2['iitax_xbin']
-    diff_itax_bin = create_difference_table(df1, df2,
-                                            groupby='webapp_income_bins',
-                                            income_measure='expanded_income',
-                                            tax_to_diff='iitax')
+    diff_itax_bin = \
+        create_difference_table(df1, df2,
+                                groupby='webapp_income_bins',
+                                income_measure='expanded_income',
+                                tax_to_diff='iitax')
+    diff_itax_bin.drop(diff_itax_bin.index[0], inplace=True)
+    summ['diff_itax_bin'] = diff_itax_bin
+
     df2['payrolltax'] = df2['payrolltax_xbin']
-    diff_ptax_bin = create_difference_table(df1, df2,
-                                            groupby='webapp_income_bins',
-                                            income_measure='expanded_income',
-                                            tax_to_diff='iitax')
+    diff_ptax_bin = \
+        create_difference_table(df1, df2,
+                                groupby='webapp_income_bins',
+                                income_measure='expanded_income',
+                                tax_to_diff='iitax')
+    diff_ptax_bin.drop(diff_ptax_bin.index[0], inplace=True)
+    summ['diff_ptax_bin'] = diff_ptax_bin
+
     df2['combined'] = df2['combined_xbin']
-    diff_comb_bin = create_difference_table(df1, df2,
-                                            groupby='webapp_income_bins',
-                                            income_measure='expanded_income',
-                                            tax_to_diff='combined')
-
-    # create distribution tables grouped by deciles and bins
-    dist1_dec = create_distribution_table(df1, groupby='weighted_deciles',
-                                          income_measure='expanded_income',
-                                          result_type='weighted_sum')
-    dist1_bin = create_distribution_table(df1, groupby='webapp_income_bins',
-                                          income_measure='expanded_income',
-                                          result_type='weighted_sum')
+    diff_comb_bin = \
+        create_difference_table(df1, df2,
+                                groupby='webapp_income_bins',
+                                income_measure='expanded_income',
+                                tax_to_diff='combined')
+    diff_comb_bin.drop(diff_comb_bin.index[0], inplace=True)
+    summ['diff_comb_bin'] = diff_comb_bin
+
+    # create distribution tables grouped by deciles
+    summ['dist1_dec'] = \
+        create_distribution_table(df1, groupby='weighted_deciles',
+                                  income_measure='expanded_income',
+                                  result_type='weighted_sum')
+
     suffix = '_xdec'
     df2_cols_with_suffix = [c for c in list(df2) if c.endswith(suffix)]
     for col in df2_cols_with_suffix:
         root_col_name = col.replace(suffix, '')
         df2[root_col_name] = df2[col]
     df2['expanded_income_baseline'] = df1['expanded_income']
-    dist2_dec = \
+    summ['dist2_dec'] = \
         create_distribution_table(df2, groupby='weighted_deciles',
                                   income_measure='expanded_income_baseline',
                                   result_type='weighted_sum')
+
+    # create distribution tables grouped by bins (removing negative-income bin)
+    dist1_bin = \
+        create_distribution_table(df1, groupby='webapp_income_bins',
+                                  income_measure='expanded_income',
+                                  result_type='weighted_sum')
+    dist1_bin.drop(dist1_bin.index[0], inplace=True)
+    summ['dist1_bin'] = dist1_bin
+
     suffix = '_xbin'
     df2_cols_with_suffix = [c for c in list(df2) if c.endswith(suffix)]
     for col in df2_cols_with_suffix:
@@ -385,19 +420,8 @@ def dropq_summary(df1, df2, mask):
         create_distribution_table(df2, groupby='webapp_income_bins',
                                   income_measure='expanded_income_baseline',
                                   result_type='weighted_sum')
-
-    # remove negative-income bin from each bin result
-    dist1_bin.drop(dist1_bin.index[0], inplace=True)
     dist2_bin.drop(dist2_bin.index[0], inplace=True)
-    diff_itax_bin.drop(diff_itax_bin.index[0], inplace=True)
-    diff_ptax_bin.drop(diff_ptax_bin.index[0], inplace=True)
-    diff_comb_bin.drop(diff_comb_bin.index[0], inplace=True)
+    summ['dist2_bin'] = dist2_bin
 
-    # return tupl of summary results
-    return (dist1_dec, dist2_dec,
-            diff_itax_dec, diff_ptax_dec, diff_comb_dec,
-            dist1_bin, dist2_bin,
-            diff_itax_bin, diff_ptax_bin, diff_comb_bin,
-            aggr_itax_d, aggr_ptax_d, aggr_comb_d,
-            aggr_itax_1, aggr_ptax_1, aggr_comb_1,
-            aggr_itax_2, aggr_ptax_2, aggr_comb_2)
+    # return dictionary of summary results
+    return summ

From d9fff0fb3851091c3aceba7188b622019e609ff6 Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Thu, 7 Sep 2017 09:09:21 -0400
Subject: [PATCH 3/5] Make run_nth_year_tax_calc_model return dict (not tuple)

---
 taxcalc/dropq/dropq.py      | 97 +++++++++++--------------------------
 taxcalc/tests/test_dropq.py | 14 +++---
 2 files changed, 36 insertions(+), 75 deletions(-)

diff --git a/taxcalc/dropq/dropq.py b/taxcalc/dropq/dropq.py
index 1d0c0b19f..a3816a6b4 100644
--- a/taxcalc/dropq/dropq.py
+++ b/taxcalc/dropq/dropq.py
@@ -122,78 +122,39 @@ def append_year(pdf):
 
     # optionally return non-JSON results
     if not return_json:
-        return (append_year(summ['dist2_dec']),
-                append_year(summ['dist1_dec']),
-                append_year(summ['diff_itax_dec']),
-                append_year(summ['diff_ptax_dec']),
-                append_year(summ['diff_comb_dec']),
-                append_year(summ['dist2_bin']),
-                append_year(summ['dist1_bin']),
-                append_year(summ['diff_itax_bin']),
-                append_year(summ['diff_ptax_bin']),
-                append_year(summ['diff_comb_bin']),
-                append_year(summ['aggr_d']),
-                append_year(summ['aggr_1']),
-                append_year(summ['aggr_2']))
+        res = dict()
+        for tbl in summ:
+            res[tbl] = append_year(summ[tbl])
+        return res
 
     # optionally construct JSON results tables for year n
     dec_row_names_n = [x + '_' + str(year_n) for x in DEC_ROW_NAMES]
-    dist2_dec_table_n = create_json_table(summ['dist2_dec'],
-                                          row_names=dec_row_names_n,
-                                          column_types=DIST_COLUMN_TYPES)
-    dist1_dec_table_n = create_json_table(summ['dist1_dec'],
-                                          row_names=dec_row_names_n,
-                                          column_types=DIST_COLUMN_TYPES)
-    diff_itax_dec_table_n = create_json_table(summ['diff_itax_dec'],
-                                              row_names=dec_row_names_n,
-                                              column_types=DIFF_COLUMN_TYPES)
-    diff_ptax_dec_table_n = create_json_table(summ['diff_ptax_dec'],
-                                              row_names=dec_row_names_n,
-                                              column_types=DIFF_COLUMN_TYPES)
-    diff_comb_dec_table_n = create_json_table(summ['diff_comb_dec'],
-                                              row_names=dec_row_names_n,
-                                              column_types=DIFF_COLUMN_TYPES)
     bin_row_names_n = [x + '_' + str(year_n) for x in BIN_ROW_NAMES]
-    dist2_bin_table_n = create_json_table(summ['dist2_bin'],
-                                          row_names=bin_row_names_n,
-                                          column_types=DIST_COLUMN_TYPES)
-    dist1_bin_table_n = create_json_table(summ['dist1_bin'],
-                                          row_names=bin_row_names_n,
-                                          column_types=DIST_COLUMN_TYPES)
-    diff_itax_bin_table_n = create_json_table(summ['diff_itax_bin'],
-                                              row_names=bin_row_names_n,
-                                              column_types=DIFF_COLUMN_TYPES)
-    diff_ptax_bin_table_n = create_json_table(summ['diff_ptax_bin'],
-                                              row_names=bin_row_names_n,
-                                              column_types=DIFF_COLUMN_TYPES)
-    diff_comb_bin_table_n = create_json_table(summ['diff_comb_bin'],
-                                              row_names=bin_row_names_n,
-                                              column_types=DIFF_COLUMN_TYPES)
-    total_row_names_n = [x + '_' + str(year_n) for x in AGGR_ROW_NAMES]
-    aggr_d_table_n = create_json_table(summ['aggr_d'],
-                                       row_names=total_row_names_n)
-    aggr_d_table_n = dict((k, v[0]) for k, v in aggr_d_table_n.items())
-    aggr_1_table_n = create_json_table(summ['aggr_1'],
-                                       row_names=total_row_names_n)
-    aggr_1_table_n = dict((k, v[0]) for k, v in aggr_1_table_n.items())
-    aggr_2_table_n = create_json_table(summ['aggr_2'],
-                                       row_names=total_row_names_n)
-    aggr_2_table_n = dict((k, v[0]) for k, v in aggr_2_table_n.items())
-
-    # return JSON results
-    return (dist2_dec_table_n,
-            dist1_dec_table_n,
-            diff_itax_dec_table_n,
-            diff_ptax_dec_table_n,
-            diff_comb_dec_table_n,
-            dist2_bin_table_n,
-            dist1_bin_table_n,
-            diff_itax_bin_table_n,
-            diff_ptax_bin_table_n,
-            diff_comb_bin_table_n,
-            aggr_d_table_n,
-            aggr_1_table_n,
-            aggr_2_table_n)
+    agg_row_names_n = [x + '_' + str(year_n) for x in AGG_ROW_NAMES]
+    info = dict()
+    for tbl in summ:
+        info[tbl] = {'row_names': [], 'col_types': []}
+        if 'dec' in tbl:
+            info[tbl]['row_names'] = dec_row_names_n
+        elif 'bin' in tbl:
+            info[tbl]['row_names'] = bin_row_names_n
+        else:
+            info[tbl]['row_names'] = agg_row_names_n
+        if 'dist' in tbl:
+            info[tbl]['col_types'] = DIST_COLUMN_TYPES
+        elif 'diff' in tbl:
+            info[tbl]['col_types'] = DIFF_COLUMN_TYPES
+    res = dict()
+    for tbl in summ:
+        if 'aggr' in tbl:
+            res_table = create_json_table(summ[tbl],
+                                          row_names=info[tbl]['row_names'])
+            res[tbl] = dict((k, v[0]) for k, v in res_table.items())
+        else:
+            res[tbl] = create_json_table(summ[tbl],
+                                         row_names=info[tbl]['row_names'],
+                                         column_types=info[tbl]['col_types'])
+    return res
 
 
 def run_nth_year_gdp_elast_model(year_n, start_year,
diff --git a/taxcalc/tests/test_dropq.py b/taxcalc/tests/test_dropq.py
index c8792e9ca..0f81c70a9 100644
--- a/taxcalc/tests/test_dropq.py
+++ b/taxcalc/tests/test_dropq.py
@@ -78,19 +78,19 @@ def test_run_tax_calc_model(puf_subsample, resjson):
                                       return_json=resjson)
     assert len(res) == 13
     dump = False  # set to True in order to dump returned results and fail test
-    for idx in range(0, len(res)):
+    for tbl in sorted(res.keys()):
         if resjson:
-            assert isinstance(res[idx], dict)
+            assert isinstance(res[tbl], dict)
         else:
-            assert isinstance(res[idx], pd.DataFrame)
+            assert isinstance(res[tbl], pd.DataFrame)
         if dump:
             if resjson:
-                cols = sorted(res[idx].keys())
+                cols = sorted(res[tbl].keys())
             else:
-                cols = sorted(list(res[idx]))
+                cols = sorted(list(res[tbl]))
             for col in cols:
-                print('<<idx={}:col={}>>'.format(idx, col))
-                print(res[idx][col])
+                print('<<tbl={}:col={}>>'.format(tbl, col))
+                print(res[tbl][col])
     assert not dump
 
 

From c6b09fa19346b493079fc783e8a9cf6e49d7410e Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Thu, 7 Sep 2017 09:37:51 -0400
Subject: [PATCH 4/5] Fix test to handle returned dict (not tuple)

---
 taxcalc/tests/test_dropq.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/taxcalc/tests/test_dropq.py b/taxcalc/tests/test_dropq.py
index 0f81c70a9..8a3f29d59 100644
--- a/taxcalc/tests/test_dropq.py
+++ b/taxcalc/tests/test_dropq.py
@@ -186,12 +186,11 @@ def test_with_pufcsv(puf_fullsample):
     # create a Public Use File object
     tax_data = puf_fullsample
     # call run_nth_year_tax_calc_model function
-    restuple = run_nth_year_tax_calc_model(year_n, start_year,
-                                           tax_data, usermods,
-                                           return_json=True)
-    total = restuple[len(restuple) - 1]  # the last of element of the tuple
-    dropq_reform_revenue = float(total['combined_tax_9'])
-    dropq_reform_revenue *= 1e-9  # convert to billions of dollars
+    resdict = run_nth_year_tax_calc_model(year_n, start_year,
+                                          tax_data, usermods,
+                                          return_json=True)
+    total = resdict['aggr_2']
+    dropq_reform_revenue = float(total['combined_tax_9']) * 1e-9
     # assert that dropq revenue is similar to the fullsample calculation
     diff = abs(fulls_reform_revenue - dropq_reform_revenue)
     proportional_diff = diff / fulls_reform_revenue

From 22a0ce36c3f83bae7e36745f5ea7bba751097fc1 Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Thu, 7 Sep 2017 10:05:16 -0400
Subject: [PATCH 5/5] Update RELEASES.md info

---
 RELEASES.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/RELEASES.md b/RELEASES.md
index 2371b4e10..40db6a665 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -13,6 +13,9 @@ Release 0.11.0 on 2017-??-??
 - Revise dropq distribution and difference tables used by TaxBrain
   [[#1537](https://github.com/open-source-economics/Tax-Calculator/pull/1537)
   by Anderson Frailey and Martin Holmer]
+- Make dropq run_nth_year_tax_calc_model return a dictionary of results
+  [[#1543](https://github.com/open-source-economics/Tax-Calculator/pull/1543)
+  by Martin Holmer]
 
 **New Features**
 - None
@@ -43,7 +46,6 @@ Release 0.10.0 on 2017-08-28
   [[#1524](https://github.com/open-source-economics/Tax-Calculator/pull/1524)
   by Martin Holmer]
 
-
 **Bug Fixes**
 - None