PSLmodels · martinholmer · Feb 17, 2018 · Feb 17, 2018
diff --git a/taxcalc/tbi/__init__.py b/taxcalc/tbi/__init__.py
@@ -1,3 +1,4 @@
 from taxcalc.tbi.tbi import (run_nth_year_tax_calc_model,
+                             WEBBIN_ROW_NAMES,
                              run_nth_year_gdp_elast_model,
                              reform_warnings_errors)
diff --git a/taxcalc/tbi/tbi.py b/taxcalc/tbi/tbi.py
@@ -25,8 +25,14 @@
                                    create_dict_table,
                                    AGGR_ROW_NAMES)
 from taxcalc import (DIST_VARIABLES, DIST_TABLE_LABELS, DIFF_TABLE_LABELS,
-                     DECILE_ROW_NAMES, WEBBIN_ROW_NAMES,
-                     proportional_change_in_gdp, Growdiff, Growfactors, Policy)
+                     proportional_change_in_gdp, Growdiff, Growfactors, Policy,
+                     DECILE_ROW_NAMES)
+
+WEBBIN_ROW_NAMES = ['$0-10K', '$10-20K', '$20-30K', '$30-40K',
+                    '$40-50K', '$50-75K', '$75-100K',
+                    '$100-200K', '$200-500K',
+                    '$500-1000K', '>$1000K', 'all']
+# the negative-income bin is removed in the summary() function
 
 AGG_ROW_NAMES = AGGR_ROW_NAMES
 

diff --git a/taxcalc/tbi/tbi_utils.py b/taxcalc/tbi/tbi_utils.py
@@ -17,7 +17,7 @@
 from taxcalc.utils import (add_income_bins, add_quantile_bins,
                            create_difference_table, create_distribution_table,
                            DIST_VARIABLES, DIST_TABLE_COLUMNS,
-                           WEBAPP_INCOME_BINS, read_egg_csv)
+                           STANDARD_INCOME_BINS, read_egg_csv)
 
 
 def check_years_return_first_year(year_n, start_year, use_puf_not_cps):
@@ -330,7 +330,7 @@ def create(df1, df2, bin_type, imeasure, suffix, cols_to_fuzz, do_fuzzing):
         if bin_type == 'dec':
             df2 = add_quantile_bins(df2, imeasure, 10)
         elif bin_type == 'bin':
-            df2 = add_income_bins(df2, imeasure, bins=WEBAPP_INCOME_BINS)
+            df2 = add_income_bins(df2, imeasure, bins=STANDARD_INCOME_BINS)
         else:
             df2 = add_quantile_bins(df2, imeasure, 1)
         gdf2 = df2.groupby('bins')
@@ -427,7 +427,7 @@ def summary(df1, df2, mask):
     df2['iitax'] = df2['iitax_xbin']
     diff_itax_xbin = \
         create_difference_table(df1, df2,
-                                groupby='webapp_income_bins',
+                                groupby='standard_income_bins',
                                 income_measure='expanded_income',
                                 tax_to_diff='iitax')
     diff_itax_xbin.drop(diff_itax_xbin.index[0], inplace=True)
@@ -436,7 +436,7 @@ def summary(df1, df2, mask):
     df2['payrolltax'] = df2['payrolltax_xbin']
     diff_ptax_xbin = \
         create_difference_table(df1, df2,
-                                groupby='webapp_income_bins',
+                                groupby='standard_income_bins',
                                 income_measure='expanded_income',
                                 tax_to_diff='payrolltax')
     diff_ptax_xbin.drop(diff_ptax_xbin.index[0], inplace=True)
@@ -445,7 +445,7 @@ def summary(df1, df2, mask):
     df2['combined'] = df2['combined_xbin']
     diff_comb_xbin = \
         create_difference_table(df1, df2,
-                                groupby='webapp_income_bins',
+                                groupby='standard_income_bins',
                                 income_measure='expanded_income',
                                 tax_to_diff='combined')
     diff_comb_xbin.drop(diff_comb_xbin.index[0], inplace=True)
@@ -470,7 +470,7 @@ def summary(df1, df2, mask):
 
     # create distribution tables grouped by xbin (removing negative-income bin)
     dist1_xbin = \
-        create_distribution_table(df1, groupby='webapp_income_bins',
+        create_distribution_table(df1, groupby='standard_income_bins',
                                   income_measure='expanded_income',
                                   result_type='weighted_sum')
     dist1_xbin.drop(dist1_xbin.index[0], inplace=True)
@@ -483,7 +483,7 @@ def summary(df1, df2, mask):
         df2[root_col_name] = df2[col]
     df2['expanded_income_baseline'] = df1['expanded_income']
     dist2_xbin = \
-        create_distribution_table(df2, groupby='webapp_income_bins',
+        create_distribution_table(df2, groupby='standard_income_bins',
                                   income_measure='expanded_income_baseline',
                                   result_type='weighted_sum')
     dist2_xbin.drop(dist2_xbin.index[0], inplace=True)

diff --git a/taxcalc/tests/test_cpscsv.py b/taxcalc/tests/test_cpscsv.py
@@ -16,7 +16,6 @@
 import os
 import sys
 import json
-import pytest
 import numpy as np
 import pandas as pd
 # pylint: disable=import-error

diff --git a/taxcalc/tests/test_taxcalcio.py b/taxcalc/tests/test_taxcalcio.py
@@ -605,6 +605,9 @@ def test_no_tables_or_graphs(reformfile1):
     fname = output_filename.replace('.csv', '-mtr.html')
     if os.path.isfile(fname):
         os.remove(fname)
+    fname = output_filename.replace('.csv', '-qin.html')
+    if os.path.isfile(fname):
+        os.remove(fname)
 
 
 def test_tables(reformfile1):

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
@@ -107,7 +107,7 @@ def test_create_tables(cps_subsample):
 
     diff = create_difference_table(calc1.dataframe(DIFF_VARIABLES),
                                    calc2.dataframe(DIFF_VARIABLES),
-                                   groupby='webapp_income_bins',
+                                   groupby='standard_income_bins',
                                    income_measure='expanded_income',
                                    tax_to_diff='iitax')
     assert isinstance(diff, pd.DataFrame)
@@ -351,7 +351,7 @@ def test_create_tables(cps_subsample):
             print('{:.0f},'.format(val))
 
     dist = create_distribution_table(calc2.dataframe(DIST_VARIABLES),
-                                     groupby='webapp_income_bins',
+                                     groupby='standard_income_bins',
                                      income_measure='expanded_income',
                                      result_type='weighted_sum')
     assert isinstance(dist, pd.DataFrame)
@@ -424,7 +424,7 @@ def test_diff_count_precision():
     9   61733 <--- largest unweighted bin count
     A  215525
 
-    WEBAPP BINS:
+    STANDARD BINS:
     0    7081 <--- negative income bin is dropped in TaxBrain display
     1   19355
     2   22722
@@ -441,27 +441,27 @@ def test_diff_count_precision():
 
     Background information on Trump2017.json reform used in TaxBrain run 16649:
 
-    WEBAPP bin 10 ($500-1000 thousand) has weighted count of 1179 thousand;
-                  weighted count of units with tax increase is 32 thousand.
+    STANDARD bin 10 ($500-1000 thousand) has weighted count of 1179 thousand;
+                    weighted count of units with tax increase is 32 thousand.
 
-    So, the mean weight for all units in WEBAPP bin 10 is 111.5421 and the
+    So, the mean weight for all units in STANDARD bin 10 is 111.5421 and the
     unweighted number with a tax increase is 287 assuming all units in that
     bin have the same weight.  (Note that 287 * 111.5421 is about 32,012.58,
     which rounds to the 32 thousand shown in the TaxBrain difference table.)
 
-    WEBAPP bin 11 ($1000+ thousand) has weighted count of 636 thousand;
-              weighted count of units with tax increase is 27 thousand.
+    STANDARD bin 11 ($1000+ thousand) has weighted count of 636 thousand;
+                    weighted count of units with tax increase is 27 thousand.
 
-    So, the mean weight for all units in WEBAPP bin 11 is about 27.517 and the
-    unweighted number with a tax increase is 981 assuming all units in that
-    bin have the same weight.  (Note that 981 * 27.517 is about 26,994.18,
+    So, the mean weight for all units in STANDARD bin 11 is about 27.517 and
+    the unweighted number with a tax increase is 981 assuming all units in
+    that bin have the same weight.  (Note that 981 * 27.517 is about 26,994.18,
     which rounds to the 27 thousand shown in the TaxBrain difference table.)
     """
     dump = False  # setting to True implies results printed and test fails
     seed = 123456789
     bs_samples = 1000
     alpha = 0.025  # implies 95% confidence interval
-    # compute stderr and confidence interval for WEBAPP bin 10 increase count
+    # compute stderr and confidence interval for STANDARD bin 10 increase count
     data_list = [111.5421] * 287 + [0.0] * (10570 - 287)
     assert len(data_list) == 10570
     data = np.array(data_list)
@@ -475,26 +475,26 @@ def test_diff_count_precision():
     if dump:
         res = '{}EST={:.1f} B={} alpha={:.3f} se={:.2f} ci=[ {:.2f} , {:.2f} ]'
         print(
-            res.format('WEBAPP-BIN10: ',
+            res.format('STANDARD-BIN10: ',
                        data_estimate, bs_samples, alpha, stderr, cilo, cihi)
         )
     assert abs((stderr / 1.90) - 1) < 0.0008
     # NOTE: a se of 1.90 thousand implies that when comparing the difference
-    #       in the weighted number of filing units in WEBAPP bin 10 with a
+    #       in the weighted number of filing units in STANDARD bin 10 with a
     #       tax increase, the difference statistic has a bigger se (because
     #       the variance of the difference is the sum of the variances of the
-    #       two point estimates).  So, in WEBAPP bin 10 if the point estimates
-    #       both had se = 1.90, then the difference in the point estimates has
-    #       has a se = 2.687.  This means that the difference would have to be
-    #       over 5 thousand in order for there to be high confidence that the
-    #       difference was different from zero in a statistically significant
-    #       manner.
+    #       two point estimates).  So, in STANDARD bin 10 if the point
+    #       estimates both had se = 1.90, then the difference in the point
+    #       estimates has has a se = 2.687.  This means that the difference
+    #       would have to be over 5 thousand in order for there to be high
+    #       confidence that the difference was different from zero in a
+    #       statistically significant manner.
     #       Or put a different way, a difference of 1 thousand cannot be
     #       accurately detected while a difference of 10 thousand can be
     #       accurately detected.
     assert abs((cilo / 28.33) - 1) < 0.0012
     assert abs((cihi / 35.81) - 1) < 0.0012
-    # compute stderr and confidence interval for WEBAPP bin 11 increase count
+    # compute stderr and confidence interval for STANDARD bin 11 increase count
     data_list = [27.517] * 981 + [0.0] * (23113 - 981)
     assert len(data_list) == 23113
     data = np.array(data_list)
@@ -508,15 +508,15 @@ def test_diff_count_precision():
     if dump:
         res = '{}EST={:.1f} B={} alpha={:.3f} se={:.2f} ci=[ {:.2f} , {:.2f} ]'
         print(
-            res.format('WEBAPP-BIN11: ',
+            res.format('STANDARD-BIN11: ',
                        data_estimate, bs_samples, alpha, stderr, cilo, cihi)
         )
     assert abs((stderr / 0.85) - 1) < 0.0040
     # NOTE: a se of 0.85 thousand implies that when comparing the difference
-    #       in the weighted number of filing units in WEBAPP bin 11 with a
+    #       in the weighted number of filing units in STANDARD bin 11 with a
     #       tax increase, the difference statistic has a bigger se (because
     #       the variance of the difference is the sum of the variances of the
-    #       two point estimates).  So, in WEBAPP bin 11 if the point estimates
+    #       two point estimates).  So, in STANDARD bin 11 if point estimates
     #       both had se = 0.85, then the difference in the point estimates has
     #       has a se = 1.20.  This means that the difference would have to be
     #       over 2.5 thousand in order for there to be high confidence that the

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
@@ -119,16 +119,16 @@
                       'all',
                       '80-90', '90-95', '95-99', 'Top 1%']
 
-WEBAPP_INCOME_BINS = [-9e99, 0, 9999, 19999, 29999, 39999, 49999, 74999, 99999,
-                      199999, 499999, 1000000, 9e99]
+STANDARD_ROW_NAMES = ['<$0K', '$0-10K', '$10-20K', '$20-30K', '$30-40K',
+                      '$40-50K', '$50-75K', '$75-100K',
+                      '$100-200K', '$200-500K',
+                      '$500-1000K', '>$1000K', 'all']
 
-WEBBIN_ROW_NAMES = ['<$10K', '$10-20K', '$20-30K', '$30-40K',
-                    '$40-50K', '$50-75K', '$75-100K',
-                    '$100-200K', '$200-500K',
-                    '$500-1000K', '>$1000K', 'all']
+STANDARD_INCOME_BINS = [-9e99, 0, 9999, 19999, 29999, 39999, 49999,
+                        74999, 99999, 199999, 499999, 1000000, 9e99]
 
-LARGE_INCOME_BINS = [-9e99, 0, 9999, 19999, 29999, 39999, 49999, 74999, 99999,
-                     200000, 9e99]
+LARGE_INCOME_BINS = [-9e99, 0, 9999, 19999, 29999, 39999, 49999,
+                     74999, 99999, 200000, 9e99]
 
 SMALL_INCOME_BINS = [-9e99, 0, 4999, 9999, 14999, 19999, 24999, 29999, 39999,
                      49999, 74999, 99999, 199999, 499999, 999999, 1499999,
@@ -194,7 +194,7 @@ def add_income_bins(pdf, income_measure,
         specifies income variable used to construct bins
 
     bin_type: String, optional
-        options for input: 'webapp', 'tpc', 'soi'
+        options for input: 'standard', 'tpc', 'soi'
         default: 'soi'
 
     bins: iterable of scalars, optional income breakpoints
@@ -212,8 +212,8 @@ def add_income_bins(pdf, income_measure,
         the original input plus the added 'bin' column
     """
     if not bins:
-        if bin_type == 'webapp':
-            bins = WEBAPP_INCOME_BINS
+        if bin_type == 'standard':
+            bins = STANDARD_INCOME_BINS
         elif bin_type == 'tpc':
             bins = LARGE_INCOME_BINS
         elif bin_type == 'soi':
@@ -252,7 +252,7 @@ def create_distribution_table(vdf, groupby, income_measure, result_type):
         call like this: vdf = calc.dataframe(STATS_VARIABLES)
 
     groupby : String object
-        options for input: 'weighted_deciles', 'webapp_income_bins',
+        options for input: 'weighted_deciles', 'standard_income_bins',
                            'large_income_bins', 'small_income_bins';
         determines how the columns in the resulting Pandas DataFrame are sorted
     NOTE: when groupby is 'weighted_deciles', the returned table has three
@@ -329,7 +329,7 @@ def stat_dataframe(gpdf):
     # main logic of create_distribution_table
     assert isinstance(vdf, pd.DataFrame)
     assert (groupby == 'weighted_deciles' or
-            groupby == 'webapp_income_bins' or
+            groupby == 'standard_income_bins' or
             groupby == 'large_income_bins' or
             groupby == 'small_income_bins')
     assert result_type == 'weighted_sum' or result_type == 'weighted_avg'
@@ -344,8 +344,8 @@ def stat_dataframe(gpdf):
     # sort the data given specified groupby and income_measure
     if groupby == 'weighted_deciles':
         pdf = add_quantile_bins(res, income_measure, 10)
-    elif groupby == 'webapp_income_bins':
-        pdf = add_income_bins(res, income_measure, bin_type='webapp')
+    elif groupby == 'standard_income_bins':
+        pdf = add_income_bins(res, income_measure, bin_type='standard')
     elif groupby == 'large_income_bins':
         pdf = add_income_bins(res, income_measure, bin_type='tpc')
     elif groupby == 'small_income_bins':
@@ -396,7 +396,7 @@ def create_difference_table(vdf1, vdf2, groupby, income_measure, tax_to_diff):
            Calculator.dataframe method
 
     groupby : String object
-        options for input: 'weighted_deciles', 'webapp_income_bins',
+        options for input: 'weighted_deciles', 'standard_income_bins',
                            'large_income_bins', 'small_income_bins'
         specifies kind of bins used to group filing units
     NOTE: when groupby is 'weighted_deciles', the returned table has three
@@ -468,8 +468,8 @@ def weighted_share_of_total(gpdf, colname, total):
         # add bin column to res2 given specified groupby and income_measure
         if groupby == 'weighted_deciles':
             pdf = add_quantile_bins(res2, income_measure, 10)
-        elif groupby == 'webapp_income_bins':
-            pdf = add_income_bins(res2, income_measure, bin_type='webapp')
+        elif groupby == 'standard_income_bins':
+            pdf = add_income_bins(res2, income_measure, bin_type='standard')
         elif groupby == 'large_income_bins':
             pdf = add_income_bins(res2, income_measure, bin_type='tpc')
         elif groupby == 'small_income_bins':
@@ -515,7 +515,7 @@ def weighted_share_of_total(gpdf, colname, total):
     assert isinstance(vdf1, pd.DataFrame)
     assert isinstance(vdf2, pd.DataFrame)
     assert (groupby == 'weighted_deciles' or
-            groupby == 'webapp_income_bins' or
+            groupby == 'standard_income_bins' or
             groupby == 'large_income_bins' or
             groupby == 'small_income_bins')
     assert (income_measure == 'expanded_income' or