PSLmodels · martinholmer · Jun 6, 2017 · Jun 3, 2017 · Jun 3, 2017 · Jun 3, 2017
diff --git a/taxcalc/dropq/dropq_utils.py b/taxcalc/dropq/dropq_utils.py
@@ -102,6 +102,7 @@ def dropq_calculate(year_n, start_year,
                          gfactors=growfactors_pre)
         # add one dollar to total wages and salaries of each filing unit
         recs1p.e00200 += 1.0  # pylint: disable=no-member
+        recs1p.e00200p += 1.0  # pylint: disable=no-member
         policy1p = Policy(gfactors=growfactors_pre)
         # create Calculator with recs1p and calculate for start_year
         calc1p = Calculator(policy=policy1p, records=recs1p,
@@ -229,10 +230,9 @@ def drop_records(df1, df2, mask):
     pseudo-randomly picks three records to 'drop' within each bin.
     We keep track of the three dropped records in both group-by
     strategies and then use these 'flag' columns to modify all
-    columns of interest, creating new '*_dec' columns for later
-    statistics based on weighted deciles and '*_bin' columns
-    for statitistics based on grouping by income bins.
-    in each bin in two group-by actions. Lastly we calculate
+    columns of interest, creating new '*_dec' columns for
+    statistics based on weighted deciles and '*_bin' columns for
+    statitistics based on income bins.  Lastly we calculate
     individual income tax differences, payroll tax differences, and
     combined tax differences between the baseline and reform
     for the two groupings.

diff --git a/taxcalc/tests/test_dropq.py b/taxcalc/tests/test_dropq.py
@@ -215,45 +215,44 @@ def test_dropq_diff_table(groupby, res_column, puf_1991_path):
 @pytest.mark.requires_pufcsv
 def test_with_pufcsv(puf_path):
     # specify usermods dictionary in code
-    start_year = 2016
-    reform_year = start_year + 1
-    reforms = dict()
-    reforms['_II_rt3'] = [0.33]
-    reforms['_PT_rt3'] = [0.33]
-    reforms['_II_rt4'] = [0.33]
-    reforms['_PT_rt4'] = [0.33]
+    start_year = 2017
+    reform_year = start_year
+    analysis_year = 2026
+    year_n = analysis_year - start_year
+    reform = {
+        '_FICA_ss_trt': [0.2]
+    }
     usermods = dict()
-    usermods['policy'] = {reform_year: reforms}
+    usermods['policy'] = {reform_year: reform}
     usermods['consumption'] = {}
     usermods['behavior'] = {}
     usermods['growdiff_baseline'] = {}
     usermods['growdiff_response'] = {}
     usermods['gdp_elasticity'] = {}
     seed = random_seed(usermods)
-    assert seed == 3047708076
+    assert seed == 1574318062
     # create a Policy object (pol) containing reform policy parameters
     pol = Policy()
     pol.implement_reform(usermods['policy'])
     # create a Records object (rec) containing all puf.csv input records
     rec = Records(data=puf_path)
     # create a Calculator object using clp policy and puf records
     calc = Calculator(policy=pol, records=rec)
-    while calc.current_year < reform_year:
+    while calc.current_year < analysis_year:
         calc.increment_year()
     # create aggregate diagnostic table (adt) as a Pandas DataFrame object
-    years = reform_year - Policy.JSON_START_YEAR + 1
-    adt = multiyear_diagnostic_table(calc, years)
+    adt = multiyear_diagnostic_table(calc, 1)
     taxes_fullsample = adt.loc["Combined Liability ($b)"]
     assert taxes_fullsample is not None
-    fulls_reform_revenue = taxes_fullsample.loc[reform_year]
+    fulls_reform_revenue = taxes_fullsample.loc[analysis_year]
     # create a Public Use File object
     tax_data = pd.read_csv(puf_path)
     # call run_nth_year_tax_calc_model function
-    restuple = run_nth_year_tax_calc_model(1, start_year,
+    restuple = run_nth_year_tax_calc_model(year_n, start_year,
                                            tax_data, usermods,
                                            return_json=True)
     total = restuple[len(restuple) - 1]  # the last of element of the tuple
-    dropq_reform_revenue = float(total['combined_tax_1'])
+    dropq_reform_revenue = float(total['combined_tax_9'])
     dropq_reform_revenue *= 1e-9  # convert to billions of dollars
     diff = abs(fulls_reform_revenue - dropq_reform_revenue)
     # assert that dropq revenue is similar to the fullsample calculation

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
@@ -192,9 +192,12 @@ def add_weighted_income_bins(pdf, num_bins=10, labels=None,
                                                    pdf['s006'].values))
     else:
         pdf['cumsum_temp'] = np.cumsum(pdf['s006'].values)
+    min_cumsum = pdf['cumsum_temp'].values[0]
     max_cumsum = pdf['cumsum_temp'].values[-1]
-    bin_edges = [0] + list(np.arange(1, (num_bins + 1)) *
-                           (max_cumsum / float(num_bins)))
+    cumsum_range = max_cumsum - min_cumsum
+    bin_width = cumsum_range / float(num_bins)
+    bin_edges = [-9e99] + list(np.arange(1, (num_bins + 1)) * bin_width)
+    bin_edges[-1] = 9e99  # raise top of last bin to include all observations
     if not labels:
         labels = range(1, (num_bins + 1))
     pdf['bins'] = pd.cut(pdf['cumsum_temp'], bins=bin_edges, labels=labels)