From 6610a62686cfbe2b33b62edddce775fa5bee1aba Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Fri, 2 Jun 2017 21:59:33 -0400
Subject: [PATCH 1/8] Make dropq test fail like in issue #1367

---
 taxcalc/tests/test_dropq.py | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/taxcalc/tests/test_dropq.py b/taxcalc/tests/test_dropq.py
index 446c9d804..fdd9b2432 100644
--- a/taxcalc/tests/test_dropq.py
+++ b/taxcalc/tests/test_dropq.py
@@ -215,13 +215,19 @@ def test_dropq_diff_table(groupby, res_column, puf_1991_path):
 @pytest.mark.requires_pufcsv
 def test_with_pufcsv(puf_path):
     # specify usermods dictionary in code
-    start_year = 2016
-    reform_year = start_year + 1
-    reforms = dict()
-    reforms['_II_rt3'] = [0.33]
-    reforms['_PT_rt3'] = [0.33]
-    reforms['_II_rt4'] = [0.33]
-    reforms['_PT_rt4'] = [0.33]
+    start_year = 2017
+    reform_year = start_year
+    analysis_year = 2026
+    year_n = analysis_year - start_year
+    reforms = {
+        '_FICA_ss_trt': [0.2]
+    }
+    # '_FICA_ss_trt': [0.22000]  # no error
+    # '_FICA_ss_trt': [0.21000]  # generates error
+    # '_FICA_ss_trt': [0.20000]  # generates error
+    # '_FICA_ss_trt': [0.19999]  # no error
+    # '_FICA_ss_trt': [0.19000]  # no error
+
     usermods = dict()
     usermods['policy'] = {reform_year: reforms}
     usermods['consumption'] = {}
@@ -229,8 +235,7 @@ def test_with_pufcsv(puf_path):
     usermods['growdiff_baseline'] = {}
     usermods['growdiff_response'] = {}
     usermods['gdp_elasticity'] = {}
-    seed = random_seed(usermods)
-    assert seed == 3047708076
+    random_seed(usermods)
     # create a Policy object (pol) containing reform policy parameters
     pol = Policy()
     pol.implement_reform(usermods['policy'])
@@ -238,22 +243,21 @@ def test_with_pufcsv(puf_path):
     rec = Records(data=puf_path)
     # create a Calculator object using clp policy and puf records
     calc = Calculator(policy=pol, records=rec)
-    while calc.current_year < reform_year:
+    while calc.current_year < analysis_year:
         calc.increment_year()
     # create aggregate diagnostic table (adt) as a Pandas DataFrame object
-    years = reform_year - Policy.JSON_START_YEAR + 1
-    adt = multiyear_diagnostic_table(calc, years)
+    adt = multiyear_diagnostic_table(calc, 1)
     taxes_fullsample = adt.loc["Combined Liability ($b)"]
     assert taxes_fullsample is not None
-    fulls_reform_revenue = taxes_fullsample.loc[reform_year]
+    fulls_reform_revenue = taxes_fullsample.loc[analysis_year]
     # create a Public Use File object
     tax_data = pd.read_csv(puf_path)
     # call run_nth_year_tax_calc_model function
-    restuple = run_nth_year_tax_calc_model(1, start_year,
+    restuple = run_nth_year_tax_calc_model(year_n, start_year,
                                            tax_data, usermods,
                                            return_json=True)
     total = restuple[len(restuple) - 1]  # the last of element of the tuple
-    dropq_reform_revenue = float(total['combined_tax_1'])
+    dropq_reform_revenue = float(total['combined_tax_9'])
     dropq_reform_revenue *= 1e-9  # convert to billions of dollars
     diff = abs(fulls_reform_revenue - dropq_reform_revenue)
     # assert that dropq revenue is similar to the fullsample calculation

From 70168997c47c5a833381791a6c2356d27fa12324 Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Fri, 2 Jun 2017 22:20:35 -0400
Subject: [PATCH 2/8] Make dropq_calculate logic more sensible for FICA reforms

---
 taxcalc/dropq/dropq_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/taxcalc/dropq/dropq_utils.py b/taxcalc/dropq/dropq_utils.py
index 992d407f1..c5dda51e4 100644
--- a/taxcalc/dropq/dropq_utils.py
+++ b/taxcalc/dropq/dropq_utils.py
@@ -102,6 +102,7 @@ def dropq_calculate(year_n, start_year,
                          gfactors=growfactors_pre)
         # add one dollar to total wages and salaries of each filing unit
         recs1p.e00200 += 1.0  # pylint: disable=no-member
+        recs1p.e00200p += 1.0  # pylint: disable=no-member
         policy1p = Policy(gfactors=growfactors_pre)
         # create Calculator with recs1p and calculate for start_year
         calc1p = Calculator(policy=policy1p, records=recs1p,
@@ -113,7 +114,7 @@ def dropq_calculate(year_n, start_year,
         # compute mask that shows which of the calc1 and calc1p results differ
         res1 = results(calc1)
         res1p = results(calc1p)
-        mask = (res1.iitax != res1p.iitax)
+        mask = (res1.combined != res1p.combined)
     else:
         mask = None
 

From 5a79fa4cde11aa0d7d02f1e3e00b4d6aa4d8dfd9 Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Fri, 2 Jun 2017 22:32:14 -0400
Subject: [PATCH 3/8] Revise add_weighted_income_bins utility function logic

---
 taxcalc/utils.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 164d546ee..9ee5e2cf9 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -192,9 +192,12 @@ def add_weighted_income_bins(pdf, num_bins=10, labels=None,
                                                    pdf['s006'].values))
     else:
         pdf['cumsum_temp'] = np.cumsum(pdf['s006'].values)
+    min_cumsum = pdf['cumsum_temp'].values[0]
     max_cumsum = pdf['cumsum_temp'].values[-1]
-    bin_edges = [0] + list(np.arange(1, (num_bins + 1)) *
-                           (max_cumsum / float(num_bins)))
+    cumsum_range = max_cumsum - min_cumsum
+    bin_width = cumsum_range / float(num_bins)
+    bin_edges = [-9e99] + list(np.arange(1, (num_bins + 1)) * bin_width)
+    bin_edges[-1] += 9e9  # add to top of last bin_edge to get all observations
     if not labels:
         labels = range(1, (num_bins + 1))
     pdf['bins'] = pd.cut(pdf['cumsum_temp'], bins=bin_edges, labels=labels)

From 47777e320f9632cc2bab978fc8f9ae8d11e0fb22 Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Sat, 3 Jun 2017 16:12:51 -0400
Subject: [PATCH 4/8] Clean-up comments in new test and add seed test

---
 taxcalc/tests/test_dropq.py | 13 ++++---------
 taxcalc/utils.py            |  2 +-
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/taxcalc/tests/test_dropq.py b/taxcalc/tests/test_dropq.py
index fdd9b2432..a1a6f254a 100644
--- a/taxcalc/tests/test_dropq.py
+++ b/taxcalc/tests/test_dropq.py
@@ -219,23 +219,18 @@ def test_with_pufcsv(puf_path):
     reform_year = start_year
     analysis_year = 2026
     year_n = analysis_year - start_year
-    reforms = {
+    reform = {
         '_FICA_ss_trt': [0.2]
     }
-    # '_FICA_ss_trt': [0.22000]  # no error
-    # '_FICA_ss_trt': [0.21000]  # generates error
-    # '_FICA_ss_trt': [0.20000]  # generates error
-    # '_FICA_ss_trt': [0.19999]  # no error
-    # '_FICA_ss_trt': [0.19000]  # no error
-
     usermods = dict()
-    usermods['policy'] = {reform_year: reforms}
+    usermods['policy'] = {reform_year: reform}
     usermods['consumption'] = {}
     usermods['behavior'] = {}
     usermods['growdiff_baseline'] = {}
     usermods['growdiff_response'] = {}
     usermods['gdp_elasticity'] = {}
-    random_seed(usermods)
+    seed = random_seed(usermods)
+    assert seed == 1574318062
     # create a Policy object (pol) containing reform policy parameters
     pol = Policy()
     pol.implement_reform(usermods['policy'])
diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 9ee5e2cf9..b66d8f522 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -197,7 +197,7 @@ def add_weighted_income_bins(pdf, num_bins=10, labels=None,
     cumsum_range = max_cumsum - min_cumsum
     bin_width = cumsum_range / float(num_bins)
     bin_edges = [-9e99] + list(np.arange(1, (num_bins + 1)) * bin_width)
-    bin_edges[-1] += 9e9  # add to top of last bin_edge to get all observations
+    bin_edges[-1] += 9e9  # add to top of last bin to include all observations
     if not labels:
         labels = range(1, (num_bins + 1))
     pdf['bins'] = pd.cut(pdf['cumsum_temp'], bins=bin_edges, labels=labels)

From 258a5311e024eae7843ca21e64a7f72a527d184f Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Sat, 3 Jun 2017 21:58:40 -0400
Subject: [PATCH 5/8] Switch back to iitax dropq mask definition

---
 taxcalc/dropq/dropq_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/taxcalc/dropq/dropq_utils.py b/taxcalc/dropq/dropq_utils.py
index c5dda51e4..8606b0915 100644
--- a/taxcalc/dropq/dropq_utils.py
+++ b/taxcalc/dropq/dropq_utils.py
@@ -114,7 +114,7 @@ def dropq_calculate(year_n, start_year,
         # compute mask that shows which of the calc1 and calc1p results differ
         res1 = results(calc1)
         res1p = results(calc1p)
-        mask = (res1.combined != res1p.combined)
+        mask = (res1.iitax != res1p.iitax)
     else:
         mask = None
 

From 131e01a9424449c001d16f1a5f86705f59dc8974 Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Sun, 4 Jun 2017 14:13:33 -0400
Subject: [PATCH 6/8] Fix docstring for drop_records function in dropq_utils.py

---
 taxcalc/dropq/dropq_utils.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/taxcalc/dropq/dropq_utils.py b/taxcalc/dropq/dropq_utils.py
index 8606b0915..79b39611e 100644
--- a/taxcalc/dropq/dropq_utils.py
+++ b/taxcalc/dropq/dropq_utils.py
@@ -230,10 +230,9 @@ def drop_records(df1, df2, mask):
     pseudo-randomly picks three records to 'drop' within each bin.
     We keep track of the three dropped records in both group-by
     strategies and then use these 'flag' columns to modify all
-    columns of interest, creating new '*_dec' columns for later
-    statistics based on weighted deciles and '*_bin' columns
-    for statitistics based on grouping by income bins.
-    in each bin in two group-by actions. Lastly we calculate
+    columns of interest, creating new '*_dec' columns for
+    statistics based on weighted deciles and '*_bin' columns for
+    statitistics based on income bins.  Lastly we calculate
     individual income tax differences, payroll tax differences, and
     combined tax differences between the baseline and reform
     for the two groupings.

From 22950f2eaae62b38db9ad6200b6afdb4ca3a0ccc Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Mon, 5 Jun 2017 08:43:52 -0400
Subject: [PATCH 7/8] Minor change to add_weighted_income_bins utility function

---
 taxcalc/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index b66d8f522..4dadd3a97 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -197,7 +197,7 @@ def add_weighted_income_bins(pdf, num_bins=10, labels=None,
     cumsum_range = max_cumsum - min_cumsum
     bin_width = cumsum_range / float(num_bins)
     bin_edges = [-9e99] + list(np.arange(1, (num_bins + 1)) * bin_width)
-    bin_edges[-1] += 9e9  # add to top of last bin to include all observations
+    bin_edges[-1] = 9e99  # raise top of last bin to include all observations
     if not labels:
         labels = range(1, (num_bins + 1))
     pdf['bins'] = pd.cut(pdf['cumsum_temp'], bins=bin_edges, labels=labels)

From 05236497614fab067df1373d54db5a87270552c2 Mon Sep 17 00:00:00 2001
From: martinholmer <martin.holmer@gmail.com>
Date: Mon, 5 Jun 2017 18:21:22 -0400
Subject: [PATCH 8/8] Fix add_weighted_income_bins logic per Amy Xu

---
 taxcalc/utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 4dadd3a97..73293675a 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -190,14 +190,16 @@ def add_weighted_income_bins(pdf, num_bins=10, labels=None,
     if weight_by_income_measure:
         pdf['cumsum_temp'] = np.cumsum(np.multiply(pdf[income_measure].values,
                                                    pdf['s006'].values))
+        min_cumsum = pdf['cumsum_temp'].values[0]
     else:
         pdf['cumsum_temp'] = np.cumsum(pdf['s006'].values)
-    min_cumsum = pdf['cumsum_temp'].values[0]
+        min_cumsum = 0.  # because s006 values are non-negative
     max_cumsum = pdf['cumsum_temp'].values[-1]
     cumsum_range = max_cumsum - min_cumsum
     bin_width = cumsum_range / float(num_bins)
-    bin_edges = [-9e99] + list(np.arange(1, (num_bins + 1)) * bin_width)
+    bin_edges = list(min_cumsum + np.arange(0, (num_bins + 1)) * bin_width)
     bin_edges[-1] = 9e99  # raise top of last bin to include all observations
+    bin_edges[0] = -9e99  # lower bottom of 1st bin to include all observations
     if not labels:
         labels = range(1, (num_bins + 1))
     pdf['bins'] = pd.cut(pdf['cumsum_temp'], bins=bin_edges, labels=labels)