diff --git a/taxcalc/dropq/dropq_utils.py b/taxcalc/dropq/dropq_utils.py index 80f54cb82..80317a5a9 100644 --- a/taxcalc/dropq/dropq_utils.py +++ b/taxcalc/dropq/dropq_utils.py @@ -92,6 +92,7 @@ def dropq_calculate(year_n, start_year, gfactors=growfactors_pre) # add one dollar to total wages and salaries of each filing unit recs1p.e00200 += 1.0 # pylint: disable=no-member + recs1p.e00200p += 1.0 # pylint: disable=no-member policy1p = Policy(gfactors=growfactors_pre) # create Calculator with recs1p and calculate for start_year calc1p = Calculator(policy=policy1p, records=recs1p, @@ -219,10 +220,9 @@ def drop_records(df1, df2, mask): pseudo-randomly picks three records to 'drop' within each bin. We keep track of the three dropped records in both group-by strategies and then use these 'flag' columns to modify all - columns of interest, creating new '*_dec' columns for later - statistics based on weighted deciles and '*_bin' columns - for statitistics based on grouping by income bins. - in each bin in two group-by actions. Lastly we calculate + columns of interest, creating new '*_dec' columns for + statistics based on weighted deciles and '*_bin' columns for + statitistics based on income bins. Lastly we calculate individual income tax differences, payroll tax differences, and combined tax differences between the baseline and reform for the two groupings. diff --git a/taxcalc/tests/test_dropq.py b/taxcalc/tests/test_dropq.py index 107dee896..c0b12348c 100644 --- a/taxcalc/tests/test_dropq.py +++ b/taxcalc/tests/test_dropq.py @@ -216,22 +216,22 @@ def test_dropq_diff_table(groupby, res_column, puf_1991_path): @pytest.mark.requires_pufcsv def test_with_pufcsv(puf_path): # specify usermods dictionary in code - start_year = 2016 - reform_year = start_year + 1 - reforms = dict() - reforms['_II_rt3'] = [0.33] - reforms['_PT_rt3'] = [0.33] - reforms['_II_rt4'] = [0.33] - reforms['_PT_rt4'] = [0.33] + start_year = 2017 + reform_year = start_year + analysis_year = 2026 + year_n = analysis_year - start_year + reform = { + '_FICA_ss_trt': [0.2] + } usermods = dict() - usermods['policy'] = {reform_year: reforms} + usermods['policy'] = {reform_year: reform} usermods['consumption'] = {} usermods['behavior'] = {} usermods['growdiff_baseline'] = {} usermods['growdiff_response'] = {} usermods['gdp_elasticity'] = {} seed = random_seed(usermods) - assert seed == 3047708076 + assert seed == 1574318062 # create a Policy object (pol) containing reform policy parameters pol = Policy() pol.implement_reform(usermods['policy']) @@ -239,22 +239,21 @@ def test_with_pufcsv(puf_path): rec = Records(data=puf_path) # create a Calculator object using clp policy and puf records calc = Calculator(policy=pol, records=rec) - while calc.current_year < reform_year: + while calc.current_year < analysis_year: calc.increment_year() # create aggregate diagnostic table (adt) as a Pandas DataFrame object - years = reform_year - Policy.JSON_START_YEAR + 1 - adt = multiyear_diagnostic_table(calc, years) + adt = multiyear_diagnostic_table(calc, 1) taxes_fullsample = adt.loc["Combined Liability ($b)"] assert taxes_fullsample is not None - fulls_reform_revenue = taxes_fullsample.loc[reform_year] + fulls_reform_revenue = taxes_fullsample.loc[analysis_year] # create a Public Use File object tax_data = pd.read_csv(puf_path) # call run_nth_year_tax_calc_model function - restuple = run_nth_year_tax_calc_model(1, start_year, + restuple = run_nth_year_tax_calc_model(year_n, start_year, tax_data, usermods, return_json=True) total = restuple[len(restuple) - 1] # the last of element of the tuple - dropq_reform_revenue = float(total['combined_tax_1']) + dropq_reform_revenue = float(total['combined_tax_9']) dropq_reform_revenue *= 1e-9 # convert to billions of dollars diff = abs(fulls_reform_revenue - dropq_reform_revenue) # assert that dropq revenue is similar to the fullsample calculation diff --git a/taxcalc/utils.py b/taxcalc/utils.py index 164d546ee..73293675a 100644 --- a/taxcalc/utils.py +++ b/taxcalc/utils.py @@ -190,11 +190,16 @@ def add_weighted_income_bins(pdf, num_bins=10, labels=None, if weight_by_income_measure: pdf['cumsum_temp'] = np.cumsum(np.multiply(pdf[income_measure].values, pdf['s006'].values)) + min_cumsum = pdf['cumsum_temp'].values[0] else: pdf['cumsum_temp'] = np.cumsum(pdf['s006'].values) + min_cumsum = 0. # because s006 values are non-negative max_cumsum = pdf['cumsum_temp'].values[-1] - bin_edges = [0] + list(np.arange(1, (num_bins + 1)) * - (max_cumsum / float(num_bins))) + cumsum_range = max_cumsum - min_cumsum + bin_width = cumsum_range / float(num_bins) + bin_edges = list(min_cumsum + np.arange(0, (num_bins + 1)) * bin_width) + bin_edges[-1] = 9e99 # raise top of last bin to include all observations + bin_edges[0] = -9e99 # lower bottom of 1st bin to include all observations if not labels: labels = range(1, (num_bins + 1)) pdf['bins'] = pd.cut(pdf['cumsum_temp'], bins=bin_edges, labels=labels)