diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index 67d65eb80..44013e5e2 100755 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -16,6 +16,7 @@ requirements: - numba - "paramtools>=0.18.0" - aiohttp + - curl run: - python @@ -26,6 +27,7 @@ requirements: - numba - "paramtools>=0.18.0" - aiohttp + - curl test: commands: diff --git a/environment.yml b/environment.yml index cff303aa4..38b18b377 100644 --- a/environment.yml +++ b/environment.yml @@ -3,6 +3,7 @@ channels: - conda-forge dependencies: - python +- curl - "numpy>=1.14" - "pandas>=1.2.0" - "bokeh>=1.4.0" @@ -20,4 +21,4 @@ dependencies: - pip - pip: - jupyter-book - - pytest_harvest \ No newline at end of file + - pytest_harvest diff --git a/taxcalc/validation/taxsim27/README.md b/taxcalc/validation/taxsim27/README.md index e9adbf75d..2812d2737 100644 --- a/taxcalc/validation/taxsim27/README.md +++ b/taxcalc/validation/taxsim27/README.md @@ -136,4 +136,4 @@ different.) Validation results using the then current-version of TAXSIM-27 on these dates: 1. 2019-03-30 : same results except for 327 itax diffs with largest being $13.00 -2. 2019-06-05 : same results (other dependent credit now included in ovar 22) +2. 2019-06-05 : same results (other dependent credit now included in ovar 22) \ No newline at end of file diff --git a/taxcalc/validation/taxsim27/output-taxsim.zip b/taxcalc/validation/taxsim27/output-taxsim.zip deleted file mode 100644 index 53d227ca9..000000000 Binary files a/taxcalc/validation/taxsim27/output-taxsim.zip and /dev/null differ diff --git a/taxcalc/validation/taxsim32/README.md b/taxcalc/validation/taxsim32/README.md new file mode 100644 index 000000000..11af5234d --- /dev/null +++ b/taxcalc/validation/taxsim32/README.md @@ -0,0 +1,133 @@ +Validation of Tax-Calculator against Internet TAXSIM-32 +======================================================= + +The general cross-model validation process described +[here](https://github.com/PSLmodels/Tax-Calculator/blob/master/taxcalc/validation/README.md#validation-of-tax-calculator-logic) +is being executed in this directory using +[TAXSIM-32](https://users.nber.org/~taxsim/taxsim27/). + +We are in the process of comparing Tax-Calculator and TAXSIM-32 +results generated from several assumption sets in the `taxsim_input.py` +script for years beginning with 2018. Each INPUT file is +used to generate a TAXSIM-32 OUTPUT file by uploading it to the +TAXSIM-32 website and requesting detailed intermediate calculations. +And each INPUT file is translated into a CSV-formatted input file that +is read by the Tax-Calculator `tc` CLI tool to generate output that is +then transformed into an OUTPUT file having the TAXSIM-32 format. +Finally, these two OUTPUT files are compared using the `main_comparison.py` +script. See the `tests_32.py` script in this directory for more details. + +The following results are for INPUT files containing 100,000 +randomly-generated filing units for a given year. The random sampling +is such that a different sample is drawn for each year. In each INPUT +file three state-related variables are set to zero for every filing +unit, one variable specifies the year, and another specifies a filing +unit id, which leaves twenty-two input variables that are set to +randomly-generated values. + +In order to handle known differences in assumptions between the two +models, we use the `taxsim_emulation.json` "reform" file to make +Tax-Calculator operate like TAXSIM-32. See the +[`taxsim_emulation.json` +file](https://github.com/PSLmodels/Tax-Calculator/blob/master/taxcalc/validation/taxsim32/taxsim_emulation.json) +for details. + +In the following results, when we say "same results" we mean that the +federal individual income tax liabilities and payroll tax liabilities +being compared have differences of no larger than one cent. + +For information on the variable names illustrated in `taxsim_input.py`, +the document that generates data for input into TAXSIM-32, see the TAXSIM-32 website listed above. + + +Instructions +------------------ +1. Navigate to `taxcalc/validation/taxsim32` and run the Python script `tests_32.py`. +2. If you would like to generate new input files and and get new files from TAXSIM-32, +just delete all of the `.in.out-taxsim` files. On Mac/Linux, this can be done with +`rm -f *.in.out-taxsim`. + + +Troubleshooting +------------------ +If the TAXSIM-32 validation code throws errors such as `.in files not found`, +`.out files not found` or that any parameter within `policy_current_law.json` +does not exist, please try these 2 steps: + +1. Make sure that the `taxcalc` conda package is installed +2. If you have Tax-Calculator downloaded locally, navigate to the root directory +and run `pip install -e .` This will install the current source code into the `taxcalc` +CLI. + + +Validation Results +------------------ + +**a18 ASSUMPTION SET**: + +2018 INPUT file that specifies the first twelve of the TAXSIM-32 +input variables, which include demographic variables and labor income, +but sets to zero all the TAXSIM-32 input variables numbered from 13 +through 27. + +Validation results using the then current-version of TAXSIM-32 on these dates: + +**b18 ASSUMPTION SET**: + +2018 INPUT file that specifies the first twenty-one of the TAXSIM-32 +input variables, which include demographic variables, labor income, +capital income, and federally-taxable benefits, but set to zero all +the other six TAXSIM-32 input variables except variables 28-32, +which are the variables representing the new QBI-related variables. +Two of those six are always set to zero because they specify transfer income +that is not taxed under the federal income tax or because they specify rent paid that +does not affect federal income tax liability. Three of the remaining +four input variables are itemized expense amounts and the fourth is +child-care expenses. + +Validation results using the then current-version of TAXSIM-32 on these dates: + +**c18 ASSUMPTION SET**: + +2018 INPUT file that specifies all the non-state TAXSIM-32 input +variables to be randomly generated values. + +Validation results using the then current-version of TAXSIM-32 on these dates: + +**a19 ASSUMPTION SET**: + +2019 INPUT file that specifies the first twelve of the TAXSIM-32 +input variables, which include demographic variables and labor income, +but sets to zero all the TAXSIM-32 input variables numbered from 13 +through 27. (This is the same logic as used to generate the **a17** +sample except that a different stream of random numbers is used so that +the 100,000 filing units are completely different.) + +Validation results using the then current-version of TAXSIM-32 on these dates: + +**b19 ASSUMPTION SET**: + +2019 INPUT file that specifies the first twenty-one of the TAXSIM-32 +input variables, which include demographic variables, labor income, +capital income, and federally-taxable benefits, but set to zero all +the other six TAXSIM-32 input variables except variables 28-32, +which are the variables representing the new QBI-related variables. +Two of those six are always set to zero because they specify transfer income +that is not taxed under the federal income tax or because they specify rent paid that +does not affect federal income tax liability. Three of the remaining +four input variables are itemized expense amounts and the fourth is +child-care expenses. (This is the same logic as used to generate the +**b17** sample except that a different stream of random numbers is +used so that the 100,000 filing units are completely different.) + +Validation results using the then current-version of TAXSIM-32 on these dates: + +**c19 ASSUMPTION SET**: + +2019 INPUT file that specifies all the non-state TAXSIM-32 input +variables to be randomly generated values. (This is the same logic as +used to generate the **c17** sample except that a different stream of +random numbers is used so that the 100,000 filing units are completely +different.) + +Validation results using the then current-version of TAXSIM-32 on these dates: diff --git a/taxcalc/validation/taxsim32/a18-taxdiffs-expect.csv b/taxcalc/validation/taxsim32/a18-taxdiffs-expect.csv new file mode 100644 index 000000000..a5ef8298f --- /dev/null +++ b/taxcalc/validation/taxsim32/a18-taxdiffs-expect.csv @@ -0,0 +1,26 @@ +,# of differing records,max_diff,max_diff_index,max_diff_taxsim_val,max_diff_taxcalc_val +fiitax,49,-0.00999999999999801,12135,-20.98,-20.99 +siitax,0,0.0,no diff,no diff,no diff +fica,0,0.0,no diff,no diff,no diff +frate,0,0.0,no diff,no diff,no diff +srate,0,0.0,no diff,no diff,no diff +ficar,124,0.8999999999999999,172,2.9,3.8 +v10,0,0.0,no diff,no diff,no diff +v11,0,0.0,no diff,no diff,no diff +v12,0,0.0,no diff,no diff,no diff +v13,100000,-26600.0,12,26600.0,0.0 +v14,0,0.0,no diff,no diff,no diff +v15,0,0.0,no diff,no diff,no diff +v16,0,0.0,no diff,no diff,no diff +v17,0,0.0,no diff,no diff,no diff +v18,0,0.0,no diff,no diff,no diff +v19,0,0.0,no diff,no diff,no diff +v20,0,0.0,no diff,no diff,no diff +v21,0,0.0,no diff,no diff,no diff +v22,0,0.0,no diff,no diff,no diff +v23,0,0.0,no diff,no diff,no diff +v24,0,0.0,no diff,no diff,no diff +v25,46,0.00999999999999801,12135,20.98,20.99 +v26,0,0.0,no diff,no diff,no diff +v27,0,0.0,no diff,no diff,no diff +v28,0,0.0,no diff,no diff,no diff diff --git a/taxcalc/validation/taxsim32/a19-taxdiffs-expect.csv b/taxcalc/validation/taxsim32/a19-taxdiffs-expect.csv new file mode 100644 index 000000000..93c790e45 --- /dev/null +++ b/taxcalc/validation/taxsim32/a19-taxdiffs-expect.csv @@ -0,0 +1,26 @@ +,# of differing records,max_diff,max_diff_index,max_diff_taxsim_val,max_diff_taxcalc_val +fiitax,71,-0.010000000000005116,12332,-119.72,-119.73 +siitax,0,0.0,no diff,no diff,no diff +fica,0,0.0,no diff,no diff,no diff +frate,0,0.0,no diff,no diff,no diff +srate,0,0.0,no diff,no diff,no diff +ficar,119,0.8999999999999999,2226,2.9,3.8 +v10,0,0.0,no diff,no diff,no diff +v11,0,0.0,no diff,no diff,no diff +v12,0,0.0,no diff,no diff,no diff +v13,100000,-27000.0,9,27000.0,0.0 +v14,0,0.0,no diff,no diff,no diff +v15,0,0.0,no diff,no diff,no diff +v16,0,0.0,no diff,no diff,no diff +v17,0,0.0,no diff,no diff,no diff +v18,0,0.0,no diff,no diff,no diff +v19,0,0.0,no diff,no diff,no diff +v20,0,0.0,no diff,no diff,no diff +v21,0,0.0,no diff,no diff,no diff +v22,0,0.0,no diff,no diff,no diff +v23,0,0.0,no diff,no diff,no diff +v24,0,0.0,no diff,no diff,no diff +v25,71,0.010000000000005116,4164,119.72,119.73 +v26,0,0.0,no diff,no diff,no diff +v27,0,0.0,no diff,no diff,no diff +v28,0,0.0,no diff,no diff,no diff diff --git a/taxcalc/validation/taxsim32/b18-taxdiffs-expect.csv b/taxcalc/validation/taxsim32/b18-taxdiffs-expect.csv new file mode 100644 index 000000000..b45326d0c --- /dev/null +++ b/taxcalc/validation/taxsim32/b18-taxdiffs-expect.csv @@ -0,0 +1,26 @@ +,# of differing records,max_diff,max_diff_index,max_diff_taxsim_val,max_diff_taxcalc_val +fiitax,100000,-130262.98999999999,75368,306837.54,176574.55 +siitax,0,0.0,no diff,no diff,no diff +fica,67638,-0.3000000000029104,14298,38376.98,38376.68 +frate,46049,-55.0,91965,40.0,-15.0 +srate,0,0.0,no diff,no diff,no diff +ficar,0,0.0,no diff,no diff,no diff +v10,99931,-350001.9800000002,99037,1402920.62,1052918.64 +v11,0,0.0,no diff,no diff,no diff +v12,5,-10980.150000000001,86601,39100.0,28119.85 +v13,100000,-26600.0,12,26600.0,0.0 +v14,0,0.0,no diff,no diff,no diff +v15,0,0.0,no diff,no diff,no diff +v16,0,0.0,no diff,no diff,no diff +v17,0,0.0,no diff,no diff,no diff +v18,100000,-402620.99,27455,653482.24,250861.25 +v19,100000,-134001.72999999998,27455,181167.43,47165.7 +v20,0,0.0,no diff,no diff,no diff +v21,0,0.0,no diff,no diff,no diff +v22,9067,8500.0,888,0.0,8500.0 +v23,8,2157.88,42055,0.0,2157.88 +v24,0,0.0,no diff,no diff,no diff +v25,0,0.0,no diff,no diff,no diff +v26,100000,-350000.0,44030,685450.0,335450.0 +v27,5513,13628.83,99719,0.0,13628.83 +v28,100000,-130941.73,27455,178107.43,47165.7 diff --git a/taxcalc/validation/taxsim32/b19-taxdiffs-expect.csv b/taxcalc/validation/taxsim32/b19-taxdiffs-expect.csv new file mode 100644 index 000000000..2875cee0b --- /dev/null +++ b/taxcalc/validation/taxsim32/b19-taxdiffs-expect.csv @@ -0,0 +1,26 @@ +,# of differing records,max_diff,max_diff_index,max_diff_taxsim_val,max_diff_taxcalc_val +fiitax,100000,-138933.06,58416,284963.75,146030.69 +siitax,0,0.0,no diff,no diff,no diff +fica,67719,-0.3000000000029104,21816,39016.15,39015.85 +frate,48029,-64.44,82573,49.44,-15.0 +srate,0,0.0,no diff,no diff,no diff +ficar,0,0.0,no diff,no diff,no diff +v10,99999,-380001.98,99411,1619677.51,1239675.53 +v11,0,0.0,no diff,no diff,no diff +v12,13,-24155.15,52779,46750.0,22594.85 +v13,100000,-27000.0,9,27000.0,0.0 +v14,0,0.0,no diff,no diff,no diff +v15,0,0.0,no diff,no diff,no diff +v16,0,0.0,no diff,no diff,no diff +v17,0,0.0,no diff,no diff,no diff +v18,100000,-411751.91000000003,34865,645007.02,233255.11 +v19,100000,-140755.06,58416,286785.75,146030.69 +v20,0,0.0,no diff,no diff,no diff +v21,0,0.0,no diff,no diff,no diff +v22,10340,8500.0,2256,0.0,8500.0 +v23,28,3397.83,82573,0.0,3397.83 +v24,0,0.0,no diff,no diff,no diff +v25,0,0.0,no diff,no diff,no diff +v26,100000,-378605.36,56063,753678.58,375073.22 +v27,6590,16327.24,37042,0.0,16327.24 +v28,100000,-137185.06,58416,283215.75,146030.69 diff --git a/taxcalc/validation/taxsim32/c18-taxdiffs-expect.csv b/taxcalc/validation/taxsim32/c18-taxdiffs-expect.csv new file mode 100644 index 000000000..2a3cf0f93 --- /dev/null +++ b/taxcalc/validation/taxsim32/c18-taxdiffs-expect.csv @@ -0,0 +1,26 @@ +,# of differing records,max_diff,max_diff_index,max_diff_taxsim_val,max_diff_taxcalc_val +fiitax,100000,-132150.84999999998,90016,150067.43,17916.58 +siitax,0,0.0,no diff,no diff,no diff +fica,67746,-0.2900000000008731,573,18578.54,18578.25 +frate,46631,56.85,12070,36.74,93.59 +srate,0,0.0,no diff,no diff,no diff +ficar,0,0.0,no diff,no diff,no diff +v10,99919,-350001.9800000002,69402,1546224.37,1196222.39 +v11,0,0.0,no diff,no diff,no diff +v12,1,-11650.0,9032,27200.0,15550.0 +v13,32064,-26600.0,12,26600.0,0.0 +v14,0,0.0,no diff,no diff,no diff +v15,0,0.0,no diff,no diff,no diff +v16,0,0.0,no diff,no diff,no diff +v17,1414,26000.0,661,0.0,26000.0 +v18,100000,-393130.3,81574,671568.48,278438.18 +v19,100000,-133445.18,81574,187859.34,54414.16 +v20,0,0.0,no diff,no diff,no diff +v21,0,0.0,no diff,no diff,no diff +v22,9041,8500.0,4303,0.0,8500.0 +v23,34,3799.6,73248,0.0,3799.6 +v24,16,-600.19,9032,600.19,0.0 +v25,0,0.0,no diff,no diff,no diff +v26,100000,-355450.88999999996,17060,570001.09,214550.2 +v27,3942,10623.65,14603,0.0,10623.65 +v28,100000,-131575.16999999998,81574,185989.34,54414.17 diff --git a/taxcalc/validation/taxsim32/c19-taxdiffs-expect.csv b/taxcalc/validation/taxsim32/c19-taxdiffs-expect.csv new file mode 100644 index 000000000..9a943e614 --- /dev/null +++ b/taxcalc/validation/taxsim32/c19-taxdiffs-expect.csv @@ -0,0 +1,26 @@ +,# of differing records,max_diff,max_diff_index,max_diff_taxsim_val,max_diff_taxcalc_val +fiitax,100000,-141609.81999999998,71749,164446.08,22836.26 +siitax,0,0.0,no diff,no diff,no diff +fica,67822,-0.3000000000029104,48837,38263.66,38263.36 +frate,48751,-75.15,69110,60.15,-15.0 +srate,0,0.0,no diff,no diff,no diff +ficar,0,0.0,no diff,no diff,no diff +v10,99997,-380001.98,43179,1390514.98,1010513.0 +v11,0,0.0,no diff,no diff,no diff +v12,11,-15775.25,19813,31450.0,15674.75 +v13,32257,-27000.0,9,27000.0,0.0 +v14,0,0.0,no diff,no diff,no diff +v15,0,0.0,no diff,no diff,no diff +v16,0,0.0,no diff,no diff,no diff +v17,1834,27000.0,595,0.0,27000.0 +v18,100000,-411471.92000000004,24420,683044.92,271573.0 +v19,100000,-142118.92,57949,308485.83,166366.91 +v20,0,0.0,no diff,no diff,no diff +v21,0,0.0,no diff,no diff,no diff +v22,10166,8500.0,3265,0.0,8500.0 +v23,78,4640.22,36893,0.0,4640.22 +v24,12,-820.96,30022,1200.0,379.04 +v25,0,0.0,no diff,no diff,no diff +v26,100000,-376000.93,71828,506700.93,130700.0 +v27,4416,10468.79,27286,0.0,10468.79 +v28,100000,-138202.08,75423,290379.91,152177.83 diff --git a/taxcalc/validation/taxsim32/input_setup.py b/taxcalc/validation/taxsim32/input_setup.py new file mode 100644 index 000000000..181788bee --- /dev/null +++ b/taxcalc/validation/taxsim32/input_setup.py @@ -0,0 +1,62 @@ +""" +Generates TAXSIM-32 `.in` input files, downloads `.in.out-taxsim` output files, +prepares files for Tax Calculator and zips them +""" +import pandas as pd +import os +import glob +from zipfile import ZipFile + +# requires curl +def get_inputs(): + """ + Runs taxsim_input.py for all combinations of year and assumption sets + """ + letters = ["a", "b", "c"] + years = ["2018", "2019"] + + name_list = [str(y + " " + x) for x in letters for y in years] + + for name in name_list: + command = str("python taxsim_input.py " + name) + os.system(command) + + +def get_ftp_output(): + """ + Uses `curl` to upload assumption set input files + and save taxsim-32 output files + """ + letters = ["a", "b", "c"] + years = ["18", "19"] + file_list = [str(x + y + ".in") for x in letters for y in years] + + for f in file_list: + file_out = f + ".out-taxsim" + os.system(f"curl -u taxsim:02138 -T {f} ftp://taxsimftp.nber.org/tmp/userid") + c_out = str( + "curl -u taxsim:02138 " + + "ftp://taxsimftp.nber.org/tmp/userid.txm32 -o " + + file_out + ) + os.system(c_out) + + +def change_delim(): + for file in glob.glob("*.in.out-taxsim"): + # Read in the file + with open(file, "r") as fin: + filedata = fin.read() + + # Replace the target string + filedata = filedata.replace(",", " ") + + # Write the file out again + with open(file, "w") as fout: + fout.write(filedata) + + +def main(): + get_inputs() + get_ftp_output() + change_delim() diff --git a/taxcalc/validation/taxsim32/main_comparison.py b/taxcalc/validation/taxsim32/main_comparison.py new file mode 100644 index 000000000..db0584a66 --- /dev/null +++ b/taxcalc/validation/taxsim32/main_comparison.py @@ -0,0 +1,87 @@ +# DESCRIPTIONS of variable outputs can be found on the TAXSIM-32 website near +# the bottom of the page +# URL: https://users.nber.org/~taxsim/taxsim32/ + +import sys +import os +import pandas as pd + + +def main(assump_set, year): + + # (1) generate TAXSIM-32-formatted output using Tax-Calculator tc CLI + os.system(f"python taxcalc.py {assump_set}{year}.in") + + # (2) generate tax differences + taxsim_df = pd.read_csv( + f"{assump_set}{year}.in.out-taxsim", + sep=" ", + skipinitialspace=True, + index_col=False, + ) + taxsim_df = taxsim_df.iloc[:, 0:28] + taxcalc_df = pd.read_csv( + f"{assump_set}{year}.in.out-taxcalc", + sep=" ", + skipinitialspace=True, + index_col=False, + header=None, + ) + + taxcalc_df.columns = taxsim_df.columns # rename taxcalc output columns + + diff_dict = { + "# of differing records": [], + "max_diff": [], + "max_diff_index": [], + "max_diff_taxsim_val": [], + "max_diff_taxcalc_val": [], + } + + for col in taxsim_df.columns[3:]: + + df_diff = pd.DataFrame({"a": taxsim_df[col], "b": taxcalc_df[col]}) + df_diff_recs = df_diff[df_diff["a"] != df_diff["b"]] + diff_dict["# of differing records"].append(df_diff_recs.shape[0]) + + ind, max_val = max( + enumerate( + abs(x - y) + for x, y in zip(taxcalc_df.loc[:, col], taxsim_df.loc[:, col]) + ), + key=lambda x: x[1], + ) + + diff_dict["max_diff"].append(taxcalc_df.loc[ind, col] - taxsim_df.loc[ind, col]) + if max_val != 0: + diff_dict["max_diff_index"].append(ind) + diff_dict["max_diff_taxsim_val"].append(taxsim_df.loc[ind, col]) + diff_dict["max_diff_taxcalc_val"].append(taxcalc_df.loc[ind, col]) + else: + diff_dict["max_diff_index"].append("no diff") + diff_dict["max_diff_taxsim_val"].append("no diff") + diff_dict["max_diff_taxcalc_val"].append("no diff") + + actual_df = pd.DataFrame(diff_dict, index=taxsim_df.columns[3:]) + print(f"Difference in dataframes for assumption set {assump_set} in year {year}") + print(actual_df) + + # (3) check for difference between LYY.taxdiffs-actual and LYY.taxdiffs-expect + if os.path.isfile(f"{assump_set}{year}-taxdiffs-expect.csv"): + expect_df = pd.read_csv(f"{assump_set}{year}-taxdiffs-expect.csv", index_col=0) + + print(actual_df.eq(expect_df)) + + print( + "Above, True values mean the element is the same between the ACTUAL and EXPECT dataframes. " + + "(EXPECT files are used for debugging purposes.)" + ) + else: + print("This EXPECT file doesn't exist.") + + # (4) Write the created df to *.taxdiffs-actual + actual_df.to_csv(f"{assump_set}{year}-taxdiffs-actual.csv") + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/taxcalc/validation/taxsim32/old expect files/a18.taxdiffs-expect b/taxcalc/validation/taxsim32/old expect files/a18.taxdiffs-expect new file mode 100644 index 000000000..9795aff28 --- /dev/null +++ b/taxcalc/validation/taxsim32/old expect files/a18.taxdiffs-expect @@ -0,0 +1,19 @@ +TAXDIFF:ovar= 6 no-diffs +TAXDIFF:ovar= 7 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 9 124 0 0.90 [173] +TAXDIFF:ovar= 10 no-diffs +TAXDIFF:ovar= 11 no-diffs +TAXDIFF:ovar= 12 no-diffs +TAXDIFF:ovar= 14 no-diffs +TAXDIFF:ovar= 15 no-diffs +TAXDIFF:ovar= 16 no-diffs +TAXDIFF:ovar= 17 no-diffs +TAXDIFF:ovar= 18 no-diffs +TAXDIFF:ovar= 22 no-diffs +TAXDIFF:ovar= 23 no-diffs +TAXDIFF:ovar= 24 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 25 46 46 0.01 [12136] +TAXDIFF:ovar= 26 no-diffs +TAXDIFF:ovar= 27 no-diffs +TAXDIFF:ovar= 28 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 4 49 49 -0.01 [12136] diff --git a/taxcalc/validation/taxsim32/old expect files/a19.taxdiffs-expect b/taxcalc/validation/taxsim32/old expect files/a19.taxdiffs-expect new file mode 100644 index 000000000..4e23db0a8 --- /dev/null +++ b/taxcalc/validation/taxsim32/old expect files/a19.taxdiffs-expect @@ -0,0 +1,19 @@ +TAXDIFF:ovar= 6 no-diffs +TAXDIFF:ovar= 7 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 9 119 0 0.90 [2227] +TAXDIFF:ovar= 10 no-diffs +TAXDIFF:ovar= 11 no-diffs +TAXDIFF:ovar= 12 no-diffs +TAXDIFF:ovar= 14 no-diffs +TAXDIFF:ovar= 15 no-diffs +TAXDIFF:ovar= 16 no-diffs +TAXDIFF:ovar= 17 no-diffs +TAXDIFF:ovar= 18 no-diffs +TAXDIFF:ovar= 22 no-diffs +TAXDIFF:ovar= 23 no-diffs +TAXDIFF:ovar= 24 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 25 71 71 0.01 [4165] +TAXDIFF:ovar= 26 no-diffs +TAXDIFF:ovar= 27 no-diffs +TAXDIFF:ovar= 28 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 4 71 71 -0.01 [12333] diff --git a/taxcalc/validation/taxsim32/old expect files/b18.taxdiffs-expect b/taxcalc/validation/taxsim32/old expect files/b18.taxdiffs-expect new file mode 100644 index 000000000..4ef8b5f32 --- /dev/null +++ b/taxcalc/validation/taxsim32/old expect files/b18.taxdiffs-expect @@ -0,0 +1,19 @@ +TAXDIFF:ovar= 6 no-diffs +TAXDIFF:ovar= 7 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 9 124 0 0.90 [173] +TAXDIFF:ovar= 10 no-diffs +TAXDIFF:ovar= 11 no-diffs +TAXDIFF:ovar= 12 no-diffs +TAXDIFF:ovar= 14 no-diffs +TAXDIFF:ovar= 15 no-diffs +TAXDIFF:ovar= 16 no-diffs +TAXDIFF:ovar= 17 no-diffs +TAXDIFF:ovar= 18 no-diffs +TAXDIFF:ovar= 22 no-diffs +TAXDIFF:ovar= 23 no-diffs +TAXDIFF:ovar= 24 no-diffs +TAXDIFF:ovar= 25 no-diffs +TAXDIFF:ovar= 26 no-diffs +TAXDIFF:ovar= 27 no-diffs +TAXDIFF:ovar= 28 no-diffs +TAXDIFF:ovar= 4 no-diffs diff --git a/taxcalc/validation/taxsim32/old expect files/b19.taxdiffs-expect b/taxcalc/validation/taxsim32/old expect files/b19.taxdiffs-expect new file mode 100644 index 000000000..e3b795b24 --- /dev/null +++ b/taxcalc/validation/taxsim32/old expect files/b19.taxdiffs-expect @@ -0,0 +1,29 @@ +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 6 67719 67719 -0.30 [21817] +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 7 48029 3 -64.44 [82574] + #big_vardiffs_with_big_inctax_diff= 48025 +TAXDIFF:ovar= 9 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 10 99999 4 -380001.98 [99412] + #big_vardiffs_with_big_inctax_diff= 99992 +TAXDIFF:ovar= 11 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 12 13 0 -24155.15 [52780] + #big_vardiffs_with_big_inctax_diff= 13 +TAXDIFF:ovar= 14 no-diffs +TAXDIFF:ovar= 15 no-diffs +TAXDIFF:ovar= 16 no-diffs +TAXDIFF:ovar= 17 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 18 100000 1 -411751.91 [34866] + #big_vardiffs_with_big_inctax_diff= 99996 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 22 10340 5 8500.00 [2257] + #big_vardiffs_with_big_inctax_diff= 10335 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 23 28 0 3397.83 [82574] + #big_vardiffs_with_big_inctax_diff= 28 +TAXDIFF:ovar= 24 no-diffs +TAXDIFF:ovar= 25 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 26 100000 1 -378605.36 [56064] + #big_vardiffs_with_big_inctax_diff= 99996 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 27 6590 1 16327.24 [37043] + #big_vardiffs_with_big_inctax_diff= 6589 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 28 100000 1 -137185.06 [58417] + #big_vardiffs_with_big_inctax_diff= 99996 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 4 100000 3 -138933.06 [58417] + #big_inctax_diffs= 99997 diff --git a/taxcalc/validation/taxsim32/old expect files/c18.taxdiffs-expect b/taxcalc/validation/taxsim32/old expect files/c18.taxdiffs-expect new file mode 100644 index 000000000..bc6af4e6c --- /dev/null +++ b/taxcalc/validation/taxsim32/old expect files/c18.taxdiffs-expect @@ -0,0 +1,19 @@ +TAXDIFF:ovar= 6 no-diffs +TAXDIFF:ovar= 7 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 9 124 0 0.90 [173] +TAXDIFF:ovar= 10 no-diffs +TAXDIFF:ovar= 11 no-diffs +TAXDIFF:ovar= 12 no-diffs +TAXDIFF:ovar= 14 no-diffs +TAXDIFF:ovar= 15 no-diffs +TAXDIFF:ovar= 16 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 17 98 0 50000.00 [9033] +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 18 98 0 -30400.00 [11544] +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 22 92 0 -4672.50 [21393] +TAXDIFF:ovar= 23 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 24 16 1 -768.37 [92383] +TAXDIFF:ovar= 25 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 26 93 0 -40000.00 [4343] +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 27 5 0 -3120.00 [50817] +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 28 98 0 -4672.50 [21393] +TAXDIFF:ovar= 4 no-diffs diff --git a/taxcalc/validation/taxsim32/old expect files/c19.taxdiffs-expect b/taxcalc/validation/taxsim32/old expect files/c19.taxdiffs-expect new file mode 100644 index 000000000..ba1ed638f --- /dev/null +++ b/taxcalc/validation/taxsim32/old expect files/c19.taxdiffs-expect @@ -0,0 +1,31 @@ +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 6 67822 67822 -0.30 [48838] +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 7 48751 3 -75.15 [69111] + #big_vardiffs_with_big_inctax_diff= 48748 +TAXDIFF:ovar= 9 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 10 99997 6 -380001.98 [43180] + #big_vardiffs_with_big_inctax_diff= 99990 +TAXDIFF:ovar= 11 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 12 11 0 -15775.25 [19814] + #big_vardiffs_with_big_inctax_diff= 11 +TAXDIFF:ovar= 14 no-diffs +TAXDIFF:ovar= 15 no-diffs +TAXDIFF:ovar= 16 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 17 1834 0 27000.00 [596] + #big_vardiffs_with_big_inctax_diff= 1834 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 18 100000 1 -411471.92 [24421] + #big_vardiffs_with_big_inctax_diff= 99998 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 22 10166 5 8500.00 [3266] + #big_vardiffs_with_big_inctax_diff= 10161 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 23 78 0 4640.22 [36894] + #big_vardiffs_with_big_inctax_diff= 78 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 24 12 4 -820.96 [30023] + #big_vardiffs_with_big_inctax_diff= 8 +TAXDIFF:ovar= 25 no-diffs +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 26 100000 1 -376000.93 [71829] + #big_vardiffs_with_big_inctax_diff= 99998 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 27 4416 2 10468.79 [27287] + #big_vardiffs_with_big_inctax_diff= 4414 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 28 100000 1 -138202.08 [75424] + #big_vardiffs_with_big_inctax_diff= 99998 +TAXDIFF:ovar,#diffs,#smdiffs,maxdiff[id]= 4 100000 1 -141609.82 [71750] + #big_inctax_diffs= 99999 diff --git a/taxcalc/validation/taxsim32/prepare_taxcalc_input.py b/taxcalc/validation/taxsim32/prepare_taxcalc_input.py new file mode 100644 index 000000000..ace26cf0c --- /dev/null +++ b/taxcalc/validation/taxsim32/prepare_taxcalc_input.py @@ -0,0 +1,127 @@ +""" +Translates TAXSIM-32 input file to Tax-Calculator tc input file. +""" +# CODING-STYLE CHECKS: +# pycodestyle prepare_tc_input.py +# pylint --disable=locally-disabled prepare_tc_input.py + +import argparse +import os +import sys +import numpy as np +import pandas as pd + + +def main(): + """ + High-level logic. + """ + # parse command-line arguments: + usage_str = 'python prepare_tc_input.py INPUT OUTPUT [--help]' + parser = argparse.ArgumentParser( + prog='', + usage=usage_str, + description=('Translates TAXSIM-32 input file into a Tax-Calculator ' + 'CSV-formatted tc input file. ' + 'Any pre-existing OUTPUT file contents are overwritten. ' + 'For details on Internet TAXSIM version 32 INPUT ' + 'format, go to ' + 'https://users.nber.org/~taxsim/taxsim32/')) + parser.add_argument('INPUT', nargs='?', default='', + help=('INPUT is name of file that contains ' + 'TAXSIM-32 input.')) + parser.add_argument('OUTPUT', nargs='?', default='', + help=('OUTPUT is name of file that will contain ' + 'CSV-formatted Tax-Calculator tc input.')) + args = parser.parse_args() + # check INPUT filename + if args.INPUT == '': + sys.stderr.write('ERROR: must specify INPUT file name\n') + sys.stderr.write('USAGE: {}\n'.format(usage_str)) + return 1 + if not os.path.isfile(args.INPUT): + emsg = 'INPUT file named {} does not exist'.format(args.INPUT) + sys.stderr.write('ERROR: {}\n'.format(emsg)) + return 1 + # check OUTPUT filename + if args.OUTPUT == '': + sys.stderr.write('ERROR: must specify OUTPUT file name\n') + sys.stderr.write('USAGE: {}\n'.format(usage_str)) + return 1 + if os.path.isfile(args.OUTPUT): + os.remove(args.OUTPUT) + # read TAXSIM-32 INPUT file into a pandas DataFrame + ivar = pd.read_csv(args.INPUT, delim_whitespace=True, + header=0, index_col=False, names=range(1, 33)) + # Drop 'idtl' – used to generate detailed output + ivar.drop(columns=32) + # translate INPUT variables into OUTPUT variables + invar = translate(ivar) + # write OUTPUT file containing Tax-Calculator input variables + invar.to_csv(args.OUTPUT, index=False) + # return no-error exit code + return 0 +# end of main function code + + +def translate(ivar): + """ + Translate TAXSIM-32 input variables into Tax-Calculator input variables. + Both ivar and returned invar are pandas DataFrame objects. + """ + assert isinstance(ivar, pd.DataFrame) + invar = pd.DataFrame() + invar['RECID'] = ivar.loc[:, 1] + invar['FLPDYR'] = ivar.loc[:, 2] + # no Tax-Calculator use of TAXSIM variable 3, state code + mstat = ivar.loc[:, 4] + assert np.all(np.logical_or(mstat == 1, mstat == 2)) + invar['age_head'] = ivar.loc[:, 5] + invar['age_spouse'] = ivar.loc[:, 6] + num_deps = ivar.loc[:, 7] + mars = np.where(mstat == 1, np.where(num_deps > 0, 4, 1), 2) + assert np.all(np.logical_or(mars == 1, + np.logical_or(mars == 2, mars == 4))) + invar['MARS'] = mars + invar['f2441'] = ivar.loc[:, 8] + invar['n24'] = ivar.loc[:, 9] + num_eitc_qualified_kids = ivar.loc[:, 10] + invar['EIC'] = np.minimum(num_eitc_qualified_kids, 3) + num_taxpayers = np.where(mars == 2, 2, 1) + invar['XTOT'] = num_taxpayers + num_deps + invar['e00200p'] = ivar.loc[:, 11] + invar['e00200s'] = ivar.loc[:, 12] + invar['e00200'] = invar['e00200p'] + invar['e00200s'] + invar['e00650'] = ivar.loc[:, 13] + invar['e00600'] = invar['e00650'] + invar['e00300'] = ivar.loc[:, 14] + invar['p22250'] = ivar.loc[:, 15] + invar['p23250'] = ivar.loc[:, 16] + invar['e02000'] = ivar.loc[:, 17] + invar['e00800'] = ivar.loc[:, 18] + invar['e01700'] = ivar.loc[:, 19] + invar['e01500'] = invar['e01700'] + invar['e02400'] = ivar.loc[:, 20] + invar['e02300'] = ivar.loc[:, 21] + # no Tax-Calculator use of TAXSIM variable 22, non-taxable transfers + # no Tax-Calculator use of TAXSIM variable 23, rent paid + invar['e18500'] = ivar.loc[:, 24] + invar['e18400'] = ivar.loc[:, 25] + invar['e32800'] = ivar.loc[:, 26] + invar['e19200'] = ivar.loc[:, 27] + invar['e26270'] = ivar.loc[:, 28] + invar['e00900p'] = ivar.loc[:, 29] + invar['e00900s'] = ivar.loc[:, 31] + invar['e00900'] = invar['e00900p'] + invar['e00900s'] + + pprofinc = ivar.loc[:, 30] + sprofinc = ivar.loc[:, 32] + + invar['PT_SSTB_income'] = np.where(pprofinc + sprofinc > 0, 1, 0) + invar['PT_SSTB_income'] = np.where(invar['e26270'] > 0, 1, invar['PT_SSTB_income']) + + return invar + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/taxcalc/validation/taxsim32/process_taxcalc_output.py b/taxcalc/validation/taxsim32/process_taxcalc_output.py new file mode 100644 index 000000000..ac8aa3b0c --- /dev/null +++ b/taxcalc/validation/taxsim32/process_taxcalc_output.py @@ -0,0 +1,190 @@ +""" +Translates tc --dump output file into file formatted like TAXSIM-32 output. +""" +# CODING-STYLE CHECKS: +# pycodestyle process_tc_output.py +# pylint --disable=locally-disabled process_tc_output.py + +import argparse +import os +import sys +import pandas as pd + + +def main(): + """ + High-level logic. + """ + # parse command-line arguments: + usage_str = 'python process_tc_output.py INPUT OUTPUT [--help]' + parser = argparse.ArgumentParser( + prog='', + usage=usage_str, + description=('Translates tc --dump output file into an output file ' + 'that is formatted like the first 28 variables in ' + 'TAXSIM-32 output. ' + 'The INPUT file contains the output generated by ' + 'running tc with the --dump option. ' + 'Any pre-existing OUTPUT file contents will be ' + 'overwritten. For details on Internet TAXSIM ' + 'version 32 OUTPUT format, go to ' + 'https://users.nber.org/~taxsim/taxsim32/')) + parser.add_argument('INPUT', nargs='?', default='', + help=('INPUT is name of file that contains ' + 'tc --dump output.')) + parser.add_argument('OUTPUT', nargs='?', default='', + help=('OUTPUT is name of file that will contain ' + 'output formatted like TAXSIM-32 output.')) + args = parser.parse_args() + # check INPUT filename + if args.INPUT == '': + sys.stderr.write('ERROR: must specify INPUT file name\n') + sys.stderr.write('USAGE: {}\n'.format(usage_str)) + return 1 + if not os.path.isfile(args.INPUT): + emsg = 'INPUT file named {} does not exist'.format(args.INPUT) + sys.stderr.write('ERROR: {}\n'.format(emsg)) + return 1 + # check OUTPUT filename + if args.OUTPUT == '': + sys.stderr.write('ERROR: must specify OUTPUT file name\n') + sys.stderr.write('USAGE: {}\n'.format(usage_str)) + return 1 + if os.path.isfile(args.OUTPUT): + os.remove(args.OUTPUT) + # read INPUT file into a pandas DataFrame + tcvar = pd.read_csv(args.INPUT) + # write OUTPUT file using the pandas DataFrame + write_taxsim_formatted_output(args.OUTPUT, tcvar) + # return no-error exit code + return 0 +# end of main function code + + +def write_taxsim_formatted_output(filename, tcvar): + """ + Write contents of tcvar pandas DataFrame to filename using + Internet-TAXSIM 9.3 output format containing 28 variables. + """ + assert isinstance(tcvar, pd.DataFrame) + with open(filename, 'w') as output_file: + for idx in range(0, len(tcvar.index)): + odict4idx = extract_output(tcvar.xs(idx)) + outline = construct_output_line(odict4idx) + output_file.write(outline) + + +def extract_output(out): + """ + Extracts output for one filing unit in out and + returns extracted output as a dictionary. + + Parameters + ---------- + out: pandas DataFrame row containing tc --dump output for one filing unit + + Returns + ------- + ovar: dictionary of output variables indexed from 1 to 28 + + Notes + ----- + The value of each output variable is stored in the ovar dictionary, + which is indexed as Internet-TAXSIM output variables are (where the + index begins with one). + """ + ovar = {} + ovar[1] = int(out['RECID']) # id for tax filing unit + ovar[2] = int(out['FLPDYR']) # year taxes are calculated + ovar[3] = 0 # state code is always zero + ovar[4] = out['iitax'] # federal income tax liability + ovar[5] = 0.0 # no state income tax calculation + ovar[6] = out['payrolltax'] # ee+er for OASDI+HI + ovar[7] = out['mtr_inctax'] # marginal federal income tax rate as percent + ovar[8] = 0.0 # no state income tax calculation + ovar[9] = out['mtr_paytax'] # marginal payroll tax rate as percent + ovar[10] = out['c00100'] # federal AGI + ovar[11] = out['e02300'] # UI benefits in AGI + ovar[12] = out['c02500'] # OASDI benefits in AGI + ovar[13] = 0.0 # always set zero-bracket amount to zero + pre_phase_out_pe = out['pre_c04600'] + post_phase_out_pe = out['c04600'] + phased_out_pe = pre_phase_out_pe - post_phase_out_pe + ovar[14] = post_phase_out_pe # post-phase-out personal exemption + ovar[15] = phased_out_pe # personal exemption that is phased out + # ovar[16] can be positive for non-itemizer: + ovar[16] = out['c21040'] # phased out itemized deduction + # ovar[17] is zero for non-itemizer: + ovar[17] = out['c04470'] # post-phase-out item deduction + ovar[18] = out['c04800'] # federal regular taxable income + # ovar[19] is regular tax on taxable income + ovar[19] = out['taxbc'] + ovar[20] = 0.0 # always set exemption surtax to zero + ovar[21] = 0.0 # always set general tax credit to zero + ovar[22] = out['c07220'] + out['odc'] # non-refundable child+odep credit + ovar[23] = out['c11070'] # refundable additional child tax credit + ovar[24] = out['c07180'] # child care credit + ovar[25] = out['eitc'] # federal EITC + ovar[26] = out['c62100'] # federal AMT taxable income + amt_liability = out['c09600'] # federal AMT liability + ovar[27] = amt_liability + # ovar[28] is federal income tax before credits; the Tax-Calculator + # out['c05800'] is this concept but includes AMT liability + # while Internet-TAXSIM ovar[28] explicitly excludes AMT liability, so + # we have the following: + ovar[28] = out['c05800'] - amt_liability + return ovar + + +OVAR_FMT = {1: '{:d}.', # add decimal point as in TAXSIM-32 output + 2: ' {:d}', + 3: ' {:d}', + 4: ' {:.2f}', + 5: ' {:.2f}', + 6: ' {:.2f}', + 7: ' {:.2f}', + 8: ' {:.2f}', + 9: ' {:.2f}', + 10: ' {:.2f}', + 11: ' {:.2f}', + 12: ' {:.2f}', + 13: ' {:.2f}', + 14: ' {:.2f}', + 15: ' {:.2f}', + 16: ' {:.2f}', + 17: ' {:.2f}', + 18: ' {:.2f}', + 19: ' {:.2f}', + 20: ' {:.2f}', + 21: ' {:.2f}', + 22: ' {:.2f}', + 23: ' {:.2f}', + 24: ' {:.2f}', + 25: ' {:.2f}', + 26: ' {:.2f}', + 27: ' {:.2f}', + 28: ' {:.2f}'} + + +def construct_output_line(odict): + """ + Construct an output line from a single-filing-unit odict dictionary. + + Parameters + ---------- + odict: dictionary of output variables indexed from 1 to len(odict). + + Returns + ------- + output_line: string + + """ + outline = '' + for vnum in range(1, len(odict) + 1): + outline += OVAR_FMT[vnum].format(odict[vnum]) + outline += '\n' + return outline + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/taxcalc/validation/taxsim32/taxcalc.py b/taxcalc/validation/taxsim32/taxcalc.py new file mode 100644 index 000000000..d82a31600 --- /dev/null +++ b/taxcalc/validation/taxsim32/taxcalc.py @@ -0,0 +1,88 @@ +import argparse +import os +import sys +import shutil + + +usage_str = 'python taxcalc.py LYY_FILENAME [--save] [--help]' + +parser = argparse.ArgumentParser( + prog='', + usage=usage_str, + description=('Call Tax-Calculator tc CLI reading input data from ' + 'specified TAXSIM-32 input file and writing output ' + 'in TAXSIM-32 output format to a file with the ' + 'specified input file name plus the .out-taxcalc ' + 'extension.')) + +parser.add_argument('LYY_FILENAME', + help=('L is a letter that is valid taxsim_input.py L ' + 'input and YY is valid taxsim_input.py YEAR ,' + '(20YY) input.'), + default='') + +parser.add_argument('--save', + help=('Save intermediate files.'), + default=False, + action="store_true") + +args = parser.parse_args() + +CURR_PATH = os.path.abspath(os.path.dirname(__file__)) + +taxsim_in = args.LYY_FILENAME +save = args.save + +if os.path.exists(os.path.join(CURR_PATH, taxsim_in)) is False: + sys.exit("ERROR: LYY_FILENAME is not a valid path") + +taxsim_in_csv = taxsim_in + ".csv" +taxsim_out_csv = taxsim_in + ".out.csv" +L = taxsim_in[0] +YY = taxsim_in[1:3] + + +# prepare Tax-Calculator input file +def prep_tc_input(): + command = "python prepare_taxcalc_input.py " + taxsim_in + \ + " " + taxsim_in_csv + os.system(command) + + +# calculate Tax-Calculator output +def calc_tc_output(): + year = '20' + YY + command = "tc " + taxsim_in_csv + " " + year + \ + " --reform taxsim_emulation.json --dump" + os.system(command) + + file_temp = taxsim_in + "-" + YY + "-#-taxsim_emulation-#.csv" + file_temp_path = os.path.join(CURR_PATH, file_temp) + file_out_path = os.path.join(CURR_PATH, taxsim_out_csv) + shutil.move(file_temp_path, file_out_path) + + file_temp2 = taxsim_in + "-" + YY + "-#-taxsim_emulation-#-doc.text" + file_temp2_path = os.path.join(CURR_PATH, file_temp2) + os.remove(file_temp2) + + +# convert Tax-Calculator output to TAXSIM-32 format +def convert_to_taxsim(): + file_out = taxsim_in + ".out-taxcalc" + command = "python process_taxcalc_output.py " + taxsim_out_csv + \ + " " + file_out + os.system(command) + + +# delete intermediate input and output files if not saving +def del_int_files(): + if save is False: + os.remove(taxsim_in_csv) + os.remove(taxsim_out_csv) + + +if __name__ == "__main__": + prep_tc_input() + calc_tc_output() + convert_to_taxsim() + del_int_files() diff --git a/taxcalc/validation/taxsim32/taxsim_emulation.json b/taxcalc/validation/taxsim32/taxsim_emulation.json new file mode 100644 index 000000000..b9c271f0b --- /dev/null +++ b/taxcalc/validation/taxsim32/taxsim_emulation.json @@ -0,0 +1,33 @@ +// JSON "reform" file that specifies changes in current-law policy that +// are required to make Tax-Calculator work like TAXSIM-27. +// +// (1) AMT_child_em_c_age = 24 (rather than 18) +// Whether to set this parameter to 18 or 24 is arbitary because +// neither model has enough information to apply correctly the child +// AMT exemption rules. Information on full-time student status and +// whether taxpayers provide more than half of their support are required +// to apply the rules correctly. Tax-Calculator makes the arbitrary +// assumption that only those under 18 are required to use the child +// AMT exemption rules, while TAXSIM-27 makes the arbitrary assumption +// that all those under 24 are required to use the child AMT exemption. +// (This change was introduced for assumption set b and higher.) +// +// (2) EITC_excess_InvestIncome_rt = 1.0 (rather than 9e99) +// The rate at which the EITC amount is reduced per dollar of investment +// income in excess of the EITC investment income ceiling is infinity under +// current law (that is, any investment income in excess of the ceiling +// causes EITC ineligibility). However, TAXSIM-27 assumes it is one, so +// that the EITC amount is reduced a dollar for each dollar of excess +// investment income. This difference in the parameter value leads to +// many EITC differences in the randomly-generated validation samples, +// with some of the differences being in the thousands of dollars. This +// non-current-law assumption in TAXSIM-27 is presumably made to reduce +// the magnitude of model-calculated marginal tax rates with respect to +// investment income in cases where a marginal increase in investment +// income takes a filing unit above the ceiling. + +{ + "AMT_child_em_c_age": {"2013": 24}, + + "EITC_excess_InvestIncome_rt": {"2013": 1.0} +} diff --git a/taxcalc/validation/taxsim32/taxsim_input.py b/taxcalc/validation/taxsim32/taxsim_input.py new file mode 100644 index 000000000..35d8eaca7 --- /dev/null +++ b/taxcalc/validation/taxsim32/taxsim_input.py @@ -0,0 +1,285 @@ +""" +Generates random sample of tax filing units with attributes such that +generated file can be directly uploaded to Internet TAXSIM version 32. +""" +# CODING-STYLE CHECKS: +# pycodestyle taxsim_input.py +# pylint --disable=locally-disabled taxsim_input.py + +import argparse +import sys +import numpy as np +import pandas as pd + + +VALID_LETTERS = ['a', 'b', 'c'] + + +def main(): + """ + High-level logic. + """ + # parse command-line arguments: + usage_str = 'python taxsim_input.py YEAR LETTER [OFFSET] [--help]' + parser = argparse.ArgumentParser( + prog='', + usage=usage_str, + description=('Generates random sample of tax filing units with ' + 'attributes and format such that the file can be ' + 'directly uploaded to Internet TAXSIM version 32. ' + 'For details on Internet TAXSIM version 32 INPUT ' + 'format, go to ' + 'https://users.nber.org/~taxsim/taxsim32/')) + parser.add_argument('YEAR', nargs='?', type=int, default=0, + help=('YEAR specifies calendar year assumed in ' + 'generated input data.')) + parser.add_argument('LETTER', nargs='?', default='', + help=('LETTER specifies assumption set ' + 'used to generate input data.')) + parser.add_argument('OFFSET', nargs='?', type=int, default=0, + help=('optional OFFSET alters the ' + 'random-number seed used to generate ' + 'sample of filing units. Default OFFSET ' + 'value is zero.')) + args = parser.parse_args() + # check YEAR value + if args.YEAR < 2013 or args.YEAR > 2023: + sys.stderr.write('ERROR: YEAR not in [2013,2023] range\n') + sys.stderr.write('USAGE: {}\n'.format(usage_str)) + return 1 + # check LETTER value + if args.LETTER == '': + sys.stderr.write('ERROR: must specify LETTER\n') + sys.stderr.write('USAGE: {}\n'.format(usage_str)) + return 1 + if args.LETTER not in VALID_LETTERS: + sys.stderr.write('ERROR: LETTER not in VALID_LETTERS, where\n') + sys.stderr.write(' VALID_LETTERS={}\n'.format(VALID_LETTERS)) + sys.stderr.write('USAGE: {}\n'.format(usage_str)) + # check OFFSET value + if args.OFFSET < 0 or args.OFFSET > 999: + sys.stderr.write('ERROR: OFFSET not in [0,999] range\n') + sys.stderr.write('USAGE: {}\n'.format(usage_str)) + return 1 + # get dictionary containing assumption set + assump = assumption_set(args.YEAR, args.LETTER) + # generate sample as pandas DataFrame + sample = sample_dataframe(assump, args.YEAR, args.OFFSET) + # write sample to input file + header_col = ['taxsimid', 'year', 'state', 'mstat', 'page', 'sage', + 'depx', 'dep13', 'dep17', 'dep18', 'pwages', 'swages', + 'dividends', 'intrec', 'stcg', 'ltcg', 'otherprop', + 'nonprop', 'pensions', 'gssi', 'ui', 'transfers', + 'rentpaid', 'proptax', 'otheritem', 'childcare', + 'mortgage', 'scorp', 'pbusinc', 'pprofinc', 'sbusinc', + 'sprofinc', 'idtl' + ] + filename = '{}{}.in'.format(args.LETTER, args.YEAR % 100) + sample.to_csv(filename, sep=' ', header=header_col, index=False) + # return no-error exit code + return 0 +# end of main function code + + +def assumption_set(year, letter): + """ + Return dictionary containing assumption parameters. + """ + adict = dict() + if letter in VALID_LETTERS: # <=========================================== + # basic assumption parameters for all ?YY.in samples: + adict['sample_size'] = 100000 + adict['year'] = year # TAXSIM ivar 2 + # demographic attributes: + adict['joint_frac'] = 0.60 # fraction of sample with joint MARS + adict['min_age'] = 17 # TAXSIM ivar 5 (primary taxpayer age) + adict['max_age'] = 77 # TAXSIM ivar 5 (primary taxpayer age) + adict['min_age_diff'] = -10 # min spouse age difference + adict['max_age_diff'] = 10 # max spouse age difference + adict['max_depx'] = 5 # TAXSIM ivar 7 (total number of dependents) + adict['max_dep13'] = 4 # TAXSIM ivar 8 (Child/Dependent Care Credit) + adict['max_dep17'] = 4 # TAXSIM ivar 9 (Child Credit) + adict['max_dep18'] = 4 # TAXSIM ivar 10 (EITC) + # labor income: + adict['max_pwages_yng'] = 500 # TAXSIM ivar 11 + adict['max_pwages_old'] = 30 # TAXSIM ivar 11 (65+ ==> old) + adict['max_swages_yng'] = 500 # TAXSIM ivar 12 + adict['max_swages_old'] = 30 # TAXSIM ivar 12 (65+ ==> old) + # non-labor income (all zeros): + adict['max_divinc'] = 0 # TAXSIM ivar 13 + adict['max_intinc'] = 0 # TAXSIM ivar 14 + adict['min_stcg'] = 0 # TAXSIM ivar 15 + adict['max_stcg'] = 0 # TAXSIM ivar 15 + adict['min_ltcg'] = 0 # TAXSIM ivar 16 + adict['max_ltcg'] = 0 # TAXSIM ivar 16 + adict['max_other_prop_inc'] = 0 # TAXSIM ivar 17 + adict['max_other_nonprop_inc'] = 0 # TAXSIM ivar 18 + adict['max_pnben'] = 0 # TAXSIM ivar 19 + adict['max_ssben'] = 0 # TAXSIM ivar 20 + adict['max_uiben'] = 0 # TAXSIM ivar 21 + # childcare expense amount (all zero): + adict['max_ccexp'] = 0 # TAXSIM ivar 26 + # itemized expense amounts (all zero): + adict['max_ided_proptax'] = 0 # TAXSIM ivar 24 + adict['max_ided_nopref'] = 0 # TAXSIM ivar 25 + adict['max_ided_mortgage'] = 0 # TAXSIM ivar 27 + adict['max_scorp_inc'] = 0 # TAXSIM ivar 28 + adict['max_pbus_inc'] = 0 # TAXSIM ivar 29 + adict['max_pprof_inc'] = 0 # TAXSIM ivar 30 + adict['max_sbus_inc'] = 0 # TAXSIM ivar 31 + adict['max_sprof_inc'] = 0 # TAXSIM ivar 32 + # end if letter in VALID_LETTERS + if letter in ['b', 'c']: # <============================================== + # non-labor income: + adict['max_divinc'] = 20 # TAXSIM ivar 13 + adict['max_intinc'] = 20 # TAXSIM ivar 14 + adict['min_stcg'] = -10 # TAXSIM ivar 15 + adict['max_stcg'] = 10 # TAXSIM ivar 15 + adict['min_ltcg'] = -10 # TAXSIM ivar 16 + adict['max_ltcg'] = 10 # TAXSIM ivar 16 + adict['max_other_prop_inc'] = 30 # TAXSIM ivar 17 + adict['max_other_nonprop_inc'] = 30 # TAXSIM ivar 18 + adict['max_pnben'] = 60 # TAXSIM ivar 19 + adict['max_ssben'] = 60 # TAXSIM ivar 20 + adict['max_uiben'] = 10 # TAXSIM ivar 21 + adict['max_scorp_inc'] = 350 # TAXSIM ivar 28 + adict['max_pbus_inc'] = 350 # TAXSIM ivar 29 + adict['max_pprof_inc'] = 1 # TAXSIM ivar 30 + adict['max_sbus_inc'] = 350 # TAXSIM ivar 31 + adict['max_sprof_inc'] = 1 # TAXSIM ivar 32 + if letter == 'c': # <===================================================== + # childcare expense amount: + adict['max_ccexp'] = 10 # TAXSIM ivar 26 + # itemized expense amounts: + adict['max_ided_proptax'] = 30 # TAXSIM ivar 24 + adict['max_ided_nopref'] = 10 # TAXSIM ivar 25 + adict['max_ided_mortgage'] = 40 # TAXSIM ivar 27 + return adict + + +def sample_dataframe(assump, year, offset): + """ + Construct DataFrame containing sample specified by assump and year+offset. + """ + # pylint: disable=too-many-locals + np.random.seed(123456789 + year + offset) + size = assump['sample_size'] + zero = np.zeros(size, dtype=np.int64) + sdict = dict() + # (01) RECID + sdict[1] = range(1, size + 1) + # (02) YEAR + sdict[2] = np.full_like(zero, assump['year'], dtype=np.int64) + # (03) STATE + sdict[3] = zero + # (04) MSTAT + urn = np.random.random(size) + mstat = np.where(urn < assump['joint_frac'], 2, 1) + sdict[4] = mstat + # (05) PAGE + sdict[5] = np.random.randint(assump['min_age'], + assump['max_age']+1, + size) + # (06) SAGE + age_diff = np.random.randint(assump['min_age_diff'], + assump['max_age_diff']+1, + size) + sage = sdict[5] + age_diff + sdict[6] = np.where(mstat == 2, np.maximum(sage, assump['min_age']), zero) + # (07-10) DEPX, DEP13, DEP17, DEP18 + depx = np.random.randint(0, assump['max_depx']+1, size) + d18 = np.random.randint(0, assump['max_dep18']+1, size) + dep18 = np.where(d18 <= depx, d18, depx) + d17 = np.random.randint(0, assump['max_dep17']+1, size) + dep17 = np.where(d17 <= dep18, d17, dep18) + d13 = np.random.randint(0, assump['max_dep13']+1, size) + dep13 = np.where(d13 <= dep17, d13, dep17) + sdict[7] = depx + sdict[8] = dep13 + sdict[9] = dep17 + sdict[10] = dep18 + # (11) PWAGES + pwages_yng = np.random.randint(0, assump['max_pwages_yng']+1, size) + pwages_old = np.random.randint(0, assump['max_pwages_old']+1, size) + sdict[11] = np.where(sdict[5] >= 65, pwages_old, pwages_yng) * 1000 + # (12) SWAGES + swages_yng = np.random.randint(0, assump['max_swages_yng']+1, size) + swages_old = np.random.randint(0, assump['max_swages_old']+1, size) + swages = np.where(sdict[6] >= 65, swages_old, swages_yng) * 1000 + sdict[12] = np.where(mstat == 2, swages, zero) + # (13) DIVIDENDS + sdict[13] = np.random.randint(0, assump['max_divinc']+1, size) * 1000 + # (14) INTREC + sdict[14] = np.random.randint(0, assump['max_intinc']+1, size) * 1000 + # (15) STCG + sdict[15] = np.random.randint(assump['min_stcg'], + assump['max_stcg']+1, + size) * 1000 + # (16) LTCG + sdict[16] = np.random.randint(assump['min_ltcg'], + assump['max_ltcg']+1, + size) * 1000 + # (17) OTHERPROP + sdict[17] = np.random.randint(0, + assump['max_other_prop_inc']+1, + size) * 1000 + # (18) NONPROP + sdict[18] = np.random.randint(0, + assump['max_other_nonprop_inc']+1, + size) * 1000 + # (19) PENSIONS + sdict[19] = np.random.randint(0, assump['max_pnben']+1, size) * 1000 + # (20) GSSI + sdict[20] = np.random.randint(0, assump['max_ssben']+1, size) * 1000 + # (21) UI + sdict[21] = np.random.randint(0, assump['max_uiben']+1, size) * 1000 + # (22) TRANSFERS (non-taxable in federal income tax) + sdict[22] = zero + # (23) RENTPAID (used only in some state income tax laws) + sdict[23] = zero + # (24) PROPTAX + sdict[24] = np.random.randint(0, + assump['max_ided_proptax']+1, + size) * 1000 + # (25) OTHERITEM + sdict[25] = np.random.randint(0, + assump['max_ided_nopref']+1, + size) * 1000 + # (26) CHILDCARE (TAXSIM-32 EXPECTS ZERO IF NO QUALIFYING CHILDRED) + ccexp = np.random.randint(0, assump['max_ccexp']+1, size) * 1000 + sdict[26] = np.where(dep13 > 0, ccexp, zero) + # (27) MORTGAGE + sdict[27] = np.random.randint(0, + assump['max_ided_mortgage']+1, + size) * 1000 + # (28) S-Corp income, QBI + sdict[28] = np.random.randint(0, + assump['max_scorp_inc']+1, + size) * 1000 + # (29) Primary Taxpayer's QBI + sdict[29] = np.random.randint(0, + assump['max_pbus_inc']+1, + size) * 1000 + # (30) Primary Taxpayer's SSTB + sdict[30] = np.random.randint(0, + assump['max_pprof_inc']+1, + size) + # (31) Spouse's QBI + sqbi = np.random.randint(0, + assump['max_sbus_inc']+1, + size) * 1000 + sdict[31] = np.where(mstat == 2, sqbi, zero) + # (32) Spouse's SSTB + spouse_sstb = np.random.randint(0, + assump['max_sprof_inc']+1, + size) + sdict[32] = np.where(mstat == 2, spouse_sstb, zero) + # (33) IDTL: variable to request intermediate calculations + sdict[33] = 2 + + smpl = pd.DataFrame(sdict) + return smpl + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/taxcalc/validation/taxsim32/tests_32.py b/taxcalc/validation/taxsim32/tests_32.py new file mode 100644 index 000000000..cb4d568d8 --- /dev/null +++ b/taxcalc/validation/taxsim32/tests_32.py @@ -0,0 +1,21 @@ +import os +import glob +import input_setup +import main_comparison + +CUR_PATH = os.path.abspath(os.path.dirname(__file__)) + +# setup input files +if not glob.glob(os.path.join(CUR_PATH, '*in.out-taxsim')): + input_setup.main() + +# run taxcalc/taxsim comparison +for assump_set in ('a', 'b', 'c'): + for year in (18, 19): + main_comparison.main(assump_set, year) + +# clean up all files except those retrieved from TAXSIM32 +for file in CUR_PATH: + for file in glob.glob('*.out*') and glob.glob('*.in*'): + if not file.endswith('taxsim'): + os.remove(file) \ No newline at end of file diff --git a/taxcalc/validation/tests.sh b/taxcalc/validation/tests_27.sh similarity index 85% rename from taxcalc/validation/tests.sh rename to taxcalc/validation/tests_27.sh index d84c4ee76..245397f69 100755 --- a/taxcalc/validation/tests.sh +++ b/taxcalc/validation/tests_27.sh @@ -1,3 +1,6 @@ +# taxcalc python package MUST be installed +# Headers from each .out-taxsim file MUST be removed + #!/bin/bash # Executes validation TESTS by calling test.sh scripts in subdirectories. echo "STARTING WITH VALIDATION TESTS : `date`"