-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #349 from andersonfrailey/reports
Automate All The Things
- Loading branch information
Showing
10 changed files
with
1,386 additions
and
114 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# TaxData History | ||
|
||
This directory contains PDF files that detail the evolution of the TaxData | ||
outputs as we update the CBO projections and IRS SOI estimates used to create | ||
weights and growth factors as well as the underlying code that produce these | ||
outputs. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,293 @@ | ||
""" | ||
Script used to automatically generate PDF report comparing TaxData outputs | ||
after updates | ||
""" | ||
# flake8: noqa: E501 | ||
import argparse | ||
import pandas as pd | ||
import taxcalc as tc | ||
import altair as alt | ||
from report_utils import (run_calc, distplot, write_page, growth_scatter_plot, | ||
compare_vars, cbo_bar_chart, agg_liability_table) | ||
from pathlib import Path | ||
from datetime import datetime | ||
from collections import defaultdict | ||
from requests_html import HTMLSession | ||
|
||
CUR_PATH = Path(__file__).resolve().parent | ||
STAGE1_PATH = Path(CUR_PATH, "..", "puf_stage1") | ||
CBO_PATH = Path(STAGE1_PATH, "CBO_baseline.csv") | ||
SOI_PATH = Path(STAGE1_PATH, "SOI_estimates.csv") | ||
GROW_FACTORS_PATH = Path(STAGE1_PATH, "growfactors.csv") | ||
META_PATH = Path(CUR_PATH, "..", "tests", "records_metadata.json") | ||
CBO_URL = "https://raw.githubusercontent.com/PSLmodels/taxdata/master/puf_stage1/CBO_baseline.csv" | ||
SOI_URL = "https://raw.githubusercontent.com/PSLmodels/taxdata/master/puf_stage1/SOI_estimates.csv" | ||
META_URL = "https://raw.githubusercontent.com/PSLmodels/taxdata/master/tests/records_metadata.json" | ||
GROW_FACTORS_URL = "https://raw.githubusercontent.com/PSLmodels/taxdata/master/puf_stage1/growfactors.csv" | ||
|
||
PUF_PATH = Path(CUR_PATH, "..", "puf_data", "puf.csv") | ||
PUF_AVAILABLE = PUF_PATH.exists() | ||
|
||
TEMPLATE_PATH = Path(CUR_PATH, "report_template.md") | ||
|
||
CBO_LABELS = { | ||
"GDP": "GDP (Billions)", | ||
"TPY": "Personal Income (Billions)", | ||
"Wages": "Wages and Salaries (Billions)", | ||
"SCHC": "Proprietors Income, Non Farm with IVA & CCAdj (Billions)", | ||
"SCHF": "Proprietors Income, Farm with IVA & CCAdj (Billions)", | ||
"INTS": "Personal Interest Income (Billions)", | ||
"DIVS": "Personal Dividend Income (Billions)", | ||
"RENTS": "Rental Income with CCADJ (Billions)", | ||
"BOOK": "Corporate Profits with IVA & CCADJ (Billions)", | ||
"CPIU": "Consumer Pricing Index, All Urban Consumers (CPI-U) - 1982-84=100", | ||
"CGNS": "Capital Gains Realizations", | ||
"RETS": "IRS Projections of Individual Returns (Millions)", | ||
"SOCSEC": "Scheduled Social Security Benefits", | ||
"CPIM": "CPI Medical Care", | ||
"UCOMP": "Unemployment Compensation (Billions)" | ||
} | ||
|
||
|
||
def report(): | ||
""" | ||
Generate TaxData history report | ||
""" | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"prs", | ||
help=( | ||
"prs is a list of prs that were used for this report. " | ||
"Enter them as a string separated by commas" | ||
), | ||
default="", | ||
type=str | ||
) | ||
parser.add_argument( | ||
"--desc", | ||
help=( | ||
"File path to a text or markdown file with additonal information " | ||
"that will appear at the begining of the report" | ||
), | ||
default="", | ||
type=str | ||
) | ||
args = parser.parse_args() | ||
desc = args.desc | ||
if desc: | ||
desc = Path(args.desc).open("r").read() | ||
plot_paths = [] | ||
date = datetime.today().date() | ||
template_args = {"date": date, "desc": desc} | ||
pull_str = "* [#{}: {}]({})" | ||
_prs = args.prs.split(",") | ||
session = HTMLSession() | ||
prs = [] | ||
for pr in _prs: | ||
url = f"https://github.com/PSLmodels/taxdata/pull/{pr}" | ||
# extract PR title | ||
r = session.get(url) | ||
elm = r.html.find("span.js-issue-title")[0] | ||
title = elm.text | ||
prs.append(pull_str.format(pr, title, url)) | ||
template_args["prs"] = prs | ||
# CBO projection comparisons | ||
cbo_projections = [] | ||
cur_cbo = pd.read_csv(CBO_URL, index_col=0) | ||
new_cbo = pd.read_csv(CBO_PATH, index_col=0) | ||
cbo_years = new_cbo.columns.astype(int) | ||
last_year = cbo_years.max() | ||
first_year = last_year - 9 | ||
if new_cbo.equals(cur_cbo): | ||
cbo_projections.append("No changes to CBO projections.") | ||
else: | ||
# we"re only going to include the final ten years in our bar chart | ||
keep_years = [str(year) for year in range(first_year, last_year + 1)] | ||
cur_cbo = cur_cbo.filter(keep_years, axis=1).transpose().reset_index() | ||
cur_cbo["Projections"] = "Current" | ||
new_cbo = new_cbo.filter(keep_years, axis=1).transpose().reset_index() | ||
new_cbo["Projections"] = "New" | ||
cbo_data = pd.concat([cur_cbo, new_cbo], axis=0) | ||
for col in cbo_data.columns: | ||
if col == "index" or col == "Projections": | ||
continue | ||
chart = cbo_bar_chart( | ||
cbo_data, col, CBO_LABELS[col] | ||
) | ||
img_path = Path(CUR_PATH, f"{col}.png") | ||
chart.save(str(img_path)) | ||
plot_paths.append(img_path) | ||
cbo_projections.append( | ||
f"![]({str(img_path)})" + "{.center}" | ||
) | ||
template_args["cbo_projections"] = cbo_projections | ||
|
||
# changes in data availability | ||
cur_meta = pd.read_json(META_URL, orient="index") | ||
new_meta = pd.read_json(META_PATH, orient="index") | ||
puf_added, puf_removed = compare_vars(cur_meta, new_meta, "puf") | ||
cps_added, cps_removed = compare_vars(cur_meta, new_meta, "cps") | ||
template_args["puf_added"] = puf_added | ||
template_args["puf_removed"] = puf_removed | ||
template_args["cps_added"] = cps_added | ||
template_args["cps_removed"] = cps_removed | ||
|
||
# growth rate changes | ||
growth_rate_projections = [] | ||
cur_grow = pd.read_csv(GROW_FACTORS_URL) | ||
new_grow = pd.read_csv(GROW_FACTORS_PATH) | ||
if new_grow.equals(cur_grow): | ||
growth_rate_projections.append("No changes to growth rate projections") | ||
else: | ||
new_grow = new_grow[ | ||
(new_grow["YEAR"] >= first_year) & (new_grow["YEAR"] <= last_year) | ||
] | ||
cur_grow = cur_grow[ | ||
(cur_grow["YEAR"] >= first_year) & (cur_grow["YEAR"] <= last_year) | ||
] | ||
new_grow["Growth Factors"] = "New" | ||
cur_grow["Growth Factors"] = "Current" | ||
growth_data = pd.concat([new_grow, cur_grow]) | ||
rows = list(growth_data.columns) | ||
rows.remove("YEAR"), | ||
rows.remove("Growth Factors") | ||
n = len(rows) // 3 | ||
chart1 = growth_scatter_plot(growth_data, rows[:n]) | ||
img_path = Path(CUR_PATH, "growth_factors1.png") | ||
chart1.save(str(img_path)) | ||
plot_paths.append(img_path) | ||
growth_rate_projections.append( | ||
f"![]({str(img_path)})" + "{.center}" | ||
) | ||
chart2 = growth_scatter_plot(growth_data, rows[n: 2 * n]) | ||
img_path = Path(CUR_PATH, "growth_factors2.png") | ||
chart2.save(str(img_path)) | ||
plot_paths.append(img_path) | ||
growth_rate_projections.append( | ||
f"![]({str(img_path)})" + "{.center}" | ||
) | ||
chart3 = growth_scatter_plot(growth_data, rows[2 * n:]) | ||
img_path = Path(CUR_PATH, "growth_factors3.png") | ||
chart3.save(str(img_path)) | ||
plot_paths.append(img_path) | ||
growth_rate_projections.append( | ||
f"![]({str(img_path)})" + "{.center}" | ||
) | ||
template_args["growth_rate_projections"] = growth_rate_projections | ||
|
||
# compare tax calculator projections | ||
calcs = [] | ||
calc_labels = [] | ||
# baseline CPS calculator | ||
base_cps = tc.Calculator( | ||
records=tc.Records.cps_constructor(), policy=tc.Policy() | ||
) | ||
base_cps.advance_to_year(first_year) | ||
base_cps.calc_all() | ||
calcs.append(base_cps) | ||
calc_labels.append("Current CPS") | ||
# updated CPS calculator | ||
cps = pd.read_csv( | ||
Path(CUR_PATH, "..", "cps_data", "pycps", "cps.csv.gz"), index_col=None | ||
) | ||
cps_weights = pd.read_csv( | ||
Path(CUR_PATH, "..", "cps_stage2", "cps_weights.csv.gz"), | ||
index_col=None | ||
) | ||
new_cps = tc.Calculator( | ||
records=tc.Records( | ||
data=cps, weights=cps_weights, adjust_ratios=None, start_year=2014 | ||
), | ||
policy=tc.Policy() | ||
) | ||
new_cps.advance_to_year(first_year) | ||
new_cps.calc_all() | ||
calcs.append(new_cps) | ||
calc_labels.append("New CPS") | ||
# distribution plots | ||
dist_vars = [ | ||
("c00100", "AGI Distribution"), | ||
("combined", "Tax Liability Distribution") | ||
] | ||
dist_plots = [] | ||
for var, title in dist_vars: | ||
plot = distplot( | ||
calcs, calc_labels, var, title=title | ||
) | ||
img_path = Path(CUR_PATH, f"{var}_dist.png") | ||
plot.save(str(img_path)) | ||
plot_paths.append(img_path) | ||
dist_plots.append( | ||
f"![]({str(img_path)})" + "{.center}" | ||
) | ||
template_args["dist_plots"] = dist_plots | ||
|
||
# aggregate totals | ||
aggs = defaultdict(list) | ||
var_list = [ | ||
"payrolltax", "iitax", "combined", "standard", "c04470" | ||
] | ||
for year in range(first_year, tc.Policy.LAST_BUDGET_YEAR + 1): | ||
base_aggs = run_calc(base_cps, year, var_list) | ||
new_aggs = run_calc(new_cps, year, var_list) | ||
aggs["Tax Liability"].append(base_aggs["payrolltax"]) | ||
aggs["Tax"].append("Current Payroll") | ||
aggs["Year"].append(year) | ||
aggs["Tax Liability"].append(new_aggs["payrolltax"]) | ||
aggs["Tax"].append("New Payroll") | ||
aggs["Year"].append(year) | ||
aggs["Tax Liability"].append(base_aggs["iitax"]) | ||
aggs["Tax"].append("Current Income") | ||
aggs["Year"].append(year) | ||
aggs["Tax Liability"].append(new_aggs["iitax"]) | ||
aggs["Tax"].append("New Income") | ||
aggs["Year"].append(year) | ||
aggs["Tax Liability"].append(base_aggs["combined"]) | ||
aggs["Tax"].append("Current Combined") | ||
aggs["Year"].append(year) | ||
aggs["Tax Liability"].append(new_aggs["combined"]) | ||
aggs["Tax"].append("New Combined") | ||
aggs["Year"].append(year) | ||
agg_df = pd.DataFrame(aggs) | ||
|
||
title = "Aggregate Tax Liability by Year" | ||
agg_chart = alt.Chart(agg_df, title=title).mark_line().encode( | ||
x=alt.X( | ||
"Year:O", axis=alt.Axis(labelAngle=0, titleFontSize=20, | ||
labelFontSize=15) | ||
), | ||
y=alt.Y( | ||
"Tax Liability", title="Tax Liability (Billions)", | ||
axis=alt.Axis(titleFontSize=20, labelFontSize=15) | ||
), | ||
color=alt.Color( | ||
"Tax", | ||
legend=alt.Legend( | ||
symbolSize=150, labelFontSize=15, titleFontSize=20 | ||
) | ||
) | ||
).properties( | ||
width=800, | ||
height=350 | ||
).configure_title( | ||
fontSize=24 | ||
) | ||
img_path = Path(CUR_PATH, "agg_plot.png") | ||
agg_chart.save(str(img_path)) | ||
plot_paths.append(img_path) | ||
template_args["agg_plot"] = f"![]({str(img_path)})" + "{.center}" | ||
|
||
# create tax liability tables | ||
template_args["combined_table"] = agg_liability_table(agg_df, "Combined") | ||
template_args["payroll_table"] = agg_liability_table(agg_df, "Payroll") | ||
template_args["income_table"] = agg_liability_table(agg_df, "Income") | ||
|
||
# write report and delete images used | ||
output_path = Path(CUR_PATH, "reports", f"taxdata_report_{date}.pdf") | ||
write_page(output_path, TEMPLATE_PATH, **template_args) | ||
for path in plot_paths: | ||
path.unlink() | ||
|
||
|
||
if __name__ == "__main__": | ||
report() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
% TaxData Updates | ||
% Generated on {{ date }} | ||
|
||
This report summarizes changes made in the TaxData repository that affect | ||
tax liability projections made by [Tax-Calculator](https://pslmodels.github.io/Tax-Calculator/). | ||
All tables and graphs in this report are automatically generated. | ||
|
||
{{ desc }} | ||
|
||
# Pull Requests and Issues | ||
|
||
This report corresponds with the issues and pull requests listed below. | ||
|
||
{% for item in prs %} | ||
{{ item }} | ||
{% endfor %} | ||
|
||
# Changes in Variable Availability | ||
|
||
### PUF | ||
|
||
##### _Added Variables_ | ||
|
||
{% for item in puf_added %} | ||
{{ item }} | ||
{% endfor %} | ||
|
||
##### _Removed Variables_ | ||
|
||
{% for item in puf_removed %} | ||
{{ item }} | ||
{% endfor %} | ||
|
||
### CPS | ||
|
||
##### _Added Variables_ | ||
|
||
{% for item in cps_added %} | ||
{{ item }} | ||
{% endfor %} | ||
|
||
##### _Removed Variables_ | ||
|
||
{% for item in cps_removed %} | ||
{{ item }} | ||
{% endfor %} | ||
|
||
# CBO Projections | ||
|
||
{% for item in cbo_projections %} | ||
{{ item }} | ||
{% endfor %} | ||
|
||
\pagebreak | ||
|
||
# Growth Rate Projections | ||
|
||
{% for item in growth_rate_projections %} | ||
{{ item }} | ||
{% endfor %} | ||
|
||
\pagebreak | ||
|
||
# Distribution of Select Variables | ||
|
||
{% for item in dist_plots %} | ||
{{ item }} | ||
{% endfor %} | ||
|
||
# Aggregate Tax Liability | ||
|
||
{{ agg_plot }} | ||
|
||
## Combined Tax Liability by Year (Billions) | ||
|
||
{{ combined_table }} | ||
|
||
## Income Tax Liability by Year (Billions) | ||
|
||
{{ income_table }} | ||
|
||
## Payroll Tax Liability by Year (Billions) | ||
|
||
{{ payroll_table }} |
Oops, something went wrong.