Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add quintile graph to tc --graphs output #1880

Merged
merged 2 commits into from
Feb 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions docs/index.htmx
Original file line number Diff line number Diff line change
Expand Up @@ -673,8 +673,9 @@ the <kbd>ref3.json</kbd> file, the content of which is:
<p>The output options illustrated in the following examples generate
tables of the post-reform level and the reform-induced change in tax
liability by income deciles as well as graphs of marginal and average
tax rates by income percentile and percentage change in aftertax
income by income decile. These tables and graphs are meant to provide
tax rates by income percentile and graphs of percentage change in
aftertax income by income deciles and by income quintiles.
These tables and graphs are meant to provide
a quick glance at the impact of a reform. Any serious analysis of a
reform will involve generating custom tables and graphs using
<a href="#partdump">partial dump</a> output. One of many examples
Expand Down Expand Up @@ -739,7 +740,7 @@ Tax-Calculator startup automatically extrapolated your data to 2024.

(8)$ ls cps-24*
cps-24-#-ref3-#-atr.html cps-24-#-ref3-#-doc.text cps-24-#-ref3-#.csv
cps-24-#-ref3-#-dec.html cps-24-#-ref3-#-mtr.html
cps-24-#-ref3-#-dec.html cps-24-#-ref3-#-mtr.html cps-24-#-ref3-#-qin.html
</pre></p>

<p>Example (8) is like example (7) except we ask for 2024 static
Expand Down
44 changes: 43 additions & 1 deletion taxcalc/calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
create_diagnostic_table,
ce_aftertax_expanded_income,
mtr_graph_data, atr_graph_data, xtr_graph_plot,
dec_graph_data, dec_graph_plot)
dec_graph_data, dec_graph_plot,
qin_graph_data, qin_graph_plot)
# import pdb


Expand Down Expand Up @@ -967,6 +968,47 @@ def decile_graph(self, calc):
title='')
return fig

def quintile_graph(self, calc):
"""
Create graph that shows percentage change in aftertax expanded
income (from going from policy in self to policy in calc) for
each expanded-income quintile and subgroups of the top quintile.
The graph can be written to an HTML file (using the
write_graph_file utility function) or shown on the screen
immediately in an interactive or notebook session (following
the instructions in the documentation of the xtr_graph_plot
utility function).

Parameters
----------
calc : Calculator object
calc represents the reform while self represents the baseline,
where both self and calc have calculated taxes for this year
before being used by this method

Returns
-------
graph that is a bokeh.plotting figure object
"""
# check that two Calculator objects are comparable
assert isinstance(calc, Calculator)
assert calc.current_year == self.current_year
assert calc.array_len == self.array_len
diff_table = self.difference_table(calc,
groupby='weighted_deciles',
income_measure='expanded_income',
tax_to_diff='combined')
# construct data for graph
data = qin_graph_data(diff_table, year=self.current_year)
# construct figure from data
fig = qin_graph_plot(data,
width=850,
height=500,
xlabel='',
ylabel='',
title='')
return fig

@staticmethod
def read_json_param_objects(reform, assump):
"""
Expand Down
2 changes: 1 addition & 1 deletion taxcalc/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def __init__(self,
adjust_ratios=PUF_RATIOS_FILENAME,
benefits=None,
start_year=PUFCSV_YEAR):
# pylint: disable=too-many-arguments
# pylint: disable=too-many-arguments,too-many-locals
self.__data_year = start_year
# read specified data
self._read_data(data, exact_calculations)
Expand Down
9 changes: 9 additions & 0 deletions taxcalc/taxcalcio.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,15 @@ def write_graph_files(self):
else:
reason = 'No graph because sum of weights is not positive'
TaxCalcIO.write_empty_graph_file(dec_fname, dec_title, reason)
# income-change-by-quintile graph
qin_fname = self._output_filename.replace('.csv', '-qin.html')
qin_title = 'Income Change by Income Quintile'
if pos_wght_sum:
fig = self.calc_base.quintile_graph(self.calc)
write_graph_file(fig, qin_fname, qin_title)
else:
reason = 'No graph because sum of weights is not positive'
TaxCalcIO.write_empty_graph_file(qin_fname, qin_title, reason)
# average-tax-rate graph
atr_fname = self._output_filename.replace('.csv', '-atr.html')
atr_title = 'ATR by Income Percentile'
Expand Down
3 changes: 3 additions & 0 deletions taxcalc/tests/test_taxcalcio.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,9 @@ def test_graphs(reformfile1):
fname = output_filename.replace('.csv', '-mtr.html')
if os.path.isfile(fname):
os.remove(fname)
fname = output_filename.replace('.csv', '-qin.html')
if os.path.isfile(fname):
os.remove(fname)


def test_ceeu_output1(lumpsumreformfile):
Expand Down
169 changes: 168 additions & 1 deletion taxcalc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@
'all',
'90-95', '95-99', 'Top 1%']

QUINTILE_ROW_NAMES = ['0-20', '20-40', '40-60', '60-80', '80-100',
'all',
'80-90', '90-95', '95-99', 'Top 1%']

WEBAPP_INCOME_BINS = [-9e99, 0, 9999, 19999, 29999, 39999, 49999, 74999, 99999,
199999, 499999, 1000000, 9e99]

Expand Down Expand Up @@ -1319,7 +1323,8 @@ def dec_graph_data(diff_table, year):
"""
# construct dictionary containing the bar data required by dec_graph_plot
bars = dict()
for idx in range(0, 14): # the ten income deciles, all, plus top details
nbins = len(DECILE_ROW_NAMES)
for idx in range(0, nbins):
info = dict()
info['label'] = DECILE_ROW_NAMES[idx]
info['value'] = diff_table['pc_aftertaxinc'][idx]
Expand Down Expand Up @@ -1447,6 +1452,168 @@ def dec_graph_plot(data,
return fig


def qin_graph_data(diff_table, year):
"""
Prepare data needed by qin_graph_plot utility function.

Parameters
----------
diff_table : a Pandas DataFrame object returned from the
Calculator class difference_table method

year : integer
specifies calendar year of the data in the diff_table

Returns
-------
dictionary object suitable for passing to qin_graph_plot utility function
"""
# aggregate decile+details diff_table into quintile+details diff
qdiff = dict()
for qin in range(0, 5):
dec = 2 * qin
qdiff[qin] = 0.5 * (diff_table['pc_aftertaxinc'][dec] +
diff_table['pc_aftertaxinc'][dec + 1])
qdiff[5] = diff_table['pc_aftertaxinc'][10] # all
qdiff[6] = diff_table['pc_aftertaxinc'][8] # 80-90 detail
qdiff[7] = diff_table['pc_aftertaxinc'][11] # 90-95 detail
qdiff[8] = diff_table['pc_aftertaxinc'][12] # 95-99 detail
qdiff[9] = diff_table['pc_aftertaxinc'][13] # Top 1% detail
assert len(qdiff) == len(QUINTILE_ROW_NAMES)
# construct dictionary containing the bar data required by qin_graph_plot
bars = dict()
nbins = len(qdiff)
for idx in range(0, nbins):
info = dict()
info['label'] = QUINTILE_ROW_NAMES[idx]
info['value'] = qdiff[idx]
if info['label'] == 'all':
info['label'] = '---------'
info['value'] = 0
bars[idx] = info
# construct dictionary containing bar data and auto-generated labels
data = dict()
data['bars'] = bars
xlabel = 'Reform-Induced Percentage Change in After-Tax Expanded Income'
data['xlabel'] = xlabel
ylabel = 'Expanded Income Percentile Group'
data['ylabel'] = ylabel
title_str = 'Change in After-Tax Income by Income Percentile Group'
data['title'] = '{} for {}'.format(title_str, year)
return data


def qin_graph_plot(data,
width=850,
height=500,
xlabel='',
ylabel='',
title=''):
"""
Plot stacked quintile graph using data returned from the
qin_graph_data function.

Parameters
----------
data : dictionary object returned from qin_graph_data() utility function

width : integer
width of plot expressed in pixels

height : integer
height of plot expressed in pixels

xlabel : string
x-axis label; if '', then use label generated by dec_graph_data

ylabel : string
y-axis label; if '', then use label generated by dec_graph_data

title : string
graph title; if '', then use title generated by dec_graph_data

Returns
-------
bokeh.plotting figure object containing a raster graphics plot

Notes
-----
USAGE EXAMPLE::

gdata = dec_graph_data(...)
gplot = dec_graph_plot(gdata)

THEN when working interactively in a Python notebook::

bp.show(gplot)

OR when executing script using Python command-line interpreter::

bio.output_file('graph-name.html', title='Change in After-Tax Income')
bio.show(gplot) [OR bio.save(gplot) WILL JUST WRITE FILE TO DISK]

WILL VISUALIZE GRAPH IN BROWSER AND WRITE GRAPH TO SPECIFIED HTML FILE

To convert the visualized graph into a PNG-formatted file, click on
the "Save" icon on the Toolbar (located in the top-right corner of
the visualized graph) and a PNG-formatted file will written to your
Download directory.

The ONLY output option the bokeh.plotting figure has is HTML format,
which (as described above) can be converted into a PNG-formatted
raster graphics file. There is no option to make the bokeh.plotting
figure generate a vector graphics file such as an EPS file.
"""
# pylint: disable=too-many-arguments,too-many-locals
if title == '':
title = data['title']
bar_keys = sorted(data['bars'].keys())
bar_labels = [data['bars'][key]['label'] for key in bar_keys]
fig = bp.figure(plot_width=width, plot_height=height, title=title,
y_range=bar_labels)
fig.title.text_font_size = '12pt'
fig.outline_line_color = None
fig.axis.axis_line_color = None
fig.axis.minor_tick_line_color = None
fig.axis.axis_label_text_font_size = '12pt'
fig.axis.axis_label_text_font_style = 'normal'
fig.axis.major_label_text_font_size = '12pt'
if xlabel == '':
xlabel = data['xlabel']
fig.xaxis.axis_label = xlabel
fig.xaxis[0].formatter = PrintfTickFormatter(format='%+.1f%%')
if ylabel == '':
ylabel = data['ylabel']
fig.yaxis.axis_label = ylabel
fig.ygrid.grid_line_color = None
# plot thick x-axis grid line at zero
fig.line(x=[0, 0], y=[0, 14], line_width=1, line_color='black')
# plot bars
barheight = 0.8
bcolor = 'blue'
yidx = 0
for idx in bar_keys:
bval = data['bars'][idx]['value']
blabel = data['bars'][idx]['label']
bheight = barheight
if blabel == '80-90':
bheight *= 0.50
bcolor = 'red'
if blabel == '90-95':
bheight *= 0.25
elif blabel == '95-99':
bheight *= 0.20
elif blabel == 'Top 1%':
bheight *= 0.05
fig.rect(x=(bval / 2.0), # x-coordinate of center of the rectangle
y=(yidx + 0.5), # y-coordinate of center of the rectangle
width=abs(bval), # width of the rectangle
height=bheight, # height of the rectangle
color=bcolor)
yidx += 1
return fig


def nonsmall_diffs(linelist1, linelist2, small=0.0):
"""
Return True if line lists differ significantly; otherwise return False.
Expand Down