PSLmodels · martinholmer · Feb 16, 2018 · Feb 15, 2018 · Feb 15, 2018
diff --git a/docs/index.htmx b/docs/index.htmx
@@ -673,8 +673,9 @@ the <kbd>ref3.json</kbd> file, the content of which is:
 <p>The output options illustrated in the following examples generate
 tables of the post-reform level and the reform-induced change in tax
 liability by income deciles as well as graphs of marginal and average
-tax rates by income percentile and percentage change in aftertax
-income by income decile.  These tables and graphs are meant to provide
+tax rates by income percentile and graphs of percentage change in
+aftertax income by income deciles and by income quintiles.
+These tables and graphs are meant to provide
 a quick glance at the impact of a reform.  Any serious analysis of a
 reform will involve generating custom tables and graphs using
 <a href="#partdump">partial dump</a> output.  One of many examples
@@ -739,7 +740,7 @@ Tax-Calculator startup automatically extrapolated your data to 2024.
 
 (8)$ ls cps-24*
 cps-24-#-ref3-#-atr.html   cps-24-#-ref3-#-doc.text   cps-24-#-ref3-#.csv
-cps-24-#-ref3-#-dec.html   cps-24-#-ref3-#-mtr.html
+cps-24-#-ref3-#-dec.html   cps-24-#-ref3-#-mtr.html   cps-24-#-ref3-#-qin.html
 </pre></p>
 
 <p>Example (8) is like example (7) except we ask for 2024 static

diff --git a/taxcalc/calculate.py b/taxcalc/calculate.py
@@ -37,7 +37,8 @@
                            create_diagnostic_table,
                            ce_aftertax_expanded_income,
                            mtr_graph_data, atr_graph_data, xtr_graph_plot,
-                           dec_graph_data, dec_graph_plot)
+                           dec_graph_data, dec_graph_plot,
+                           qin_graph_data, qin_graph_plot)
 # import pdb
 
 
@@ -967,6 +968,47 @@ def decile_graph(self, calc):
                              title='')
         return fig
 
+    def quintile_graph(self, calc):
+        """
+        Create graph that shows percentage change in aftertax expanded
+        income (from going from policy in self to policy in calc) for
+        each expanded-income quintile and subgroups of the top quintile.
+        The graph can be written to an HTML file (using the
+        write_graph_file utility function) or shown on the screen
+        immediately in an interactive or notebook session (following
+        the instructions in the documentation of the xtr_graph_plot
+        utility function).
+
+        Parameters
+        ----------
+        calc : Calculator object
+            calc represents the reform while self represents the baseline,
+            where both self and calc have calculated taxes for this year
+            before being used by this method
+
+        Returns
+        -------
+        graph that is a bokeh.plotting figure object
+        """
+        # check that two Calculator objects are comparable
+        assert isinstance(calc, Calculator)
+        assert calc.current_year == self.current_year
+        assert calc.array_len == self.array_len
+        diff_table = self.difference_table(calc,
+                                           groupby='weighted_deciles',
+                                           income_measure='expanded_income',
+                                           tax_to_diff='combined')
+        # construct data for graph
+        data = qin_graph_data(diff_table, year=self.current_year)
+        # construct figure from data
+        fig = qin_graph_plot(data,
+                             width=850,
+                             height=500,
+                             xlabel='',
+                             ylabel='',
+                             title='')
+        return fig
+
     @staticmethod
     def read_json_param_objects(reform, assump):
         """

diff --git a/taxcalc/records.py b/taxcalc/records.py
@@ -116,7 +116,7 @@ def __init__(self,
                  adjust_ratios=PUF_RATIOS_FILENAME,
                  benefits=None,
                  start_year=PUFCSV_YEAR):
-        # pylint: disable=too-many-arguments
+        # pylint: disable=too-many-arguments,too-many-locals
         self.__data_year = start_year
         # read specified data
         self._read_data(data, exact_calculations)

diff --git a/taxcalc/taxcalcio.py b/taxcalc/taxcalcio.py
@@ -651,6 +651,15 @@ def write_graph_files(self):
         else:
             reason = 'No graph because sum of weights is not positive'
             TaxCalcIO.write_empty_graph_file(dec_fname, dec_title, reason)
+        # income-change-by-quintile graph
+        qin_fname = self._output_filename.replace('.csv', '-qin.html')
+        qin_title = 'Income Change by Income Quintile'
+        if pos_wght_sum:
+            fig = self.calc_base.quintile_graph(self.calc)
+            write_graph_file(fig, qin_fname, qin_title)
+        else:
+            reason = 'No graph because sum of weights is not positive'
+            TaxCalcIO.write_empty_graph_file(qin_fname, qin_title, reason)
         # average-tax-rate graph
         atr_fname = self._output_filename.replace('.csv', '-atr.html')
         atr_title = 'ATR by Income Percentile'

diff --git a/taxcalc/tests/test_taxcalcio.py b/taxcalc/tests/test_taxcalcio.py
@@ -686,6 +686,9 @@ def test_graphs(reformfile1):
     fname = output_filename.replace('.csv', '-mtr.html')
     if os.path.isfile(fname):
         os.remove(fname)
+    fname = output_filename.replace('.csv', '-qin.html')
+    if os.path.isfile(fname):
+        os.remove(fname)
 
 
 def test_ceeu_output1(lumpsumreformfile):

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
@@ -115,6 +115,10 @@
                     'all',
                     '90-95', '95-99', 'Top 1%']
 
+QUINTILE_ROW_NAMES = ['0-20', '20-40', '40-60', '60-80', '80-100',
+                      'all',
+                      '80-90', '90-95', '95-99', 'Top 1%']
+
 WEBAPP_INCOME_BINS = [-9e99, 0, 9999, 19999, 29999, 39999, 49999, 74999, 99999,
                       199999, 499999, 1000000, 9e99]
 
@@ -1319,7 +1323,8 @@ def dec_graph_data(diff_table, year):
     """
     # construct dictionary containing the bar data required by dec_graph_plot
     bars = dict()
-    for idx in range(0, 14):  # the ten income deciles, all, plus top details
+    nbins = len(DECILE_ROW_NAMES)
+    for idx in range(0, nbins):
         info = dict()
         info['label'] = DECILE_ROW_NAMES[idx]
         info['value'] = diff_table['pc_aftertaxinc'][idx]
@@ -1447,6 +1452,168 @@ def dec_graph_plot(data,
     return fig
 
 
+def qin_graph_data(diff_table, year):
+    """
+    Prepare data needed by qin_graph_plot utility function.
+
+    Parameters
+    ----------
+    diff_table : a Pandas DataFrame object returned from the
+        Calculator class difference_table method
+
+    year : integer
+        specifies calendar year of the data in the diff_table
+
+    Returns
+    -------
+    dictionary object suitable for passing to qin_graph_plot utility function
+    """
+    # aggregate decile+details diff_table into quintile+details diff
+    qdiff = dict()
+    for qin in range(0, 5):
+        dec = 2 * qin
+        qdiff[qin] = 0.5 * (diff_table['pc_aftertaxinc'][dec] +
+                            diff_table['pc_aftertaxinc'][dec + 1])
+    qdiff[5] = diff_table['pc_aftertaxinc'][10]  # all
+    qdiff[6] = diff_table['pc_aftertaxinc'][8]  # 80-90 detail
+    qdiff[7] = diff_table['pc_aftertaxinc'][11]  # 90-95 detail
+    qdiff[8] = diff_table['pc_aftertaxinc'][12]  # 95-99 detail
+    qdiff[9] = diff_table['pc_aftertaxinc'][13]  # Top 1% detail
+    assert len(qdiff) == len(QUINTILE_ROW_NAMES)
+    # construct dictionary containing the bar data required by qin_graph_plot
+    bars = dict()
+    nbins = len(qdiff)
+    for idx in range(0, nbins):
+        info = dict()
+        info['label'] = QUINTILE_ROW_NAMES[idx]
+        info['value'] = qdiff[idx]
+        if info['label'] == 'all':
+            info['label'] = '---------'
+            info['value'] = 0
+        bars[idx] = info
+    # construct dictionary containing bar data and auto-generated labels
+    data = dict()
+    data['bars'] = bars
+    xlabel = 'Reform-Induced Percentage Change in After-Tax Expanded Income'
+    data['xlabel'] = xlabel
+    ylabel = 'Expanded Income Percentile Group'
+    data['ylabel'] = ylabel
+    title_str = 'Change in After-Tax Income by Income Percentile Group'
+    data['title'] = '{} for {}'.format(title_str, year)
+    return data
+
+
+def qin_graph_plot(data,
+                   width=850,
+                   height=500,
+                   xlabel='',
+                   ylabel='',
+                   title=''):
+    """
+    Plot stacked quintile graph using data returned from the
+    qin_graph_data function.
+
+    Parameters
+    ----------
+    data : dictionary object returned from qin_graph_data() utility function
+
+    width : integer
+        width of plot expressed in pixels
+
+    height : integer
+        height of plot expressed in pixels
+
+    xlabel : string
+        x-axis label; if '', then use label generated by dec_graph_data
+
+    ylabel : string
+        y-axis label; if '', then use label generated by dec_graph_data
+
+    title : string
+        graph title; if '', then use title generated by dec_graph_data
+
+    Returns
+    -------
+    bokeh.plotting figure object containing a raster graphics plot
+
+    Notes
+    -----
+    USAGE EXAMPLE::
+
+      gdata = dec_graph_data(...)
+      gplot = dec_graph_plot(gdata)
+
+    THEN when working interactively in a Python notebook::
+
+      bp.show(gplot)
+
+    OR when executing script using Python command-line interpreter::
+
+      bio.output_file('graph-name.html', title='Change in After-Tax Income')
+      bio.show(gplot)  [OR bio.save(gplot) WILL JUST WRITE FILE TO DISK]
+
+    WILL VISUALIZE GRAPH IN BROWSER AND WRITE GRAPH TO SPECIFIED HTML FILE
+
+    To convert the visualized graph into a PNG-formatted file, click on
+    the "Save" icon on the Toolbar (located in the top-right corner of
+    the visualized graph) and a PNG-formatted file will written to your
+    Download directory.
+
+    The ONLY output option the bokeh.plotting figure has is HTML format,
+    which (as described above) can be converted into a PNG-formatted
+    raster graphics file.  There is no option to make the bokeh.plotting
+    figure generate a vector graphics file such as an EPS file.
+    """
+    # pylint: disable=too-many-arguments,too-many-locals
+    if title == '':
+        title = data['title']
+    bar_keys = sorted(data['bars'].keys())
+    bar_labels = [data['bars'][key]['label'] for key in bar_keys]
+    fig = bp.figure(plot_width=width, plot_height=height, title=title,
+                    y_range=bar_labels)
+    fig.title.text_font_size = '12pt'
+    fig.outline_line_color = None
+    fig.axis.axis_line_color = None
+    fig.axis.minor_tick_line_color = None
+    fig.axis.axis_label_text_font_size = '12pt'
+    fig.axis.axis_label_text_font_style = 'normal'
+    fig.axis.major_label_text_font_size = '12pt'
+    if xlabel == '':
+        xlabel = data['xlabel']
+    fig.xaxis.axis_label = xlabel
+    fig.xaxis[0].formatter = PrintfTickFormatter(format='%+.1f%%')
+    if ylabel == '':
+        ylabel = data['ylabel']
+    fig.yaxis.axis_label = ylabel
+    fig.ygrid.grid_line_color = None
+    # plot thick x-axis grid line at zero
+    fig.line(x=[0, 0], y=[0, 14], line_width=1, line_color='black')
+    # plot bars
+    barheight = 0.8
+    bcolor = 'blue'
+    yidx = 0
+    for idx in bar_keys:
+        bval = data['bars'][idx]['value']
+        blabel = data['bars'][idx]['label']
+        bheight = barheight
+        if blabel == '80-90':
+            bheight *= 0.50
+            bcolor = 'red'
+        if blabel == '90-95':
+            bheight *= 0.25
+        elif blabel == '95-99':
+            bheight *= 0.20
+        elif blabel == 'Top 1%':
+            bheight *= 0.05
+        fig.rect(x=(bval / 2.0),   # x-coordinate of center of the rectangle
+                 y=(yidx + 0.5),   # y-coordinate of center of the rectangle
+                 width=abs(bval),  # width of the rectangle
+                 height=bheight,   # height of the rectangle
+                 color=bcolor)
+        yidx += 1
+    return fig
+
+
 def nonsmall_diffs(linelist1, linelist2, small=0.0):
     """
     Return True if line lists differ significantly; otherwise return False.