Merge pull request #1902 from martinholmer/fix-tables

Improve handling of non-positive incomes in tables and graphs
PSLmodels · Mar 8, 2018 · 8fc3308 · 8fc3308
2 parents f55d018 + eb3ce9d
commit 8fc3308
Show file tree

Hide file tree

Showing 4 changed files with 154 additions and 331 deletions.
diff --git a/taxcalc/calculate.py b/taxcalc/calculate.py
@@ -38,8 +38,7 @@
                            ce_aftertax_expanded_income,
                            mtr_graph_data, atr_graph_data, xtr_graph_plot,
                            dec_graph_data, dec_graph_plot,
-                           pch_graph_data, pch_graph_plot,
-                           qin_graph_data, qin_graph_plot)
+                           pch_graph_data, pch_graph_plot)
 # import pdb
 
 
@@ -991,7 +990,7 @@ def pch_graph(self, calc):
                              title='')
         return fig
 
-    def decile_graph(self, calc, set_bottom_decile_result_to_zero=True):
+    def decile_graph(self, calc, hide_negative_incomes=True):
         """
         Create graph that shows percentage change in aftertax expanded
         income (from going from policy in self to policy in calc) for
@@ -1001,8 +1000,6 @@ def decile_graph(self, calc, set_bottom_decile_result_to_zero=True):
         immediately in an interactive or notebook session (following
         the instructions in the documentation of the xtr_graph_plot
         utility function).
-        Note that some deciles may contain filing units with negative
-        or zero baseline (self) expanded income.
 
         Parameters
         ----------
@@ -1011,11 +1008,16 @@ def decile_graph(self, calc, set_bottom_decile_result_to_zero=True):
             where both self and calc have calculated taxes for this year
             before being used by this method
 
-        set_bottom_decile_result_to_zero : boolean
-            specify whether or not bottom decile (which contains filing
-            units with non-positive expanded income) result is shown in the
-            graph (default value is True; set to False to show the bottom
-            decile result)
+        hide_negative_incomes : boolean
+            if True (which is the default), the bottom table bin containing
+            filing units with non-positive expanded_income is not shown in
+            the graph and the table bin containing filing units with positive
+            expanded_income in the bottom decile is shown with its bar width
+            adjusted to the number of weighted filing units in bottom decile
+            who have positive expanded_income; if False, the bottom table bin
+            containing filing units with non-positive expanded_income is shown,
+            which may be misleading because the percentage change is correctly
+            calculated with a negative divisor.
 
         Returns
         -------
@@ -1030,9 +1032,8 @@ def decile_graph(self, calc, set_bottom_decile_result_to_zero=True):
                                            income_measure='expanded_income',
                                            tax_to_diff='combined')
         # construct data for graph
-        data = dec_graph_data(diff_table, year=self.current_year)
-        if set_bottom_decile_result_to_zero:
-            data['bars'][0]['value'] = 0
+        data = dec_graph_data(diff_table, year=self.current_year,
+                              hide_negative_incomes=hide_negative_incomes)
         # construct figure from data
         fig = dec_graph_plot(data,
                              width=850,
@@ -1042,56 +1043,6 @@ def decile_graph(self, calc, set_bottom_decile_result_to_zero=True):
                              title='')
         return fig
 
-    def quintile_graph(self, calc, set_bottom_quintile_result_to_zero=True):
-        """Create graph that shows percentage change in aftertax expanded
-        income (from going from policy in self to policy in calc) for
-        each expanded-income quintile and subgroups of the top quintile.
-        The graph can be written to an HTML file (using the
-        write_graph_file utility function) or shown on the screen
-        immediately in an interactive or notebook session (following
-        the instructions in the documentation of the xtr_graph_plot
-        utility function).
-        Note that some quintiles may contain filing units with negative
-        or zero baseline (self) expanded income.
-
-        Parameters
-        ----------
-        calc : Calculator object
-            calc represents the reform while self represents the baseline,
-            where both self and calc have calculated taxes for this year
-            before being used by this method
-
-        set_bottom_quintile_result_to_zero : boolean
-            specify whether or not bottom quintile (which contains filing
-            units with non-positive expanded income) result is shown in the
-            graph (default value is True; set to False to show the bottom
-            quintile result)
-
-        Returns
-        -------
-        graph that is a bokeh.plotting figure object
-        """
-        # check that two Calculator objects are comparable
-        assert isinstance(calc, Calculator)
-        assert calc.current_year == self.current_year
-        assert calc.array_len == self.array_len
-        diff_table = self.difference_table(calc,
-                                           groupby='weighted_deciles',
-                                           income_measure='expanded_income',
-                                           tax_to_diff='combined')
-        # construct data for graph
-        data = qin_graph_data(diff_table, year=self.current_year)
-        if set_bottom_quintile_result_to_zero:
-            data['bars'][0]['value'] = 0
-        # construct figure from data
-        fig = qin_graph_plot(data,
-                             width=850,
-                             height=500,
-                             xlabel='',
-                             ylabel='',
-                             title='')
-        return fig
-
     @staticmethod
     def read_json_param_objects(reform, assump):
         """

diff --git a/taxcalc/consumption.json b/taxcalc/consumption.json
@@ -94,7 +94,7 @@
 
     "_BEN_vet_value": {
         "long_name": "Consumption value of veterans benefits",
-        "description": "Consumption value per dollar of veterans benefits, some of which are in-kind benefits (about 40% are in-kind medical benefits).",
+        "description": "Consumption value per dollar of veterans benefits, some of which are in-kind benefits (about 40% are in-kind medical benefits and overall about 51% are in-kind benefits).",
         "section_1": "",
         "section_2": "",
         "notes": "",

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
@@ -87,17 +87,17 @@ def test_create_tables(cps_subsample):
                                    tax_to_diff='combined')
     assert isinstance(diff, pd.DataFrame)
     expected = [0.00,
-                0.02,
-                0.58,
-                0.72,
-                0.67,
-                0.78,
-                0.77,
-                0.64,
-                0.56,
-                0.17,
-                0.53]
-    tabcol = 'perc_aftertax'
+                -0.02,
+                -0.58,
+                -0.72,
+                -0.67,
+                -0.78,
+                -0.77,
+                -0.64,
+                -0.56,
+                -0.17,
+                -0.53]
+    tabcol = 'pc_aftertaxinc'
     if not np.allclose(diff[tabcol].values, expected,
                        atol=0.005, rtol=0.0, equal_nan=True):
         test_failure = True
@@ -112,19 +112,19 @@ def test_create_tables(cps_subsample):
                                    tax_to_diff='iitax')
     assert isinstance(diff, pd.DataFrame)
     expected = [0.00,
-                0.02,
-                0.58,
-                0.72,
-                0.67,
-                0.78,
-                0.77,
-                0.64,
-                0.56,
-                0.22,
-                0.08,
-                0.06,
-                0.53]
-    tabcol = 'perc_aftertax'
+                -0.02,
+                -0.58,
+                -0.72,
+                -0.67,
+                -0.78,
+                -0.77,
+                -0.64,
+                -0.56,
+                -0.22,
+                -0.08,
+                -0.06,
+                -0.53]
+    tabcol = 'pc_aftertaxinc'
     if not np.allclose(diff[tabcol].values, expected,
                        atol=0.005, rtol=0.0, equal_nan=True):
         test_failure = True
@@ -139,26 +139,26 @@ def test_create_tables(cps_subsample):
                                    tax_to_diff='iitax')
     assert isinstance(diff, pd.DataFrame)
     expected = [0.00,
-                0.01,
-                0.03,
-                0.24,
-                0.78,
-                0.66,
-                0.76,
-                0.67,
-                0.78,
-                0.77,
-                0.64,
-                0.56,
-                0.22,
-                0.08,
-                0.08,
-                0.07,
-                0.05,
-                0.03,
+                -0.01,
+                -0.03,
+                -0.24,
+                -0.78,
+                -0.66,
+                -0.76,
+                -0.67,
+                -0.78,
+                -0.77,
+                -0.64,
+                -0.56,
+                -0.22,
+                -0.08,
+                -0.08,
+                -0.07,
+                -0.05,
+                -0.03,
                 0.00,
-                0.53]
-    tabcol = 'perc_aftertax'
+                -0.53]
+    tabcol = 'pc_aftertaxinc'
     if not np.allclose(diff[tabcol].values, expected,
                        atol=0.005, rtol=0.0, equal_nan=True):
         test_failure = True
@@ -172,7 +172,8 @@ def test_create_tables(cps_subsample):
                                    income_measure='expanded_income',
                                    tax_to_diff='combined')
     assert isinstance(diff, pd.DataFrame)
-    expected = [171711,
+    expected = [0,
+                171711,
                 15725179,
                 26767322,
                 33151429,
@@ -193,7 +194,8 @@ def test_create_tables(cps_subsample):
         print('diff', tabcol)
         for val in diff[tabcol].values:
             print('{:.0f},'.format(val))
-    expected = [0.03,
+    expected = [0.00,
+                0.03,
                 2.94,
                 5.00,
                 6.20,
@@ -214,28 +216,30 @@ def test_create_tables(cps_subsample):
         print('diff', tabcol)
         for val in diff[tabcol].values:
             print('{:.2f},'.format(val))
-    expected = [0.02,
-                0.63,
-                0.72,
-                0.69,
-                0.77,
-                0.75,
-                0.75,
-                0.62,
-                0.59,
-                0.28,
-                0.53,
-                0.52,
-                0.23,
-                0.06]
-    tabcol = 'perc_aftertax'
+    expected = [0.00,
+                -0.02,
+                -0.63,
+                -0.72,
+                -0.69,
+                -0.77,
+                -0.75,
+                -0.75,
+                -0.62,
+                -0.59,
+                -0.28,
+                -0.53,
+                -0.52,
+                -0.23,
+                -0.06]
+    tabcol = 'pc_aftertaxinc'
     if not np.allclose(diff[tabcol].values, expected,
                        atol=0.005, rtol=0.0, equal_nan=True):
         test_failure = True
         print('diff', tabcol)
         for val in diff[tabcol].values:
             print('{:.2f},'.format(val))
-    expected = [-0.02,
+    expected = [0.00,
+                -0.02,
                 -0.63,
                 -0.72,
                 -0.69,
@@ -264,7 +268,8 @@ def test_create_tables(cps_subsample):
                                      income_measure='expanded_income',
                                      result_type='weighted_sum')
     assert isinstance(dist, pd.DataFrame)
-    expected = [-58122959,
+    expected = [0,
+                -58122959,
                 -69644449,
                 -67116585,
                 47133880,
@@ -285,7 +290,8 @@ def test_create_tables(cps_subsample):
         print('dist', tabcol)
         for val in dist[tabcol].values:
             print('{:.0f},'.format(val))
-    expected = [1202,
+    expected = [0,
+                1202,
                 13625,
                 22333,
                 27220,
@@ -306,7 +312,8 @@ def test_create_tables(cps_subsample):
         print('dist', tabcol)
         for val in dist[tabcol].values:
             print('{:.0f},'.format(val))
-    expected = [795716514,
+    expected = [0,
+                795716514,
                 2643384899,
                 3946422611,
                 5277286335,
@@ -327,7 +334,8 @@ def test_create_tables(cps_subsample):
         print('dist', tabcol)
         for val in dist[tabcol].values:
             print('{:.0f},'.format(val))
-    expected = [782122416,
+    expected = [0,
+                782122416,
                 2478134056,
                 3682019346,
                 4789142820,
@@ -707,17 +715,9 @@ def test_add_quantile_bins():
     default_labels = set(range(1, 101))
     for lab in bin_labels:
         assert lab in default_labels
-    # custom labels
     dfb = add_quantile_bins(dfx, 'expanded_income', 100,
                             weight_by_income_measure=True)
     assert 'bins' in dfb
-    custom_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
-    dfb = add_quantile_bins(dfx, 'expanded_income', 10,
-                            labels=custom_labels)
-    assert 'bins' in dfb
-    bin_labels = dfb['bins'].unique()
-    for lab in bin_labels:
-        assert lab in custom_labels
 
 
 def test_dist_table_sum_row(cps_subsample):
@@ -988,10 +988,10 @@ def test_dec_qin_graph_plots(cps_subsample):
     assert calc1.current_year == calc2.current_year
     calc1.calc_all()
     calc2.calc_all()
-    fig_dec = calc1.decile_graph(calc2)
-    assert fig_dec
-    fig_qin = calc1.quintile_graph(calc2)
-    assert fig_qin
+    fig_dec_hide = calc1.decile_graph(calc2)
+    assert fig_dec_hide
+    fig_dec_show = calc1.decile_graph(calc2, hide_negative_incomes=False)
+    assert fig_dec_show
 
 
 def test_nonsmall_diffs():