Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve handling of non-positive incomes in tables and graphs #1902

Merged
merged 8 commits into from
Mar 8, 2018
77 changes: 14 additions & 63 deletions taxcalc/calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@
ce_aftertax_expanded_income,
mtr_graph_data, atr_graph_data, xtr_graph_plot,
dec_graph_data, dec_graph_plot,
pch_graph_data, pch_graph_plot,
qin_graph_data, qin_graph_plot)
pch_graph_data, pch_graph_plot)
# import pdb


Expand Down Expand Up @@ -991,7 +990,7 @@ def pch_graph(self, calc):
title='')
return fig

def decile_graph(self, calc, set_bottom_decile_result_to_zero=True):
def decile_graph(self, calc, hide_negative_incomes=True):
"""
Create graph that shows percentage change in aftertax expanded
income (from going from policy in self to policy in calc) for
Expand All @@ -1001,8 +1000,6 @@ def decile_graph(self, calc, set_bottom_decile_result_to_zero=True):
immediately in an interactive or notebook session (following
the instructions in the documentation of the xtr_graph_plot
utility function).
Note that some deciles may contain filing units with negative
or zero baseline (self) expanded income.
Parameters
----------
Expand All @@ -1011,11 +1008,16 @@ def decile_graph(self, calc, set_bottom_decile_result_to_zero=True):
where both self and calc have calculated taxes for this year
before being used by this method
set_bottom_decile_result_to_zero : boolean
specify whether or not bottom decile (which contains filing
units with non-positive expanded income) result is shown in the
graph (default value is True; set to False to show the bottom
decile result)
hide_negative_incomes : boolean
if True (which is the default), the bottom table bin containing
filing units with non-positive expanded_income is not shown in
the graph and the table bin containing filing units with positive
expanded_income in the bottom decile is shown with its bar width
adjusted to the number of weighted filing units in bottom decile
who have positive expanded_income; if False, the bottom table bin
containing filing units with non-positive expanded_income is shown,
which may be misleading because the percentage change is correctly
calculated with a negative divisor.
Returns
-------
Expand All @@ -1030,9 +1032,8 @@ def decile_graph(self, calc, set_bottom_decile_result_to_zero=True):
income_measure='expanded_income',
tax_to_diff='combined')
# construct data for graph
data = dec_graph_data(diff_table, year=self.current_year)
if set_bottom_decile_result_to_zero:
data['bars'][0]['value'] = 0
data = dec_graph_data(diff_table, year=self.current_year,
hide_negative_incomes=hide_negative_incomes)
# construct figure from data
fig = dec_graph_plot(data,
width=850,
Expand All @@ -1042,56 +1043,6 @@ def decile_graph(self, calc, set_bottom_decile_result_to_zero=True):
title='')
return fig

def quintile_graph(self, calc, set_bottom_quintile_result_to_zero=True):
"""Create graph that shows percentage change in aftertax expanded
income (from going from policy in self to policy in calc) for
each expanded-income quintile and subgroups of the top quintile.
The graph can be written to an HTML file (using the
write_graph_file utility function) or shown on the screen
immediately in an interactive or notebook session (following
the instructions in the documentation of the xtr_graph_plot
utility function).
Note that some quintiles may contain filing units with negative
or zero baseline (self) expanded income.
Parameters
----------
calc : Calculator object
calc represents the reform while self represents the baseline,
where both self and calc have calculated taxes for this year
before being used by this method
set_bottom_quintile_result_to_zero : boolean
specify whether or not bottom quintile (which contains filing
units with non-positive expanded income) result is shown in the
graph (default value is True; set to False to show the bottom
quintile result)
Returns
-------
graph that is a bokeh.plotting figure object
"""
# check that two Calculator objects are comparable
assert isinstance(calc, Calculator)
assert calc.current_year == self.current_year
assert calc.array_len == self.array_len
diff_table = self.difference_table(calc,
groupby='weighted_deciles',
income_measure='expanded_income',
tax_to_diff='combined')
# construct data for graph
data = qin_graph_data(diff_table, year=self.current_year)
if set_bottom_quintile_result_to_zero:
data['bars'][0]['value'] = 0
# construct figure from data
fig = qin_graph_plot(data,
width=850,
height=500,
xlabel='',
ylabel='',
title='')
return fig

@staticmethod
def read_json_param_objects(reform, assump):
"""
Expand Down
2 changes: 1 addition & 1 deletion taxcalc/consumption.json
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@

"_BEN_vet_value": {
"long_name": "Consumption value of veterans benefits",
"description": "Consumption value per dollar of veterans benefits, some of which are in-kind benefits (about 40% are in-kind medical benefits).",
"description": "Consumption value per dollar of veterans benefits, some of which are in-kind benefits (about 40% are in-kind medical benefits and overall about 51% are in-kind benefits).",
"section_1": "",
"section_2": "",
"notes": "",
Expand Down
154 changes: 77 additions & 77 deletions taxcalc/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,17 @@ def test_create_tables(cps_subsample):
tax_to_diff='combined')
assert isinstance(diff, pd.DataFrame)
expected = [0.00,
0.02,
0.58,
0.72,
0.67,
0.78,
0.77,
0.64,
0.56,
0.17,
0.53]
tabcol = 'perc_aftertax'
-0.02,
-0.58,
-0.72,
-0.67,
-0.78,
-0.77,
-0.64,
-0.56,
-0.17,
-0.53]
tabcol = 'pc_aftertaxinc'
if not np.allclose(diff[tabcol].values, expected,
atol=0.005, rtol=0.0, equal_nan=True):
test_failure = True
Expand All @@ -112,19 +112,19 @@ def test_create_tables(cps_subsample):
tax_to_diff='iitax')
assert isinstance(diff, pd.DataFrame)
expected = [0.00,
0.02,
0.58,
0.72,
0.67,
0.78,
0.77,
0.64,
0.56,
0.22,
0.08,
0.06,
0.53]
tabcol = 'perc_aftertax'
-0.02,
-0.58,
-0.72,
-0.67,
-0.78,
-0.77,
-0.64,
-0.56,
-0.22,
-0.08,
-0.06,
-0.53]
tabcol = 'pc_aftertaxinc'
if not np.allclose(diff[tabcol].values, expected,
atol=0.005, rtol=0.0, equal_nan=True):
test_failure = True
Expand All @@ -139,26 +139,26 @@ def test_create_tables(cps_subsample):
tax_to_diff='iitax')
assert isinstance(diff, pd.DataFrame)
expected = [0.00,
0.01,
0.03,
0.24,
0.78,
0.66,
0.76,
0.67,
0.78,
0.77,
0.64,
0.56,
0.22,
0.08,
0.08,
0.07,
0.05,
0.03,
-0.01,
-0.03,
-0.24,
-0.78,
-0.66,
-0.76,
-0.67,
-0.78,
-0.77,
-0.64,
-0.56,
-0.22,
-0.08,
-0.08,
-0.07,
-0.05,
-0.03,
0.00,
0.53]
tabcol = 'perc_aftertax'
-0.53]
tabcol = 'pc_aftertaxinc'
if not np.allclose(diff[tabcol].values, expected,
atol=0.005, rtol=0.0, equal_nan=True):
test_failure = True
Expand All @@ -172,7 +172,8 @@ def test_create_tables(cps_subsample):
income_measure='expanded_income',
tax_to_diff='combined')
assert isinstance(diff, pd.DataFrame)
expected = [171711,
expected = [0,
171711,
15725179,
26767322,
33151429,
Expand All @@ -193,7 +194,8 @@ def test_create_tables(cps_subsample):
print('diff', tabcol)
for val in diff[tabcol].values:
print('{:.0f},'.format(val))
expected = [0.03,
expected = [0.00,
0.03,
2.94,
5.00,
6.20,
Expand All @@ -214,28 +216,30 @@ def test_create_tables(cps_subsample):
print('diff', tabcol)
for val in diff[tabcol].values:
print('{:.2f},'.format(val))
expected = [0.02,
0.63,
0.72,
0.69,
0.77,
0.75,
0.75,
0.62,
0.59,
0.28,
0.53,
0.52,
0.23,
0.06]
tabcol = 'perc_aftertax'
expected = [0.00,
-0.02,
-0.63,
-0.72,
-0.69,
-0.77,
-0.75,
-0.75,
-0.62,
-0.59,
-0.28,
-0.53,
-0.52,
-0.23,
-0.06]
tabcol = 'pc_aftertaxinc'
if not np.allclose(diff[tabcol].values, expected,
atol=0.005, rtol=0.0, equal_nan=True):
test_failure = True
print('diff', tabcol)
for val in diff[tabcol].values:
print('{:.2f},'.format(val))
expected = [-0.02,
expected = [0.00,
-0.02,
-0.63,
-0.72,
-0.69,
Expand Down Expand Up @@ -264,7 +268,8 @@ def test_create_tables(cps_subsample):
income_measure='expanded_income',
result_type='weighted_sum')
assert isinstance(dist, pd.DataFrame)
expected = [-58122959,
expected = [0,
-58122959,
-69644449,
-67116585,
47133880,
Expand All @@ -285,7 +290,8 @@ def test_create_tables(cps_subsample):
print('dist', tabcol)
for val in dist[tabcol].values:
print('{:.0f},'.format(val))
expected = [1202,
expected = [0,
1202,
13625,
22333,
27220,
Expand All @@ -306,7 +312,8 @@ def test_create_tables(cps_subsample):
print('dist', tabcol)
for val in dist[tabcol].values:
print('{:.0f},'.format(val))
expected = [795716514,
expected = [0,
795716514,
2643384899,
3946422611,
5277286335,
Expand All @@ -327,7 +334,8 @@ def test_create_tables(cps_subsample):
print('dist', tabcol)
for val in dist[tabcol].values:
print('{:.0f},'.format(val))
expected = [782122416,
expected = [0,
782122416,
2478134056,
3682019346,
4789142820,
Expand Down Expand Up @@ -707,17 +715,9 @@ def test_add_quantile_bins():
default_labels = set(range(1, 101))
for lab in bin_labels:
assert lab in default_labels
# custom labels
dfb = add_quantile_bins(dfx, 'expanded_income', 100,
weight_by_income_measure=True)
assert 'bins' in dfb
custom_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
dfb = add_quantile_bins(dfx, 'expanded_income', 10,
labels=custom_labels)
assert 'bins' in dfb
bin_labels = dfb['bins'].unique()
for lab in bin_labels:
assert lab in custom_labels


def test_dist_table_sum_row(cps_subsample):
Expand Down Expand Up @@ -988,10 +988,10 @@ def test_dec_qin_graph_plots(cps_subsample):
assert calc1.current_year == calc2.current_year
calc1.calc_all()
calc2.calc_all()
fig_dec = calc1.decile_graph(calc2)
assert fig_dec
fig_qin = calc1.quintile_graph(calc2)
assert fig_qin
fig_dec_hide = calc1.decile_graph(calc2)
assert fig_dec_hide
fig_dec_show = calc1.decile_graph(calc2, hide_negative_incomes=False)
assert fig_dec_show


def test_nonsmall_diffs():
Expand Down
Loading