From 7827df27322288f6529c450b52bfac305c163997 Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Fri, 23 Sep 2016 15:03:11 -0400
Subject: [PATCH 01/33] generalize add_weighted_decile_bins

---
 taxcalc/utils.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 550381fe0..4a22a2255 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -5,10 +5,10 @@
 from collections import defaultdict, OrderedDict
 
 
-STATS_COLUMNS = ['_expanded_income', 'c00100', '_standard', 'c04470', 'c04600',
-                 'c04800', 'c05200', 'c62100', 'c09600', 'c05800', 'c09200',
-                 '_refund', 'c07100', '_iitax', '_payrolltax', '_combined',
-                 's006']
+STATS_COLUMNS = ['_expanded_income', 'c00100', '_standard',
+                 'c04470', 'c04600', 'c04800', 'c05200', 'c62100', 'c09600',
+                 'c05800', 'c09200', '_refund', 'c07100', '_iitax',
+                 '_payrolltax', '_combined', 's006']
 
 # each entry in this array corresponds to the same entry in the array
 # TABLE_LABELS below. this allows us to use TABLE_LABELS to map a
@@ -72,6 +72,11 @@ def weighted_mean(agg, col_name):
             float(agg['s006'].sum() + EPSILON))
 
 
+def wage_weighted(agg, col_name):
+    return (float((agg[col_name] * agg['s006'] * agg['e00200']).sum()) /
+            float((agg['s006']*agg['e00200']).sum() + EPSILON))
+
+
 def weighted_sum(agg, col_name):
     return (agg[col_name] * agg['s006']).sum()
 
@@ -91,7 +96,7 @@ def weighted_share_of_total(agg, col_name, total):
 
 
 def add_weighted_decile_bins(df, income_measure='_expanded_income',
-                             labels=None):
+                             num_bins=10, labels=None):
     """
     Add a column of income bins based on each 10% of the income_measure,
     weighted by s006.
@@ -107,11 +112,12 @@ def add_weighted_decile_bins(df, income_measure='_expanded_income',
     # Max value of cum sum of weights
     max_ = df['cumsum_weights'].values[-1]
     # Create 10 bins and labels based on this cumulative weight
-    bins = [0] + list(np.arange(1, 11) * (max_ / 10.0))
+    bin_edges = [0] + list(np.arange(1, (num_bins+1)) *
+                           (max_ / float(num_bins)))
     if not labels:
-        labels = range(1, 11)
+        labels = range(1, (num_bins+1))
     #  Groupby weighted deciles
-    df['bins'] = pd.cut(df['cumsum_weights'], bins, labels=labels)
+    df['bins'] = pd.cut(df['cumsum_weights'], bins=bin_edges, labels=labels)
     return df
 
 

From 289c9b680197167d0433c2951b8ad0ad0ed9e6ff Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Fri, 23 Sep 2016 16:26:59 -0400
Subject: [PATCH 02/33] test modified

---
 taxcalc/tests/test_utils.py |  7 ++++---
 taxcalc/utils.py            | 15 +++++++++++----
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
index 7fe33c7c2..b3776f11a 100644
--- a/taxcalc/tests/test_utils.py
+++ b/taxcalc/tests/test_utils.py
@@ -316,13 +316,14 @@ def test_add_income_bins_raises():
 
 def test_add_weighted_decile_bins():
     df = DataFrame(data=data, columns=['_expanded_income', 's006', 'label'])
-    df = add_weighted_decile_bins(df)
-    assert 'bins' in df
+    df = add_weighted_decile_bins(df, num_bins=100)
     bin_labels = df['bins'].unique()
-    default_labels = set(range(1, 11))
+    default_labels = set(range(1, 101))
     for lab in bin_labels:
         assert lab in default_labels
     # Custom labels
+    df = add_weighted_decile_bins(df, complex_weight=True)
+    assert 'bins' in df
     custom_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
     df = add_weighted_decile_bins(df, labels=custom_labels)
     assert 'bins' in df
diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 4a22a2255..b4a06d7a2 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -96,7 +96,7 @@ def weighted_share_of_total(agg, col_name, total):
 
 
 def add_weighted_decile_bins(df, income_measure='_expanded_income',
-                             num_bins=10, labels=None):
+                             num_bins=10, labels=None, complex_weight=False):
     """
     Add a column of income bins based on each 10% of the income_measure,
     weighted by s006.
@@ -105,10 +105,17 @@ def add_weighted_decile_bins(df, income_measure='_expanded_income',
 
     This function will server as a 'grouper' later on.
     """
-    # First, sort by income_measure
+    # First, weight income measure by s006 if desired
+    if complex_weight:
+        df['s006_weighted'] = np.multiply(df[income_measure].values,
+                                          df['s006'].values)
+    # Next, sort by income_measure
     df.sort(income_measure, inplace=True)
-    # Next, do a cumulative sum by the weights
-    df['cumsum_weights'] = np.cumsum(df['s006'].values)
+    # Do a cumulative sum by the desired weights
+    if complex_weight:
+        df['cumsum_weights'] = np.cumsum(df['s006_weighted'].values)
+    else:
+        df['cumsum_weights'] = np.cumsum(df['s006'].values)
     # Max value of cum sum of weights
     max_ = df['cumsum_weights'].values[-1]
     # Create 10 bins and labels based on this cumulative weight

From 245539c5e10027b1332795628ca1ebe6b05a2663 Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Sun, 25 Sep 2016 23:53:37 -0400
Subject: [PATCH 03/33] get_data_mtr

---
 taxcalc/utils.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index b4a06d7a2..3e58a502d 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -243,6 +243,17 @@ def results(obj):
     return DataFrame(data=np.column_stack(arrays), columns=STATS_COLUMNS)
 
 
+def exp_results(c):
+    RES_COLUMNS = STATS_COLUMNS + ['e00200'] + ['MARS']
+    outputs = []
+    for col in RES_COLUMNS:
+        if hasattr(c.policy, col):
+            outputs.append(getattr(c.policy, col))
+        else:
+            outputs.append(getattr(c.records, col))
+    return DataFrame(data=np.column_stack(outputs), columns=RES_COLUMNS)
+
+
 def weighted_avg_allcols(df, cols, income_measure='_expanded_income'):
     diff = DataFrame(df.groupby('bins', as_index=False).apply(weighted_mean,
                                                               income_measure),
@@ -612,3 +623,66 @@ def f(x):
     out = out.applymap(fstring.format)
     out.to_csv(ascii_filename, header=False, index=False,
                delim_whitespace=True, sep='\t')
+
+
+def get_mtr_data(calcX, calcY, MARS='ALL', weights='weighted_mean',
+                 tab='e00200', mtr_measure='IIT', complex_weight=False):
+    df_x = exp_results(calcX)
+    df_y = exp_results(calcY)
+
+    a, mtr_iit_x, mtr_combined_x = calcX.mtr()
+    a, mtr_iit_y, mtr_combined_y = calcY.mtr()
+    df_x['mtr_iit'] = mtr_iit_x
+    df_y['mtr_iit'] = mtr_iit_y
+    df_x['mtr_combined'] = mtr_combined_x
+    df_y['mtr_combined'] = mtr_combined_y
+
+    df_y[tab] = df_x[tab]
+
+    if complex_weight:
+        df_x = add_weighted_decile_bins(df_x, tab, 100, complex_weight=True)
+        df_y = add_weighted_decile_bins(df_y, tab, 100, complex_weight=True)
+    else:
+        df_x = add_weighted_decile_bins(df_x, tab, 100)
+        df_y = add_weighted_decile_bins(df_y, tab, 100)
+
+    if MARS == 'ALL':
+        df_filtered_x = df_x.copy()
+        df_filtered_y = df_y.copy()
+    else:
+        df_filtered_x = df_x[(df_x['MARS'] == MARS)].copy()
+        df_filtered_y = df_y[(df_y['MARS'] == MARS)].copy()
+
+    gp_x = df_filtered_x.groupby('bins', as_index=False)
+    gp_y = df_filtered_y.groupby('bins', as_index=False)
+
+    if mtr_measure == 'combined':
+        wgtpct_x = gp_x.apply(weights, 'mtr_combined')
+        wgtpct_y = gp_y.apply(weights, 'mtr_combined')
+    elif mtr_measure == 'IIT':
+        wgtpct_x = gp_x.apply(weights, 'mtr_iit')
+        wgtpct_y = gp_y.apply(weights, 'mtr_iit')
+
+    wpct_x = DataFrame(data=wgtpct_x, columns=['w_mtr'])
+    wpct_y = DataFrame(data=wgtpct_y, columns=['w_mtr'])
+
+    wpct_x['bins'] = np.arange(1, 101)
+    wpct_y['bins'] = np.arange(1, 101)
+
+    rsltx = pd.merge(df_filtered_x[['bins']], wpct_x, how='left')
+    rslty = pd.merge(df_filtered_y[['bins']], wpct_y, how='left')
+
+    df_filtered_x['w_mtr'] = rsltx['w_mtr'].values
+    df_filtered_y['w_mtr'] = rslty['w_mtr'].values
+
+    df_filtered_x.drop_duplicates(subset='bins', inplace=True)
+    df_filtered_y.drop_duplicates(subset='bins', inplace=True)
+
+    df_filtered_x = df_filtered_x['w_mtr']
+    df_filtered_y = df_filtered_y['w_mtr']
+
+    merged = pd.concat([df_filtered_x, df_filtered_y], axis=1,
+                       ignore_index=True)
+    merged.columns = ['base', 'reform']
+
+    return merged

From b779bdad40086acda9b18ff7ec7f96512d027c53 Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Mon, 26 Sep 2016 00:12:08 -0400
Subject: [PATCH 04/33] typo

---
 taxcalc/utils.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 3e58a502d..500853517 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -626,7 +626,8 @@ def f(x):
 
 
 def get_mtr_data(calcX, calcY, MARS='ALL', weights='weighted_mean',
-                 tab='e00200', mtr_measure='IIT', complex_weight=False):
+                 income_measure='e00200', mtr_measure='IIT',
+                 complex_weight=False):
     df_x = exp_results(calcX)
     df_y = exp_results(calcY)
 
@@ -637,14 +638,16 @@ def get_mtr_data(calcX, calcY, MARS='ALL', weights='weighted_mean',
     df_x['mtr_combined'] = mtr_combined_x
     df_y['mtr_combined'] = mtr_combined_y
 
-    df_y[tab] = df_x[tab]
+    df_y[income_measure] = df_x[income_measure]
 
     if complex_weight:
-        df_x = add_weighted_decile_bins(df_x, tab, 100, complex_weight=True)
-        df_y = add_weighted_decile_bins(df_y, tab, 100, complex_weight=True)
+        df_x = add_weighted_decile_bins(df_x, income_measure, 100,
+                                        complex_weight=True)
+        df_y = add_weighted_decile_bins(df_y, income_measure, 100,
+                                        complex_weight=True)
     else:
-        df_x = add_weighted_decile_bins(df_x, tab, 100)
-        df_y = add_weighted_decile_bins(df_y, tab, 100)
+        df_x = add_weighted_decile_bins(df_x, income_measure, 100)
+        df_y = add_weighted_decile_bins(df_y, income_measure, 100)
 
     if MARS == 'ALL':
         df_filtered_x = df_x.copy()

From 9a06bccd82e58ad167bc3773be538f69e122795c Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Mon, 26 Sep 2016 12:58:40 -0400
Subject: [PATCH 05/33] styles added

---
 taxcalc/styles.py | 97 +++++++++++++++++++++++++++++++++++++++++++++++
 taxcalc/utils.py  | 17 ++++++++-
 2 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 taxcalc/styles.py

diff --git a/taxcalc/styles.py b/taxcalc/styles.py
new file mode 100644
index 000000000..67c75c351
--- /dev/null
+++ b/taxcalc/styles.py
@@ -0,0 +1,97 @@
+from bokeh.plotting import figure
+
+DATETIME_FORMAT = dict(
+    microseconds=["%m/%d %X"],
+    milliseconds=["%X"],
+    seconds=["%X"],
+    minsec=["%X"],
+    minutes=["%H:%M"],
+    hourmin=["%H:%M"],
+    hours=["%H:%M"],
+    days=["%m/%d"],
+)
+
+FONT = "Helvetica"
+FONT_SIZE = "10pt"
+
+NODATA_COLOR = "#eeeeee"
+GRAY = "#CCCCCC"
+DARK_GRAY = "#6B6B73"
+BLUE = '#21bfd5'
+RED = '#FF6666'
+GREEN = '#32CD32'
+PURPLE = '#C5007C'
+
+AXIS_FORMATS = dict(
+    minor_tick_in=None,
+    minor_tick_out=None,
+    major_tick_in=None,
+    major_label_text_font=FONT,
+    major_label_text_font_size="8pt",
+    axis_label_text_font=FONT,
+    axis_label_text_font_style="italic",
+    axis_label_text_font_size="8pt",
+
+    axis_line_color=DARK_GRAY,
+    major_tick_line_color=DARK_GRAY,
+    major_label_text_color=DARK_GRAY,
+
+    major_tick_line_cap="round",
+    axis_line_cap="round",
+    axis_line_width=1,
+    major_tick_line_width=1,
+)
+PLOT_FORMATS = dict(
+    toolbar_location=None,
+    outline_line_color="#FFFFFF",
+    title_text_font=FONT,
+    title_text_align='center',
+    title_text_color=DARK_GRAY,
+    title_text_font_size="9pt",
+    title_text_baseline='bottom',
+    min_border_left=0,
+    min_border_right=10,
+    min_border_top=5,
+    min_border_bottom=0,
+)
+LINE_FORMATS = dict(
+    line_cap='round',
+    line_join='round',
+    line_width=2
+)
+
+FONT_PROPS_SM = dict(
+    text_font=FONT,
+    text_font_size='8pt',
+)
+
+FONT_PROPS_MD = dict(
+    text_font=FONT,
+    text_font_size='10pt',
+)
+
+FONT_PROPS_LG = dict(
+    text_font=FONT,
+    text_font_size='12pt',
+)
+
+BLANK_AXIS = dict(
+    minor_tick_in=None,
+    minor_tick_out=None,
+    major_tick_in=None,
+    major_label_text_font=FONT,
+    major_label_text_font_size="8pt",
+    axis_label_text_font=FONT,
+    axis_label_text_font_style="italic",
+    axis_label_text_font_size="8pt",
+
+    axis_line_color='white',
+    major_tick_line_color='white',
+    major_label_text_color='white',
+    axis_label_text_color='white',
+
+    major_tick_line_cap="round",
+    axis_line_cap="round",
+    axis_line_width=1,
+    major_tick_line_width=1,
+)
diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 500853517..8f699426f 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -3,7 +3,22 @@
 import pandas as pd
 from pandas import DataFrame
 from collections import defaultdict, OrderedDict
-
+from bokeh.models import Plot, Range1d, ImageURL, DataRange1d
+from bokeh.embed import components
+from bokeh.layouts import layout
+from bokeh.plotting import figure, hplot, vplot, output_file, show
+from bokeh.models import (ColumnDataSource, LogAxis, LinearAxis, Rect,
+                          FactorRange, CategoricalAxis, Line, Text, Square,
+                          HoverTool)
+
+from styles import (PLOT_FORMATS,
+                    AXIS_FORMATS,
+                    FONT_PROPS_SM,
+                    DARK_GRAY,
+                    GREEN,
+                    PURPLE,
+                    RED,
+                    BLUE)
 
 STATS_COLUMNS = ['_expanded_income', 'c00100', '_standard',
                  'c04470', 'c04600', 'c04800', 'c05200', 'c62100', 'c09600',

From b4bd5331c2130174bebfd41a1cc7a0650450fc61 Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Mon, 26 Sep 2016 13:22:54 -0400
Subject: [PATCH 06/33] add plot function

---
 taxcalc/utils.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 8f699426f..391fd0818 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -704,3 +704,32 @@ def get_mtr_data(calcX, calcY, MARS='ALL', weights='weighted_mean',
     merged.columns = ['base', 'reform']
 
     return merged
+
+
+def mtr_plot(source, xlab='Percentile', ylab='Avg. MTR', title='MTR plot',
+             plot_width=425, plot_height=250, loc='top_left'):
+
+    QQ = figure(plot_width=plot_width, plot_height=plot_height, title=title)
+
+    QQ.line((source.reset_index()).index,
+            (source.reset_index()).base, line_color=BLUE, line_width=0.8,
+            line_alpha=.8, legend="Base")
+
+    QQ.line((source.reset_index()).index,
+            (source.reset_index()).reform, line_color=RED, line_width=0.8,
+            line_alpha=1, legend="Reform")
+
+    QQ.legend.label_text_font = "times"
+    QQ.legend.label_text_font_style = "italic"
+    QQ.legend.location = loc
+
+    QQ.legend.label_width = 2
+    QQ.legend.label_height = 2
+    QQ.legend.label_standoff = 2
+    QQ.legend.glyph_width = 14
+    QQ.legend.glyph_height = 14
+    QQ.legend.legend_spacing = 5
+    QQ.legend.legend_padding = 5
+    QQ.yaxis.axis_label = ylab
+    QQ.xaxis.axis_label = xlab
+    return show(QQ)

From 7d9c905edf29d15577be44745edc574fa2d1d132 Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Mon, 26 Sep 2016 15:27:32 -0400
Subject: [PATCH 07/33] add test suite

---
 taxcalc/tests/test_utils.py | 10 ++++++++++
 taxcalc/utils.py            | 38 ++++++++++++++++++-------------------
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
index b3776f11a..e95bf6977 100644
--- a/taxcalc/tests/test_utils.py
+++ b/taxcalc/tests/test_utils.py
@@ -582,3 +582,13 @@ def test_ascii_output_function(csvfile, asciifile):
     assert filecmp.cmp(output_test.name, asciifile.name)
     output_test.close()
     os.remove(output_test.name)
+
+
+def test_mtr_plot():
+    pol = Policy()
+    recs = Records(data=TAXDATA, weights=WEIGHTS, start_year=2009)
+    behv = Behavior()
+    calc = Calculator(policy=pol, records=recs, behavior=behv)
+    calc.calc_all()
+    source = get_mtr_data(calc, calc, weights=weighted_mean)
+    plot = mtr_plot(source)
diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 391fd0818..e0a1fe975 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -126,7 +126,7 @@ def add_weighted_decile_bins(df, income_measure='_expanded_income',
                                           df['s006'].values)
     # Next, sort by income_measure
     df.sort(income_measure, inplace=True)
-    # Do a cumulative sum by the desired weights
+    # Do a cumulative sum
     if complex_weight:
         df['cumsum_weights'] = np.cumsum(df['s006_weighted'].values)
     else:
@@ -640,7 +640,7 @@ def f(x):
                delim_whitespace=True, sep='\t')
 
 
-def get_mtr_data(calcX, calcY, MARS='ALL', weights='weighted_mean',
+def get_mtr_data(calcX, calcY, weights, MARS='ALL',
                  income_measure='e00200', mtr_measure='IIT',
                  complex_weight=False):
     df_x = exp_results(calcX)
@@ -709,27 +709,27 @@ def get_mtr_data(calcX, calcY, MARS='ALL', weights='weighted_mean',
 def mtr_plot(source, xlab='Percentile', ylab='Avg. MTR', title='MTR plot',
              plot_width=425, plot_height=250, loc='top_left'):
 
-    QQ = figure(plot_width=plot_width, plot_height=plot_height, title=title)
+    PP = figure(plot_width=plot_width, plot_height=plot_height, title=title)
 
-    QQ.line((source.reset_index()).index,
+    PP.line((source.reset_index()).index,
             (source.reset_index()).base, line_color=BLUE, line_width=0.8,
             line_alpha=.8, legend="Base")
 
-    QQ.line((source.reset_index()).index,
+    PP.line((source.reset_index()).index,
             (source.reset_index()).reform, line_color=RED, line_width=0.8,
             line_alpha=1, legend="Reform")
 
-    QQ.legend.label_text_font = "times"
-    QQ.legend.label_text_font_style = "italic"
-    QQ.legend.location = loc
-
-    QQ.legend.label_width = 2
-    QQ.legend.label_height = 2
-    QQ.legend.label_standoff = 2
-    QQ.legend.glyph_width = 14
-    QQ.legend.glyph_height = 14
-    QQ.legend.legend_spacing = 5
-    QQ.legend.legend_padding = 5
-    QQ.yaxis.axis_label = ylab
-    QQ.xaxis.axis_label = xlab
-    return show(QQ)
+    PP.legend.label_text_font = "times"
+    PP.legend.label_text_font_style = "italic"
+    PP.legend.location = loc
+
+    PP.legend.label_width = 2
+    PP.legend.label_height = 2
+    PP.legend.label_standoff = 2
+    PP.legend.glyph_width = 14
+    PP.legend.glyph_height = 14
+    PP.legend.legend_spacing = 5
+    PP.legend.legend_padding = 5
+    PP.yaxis.axis_label = ylab
+    PP.xaxis.axis_label = xlab
+    return PP

From 58fbfa54ba13a8f7878329f786149c810adbc40a Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Mon, 26 Sep 2016 16:46:10 -0400
Subject: [PATCH 08/33] add document

---
 taxcalc/utils.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index e0a1fe975..460ebcbc2 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -643,9 +643,44 @@ def f(x):
 def get_mtr_data(calcX, calcY, weights, MARS='ALL',
                  income_measure='e00200', mtr_measure='IIT',
                  complex_weight=False):
+    """
+    This function prepares the MTR data for two calculators.
+
+    Parameters
+    ----------
+    calcX : a Tax-Calculator Records object that refers to the baseline
+
+    calcY : a Tax-Calculator Records object that refers to the reform
+
+    weights : String object
+        options for input: 'weighted_count_lt_zero', 'weighted_count_gt_zero',
+            'weighted_count', 'weighted_mean', 'wage_weighted', 'weighted_sum',
+            'weighted_perc_inc', 'weighted_perc_dec', 'weighted_share_of_total'
+        Choose different weight measure
+
+    MARS : Integer
+        options for input: 1, 2, 3, 4
+        Choose different filling status
+
+    income_measure : String object
+        options for input: '_expanded_income', '_iitax'
+        classifier of income bins/deciles
+
+    mtr_measure : String object
+        options for input: '_iitax', '_payrolltax', '_combined'
+
+    complex_weight : Boolean
+        The cumulated sum will be carried out based on weighted income measure
+        if this option is true
+    Returns
+    -------
+    DataFrame object
+    """
+    # Get output columns
     df_x = exp_results(calcX)
     df_y = exp_results(calcY)
 
+    # Calculate MTR
     a, mtr_iit_x, mtr_combined_x = calcX.mtr()
     a, mtr_iit_y, mtr_combined_y = calcY.mtr()
     df_x['mtr_iit'] = mtr_iit_x
@@ -655,6 +690,7 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
 
     df_y[income_measure] = df_x[income_measure]
 
+    # Complex weighted bins or not
     if complex_weight:
         df_x = add_weighted_decile_bins(df_x, income_measure, 100,
                                         complex_weight=True)
@@ -664,6 +700,7 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
         df_x = add_weighted_decile_bins(df_x, income_measure, 100)
         df_y = add_weighted_decile_bins(df_y, income_measure, 100)
 
+    # Select either all filers or one filling status
     if MARS == 'ALL':
         df_filtered_x = df_x.copy()
         df_filtered_y = df_y.copy()
@@ -684,6 +721,7 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
     wpct_x = DataFrame(data=wgtpct_x, columns=['w_mtr'])
     wpct_y = DataFrame(data=wgtpct_y, columns=['w_mtr'])
 
+    # Add bin labels
     wpct_x['bins'] = np.arange(1, 101)
     wpct_y['bins'] = np.arange(1, 101)
 
@@ -708,7 +746,36 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
 
 def mtr_plot(source, xlab='Percentile', ylab='Avg. MTR', title='MTR plot',
              plot_width=425, plot_height=250, loc='top_left'):
+    """
+    This function prepares the MTR data for two calculators.
+
+    Parameters
+    ----------
+    source : DataFrame which can be obtained using get_mtr_data() function
+
+    xlab : String object
+        Name for X axis
 
+    ylab : String object
+        Name for Y axis
+
+    title : String object
+        Caption for the plot
+
+    plot_width : Numeric (Usually integer)
+        Width of the plot
+
+    plot_height : Numeric (Usually integer)
+        Height of the plot
+
+    loc : String object
+        Toptions for input: "top_right", "top_left", "bottom_left",
+            "bottom_right"
+        Choose the location of the legend label
+    Returns
+    -------
+    Figure Object (Use show() option to visualize)
+    """
     PP = figure(plot_width=plot_width, plot_height=plot_height, title=title)
 
     PP.line((source.reset_index()).index,

From 9751f25105ba948fdd90d34e9acc46c2281d41c3 Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Mon, 26 Sep 2016 17:22:50 -0400
Subject: [PATCH 09/33] PEP8

---
 taxcalc/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 381fcf6d0..c0b009a98 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -89,7 +89,7 @@ def weighted_mean(agg, col_name):
 
 def wage_weighted(agg, col_name):
     return (float((agg[col_name] * agg['s006'] * agg['e00200']).sum()) /
-            float((agg['s006']*agg['e00200']).sum() + EPSILON))
+            float((agg['s006'] * agg['e00200']).sum() + EPSILON))
 
 
 def weighted_sum(agg, col_name):
@@ -134,10 +134,10 @@ def add_weighted_decile_bins(df, income_measure='_expanded_income',
     # Max value of cum sum of weights
     max_ = df['cumsum_weights'].values[-1]
     # Create 10 bins and labels based on this cumulative weight
-    bin_edges = [0] + list(np.arange(1, (num_bins+1)) *
+    bin_edges = [0] + list(np.arange(1, (num_bins + 1)) *
                            (max_ / float(num_bins)))
     if not labels:
-        labels = range(1, (num_bins+1))
+        labels = range(1, (num_bins + 1))
     #  Groupby weighted deciles
     df['bins'] = pd.cut(df['cumsum_weights'], bins=bin_edges, labels=labels)
     return df

From c7700ed3a150500c12896af6fe658543930ac5fc Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Tue, 4 Oct 2016 14:23:40 -0400
Subject: [PATCH 10/33] rm styles.py, and used simple color potions

---
 taxcalc/styles.py | 97 -----------------------------------------------
 taxcalc/utils.py  | 14 ++-----
 2 files changed, 3 insertions(+), 108 deletions(-)
 delete mode 100644 taxcalc/styles.py

diff --git a/taxcalc/styles.py b/taxcalc/styles.py
deleted file mode 100644
index 67c75c351..000000000
--- a/taxcalc/styles.py
+++ /dev/null
@@ -1,97 +0,0 @@
-from bokeh.plotting import figure
-
-DATETIME_FORMAT = dict(
-    microseconds=["%m/%d %X"],
-    milliseconds=["%X"],
-    seconds=["%X"],
-    minsec=["%X"],
-    minutes=["%H:%M"],
-    hourmin=["%H:%M"],
-    hours=["%H:%M"],
-    days=["%m/%d"],
-)
-
-FONT = "Helvetica"
-FONT_SIZE = "10pt"
-
-NODATA_COLOR = "#eeeeee"
-GRAY = "#CCCCCC"
-DARK_GRAY = "#6B6B73"
-BLUE = '#21bfd5'
-RED = '#FF6666'
-GREEN = '#32CD32'
-PURPLE = '#C5007C'
-
-AXIS_FORMATS = dict(
-    minor_tick_in=None,
-    minor_tick_out=None,
-    major_tick_in=None,
-    major_label_text_font=FONT,
-    major_label_text_font_size="8pt",
-    axis_label_text_font=FONT,
-    axis_label_text_font_style="italic",
-    axis_label_text_font_size="8pt",
-
-    axis_line_color=DARK_GRAY,
-    major_tick_line_color=DARK_GRAY,
-    major_label_text_color=DARK_GRAY,
-
-    major_tick_line_cap="round",
-    axis_line_cap="round",
-    axis_line_width=1,
-    major_tick_line_width=1,
-)
-PLOT_FORMATS = dict(
-    toolbar_location=None,
-    outline_line_color="#FFFFFF",
-    title_text_font=FONT,
-    title_text_align='center',
-    title_text_color=DARK_GRAY,
-    title_text_font_size="9pt",
-    title_text_baseline='bottom',
-    min_border_left=0,
-    min_border_right=10,
-    min_border_top=5,
-    min_border_bottom=0,
-)
-LINE_FORMATS = dict(
-    line_cap='round',
-    line_join='round',
-    line_width=2
-)
-
-FONT_PROPS_SM = dict(
-    text_font=FONT,
-    text_font_size='8pt',
-)
-
-FONT_PROPS_MD = dict(
-    text_font=FONT,
-    text_font_size='10pt',
-)
-
-FONT_PROPS_LG = dict(
-    text_font=FONT,
-    text_font_size='12pt',
-)
-
-BLANK_AXIS = dict(
-    minor_tick_in=None,
-    minor_tick_out=None,
-    major_tick_in=None,
-    major_label_text_font=FONT,
-    major_label_text_font_size="8pt",
-    axis_label_text_font=FONT,
-    axis_label_text_font_style="italic",
-    axis_label_text_font_size="8pt",
-
-    axis_line_color='white',
-    major_tick_line_color='white',
-    major_label_text_color='white',
-    axis_label_text_color='white',
-
-    major_tick_line_cap="round",
-    axis_line_cap="round",
-    axis_line_width=1,
-    major_tick_line_width=1,
-)
diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index c0b009a98..001bf35b0 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -6,20 +6,12 @@
 from bokeh.models import Plot, Range1d, ImageURL, DataRange1d
 from bokeh.embed import components
 from bokeh.layouts import layout
+from bokeh.palettes import Blues4, Reds4
 from bokeh.plotting import figure, hplot, vplot, output_file, show
 from bokeh.models import (ColumnDataSource, LogAxis, LinearAxis, Rect,
                           FactorRange, CategoricalAxis, Line, Text, Square,
                           HoverTool)
 
-from styles import (PLOT_FORMATS,
-                    AXIS_FORMATS,
-                    FONT_PROPS_SM,
-                    DARK_GRAY,
-                    GREEN,
-                    PURPLE,
-                    RED,
-                    BLUE)
-
 STATS_COLUMNS = ['_expanded_income', 'c00100', '_standard',
                  'c04470', 'c04600', 'c04800', 'c05200', 'c62100', 'c09600',
                  'c05800', 'c09200', '_refund', 'c07100', '_iitax',
@@ -779,11 +771,11 @@ def mtr_plot(source, xlab='Percentile', ylab='Avg. MTR', title='MTR plot',
     PP = figure(plot_width=plot_width, plot_height=plot_height, title=title)
 
     PP.line((source.reset_index()).index,
-            (source.reset_index()).base, line_color=BLUE, line_width=0.8,
+            (source.reset_index()).base, line_color=Blues4[0], line_width=0.8,
             line_alpha=.8, legend="Base")
 
     PP.line((source.reset_index()).index,
-            (source.reset_index()).reform, line_color=RED, line_width=0.8,
+            (source.reset_index()).reform, line_color=Reds4[1], line_width=0.8,
             line_alpha=1, legend="Reform")
 
     PP.legend.label_text_font = "times"

From 04d591de871bce4a0ecad2f96418e076efee88a7 Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Tue, 4 Oct 2016 14:31:48 -0400
Subject: [PATCH 11/33] add bokeh to all test yml

---
 .travis.yml     | 2 +-
 environment.yml | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 1dc81883e..df43bf681 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,7 +13,7 @@ install:
   - export PATH="$HOME/miniconda/bin:$PATH"
   - conda config --set always_yes yes --set changeps1 no
   - conda update conda
-  - conda create -n taxcalcdev python=$TRAVIS_PYTHON_VERSION pytest setuptools pandas=0.18 toolz six mock
+  - conda create -n taxcalcdev python=$TRAVIS_PYTHON_VERSION pytest setuptools pandas=0.18 toolz six mock bokeh
   - source activate taxcalcdev
   - if [ $TRAVIS_EVENT_TYPE != 'cron' ]; then conda install numba; fi
   - pip install pytest-pep8
diff --git a/environment.yml b/environment.yml
index 69bc4f6ae..cb0939920 100644
--- a/environment.yml
+++ b/environment.yml
@@ -6,6 +6,7 @@ dependencies:
 - numpy =1.10.4
 - pandas =0.18.0
 - numba
+- bokeh
 - toolz
 - six
 - ipython

From eeb3d27656d3224a82b7b87de53ff02a527a189e Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Tue, 4 Oct 2016 15:04:52 -0400
Subject: [PATCH 12/33] order matters?

---
 taxcalc/tests/test_utils.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
index 666a94334..1f6f96f57 100644
--- a/taxcalc/tests/test_utils.py
+++ b/taxcalc/tests/test_utils.py
@@ -526,6 +526,16 @@ def test_expand_2D_accept_None_additional_row():
     npt.assert_allclose(pol.II_brk2, exp_2020)
 
 
+def test_mtr_plot():
+    pol = Policy()
+    recs = Records(data=TAXDATA, weights=WEIGHTS, start_year=2009)
+    behv = Behavior()
+    calc = Calculator(policy=pol, records=recs, behavior=behv)
+    calc.calc_all()
+    source = get_mtr_data(calc, calc, weights=weighted_mean)
+    plot = mtr_plot(source)
+
+
 def test_multiyear_diagnostic_table():
     pol = Policy()
     recs = Records(data=TAXDATA, weights=WEIGHTS, start_year=2009)
@@ -584,16 +594,6 @@ def test_ascii_output_function(csvfile, asciifile):
     os.remove(output_test.name)
 
 
-def test_mtr_plot():
-    pol = Policy()
-    recs = Records(data=TAXDATA, weights=WEIGHTS, start_year=2009)
-    behv = Behavior()
-    calc = Calculator(policy=pol, records=recs, behavior=behv)
-    calc.calc_all()
-    source = get_mtr_data(calc, calc, weights=weighted_mean)
-    plot = mtr_plot(source)
-
-
 def test_string_to_number():
     assert string_to_number(None) == 0
     assert string_to_number('') == 0

From e326f43e6b169cfe74f4eb5555ed65d55ba09160 Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Tue, 4 Oct 2016 16:26:18 -0400
Subject: [PATCH 13/33] more tests

---
 taxcalc/tests/test_utils.py | 12 +++++++++++-
 taxcalc/utils.py            |  5 +----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
index 9ebd3f58d..33d1958a8 100644
--- a/taxcalc/tests/test_utils.py
+++ b/taxcalc/tests/test_utils.py
@@ -517,12 +517,22 @@ def test_expand_2D_accept_None_additional_row():
     npt.assert_allclose(pol.II_brk2, exp_2020)
 
 
+def test_get_mtr_data(records_2009):
+    pol = Policy()
+    behv = Behavior()
+    calc = Calculator(policy=pol, records=records_2009, behavior=behv)
+    calc.calc_all()
+    source = get_mtr_data(calc, calc, weights=wage_weighted, MARS=1,
+                          mtr_measure='combined')
+
+
 def test_mtr_plot(records_2009):
     pol = Policy()
     behv = Behavior()
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
-    source = get_mtr_data(calc, calc, weights=weighted_mean)
+    source = get_mtr_data(calc, calc, weights=wage_weighted,
+                          complex_weight=True)
     plot = mtr_plot(source)
 
 
diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 001bf35b0..9b62ee716 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -254,10 +254,7 @@ def exp_results(c):
     RES_COLUMNS = STATS_COLUMNS + ['e00200'] + ['MARS']
     outputs = []
     for col in RES_COLUMNS:
-        if hasattr(c.policy, col):
-            outputs.append(getattr(c.policy, col))
-        else:
-            outputs.append(getattr(c.records, col))
+        outputs.append(getattr(c.records, col))
     return DataFrame(data=np.column_stack(outputs), columns=RES_COLUMNS)
 
 

From ab3c5f5ea842adf4bbced6a4d6a35ea6decea90f Mon Sep 17 00:00:00 2001
From: talumbau <tj.alumbaugh@continuum.io>
Date: Wed, 5 Oct 2016 13:33:41 -0500
Subject: [PATCH 14/33] try/except on import of bokeh

 - handle functions that require bokeh with `@requires_bokeh` decorator
---
 taxcalc/tests/test_utils.py | 14 ++++++++++++++
 taxcalc/utils.py            | 38 +++++++++++++++++++++++++++++--------
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
index 33d1958a8..99b0e2a8b 100644
--- a/taxcalc/tests/test_utils.py
+++ b/taxcalc/tests/test_utils.py
@@ -536,6 +536,20 @@ def test_mtr_plot(records_2009):
     plot = mtr_plot(source)
 
 
+def test_mtr_plot_force_no_bokeh(records_2009):
+    import taxcalc
+    taxcalc.utils.BOKEH_AVAILABLE = False
+    pol = Policy()
+    behv = Behavior()
+    calc = Calculator(policy=pol, records=records_2009, behavior=behv)
+    calc.calc_all()
+    source = get_mtr_data(calc, calc, weights=wage_weighted,
+                          complex_weight=True)
+    with pytest.raises(RuntimeError):
+        plot = mtr_plot(source)
+    taxcalc.utils.BOKEH_AVAILABLE = True
+
+
 def test_multiyear_diagnostic_table(records_2009):
     pol = Policy()
     behv = Behavior()
diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 9b62ee716..749911165 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -3,14 +3,16 @@
 import pandas as pd
 from pandas import DataFrame
 from collections import defaultdict, OrderedDict
-from bokeh.models import Plot, Range1d, ImageURL, DataRange1d
-from bokeh.embed import components
-from bokeh.layouts import layout
-from bokeh.palettes import Blues4, Reds4
-from bokeh.plotting import figure, hplot, vplot, output_file, show
-from bokeh.models import (ColumnDataSource, LogAxis, LinearAxis, Rect,
-                          FactorRange, CategoricalAxis, Line, Text, Square,
-                          HoverTool)
+
+try:
+    import bokeh
+    BOKEH_AVAILABLE = True
+    from bokeh.palettes import Blues4, Reds4
+    from bokeh.plotting import figure
+
+except ImportError:
+    BOKEH_AVAILABLE = False
+#
 
 STATS_COLUMNS = ['_expanded_income', 'c00100', '_standard',
                  'c04470', 'c04600', 'c04800', 'c05200', 'c62100', 'c09600',
@@ -733,6 +735,26 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
     return merged
 
 
+def requires_bokeh(fn):
+    """
+    Decorator for functions that require bokeh.
+    If BOKEH_AVAILABLE=True, this does nothing.
+    IF BOKEH_AVAILABEL=False, we raise an exception and tell the caller
+    that they must install bokeh in order to use the function.
+    """
+    def wrapped_f(*args, **kwargs):
+        if BOKEH_AVAILABLE:
+            return fn(*args, **kwargs)
+        else:
+            msg = ("`bokeh` is not installed. Please install "
+                   "`bokeh` to use this package (`conda install "
+                   "bokeh`)")
+            raise RuntimeError(msg)
+
+    return wrapped_f
+
+
+@requires_bokeh
 def mtr_plot(source, xlab='Percentile', ylab='Avg. MTR', title='MTR plot',
              plot_width=425, plot_height=250, loc='top_left'):
     """

From 0f7e9b2d1f8b6b3a448d475437c0965d753f63a9 Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Thu, 6 Oct 2016 20:36:29 -0400
Subject: [PATCH 15/33] Removed Two Un-supported Groupby

---
 taxcalc/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 749911165..1d85fe6e9 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -644,9 +644,9 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
     calcY : a Tax-Calculator Records object that refers to the reform
 
     weights : String object
-        options for input: 'weighted_count_lt_zero', 'weighted_count_gt_zero',
-            'weighted_count', 'weighted_mean', 'wage_weighted', 'weighted_sum',
-            'weighted_perc_inc', 'weighted_perc_dec', 'weighted_share_of_total'
+        options for input: weighted_count_lt_zero, weighted_count_gt_zero,
+            weighted_mean, wage_weighted, weighted_sum,
+            weighted_perc_inc, weighted_perc_dec
         Choose different weight measure
 
     MARS : Integer
@@ -654,7 +654,7 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
         Choose different filling status
 
     income_measure : String object
-        options for input: '_expanded_income', '_iitax'
+        options for input: '_expanded_income', 'c00100', 'e00200'
         classifier of income bins/deciles
 
     mtr_measure : String object

From 483cba2d986fcfb94c06a443d8e9e67933d5d657 Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Fri, 7 Oct 2016 00:04:12 -0400
Subject: [PATCH 16/33] output_file under command line

---
 taxcalc/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 1d85fe6e9..ebab37cbe 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -785,7 +785,8 @@ def mtr_plot(source, xlab='Percentile', ylab='Avg. MTR', title='MTR plot',
         Choose the location of the legend label
     Returns
     -------
-    Figure Object (Use show() option to visualize)
+    Figure Object (Use show(FIGURE_NAME) option to visualize)
+        Note that, when using command line, output file needs to be first specified using command output_file("FILE_NAME.html")
     """
     PP = figure(plot_width=plot_width, plot_height=plot_height, title=title)
 

From a9538096fdd547eb098535c4731e1acc4a49d2ba Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Fri, 7 Oct 2016 15:12:35 -0400
Subject: [PATCH 17/33] PEP8 and more docs

---
 taxcalc/utils.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index ebab37cbe..dc976f7ab 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -699,9 +699,11 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
         df_filtered_x = df_x[(df_x['MARS'] == MARS)].copy()
         df_filtered_y = df_y[(df_y['MARS'] == MARS)].copy()
 
+    # Split into groups by 'bins'
     gp_x = df_filtered_x.groupby('bins', as_index=False)
     gp_y = df_filtered_y.groupby('bins', as_index=False)
 
+    # Apply desired weights to mtr
     if mtr_measure == 'combined':
         wgtpct_x = gp_x.apply(weights, 'mtr_combined')
         wgtpct_y = gp_y.apply(weights, 'mtr_combined')
@@ -716,15 +718,18 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
     wpct_x['bins'] = np.arange(1, 101)
     wpct_y['bins'] = np.arange(1, 101)
 
+    # Merge two dataframes
     rsltx = pd.merge(df_filtered_x[['bins']], wpct_x, how='left')
     rslty = pd.merge(df_filtered_y[['bins']], wpct_y, how='left')
 
     df_filtered_x['w_mtr'] = rsltx['w_mtr'].values
     df_filtered_y['w_mtr'] = rslty['w_mtr'].values
 
+    # Get rid of duplicated bins
     df_filtered_x.drop_duplicates(subset='bins', inplace=True)
     df_filtered_y.drop_duplicates(subset='bins', inplace=True)
 
+    # Prepare cleaned mtr data and concatenate into one datafram
     df_filtered_x = df_filtered_x['w_mtr']
     df_filtered_y = df_filtered_y['w_mtr']
 
@@ -758,7 +763,8 @@ def wrapped_f(*args, **kwargs):
 def mtr_plot(source, xlab='Percentile', ylab='Avg. MTR', title='MTR plot',
              plot_width=425, plot_height=250, loc='top_left'):
     """
-    This function prepares the MTR data for two calculators.
+    This function generates marginal tax rate plot.
+    Source data can be obtained from get_mtr_data function.
 
     Parameters
     ----------
@@ -786,7 +792,8 @@ def mtr_plot(source, xlab='Percentile', ylab='Avg. MTR', title='MTR plot',
     Returns
     -------
     Figure Object (Use show(FIGURE_NAME) option to visualize)
-        Note that, when using command line, output file needs to be first specified using command output_file("FILE_NAME.html")
+        Note that, when using command line, output file needs to be
+        first specified using command output_file("FILE_NAME.html")
     """
     PP = figure(plot_width=plot_width, plot_height=plot_height, title=title)
 

From df58a477f5b01b080c5f8e1f491a76e3c10fea53 Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Tue, 11 Oct 2016 13:43:32 -0400
Subject: [PATCH 18/33] Import output_file, show

---
 taxcalc/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index dc976f7ab..bc2457110 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -8,7 +8,7 @@
     import bokeh
     BOKEH_AVAILABLE = True
     from bokeh.palettes import Blues4, Reds4
-    from bokeh.plotting import figure
+    from bokeh.plotting import figure, output_file, show
 
 except ImportError:
     BOKEH_AVAILABLE = False

From db5cad04f4750b6aecd4e98e81d74634ca7573fb Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Tue, 11 Oct 2016 17:35:14 -0400
Subject: [PATCH 19/33] fixed document typo, and deprecation error

---
 taxcalc/utils.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index bc2457110..b6e2b4518 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -119,7 +119,7 @@ def add_weighted_decile_bins(df, income_measure='_expanded_income',
         df['s006_weighted'] = np.multiply(df[income_measure].values,
                                           df['s006'].values)
     # Next, sort by income_measure
-    df.sort(income_measure, inplace=True)
+    df.sort_values(by=income_measure, inplace=True)
     # Do a cumulative sum
     if complex_weight:
         df['cumsum_weights'] = np.cumsum(df['s006_weighted'].values)
@@ -632,7 +632,7 @@ def f(x):
 
 
 def get_mtr_data(calcX, calcY, weights, MARS='ALL',
-                 income_measure='e00200', mtr_measure='IIT',
+                 income_measure='e00200', mtr_measure='_iitax',
                  complex_weight=False):
     """
     This function prepares the MTR data for two calculators.
@@ -658,7 +658,8 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
         classifier of income bins/deciles
 
     mtr_measure : String object
-        options for input: '_iitax', '_payrolltax', '_combined'
+        options for input: '_iitax', '_combined'
+        Choose different marginal tax rate measure
 
     complex_weight : Boolean
         The cumulated sum will be carried out based on weighted income measure
@@ -704,10 +705,10 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
     gp_y = df_filtered_y.groupby('bins', as_index=False)
 
     # Apply desired weights to mtr
-    if mtr_measure == 'combined':
+    if mtr_measure == '_combined':
         wgtpct_x = gp_x.apply(weights, 'mtr_combined')
         wgtpct_y = gp_y.apply(weights, 'mtr_combined')
-    elif mtr_measure == 'IIT':
+    elif mtr_measure == '_iitax':
         wgtpct_x = gp_x.apply(weights, 'mtr_iit')
         wgtpct_y = gp_y.apply(weights, 'mtr_iit')
 
@@ -814,8 +815,8 @@ def mtr_plot(source, xlab='Percentile', ylab='Avg. MTR', title='MTR plot',
     PP.legend.label_standoff = 2
     PP.legend.glyph_width = 14
     PP.legend.glyph_height = 14
-    PP.legend.legend_spacing = 5
-    PP.legend.legend_padding = 5
+    PP.legend.spacing = 5
+    PP.legend.padding = 5
     PP.yaxis.axis_label = ylab
     PP.xaxis.axis_label = xlab
     return PP

From 5c1256d7aecdf5293a3b7c19250822c92133ca46 Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Tue, 11 Oct 2016 17:39:34 -0400
Subject: [PATCH 20/33] _combined instead of combined in test suite

---
 taxcalc/tests/test_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
index 99b0e2a8b..6fa612145 100644
--- a/taxcalc/tests/test_utils.py
+++ b/taxcalc/tests/test_utils.py
@@ -523,7 +523,7 @@ def test_get_mtr_data(records_2009):
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
     source = get_mtr_data(calc, calc, weights=wage_weighted, MARS=1,
-                          mtr_measure='combined')
+                          mtr_measure='_combined')
 
 
 def test_mtr_plot(records_2009):

From b41a061d47a81899fde4edb4f4b2d93bacc76bbf Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Wed, 12 Oct 2016 10:08:53 -0400
Subject: [PATCH 21/33] partially revert deprecation fix

---
 taxcalc/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index b6e2b4518..a69a09e56 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -815,8 +815,8 @@ def mtr_plot(source, xlab='Percentile', ylab='Avg. MTR', title='MTR plot',
     PP.legend.label_standoff = 2
     PP.legend.glyph_width = 14
     PP.legend.glyph_height = 14
-    PP.legend.spacing = 5
-    PP.legend.padding = 5
+    PP.legend.legend_spacing = 5
+    PP.legend.legend_padding = 5
     PP.yaxis.axis_label = ylab
     PP.xaxis.axis_label = xlab
     return PP

From deecd730fda98cc0ad38745420bc40efb38818d6 Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Mon, 17 Oct 2016 14:10:28 -0400
Subject: [PATCH 22/33] modified documentation for weighting options

---
 taxcalc/utils.py | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index a69a09e56..04927a255 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -631,7 +631,7 @@ def f(x):
                delim_whitespace=True, sep='\t')
 
 
-def get_mtr_data(calcX, calcY, weights, MARS='ALL',
+def get_mtr_data(calcX, calcY, weighting, MARS='ALL',
                  income_measure='e00200', mtr_measure='_iitax',
                  complex_weight=False):
     """
@@ -643,11 +643,18 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
 
     calcY : a Tax-Calculator Records object that refers to the reform
 
-    weights : String object
-        options for input: weighted_count_lt_zero, weighted_count_gt_zero,
-            weighted_mean, wage_weighted, weighted_sum,
-            weighted_perc_inc, weighted_perc_dec
-        Choose different weight measure
+    weighting : String that coincides with tax-calculator defined function
+        options for input:
+            weighted_mean: Averaging marginal tax rate by the
+                weight of each record. This option would be
+                helpful if you are interested in the MTR after
+                taking weights into consideration.
+            wage_weighted: Averaging marginal tax rate by the
+                product of weight and wage of each record. This
+                option would be helpful if you are interested in
+                the MTR after taking both weights and wages
+                into consideration.
+        Choose different weighting method
 
     MARS : Integer
         options for input: 1, 2, 3, 4
@@ -704,13 +711,13 @@ def get_mtr_data(calcX, calcY, weights, MARS='ALL',
     gp_x = df_filtered_x.groupby('bins', as_index=False)
     gp_y = df_filtered_y.groupby('bins', as_index=False)
 
-    # Apply desired weights to mtr
+    # Apply desired weighting method to mtr
     if mtr_measure == '_combined':
-        wgtpct_x = gp_x.apply(weights, 'mtr_combined')
-        wgtpct_y = gp_y.apply(weights, 'mtr_combined')
+        wgtpct_x = gp_x.apply(weighting, 'mtr_combined')
+        wgtpct_y = gp_y.apply(weighting, 'mtr_combined')
     elif mtr_measure == '_iitax':
-        wgtpct_x = gp_x.apply(weights, 'mtr_iit')
-        wgtpct_y = gp_y.apply(weights, 'mtr_iit')
+        wgtpct_x = gp_x.apply(weighting, 'mtr_iit')
+        wgtpct_y = gp_y.apply(weighting, 'mtr_iit')
 
     wpct_x = DataFrame(data=wgtpct_x, columns=['w_mtr'])
     wpct_y = DataFrame(data=wgtpct_y, columns=['w_mtr'])

From b2e09dd827faa8c173633838fee7e843d92de2c4 Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Mon, 17 Oct 2016 14:15:39 -0400
Subject: [PATCH 23/33] test_suite change

---
 taxcalc/tests/test_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
index 6fa612145..083d3e43e 100644
--- a/taxcalc/tests/test_utils.py
+++ b/taxcalc/tests/test_utils.py
@@ -522,7 +522,7 @@ def test_get_mtr_data(records_2009):
     behv = Behavior()
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
-    source = get_mtr_data(calc, calc, weights=wage_weighted, MARS=1,
+    source = get_mtr_data(calc, calc, weighting=wage_weighted, MARS=1,
                           mtr_measure='_combined')
 
 
@@ -531,7 +531,7 @@ def test_mtr_plot(records_2009):
     behv = Behavior()
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
-    source = get_mtr_data(calc, calc, weights=wage_weighted,
+    source = get_mtr_data(calc, calc, weighting=wage_weighted,
                           complex_weight=True)
     plot = mtr_plot(source)
 
@@ -543,7 +543,7 @@ def test_mtr_plot_force_no_bokeh(records_2009):
     behv = Behavior()
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
-    source = get_mtr_data(calc, calc, weights=wage_weighted,
+    source = get_mtr_data(calc, calc, weighting=wage_weighted,
                           complex_weight=True)
     with pytest.raises(RuntimeError):
         plot = mtr_plot(source)

From 4723564947c041d10589848a16e8aa031fba8ad3 Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Tue, 18 Oct 2016 14:08:46 -0400
Subject: [PATCH 24/33] weighting of string type

---
 taxcalc/utils.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 04927a255..fb5838ec0 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -631,7 +631,7 @@ def f(x):
                delim_whitespace=True, sep='\t')
 
 
-def get_mtr_data(calcX, calcY, weighting, MARS='ALL',
+def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
                  income_measure='e00200', mtr_measure='_iitax',
                  complex_weight=False):
     """
@@ -643,13 +643,13 @@ def get_mtr_data(calcX, calcY, weighting, MARS='ALL',
 
     calcY : a Tax-Calculator Records object that refers to the reform
 
-    weighting : String that coincides with tax-calculator defined function
+    weighting : String object
         options for input:
-            weighted_mean: Averaging marginal tax rate by the
+            'weighted_mean': Averaging marginal tax rate by the
                 weight of each record. This option would be
                 helpful if you are interested in the MTR after
                 taking weights into consideration.
-            wage_weighted: Averaging marginal tax rate by the
+            'wage_weighted': Averaging marginal tax rate by the
                 product of weight and wage of each record. This
                 option would be helpful if you are interested in
                 the MTR after taking both weights and wages
@@ -711,10 +711,19 @@ def get_mtr_data(calcX, calcY, weighting, MARS='ALL',
     gp_x = df_filtered_x.groupby('bins', as_index=False)
     gp_y = df_filtered_y.groupby('bins', as_index=False)
 
+    # Extract proper weighting method
+    if weighting == 'weighted_mean':
+        weighting_method = weighted_mean
+    elif weighting == 'wage_weighted':
+        weighting_method = wage_weighted
+    else:
+        msg = 'weighting option "{}" is not valid'
+        raise ValueError(msg.format(weighting))
+
     # Apply desired weighting method to mtr
     if mtr_measure == '_combined':
-        wgtpct_x = gp_x.apply(weighting, 'mtr_combined')
-        wgtpct_y = gp_y.apply(weighting, 'mtr_combined')
+        wgtpct_x = gp_x.apply(weighting_method, 'mtr_combined')
+        wgtpct_y = gp_y.apply(weighting_method, 'mtr_combined')
     elif mtr_measure == '_iitax':
         wgtpct_x = gp_x.apply(weighting, 'mtr_iit')
         wgtpct_y = gp_y.apply(weighting, 'mtr_iit')

From fb5549377ab73cae34e9a09fef48415b573f51bc Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Tue, 18 Oct 2016 14:10:01 -0400
Subject: [PATCH 25/33] modify test suite

---
 taxcalc/tests/test_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
index 083d3e43e..ecbe89fd7 100644
--- a/taxcalc/tests/test_utils.py
+++ b/taxcalc/tests/test_utils.py
@@ -522,7 +522,7 @@ def test_get_mtr_data(records_2009):
     behv = Behavior()
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
-    source = get_mtr_data(calc, calc, weighting=wage_weighted, MARS=1,
+    source = get_mtr_data(calc, calc, weighting, MARS=1,
                           mtr_measure='_combined')
 
 
@@ -531,7 +531,7 @@ def test_mtr_plot(records_2009):
     behv = Behavior()
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
-    source = get_mtr_data(calc, calc, weighting=wage_weighted,
+    source = get_mtr_data(calc, calc, weighting='wage_weighted',
                           complex_weight=True)
     plot = mtr_plot(source)
 
@@ -543,7 +543,7 @@ def test_mtr_plot_force_no_bokeh(records_2009):
     behv = Behavior()
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
-    source = get_mtr_data(calc, calc, weighting=wage_weighted,
+    source = get_mtr_data(calc, calc, weighting,
                           complex_weight=True)
     with pytest.raises(RuntimeError):
         plot = mtr_plot(source)

From fc83012629e60acdc73d57e1b297d42df76d5755 Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Tue, 18 Oct 2016 14:19:19 -0400
Subject: [PATCH 26/33] forgot to update '_iitax' case

---
 taxcalc/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index fb5838ec0..63bf3ead5 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -725,8 +725,8 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
         wgtpct_x = gp_x.apply(weighting_method, 'mtr_combined')
         wgtpct_y = gp_y.apply(weighting_method, 'mtr_combined')
     elif mtr_measure == '_iitax':
-        wgtpct_x = gp_x.apply(weighting, 'mtr_iit')
-        wgtpct_y = gp_y.apply(weighting, 'mtr_iit')
+        wgtpct_x = gp_x.apply(weighting_method, 'mtr_iit')
+        wgtpct_y = gp_y.apply(weighting_method, 'mtr_iit')
 
     wpct_x = DataFrame(data=wgtpct_x, columns=['w_mtr'])
     wpct_y = DataFrame(data=wgtpct_y, columns=['w_mtr'])

From 0d512d9ac04f65e86a43642e0f88660e893d3ca9 Mon Sep 17 00:00:00 2001
From: "Sean.Wang" <GoFroggyRun@users.noreply.github.com>
Date: Tue, 18 Oct 2016 14:20:37 -0400
Subject: [PATCH 27/33] update test suite

---
 taxcalc/tests/test_utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
index ecbe89fd7..0e43479ab 100644
--- a/taxcalc/tests/test_utils.py
+++ b/taxcalc/tests/test_utils.py
@@ -522,8 +522,7 @@ def test_get_mtr_data(records_2009):
     behv = Behavior()
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
-    source = get_mtr_data(calc, calc, weighting, MARS=1,
-                          mtr_measure='_combined')
+    source = get_mtr_data(calc, calc, MARS=1, mtr_measure='_combined')
 
 
 def test_mtr_plot(records_2009):
@@ -543,7 +542,7 @@ def test_mtr_plot_force_no_bokeh(records_2009):
     behv = Behavior()
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
-    source = get_mtr_data(calc, calc, weighting,
+    source = get_mtr_data(calc, calc, weighting='weighted_mean',
                           complex_weight=True)
     with pytest.raises(RuntimeError):
         plot = mtr_plot(source)

From 829bbb98ec2fdf3327da1d2dfe5420d7e02d863f Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Wed, 19 Oct 2016 20:30:27 -0400
Subject: [PATCH 28/33] more documentation

---
 taxcalc/utils.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 384837417..2437f30f4 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -632,7 +632,7 @@ def f(x):
 
 
 def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
-                 income_measure='e00200', mtr_measure='_iitax',
+                 income_measure='e00200', mtr_measure='_combined',
                  complex_weight=False):
     """
     This function prepares the MTR data for two calculators.
@@ -656,16 +656,25 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
                 into consideration.
         Choose different weighting method
 
-    MARS : Integer
-        options for input: 1, 2, 3, 4
+    MARS : Integer or String
+        options for input: 'ALL', 1, 2, 3, 4
         Choose different filling status
 
     income_measure : String object
-        options for input: '_expanded_income', 'c00100', 'e00200'
+        options for input:
+            '_expanded_income': The sum of adjusted gross income, non-taxable
+                interest income, non-taxable social security benefits and
+                employer share of FICA.
+            'c00100': Adjusted gross income
+            'e00200': Salaries and wages.
         classifier of income bins/deciles
 
     mtr_measure : String object
-        options for input: '_iitax', '_combined'
+        options for input:
+            '_iitax': Marginal individual income tax rates.
+            '_combined': Marginal combined tax rates, which is
+                the sum of marginal payroll tax rates and marginal individual
+                income tax rates.
         Choose different marginal tax rate measure
 
     complex_weight : Boolean

From 0d458e0ba491649acb122fc4c6b683f08fe14bd1 Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Wed, 19 Oct 2016 20:54:53 -0400
Subject: [PATCH 29/33] complex weight doc

---
 taxcalc/utils.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 2437f30f4..e52417f51 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -654,7 +654,7 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
                 option would be helpful if you are interested in
                 the MTR after taking both weights and wages
                 into consideration.
-        Choose different weighting method
+        Choose different weighting methods
 
     MARS : Integer or String
         options for input: 'ALL', 1, 2, 3, 4
@@ -671,15 +671,18 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
 
     mtr_measure : String object
         options for input:
-            '_iitax': Marginal individual income tax rates.
+            '_iitax': Marginal individual income tax rate.
             '_combined': Marginal combined tax rates, which is
-                the sum of marginal payroll tax rates and marginal individual
-                income tax rates.
-        Choose different marginal tax rate measure
+                the sum of marginal payroll tax rate and marginal individual
+                income tax rate.
+        Choose different marginal tax rate measures
 
     complex_weight : Boolean
-        The cumulated sum will be carried out based on weighted income measure
-        if this option is true
+        If this option is true, the desired income measure will be weighted by
+        s006. And thus this will allow users to obtain aggregate activity for
+        selected income measure. For example, if income measure is 'e00200' and
+        this option is true, then the bin (or x-axis in the plot)
+        is the (percentile of) economic activity.
     Returns
     -------
     DataFrame object
@@ -755,7 +758,7 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
     df_filtered_x.drop_duplicates(subset='bins', inplace=True)
     df_filtered_y.drop_duplicates(subset='bins', inplace=True)
 
-    # Prepare cleaned mtr data and concatenate into one datafram
+    # Prepare cleaned mtr data and concatenate into one dataframe
     df_filtered_x = df_filtered_x['w_mtr']
     df_filtered_y = df_filtered_y['w_mtr']
 

From 25d7c5d0786db4ffec23b8e76515e54a5fab7be1 Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Thu, 20 Oct 2016 22:02:16 -0400
Subject: [PATCH 30/33] document and NA

---
 taxcalc/utils.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index e52417f51..847752ffe 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -678,11 +678,11 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
         Choose different marginal tax rate measures
 
     complex_weight : Boolean
-        If this option is true, the desired income measure will be weighted by
-        s006. And thus this will allow users to obtain aggregate activity for
-        selected income measure. For example, if income measure is 'e00200' and
-        this option is true, then the bin (or x-axis in the plot)
-        is the (percentile of) economic activity.
+        If this option is true, for each record, s006 (weight) will be weighted
+        by the desired income measure. And thus this will allow users to obtain
+        aggregate activity for selected income measure. For example, if income
+        measure is 'e00200' and this option is true, then the bin (or x-axis in
+        the plot) is the (percentile of) total economic activity.
     Returns
     -------
     DataFrame object
@@ -765,7 +765,9 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
     merged = pd.concat([df_filtered_x, df_filtered_y], axis=1,
                        ignore_index=True)
     merged.columns = ['base', 'reform']
-
+    merged.index = (merged.reset_index()).index
+    if complex_weight:
+        merged = merged[1:]
     return merged
 
 

From 94bf8e12ef29cb830b14897810415170f0b0fa7b Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Thu, 20 Oct 2016 22:17:01 -0400
Subject: [PATCH 31/33] name change

---
 taxcalc/tests/test_utils.py |  6 +++---
 taxcalc/utils.py            | 19 ++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/taxcalc/tests/test_utils.py b/taxcalc/tests/test_utils.py
index 0e43479ab..04a1bcd3e 100644
--- a/taxcalc/tests/test_utils.py
+++ b/taxcalc/tests/test_utils.py
@@ -314,7 +314,7 @@ def test_add_weighted_decile_bins():
     for lab in bin_labels:
         assert lab in default_labels
     # Custom labels
-    df = add_weighted_decile_bins(df, complex_weight=True)
+    df = add_weighted_decile_bins(df, weight_by_income_measure=True)
     assert 'bins' in df
     custom_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
     df = add_weighted_decile_bins(df, labels=custom_labels)
@@ -531,7 +531,7 @@ def test_mtr_plot(records_2009):
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
     source = get_mtr_data(calc, calc, weighting='wage_weighted',
-                          complex_weight=True)
+                          weight_by_income_measure=True)
     plot = mtr_plot(source)
 
 
@@ -543,7 +543,7 @@ def test_mtr_plot_force_no_bokeh(records_2009):
     calc = Calculator(policy=pol, records=records_2009, behavior=behv)
     calc.calc_all()
     source = get_mtr_data(calc, calc, weighting='weighted_mean',
-                          complex_weight=True)
+                          weight_by_income_measure=True)
     with pytest.raises(RuntimeError):
         plot = mtr_plot(source)
     taxcalc.utils.BOKEH_AVAILABLE = True
diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 847752ffe..8d6a3314c 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -105,7 +105,8 @@ def weighted_share_of_total(agg, col_name, total):
 
 
 def add_weighted_decile_bins(df, income_measure='_expanded_income',
-                             num_bins=10, labels=None, complex_weight=False):
+                             num_bins=10, labels=None,
+                             weight_by_income_measure=False):
     """
     Add a column of income bins based on each 10% of the income_measure,
     weighted by s006.
@@ -115,13 +116,13 @@ def add_weighted_decile_bins(df, income_measure='_expanded_income',
     This function will server as a 'grouper' later on.
     """
     # First, weight income measure by s006 if desired
-    if complex_weight:
+    if weight_by_income_measure:
         df['s006_weighted'] = np.multiply(df[income_measure].values,
                                           df['s006'].values)
     # Next, sort by income_measure
     df.sort_values(by=income_measure, inplace=True)
     # Do a cumulative sum
-    if complex_weight:
+    if weight_by_income_measure:
         df['cumsum_weights'] = np.cumsum(df['s006_weighted'].values)
     else:
         df['cumsum_weights'] = np.cumsum(df['s006'].values)
@@ -633,7 +634,7 @@ def f(x):
 
 def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
                  income_measure='e00200', mtr_measure='_combined',
-                 complex_weight=False):
+                 weight_by_income_measure=False):
     """
     This function prepares the MTR data for two calculators.
 
@@ -677,7 +678,7 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
                 income tax rate.
         Choose different marginal tax rate measures
 
-    complex_weight : Boolean
+    weight_by_income_measure : Boolean
         If this option is true, for each record, s006 (weight) will be weighted
         by the desired income measure. And thus this will allow users to obtain
         aggregate activity for selected income measure. For example, if income
@@ -702,11 +703,11 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
     df_y[income_measure] = df_x[income_measure]
 
     # Complex weighted bins or not
-    if complex_weight:
+    if weight_by_income_measure:
         df_x = add_weighted_decile_bins(df_x, income_measure, 100,
-                                        complex_weight=True)
+                                        weight_by_income_measure=True)
         df_y = add_weighted_decile_bins(df_y, income_measure, 100,
-                                        complex_weight=True)
+                                        weight_by_income_measure=True)
     else:
         df_x = add_weighted_decile_bins(df_x, income_measure, 100)
         df_y = add_weighted_decile_bins(df_y, income_measure, 100)
@@ -766,7 +767,7 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
                        ignore_index=True)
     merged.columns = ['base', 'reform']
     merged.index = (merged.reset_index()).index
-    if complex_weight:
+    if weight_by_income_measure:
         merged = merged[1:]
     return merged
 

From e3822391496477f597d1572ef4ff1e31dda7a7a1 Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Sun, 23 Oct 2016 23:14:19 -0400
Subject: [PATCH 32/33] more docstring

---
 taxcalc/utils.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 8d6a3314c..0c0e926f3 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -680,10 +680,13 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
 
     weight_by_income_measure : Boolean
         If this option is true, for each record, s006 (weight) will be weighted
-        by the desired income measure. And thus this will allow users to obtain
-        aggregate activity for selected income measure. For example, if income
-        measure is 'e00200' and this option is true, then the bin (or x-axis in
-        the plot) is the (percentile of) total economic activity.
+        by the desired income measure of choice. (Note that this option
+        is not about 'weighted' vs 'unweighted', but rather about what to
+        weight s006 by.) And thus this will allow users to investigate
+        different aggregated targets (via choices of income_measure).
+        For example, if income measure is 'e00200' and this option is true,
+        then the bin (or x-axis in the plot) is the (percentile of) total
+        wages and salaries.
     Returns
     -------
     DataFrame object
@@ -824,8 +827,11 @@ def mtr_plot(source, xlab='Percentile', ylab='Avg. MTR', title='MTR plot',
     Returns
     -------
     Figure Object (Use show(FIGURE_NAME) option to visualize)
+        The default output is in HTML format. To obtain a PNG copy, use the
+        'Save' option on the Toolbar (usually located on the top-right corner
+        of the plot).
         Note that, when using command line, output file needs to be
-        first specified using command output_file("FILE_NAME.html")
+        first specified using command output_file("FILE_NAME.html").
     """
     PP = figure(plot_width=plot_width, plot_height=plot_height, title=title)
 

From 89bbd4829b4c4f7ac3c372db944c3b2ef62f17fe Mon Sep 17 00:00:00 2001
From: Sean <zhwa@umich.edu>
Date: Tue, 25 Oct 2016 14:04:18 -0400
Subject: [PATCH 33/33] swtiched order

---
 taxcalc/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/taxcalc/utils.py b/taxcalc/utils.py
index 0c0e926f3..a3d799aa8 100644
--- a/taxcalc/utils.py
+++ b/taxcalc/utils.py
@@ -691,13 +691,13 @@ def get_mtr_data(calcX, calcY, weighting='weighted_mean', MARS='ALL',
     -------
     DataFrame object
     """
+    # Calculate MTR
+    a, mtr_iit_x, mtr_combined_x = calcX.mtr()
+    a, mtr_iit_y, mtr_combined_y = calcY.mtr()
     # Get output columns
     df_x = exp_results(calcX)
     df_y = exp_results(calcY)
 
-    # Calculate MTR
-    a, mtr_iit_x, mtr_combined_x = calcX.mtr()
-    a, mtr_iit_y, mtr_combined_y = calcY.mtr()
     df_x['mtr_iit'] = mtr_iit_x
     df_y['mtr_iit'] = mtr_iit_y
     df_x['mtr_combined'] = mtr_combined_x