-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexperiment_3_mondrian_kernel_vs_forest.py
108 lines (85 loc) · 4.22 KB
/
experiment_3_mondrian_kernel_vs_forest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import matplotlib.pylab as plt
import numpy as np
from mondrian_kernel import evaluate_all_lifetimes
from utils import load_CPU, initialize_plotting, remove_chartjunk, tableau20
def plot_mondrian_kernel_vs_mondrian_forest(lifetime_max, res):
""" Plots training and test set error of Mondrian kernel and Mondrian forest based on the same set of M Mondrian samples.
This procedure takes as input a dictionary res, returned by the evaluate_all_lifetimes procedure in mondrian_kernel.py.
"""
times = res['times']
forest_train = res['forest_train']
forest_test = res['forest_test']
kernel_train = res['kernel_train']
kernel_test = res['kernel_test']
# set up test error plot
fig = plt.figure(figsize=(7, 4))
ax = fig.add_subplot('111')
remove_chartjunk(ax)
ax.set_xlabel('lifetime $\lambda$')
ax.set_ylabel('relative error [\%]')
ax.yaxis.grid(b=True, which='major', linestyle='dotted', lw=0.5, color='black', alpha=0.3)
ax.set_xscale('log')
ax.set_xlim((1e-8, lifetime_max))
ax.set_ylim((0, 25))
rasterized = False
ax.plot(times, forest_test, drawstyle="steps-post", ls='-', lw=2, color=tableau20(6), label='"M. forest" (test)', rasterized=rasterized)
ax.plot(times, forest_train, drawstyle="steps-post", ls='-', color=tableau20(7), label='"M. forest" (train)', rasterized=rasterized)
ax.plot(times, kernel_test, drawstyle="steps-post", ls='-', lw=2, color=tableau20(4), label='M. kernel (test)', rasterized=rasterized)
ax.plot(times, kernel_train, drawstyle="steps-post", ls='-', color=tableau20(5), label='M. kernel (train)', rasterized=rasterized)
ax.legend(bbox_to_anchor=[1.15, 1.05], frameon=False)
def plot_kernel_vs_forest_weights(y, res):
""" Plots the weights learned by Mondrian kernel and Mondrian forest based on the same set of M Mondrian samples.
This procedure takes as input a dictionary res, returned by the evaluate_all_lifetimes procedure in mondrian_kernel.py.
"""
w_forest = res['w_forest']
w_kernel = res['w_kernel']
# plot weights against each other
fig1 = plt.figure(figsize=(8, 4))
ax1 = fig1.add_subplot('121')
ax1.set_xlabel('weights learned by "Mondrian forest"')
ax1.set_ylabel('weights learned by Mondrian kernel')
ax1.scatter(w_forest, w_kernel, marker='.', color=tableau20(16))
xl = ax1.get_xlim()
yl = ax1.get_ylim()
lims = [
np.min([xl, yl]), # min of both axes
np.max([xl, yl]), # max of both axes
]
ax1.plot(lims, lims, '--', color='black', alpha=0.75, zorder=0)
ax1.set_xlim(xl)
#ax1.set_ylim(yl)
ax1.set_ylim((-60, 60))
# plot histogram of weight values (and training targets)
ax2 = fig1.add_subplot('122')
ax2.set_xlabel('values')
ax2.set_ylabel('value frequency')
bins = np.linspace(-100, 20, 50)
ax2.hist(w_forest, bins=bins, histtype='stepfilled', normed=True, color=tableau20(6), alpha=0.5,
label='M. forest weights $\mathbf{w}$')
ax2.hist(w_kernel, bins=bins, histtype='stepfilled', normed=True, color=tableau20(4), alpha=0.5,
label='M. kernel weights $\mathbf{w}$')
ax2.hist(y - np.mean(y), bins=bins, histtype='stepfilled', normed=True, color=tableau20(8), alpha=0.5,
label='training targets $\mathbf{y}$')
ax2.set_ylim((0.0, 0.16))
ax2.legend(frameon=False, loc='upper left')
fig1.tight_layout()
def main():
# randomness seed
np.random.seed(9879846)
# load data
X, y, X_test, y_test = load_CPU()
# set experiment parameters
M = 50 # number of Mondrian trees to use
lifetime_max = 1*1e-5 # terminal lifetime
weights_lifetime = 2*1e-6 # lifetime for which weights should be plotted
delta = 0.0001 # ridge regression delta
# run both Mondrian kernel and Mondrian forest from lifetime 0 up to lifetime_max
res = evaluate_all_lifetimes(X, y, X_test, y_test, M, lifetime_max, delta,
mondrian_kernel=True, mondrian_forest=True, weights_from_lifetime=weights_lifetime)
# plot
plot_mondrian_kernel_vs_mondrian_forest(lifetime_max, res)
plot_kernel_vs_forest_weights(y, res)
if __name__ == "__main__":
initialize_plotting()
main()
plt.show()