-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathensemble_learning_example.py
141 lines (98 loc) · 3.47 KB
/
ensemble_learning_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
This example shows how you can search for the best models in each layer in a
stacking ensemble.
We want to create a stacking ensemble with 3 layers:
- a top layer with one model
- a middle layer with multiple models
- a bottom layer with multiple models
We also want to know how many models should be used in the middle and bottom layer.
For that we can use the helper function "get_combinations". It works as follows:
input = [1, 2 , 3]
output = get_combinations(input, comb_len=2)
output: [[1, 2], [1, 3], [2, 3], [1, 2, 3]]
Instead of numbers we insert models into "input". This way we get each combination
with more than 2 elements. Only 1 model per layer would not make much sense.
The ensemble itself is created via the package "mlxtend" in the objective-function "stacking".
"""
import itertools
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score
from mlxtend.classifier import StackingClassifier
from sklearn.ensemble import (
GradientBoostingClassifier,
RandomForestClassifier,
ExtraTreesClassifier,
)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier
from hyperactive import Hyperactive
data = load_breast_cancer()
X, y = data.data, data.target
# define models that are used in search space
gbc = GradientBoostingClassifier()
rfc = RandomForestClassifier()
etc = ExtraTreesClassifier()
mlp = MLPClassifier()
gnb = GaussianNB()
gpc = GaussianProcessClassifier()
dtc = DecisionTreeClassifier()
knn = KNeighborsClassifier()
lr = LogisticRegression()
rc = RidgeClassifier()
def stacking(opt):
lvl_1_ = opt["lvl_1"]()
lvl_0_ = opt["lvl_0"]()
top_ = opt["top"]()
stack_lvl_0 = StackingClassifier(classifiers=lvl_0_, meta_classifier=top_)
stack_lvl_1 = StackingClassifier(classifiers=lvl_1_, meta_classifier=stack_lvl_0)
scores = cross_val_score(stack_lvl_1, X, y, cv=3)
return scores.mean()
# helper function to create search space dimensions
def get_combinations(models, comb_len=2):
def _list_in_list_of_lists(list_, list_of_lists):
for list__ in list_of_lists:
if set(list_) == set(list__):
return True
comb_list = []
for i in range(0, len(models) + 1):
for subset in itertools.permutations(models, i):
if len(subset) < comb_len:
continue
if _list_in_list_of_lists(subset, comb_list):
continue
comb_list.append(list(subset))
comb_list_f = []
for comb_ in comb_list:
def _func_():
return comb_
_func_.__name__ = str(i) + "___" + str(comb_)
comb_list_f.append(_func_)
return comb_list_f
def lr_f():
return lr
def dtc_f():
return dtc
def gnb_f():
return gnb
def rc_f():
return rc
models_0 = [gpc, dtc, mlp, gnb, knn]
models_1 = [gbc, rfc, etc]
stack_lvl_0_clfs = get_combinations(models_0)
stack_lvl_1_clfs = get_combinations(models_1)
print("\n stack_lvl_0_clfs \n", stack_lvl_0_clfs, "\n")
search_space = {
"lvl_1": stack_lvl_1_clfs,
"lvl_0": stack_lvl_0_clfs,
"top": [lr_f, dtc_f, gnb_f, rc_f],
}
"""
hyper = Hyperactive()
hyper.add_search(stacking, search_space, n_iter=3)
hyper.run()
"""