-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathpretrained_nas.py
141 lines (104 loc) · 3.66 KB
/
pretrained_nas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
This script describes how to save time during the optimization by
using a pretrained model. It is similar to the transer learning example,
but here you do the training and model creation of the pretrained model
yourself.
The problem is that most of the optimization time is "waisted" by
training the model. The time to find a new position to explore by
Hyperactive is very small compared to the training time of
neural networks. This means, that we can do more optimization
if we keep the training time as little as possible.
The idea of pretrained neural architecture search is to pretrain a complete model one time.
In the next step we remove the layers that should be optimized
and make the remaining layers not-trainable.
This results in a partial, pretrained, not-trainable model that will be
used during the Hyperactive optimization.
You can now add layers to the partial model in the objective function
and add the parameters or layers that will be optimized by Hyperactive.
With each iteration of the optimization run we are only training
the added layers of the model. This saves a lot of training time.
"""
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import (
Dense,
Conv2D,
MaxPooling2D,
Flatten,
Activation,
Dropout,
)
from keras.datasets import cifar10
from keras.utils import to_categorical
from hyperactive import Hyperactive
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
# to make the example quick
X_train = X_train[0:1000]
y_train = y_train[0:1000]
X_test = X_test[0:1000]
y_test = y_test[0:1000]
# create model and train it
model = Sequential()
model.add(Conv2D(64, (3, 3), padding="same", input_shape=X_train.shape[1:]))
model.add(Activation("relu"))
model.add(Conv2D(32, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(32, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(200))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation("softmax"))
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.fit(X_train, y_train, epochs=5, batch_size=500)
model_pretrained = model
n_layers = len(model_pretrained.layers)
# delete the last 9 layers
for i in range(n_layers - 9):
model_pretrained.pop()
# set remaining layers to not-trainable
for layer in model_pretrained.layers:
layer.trainable = False
model_pretrained.summary()
def cnn(opt):
model = keras.models.clone_model(model_pretrained)
model.add(Flatten())
model.add(Dense(opt["neurons.0"]))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation("softmax"))
model.compile(
optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
)
model.fit(X_train, y_train, epochs=5, batch_size=500)
model.summary()
_, score = model.evaluate(x=X_test, y=y_test)
return score
# conv 1, 2, 3 are functions that adds layers. We want to know which function is the best
def conv1(model):
model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
return model
def conv2(model):
model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
return model
def conv3(model):
return model
search_space = {
"conv_layer.0": [conv1, conv2, conv3],
"neurons.0": list(range(100, 1000, 100)),
}
hyper = Hyperactive()
hyper.add_search(cnn, search_space, n_iter=3)
hyper.run()