-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVGG.py
144 lines (129 loc) · 6.12 KB
/
VGG.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model
import tensorflow.keras.backend as K
from tensorflow.keras.regularizers import l2
'''
{ Summary }
Build a VGG16, VGG19 from scratch
{ Arguments }
input_shape:
input shape of input image, e.g. (32,32,3) for cifar10
n_classes:
number of classes in your dataset
alpha: 0.0~1.0
this parameter was not in the original paper. I intentionally introduce this parameter
for you to control model size conveniently. If you use 0.5, the filters of each Conv2D
layers will be halved.
n_downsampling: 0~5
In original VGG, there are 5 maxpooling layers with pool size (2,2). Therefore you
feature map will be 32 times smaller than your original data before entering the final
dense layers. However, for images with smaller spatial resolutions, say cifar10, you
don't want downsample it too aggresively. You can control how many downsampling will
be performed in the model. If you set it to 3, the first 2 maxpooling will be skipped.
last_dense_units:
There are two dense layers in final part of VGG. The default 4096 is designed for ImageNet
which contains 1000 classes. For dataset with classes, e.g. cifar10 with only 10 classes,
you should consider use a smaller value, e.g. 128.
{ Returns }
Model:
A Keras model instance
{ Suggested Models }
for smaller dataset, such as cifar10, cifar100, try reduced vgg16 or vgg19:
model = vgg16(input_shape = (32,32,3), n_classes = 10, alpha = 0.5, n_downsampling = 3, last_dense_units = 128)
model = vgg19(input_shape = (32,32,3), n_classes = 10, alpha = 0.5, n_downsampling = 3, last_dense_units = 128)
for Tiny ImageNet, (64x64x4), try:
model = vgg16(input_shape = (32,32,3), n_classes = 10, alpha = 0.5, n_downsampling = 4, last_dense_units = 512)
model = vgg19(input_shape = (32,32,3), n_classes = 10, alpha = 0.5, n_downsampling = 4, last_dense_units = 512)
for ImageNet, (224x224x4), try original VGG16 or VGG19:
model = vgg16(input_shape = (224,224,3), n_classes = 1000, alpha = 1.0, n_downsampling = 5, last_dense_units = 4096)
model = vgg19(input_shape = (224,224,3), n_classes = 1000, alpha = 1.0, n_downsampling = 5, last_dense_units = 4096)
'''
class ConvBlock:
@staticmethod
def conv(x_in, filters):
x = layers.Conv2D(filters = filters, kernel_size = (3,3), strides = (1,1),
padding = 'same', kernel_initializer='he_uniform', kernel_regularizer = l2(4e-5))(x_in)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
return x
@staticmethod
def maxpool(x_in):
x = layers.MaxPooling2D(pool_size=(2, 2), padding= 'same')(x_in)
return x
def vgg16(input_shape = (224,224,3), n_classes = 1000, alpha = 1.0, n_downsampling = 5, last_dense_units = 4096):
assert 0 <= n_downsampling <= 5
dim_reduction = [False]*(5-n_downsampling)+[True]*(n_downsampling)
x_in = layers.Input(shape = input_shape)
x = ConvBlock.conv(x_in, int(64*alpha))
x = ConvBlock.conv(x, int(64*alpha))
if dim_reduction[0]:
x = ConvBlock.maxpool(x)
x = ConvBlock.conv(x, int(128*alpha))
x = ConvBlock.conv(x, int(128*alpha))
if dim_reduction[1]:
x = ConvBlock.maxpool(x)
x = ConvBlock.conv(x, int(256*alpha))
x = ConvBlock.conv(x, int(256*alpha))
x = ConvBlock.conv(x, int(256*alpha))
if dim_reduction[2]:
x = ConvBlock.maxpool(x)
x = ConvBlock.conv(x, int(512*alpha))
x = ConvBlock.conv(x, int(512*alpha))
x = ConvBlock.conv(x, int(512*alpha))
if dim_reduction[3]:
x = ConvBlock.maxpool(x)
x = ConvBlock.conv(x, int(512*alpha))
x = ConvBlock.conv(x, int(512*alpha))
x = ConvBlock.conv(x, int(512*alpha))
if dim_reduction[4]:
x = ConvBlock.maxpool(x)
x = layers.Flatten()(x)
x = layers.Dense(last_dense_units, activation = 'relu')(x)
x = layers.Dense(last_dense_units, activation = 'relu')(x)
x_out = layers.Dense(n_classes, activation = 'softmax')(x)
model = Model(inputs = x_in, outputs = x_out)
plot_model(model, 'model.png',show_shapes = True)
print(model.summary())
return model
def vgg19(input_shape = (224,224,3), n_classes = 1000, alpha = 1.0, n_downsampling = 5, last_dense_units = 4096):
assert 0 <= n_downsampling <= 5
dim_reduction = [False]*(5-n_downsampling)+[True]*(n_downsampling)
x_in = layers.Input(shape = input_shape)
x = ConvBlock.conv(x_in, int(64*alpha))
x = ConvBlock.conv(x, int(64*alpha))
if dim_reduction[0]:
x = ConvBlock.maxpool(x)
x = ConvBlock.conv(x, int(128*alpha))
x = ConvBlock.conv(x, int(128*alpha))
if dim_reduction[1]:
x = ConvBlock.maxpool(x)
x = ConvBlock.conv(x, int(256*alpha))
x = ConvBlock.conv(x, int(256*alpha))
x = ConvBlock.conv(x, int(256*alpha))
x = ConvBlock.conv(x, int(256*alpha))
if dim_reduction[2]:
x = ConvBlock.maxpool(x)
x = ConvBlock.conv(x, int(512*alpha))
x = ConvBlock.conv(x, int(512*alpha))
x = ConvBlock.conv(x, int(512*alpha))
x = ConvBlock.conv(x, int(512*alpha))
if dim_reduction[3]:
x = ConvBlock.maxpool(x)
x = ConvBlock.conv(x, int(512*alpha))
x = ConvBlock.conv(x, int(512*alpha))
x = ConvBlock.conv(x, int(512*alpha))
x = ConvBlock.conv(x, int(512*alpha))
if dim_reduction[4]:
x = ConvBlock.maxpool(x)
x = layers.Flatten()(x)
x = layers.Dense(last_dense_units, activation = 'relu')(x)
x = layers.Dense(last_dense_units, activation = 'relu')(x)
x_out = layers.Dense(n_classes, activation = 'softmax')(x)
model = Model(inputs = x_in, outputs = x_out)
return model
if __name__ =='__main__':
# test for cifar10
model = vgg16(input_shape = (32,32,3), n_classes = 10, alpha = 0.5, n_downsampling = 3, last_dense_units = 100)
model.summary()
plot_model(model, 'model.png',show_shapes = True)