This repository has been archived by the owner on Nov 28, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 40
/
shufflenet.py
executable file
·338 lines (271 loc) · 11.9 KB
/
shufflenet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
from keras import backend as K
from keras.applications.imagenet_utils import _obtain_input_shape
from keras.models import Model
from keras.engine.topology import get_source_inputs
from keras.layers import Activation, Add, Concatenate, GlobalAveragePooling2D,GlobalMaxPooling2D, Input, Dense
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, BatchNormalization, Lambda
from keras.applications.mobilenet import DepthwiseConv2D
import numpy as np
def ShuffleNet(include_top=True, input_tensor=None, scale_factor=1.0, pooling='max',
input_shape=(224,224,3), groups=1, load_model=None, num_shuffle_units=[3, 7, 3],
bottleneck_ratio=0.25, classes=1000):
"""
ShuffleNet implementation for Keras 2
ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices
Xiangyu Zhang, Xinyu Zhou, Mengxiao Lin, Jian Sun
https://arxiv.org/pdf/1707.01083.pdf
Note that only TensorFlow is supported for now, therefore it only works
with the data format `image_data_format='channels_last'` in your Keras
config at `~/.keras/keras.json`.
Parameters
----------
include_top: bool(True)
whether to include the fully-connected layer at the top of the network.
input_tensor:
optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
scale_factor:
scales the number of output channels
input_shape:
pooling:
Optional pooling mode for feature extraction
when `include_top` is `False`.
- `None` means that the output of the model
will be the 4D tensor output of the
last convolutional layer.
- `avg` means that global average pooling
will be applied to the output of the
last convolutional layer, and thus
the output of the model will be a
2D tensor.
- `max` means that global max pooling will
be applied.
groups: int
number of groups per channel
num_shuffle_units: list([3,7,3])
number of stages (list length) and the number of shufflenet units in a
stage beginning with stage 2 because stage 1 is fixed
e.g. idx 0 contains 3 + 1 (first shuffle unit in each stage differs) shufflenet units for stage 2
idx 1 contains 7 + 1 Shufflenet Units for stage 3 and
idx 2 contains 3 + 1 Shufflenet Units
bottleneck_ratio:
bottleneck ratio implies the ratio of bottleneck channels to output channels.
For example, bottleneck ratio = 1 : 4 means the output feature map is 4 times
the width of the bottleneck feature map.
classes: int(1000)
number of classes to predict
Returns
-------
A Keras model instance
References
----------
- [ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices]
(http://www.arxiv.org/pdf/1707.01083.pdf)
"""
if K.backend() != 'tensorflow':
raise RuntimeError('Only TensorFlow backend is currently supported, '
'as other backends do not support ')
name = "ShuffleNet_%.2gX_g%d_br_%.2g_%s" % (scale_factor, groups, bottleneck_ratio, "".join([str(x) for x in num_shuffle_units]))
input_shape = _obtain_input_shape(input_shape,
default_size=224,
min_size=28,
require_flatten=include_top,
data_format=K.image_data_format())
out_dim_stage_two = {1: 144, 2: 200, 3: 240, 4: 272, 8: 384}
if groups not in out_dim_stage_two:
raise ValueError("Invalid number of groups.")
if pooling not in ['max','avg']:
raise ValueError("Invalid value for pooling.")
if not (float(scale_factor) * 4).is_integer():
raise ValueError("Invalid value for scale_factor. Should be x over 4.")
exp = np.insert(np.arange(0, len(num_shuffle_units), dtype=np.float32), 0, 0)
out_channels_in_stage = 2 ** exp
out_channels_in_stage *= out_dim_stage_two[groups] # calculate output channels for each stage
out_channels_in_stage[0] = 24 # first stage has always 24 output channels
out_channels_in_stage *= scale_factor
out_channels_in_stage = out_channels_in_stage.astype(int)
if input_tensor is None:
img_input = Input(shape=input_shape)
else:
if not K.is_keras_tensor(input_tensor):
img_input = Input(tensor=input_tensor, shape=input_shape)
else:
img_input = input_tensor
# create shufflenet architecture
x = Conv2D(filters=out_channels_in_stage[0], kernel_size=(3, 3), padding='same',
use_bias=False, strides=(2, 2), activation="relu", name="conv1")(img_input)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same', name="maxpool1")(x)
# create stages containing shufflenet units beginning at stage 2
for stage in range(0, len(num_shuffle_units)):
repeat = num_shuffle_units[stage]
x = _block(x, out_channels_in_stage, repeat=repeat,
bottleneck_ratio=bottleneck_ratio,
groups=groups, stage=stage + 2)
if pooling == 'avg':
x = GlobalAveragePooling2D(name="global_pool")(x)
elif pooling == 'max':
x = GlobalMaxPooling2D(name="global_pool")(x)
if include_top:
x = Dense(units=classes, name="fc")(x)
x = Activation('softmax', name='softmax')(x)
if input_tensor is not None:
inputs = get_source_inputs(input_tensor)
else:
inputs = img_input
model = Model(inputs=inputs, outputs=x, name=name)
if load_model is not None:
model.load_weights('', by_name=True)
return model
def _block(x, channel_map, bottleneck_ratio, repeat=1, groups=1, stage=1):
"""
creates a bottleneck block containing `repeat + 1` shuffle units
Parameters
----------
x:
Input tensor of with `channels_last` data format
channel_map: list
list containing the number of output channels for a stage
repeat: int(1)
number of repetitions for a shuffle unit with stride 1
groups: int(1)
number of groups per channel
bottleneck_ratio: float
bottleneck ratio implies the ratio of bottleneck channels to output channels.
For example, bottleneck ratio = 1 : 4 means the output feature map is 4 times
the width of the bottleneck feature map.
stage: int(1)
stage number
Returns
-------
"""
x = _shuffle_unit(x, in_channels=channel_map[stage - 2],
out_channels=channel_map[stage - 1], strides=2,
groups=groups, bottleneck_ratio=bottleneck_ratio,
stage=stage, block=1)
for i in range(1, repeat + 1):
x = _shuffle_unit(x, in_channels=channel_map[stage - 1],
out_channels=channel_map[stage - 1], strides=1,
groups=groups, bottleneck_ratio=bottleneck_ratio,
stage=stage, block=(i + 1))
return x
def _shuffle_unit(inputs, in_channels, out_channels, groups, bottleneck_ratio, strides=2, stage=1, block=1):
"""
creates a shuffleunit
Parameters
----------
inputs:
Input tensor of with `channels_last` data format
in_channels:
number of input channels
out_channels:
number of output channels
strides:
An integer or tuple/list of 2 integers,
specifying the strides of the convolution along the width and height.
groups: int(1)
number of groups per channel
bottleneck_ratio: float
bottleneck ratio implies the ratio of bottleneck channels to output channels.
For example, bottleneck ratio = 1 : 4 means the output feature map is 4 times
the width of the bottleneck feature map.
stage: int(1)
stage number
block: int(1)
block number
Returns
-------
"""
if K.image_data_format() == 'channels_last':
bn_axis = -1
else:
bn_axis = 1
prefix = 'stage%d/block%d' % (stage, block)
#if strides >= 2:
#out_channels -= in_channels
# default: 1/4 of the output channel of a ShuffleNet Unit
bottleneck_channels = int(out_channels * bottleneck_ratio)
groups = (1 if stage == 2 and block == 1 else groups)
x = _group_conv(inputs, in_channels, out_channels=bottleneck_channels,
groups=(1 if stage == 2 and block == 1 else groups),
name='%s/1x1_gconv_1' % prefix)
x = BatchNormalization(axis=bn_axis, name='%s/bn_gconv_1' % prefix)(x)
x = Activation('relu', name='%s/relu_gconv_1' % prefix)(x)
x = Lambda(channel_shuffle, arguments={'groups': groups}, name='%s/channel_shuffle' % prefix)(x)
x = DepthwiseConv2D(kernel_size=(3, 3), padding="same", use_bias=False,
strides=strides, name='%s/1x1_dwconv_1' % prefix)(x)
x = BatchNormalization(axis=bn_axis, name='%s/bn_dwconv_1' % prefix)(x)
x = _group_conv(x, bottleneck_channels, out_channels=out_channels if strides == 1 else out_channels - in_channels,
groups=groups, name='%s/1x1_gconv_2' % prefix)
x = BatchNormalization(axis=bn_axis, name='%s/bn_gconv_2' % prefix)(x)
if strides < 2:
ret = Add(name='%s/add' % prefix)([x, inputs])
else:
avg = AveragePooling2D(pool_size=3, strides=2, padding='same', name='%s/avg_pool' % prefix)(inputs)
ret = Concatenate(bn_axis, name='%s/concat' % prefix)([x, avg])
ret = Activation('relu', name='%s/relu_out' % prefix)(ret)
return ret
def _group_conv(x, in_channels, out_channels, groups, kernel=1, stride=1, name=''):
"""
grouped convolution
Parameters
----------
x:
Input tensor of with `channels_last` data format
in_channels:
number of input channels
out_channels:
number of output channels
groups:
number of groups per channel
kernel: int(1)
An integer or tuple/list of 2 integers, specifying the
width and height of the 2D convolution window.
Can be a single integer to specify the same value for
all spatial dimensions.
stride: int(1)
An integer or tuple/list of 2 integers,
specifying the strides of the convolution along the width and height.
Can be a single integer to specify the same value for all spatial dimensions.
name: str
A string to specifies the layer name
Returns
-------
"""
if groups == 1:
return Conv2D(filters=out_channels, kernel_size=kernel, padding='same',
use_bias=False, strides=stride, name=name)(x)
# number of intput channels per group
ig = in_channels // groups
group_list = []
assert out_channels % groups == 0
for i in range(groups):
offset = i * ig
group = Lambda(lambda z: z[:, :, :, offset: offset + ig], name='%s/g%d_slice' % (name, i))(x)
group_list.append(Conv2D(int(0.5 + out_channels / groups), kernel_size=kernel, strides=stride,
use_bias=False, padding='same', name='%s_/g%d' % (name, i))(group))
return Concatenate(name='%s/concat' % name)(group_list)
def channel_shuffle(x, groups):
"""
Parameters
----------
x:
Input tensor of with `channels_last` data format
groups: int
number of groups per channel
Returns
-------
channel shuffled output tensor
Examples
--------
Example for a 1D Array with 3 groups
>>> d = np.array([0,1,2,3,4,5,6,7,8])
>>> x = np.reshape(d, (3,3))
>>> x = np.transpose(x, [1,0])
>>> x = np.reshape(x, (9,))
'[0 1 2 3 4 5 6 7 8] --> [0 3 6 1 4 7 2 5 8]'
"""
height, width, in_channels = x.shape.as_list()[1:]
channels_per_group = in_channels // groups
x = K.reshape(x, [-1, height, width, groups, channels_per_group])
x = K.permute_dimensions(x, (0, 1, 2, 4, 3)) # transpose
x = K.reshape(x, [-1, height, width, in_channels])
return x