Skip to content

Commit

Permalink
Training Container for NAS RL Suggestion in v1alpha2 (#614)
Browse files Browse the repository at this point in the history
* Add training container in v1alpha2

* Modify runTrial
  • Loading branch information
andreyvelich authored and k8s-ci-robot committed Jun 4, 2019
1 parent cb25807 commit 32d3401
Show file tree
Hide file tree
Showing 5 changed files with 319 additions and 0 deletions.
32 changes: 32 additions & 0 deletions examples/v1alpha2/NAS-training-containers/RL-cifar10/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
ARG cuda_version=9.0
ARG cudnn_version=7
FROM nvidia/cuda:${cuda_version}-cudnn${cudnn_version}-devel

# Install system packages
RUN apt-get update && apt-get install -y software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y --no-install-recommends \
bzip2 \
g++ \
git \
graphviz \
libgl1-mesa-glx \
libhdf5-dev \
openmpi-bin \
python3.5 \
python3-pip \
python3-setuptools \
python3-dev \
wget && \
rm -rf /var/lib/apt/lists/*


ADD . /app
WORKDIR /app

RUN pip3 install --upgrade pip
RUN pip3 install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /app

ENTRYPOINT ["python3.5", "-u", "RunTrial.py"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import numpy as np
from keras.models import Model
from keras import backend as K
import json
from keras.layers import Input, Conv2D, ZeroPadding2D, concatenate, MaxPooling2D, \
AveragePooling2D, Dense, Activation, BatchNormalization, GlobalAveragePooling2D, Dropout
from op_library import concat, conv, sp_conv, dw_conv, reduction


class ModelConstructor(object):
def __init__(self, arc_json, nn_json):
self.arch = json.loads(arc_json)
nn_config = json.loads(nn_json)
self.num_layers = nn_config['num_layers']
self.input_sizes = nn_config['input_sizes']
self.output_size = nn_config['output_sizes'][-1]
self.embedding = nn_config['embedding']

def build_model(self):
# a list of the data all layers
all_layers = [0 for _ in range(self.num_layers + 1)]
# a list of all the dimensions of all layers
all_dims = [0 for _ in range(self.num_layers + 1)]

# ================= Stacking layers =================
# Input Layer. Layer 0
input_layer = Input(shape=self.input_sizes)
all_layers[0] = input_layer

# Intermediate Layers. Starting from layer 1.
for l in range(1, self.num_layers + 1):
input_layers = list()
opt = self.arch[l - 1][0]
opt_config = self.embedding[str(opt)]
skip = self.arch[l - 1][1:l+1]

# set up the connection to the previous layer first
input_layers.append(all_layers[l - 1])

# then add skip connections
for i in range(l - 1):
if l > 1 and skip[i] == 1:
input_layers.append(all_layers[i])

layer_input = concat(input_layers)
if opt_config['opt_type'] == 'convolution':
layer_output = conv(layer_input, opt_config)
if opt_config['opt_type'] == 'separable_convolution':
layer_output = sp_conv(layer_input, opt_config)
if opt_config['opt_type'] == 'depthwise_convolution':
layer_output = dw_conv(layer_input, opt_config)
elif opt_config['opt_type'] == 'reduction':
layer_output = reduction(layer_input, opt_config)

all_layers[l] = layer_output

# Final Layer
# Global Average Pooling, then Fully connected with softmax.
avgpooled = GlobalAveragePooling2D()(all_layers[self.num_layers])
dropped = Dropout(0.4)(avgpooled)
logits = Dense(units=self.output_size,
activation='softmax')(dropped)

# Encapsulate the model
self.model = Model(inputs=input_layer, outputs=logits)

return self.model
77 changes: 77 additions & 0 deletions examples/v1alpha2/NAS-training-containers/RL-cifar10/RunTrial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import keras
import numpy as np
from keras.datasets import cifar10
from ModelConstructor import ModelConstructor
from keras.utils import to_categorical
from keras.utils import multi_gpu_model
from keras.preprocessing.image import ImageDataGenerator
import argparse
import time

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='TrainingContainer')
parser.add_argument('--architecture', type=str, default="", metavar='N',
help='architecture of the neural network')
parser.add_argument('--nn_config', type=str, default="", metavar='N',
help='configurations and search space embeddings')
parser.add_argument('--num_epochs', type=int, default=10, metavar='N',
help='number of epoches that each child will be trained')
parser.add_argument('--num_gpus', type=int, default=1, metavar='N',
help='number of GPU that used for training')
args = parser.parse_args()

arch = args.architecture.replace("\'", "\"")
print(">>> arch received by trial")
print(arch)

nn_config = args.nn_config.replace("\'", "\"")
print(">>> nn_config received by trial")
print(nn_config)

num_epochs = args.num_epochs
print(">>> num_epochs received by trial")
print(num_epochs)

num_gpus = args.num_gpus
print(">>> num_gpus received by trial:")
print(num_gpus)

print("\n>>> Constructing Model...")
constructor = ModelConstructor(arch, nn_config)
test_model = constructor.build_model()
print(">>> Model Constructed Successfully\n")

if num_gpus > 1:
test_model = multi_gpu_model(test_model, gpus=num_gpus)

test_model.summary()
test_model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(lr=1e-3, decay=1e-4),
metrics=['accuracy'])

(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

augmentation = ImageDataGenerator(
width_shift_range=0.1,
height_shift_range=0.1,
horizontal_flip=True)

aug_data_flow = augmentation.flow(x_train, y_train, batch_size=128)

print(">>> Data Loaded. Training starts.")
for e in range(num_epochs):
print("\nTotal Epoch {}/{}".format(e+1, num_epochs))
history = test_model.fit_generator(generator=aug_data_flow,
steps_per_epoch=int(len(x_train)/128)+1,
epochs=1, verbose=1,
validation_data=(x_test, y_test))
print("Training-Accuracy={}".format(history.history['acc'][-1]))
print("Training-Loss={}".format(history.history['loss'][-1]))
print("Validation-Accuracy={}".format(history.history['val_acc'][-1]))
print("Validation-Loss={}".format(history.history['val_loss'][-1]))
141 changes: 141 additions & 0 deletions examples/v1alpha2/NAS-training-containers/RL-cifar10/op_library.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import numpy as np
from keras import backend as K
from keras.layers import Input, Conv2D, ZeroPadding2D, concatenate, MaxPooling2D, \
AveragePooling2D, Dense, Activation, BatchNormalization, GlobalAveragePooling2D, \
SeparableConv2D, DepthwiseConv2D


def concat(inputs):
n = len(inputs)
if n == 1:
return inputs[0]

total_dim = list()
for x in inputs:
total_dim.append(K.int_shape(x))
total_dim = np.asarray(total_dim)
max_dim = max(total_dim[:, 1])

padded_input = [0 for _ in range(n)]

for i in range(n):
if total_dim[i][1] < max_dim:
diff = max_dim - total_dim[i][1]
half_diff = int(diff / 2)
if diff % 2 == 0:
padded_input[i] = ZeroPadding2D(padding=(half_diff, half_diff))(inputs[i])
else:
padded_input[i] = ZeroPadding2D(padding=((half_diff, half_diff + 1),
(half_diff, half_diff + 1)))(inputs[i])
else:
padded_input[i] = inputs[i]

result = concatenate(inputs=padded_input, axis=-1)
return result


def conv(x, config):
parameters = {
"num_filter": 64,
"filter_size": 3,
"stride": 1,
}
for k in parameters.keys():
if k in config:
parameters[k] = int(config[k])

activated = Activation('relu')(x)

conved = Conv2D(
filters=parameters['num_filter'],
kernel_size=parameters['filter_size'],
strides=parameters['stride'],
padding='same')(activated)

result = BatchNormalization()(conved)

return result


def sp_conv(x, config):
parameters = {
"num_filter": 64,
"filter_size": 3,
"stride": 1,
"depth_multiplier": 1,
}

for k in parameters.keys():
if k in config:
parameters[k] = int(config[k])

activated = Activation('relu')(x)

conved = SeparableConv2D(
filters=parameters['num_filter'],
kernel_size=parameters['filter_size'],
strides=parameters['stride'],
depth_multiplier=parameters['depth_multiplier'],
padding='same')(activated)

result = BatchNormalization()(conved)

return result

def dw_conv(x, config):
parameters = {
"filter_size": 3,
"stride": 1,
"depth_multiplier": 1,
}
for k in parameters.keys():
if k in config:
parameters[k] = int(config[k])

activated = Activation('relu')(x)

conved = DepthwiseConv2D(
kernel_size=parameters['filter_size'],
strides=parameters['stride'],
depth_multiplier=parameters['depth_multiplier'],
padding='same')(activated)

result = BatchNormalization()(conved)

return result


def reduction(x, config):
# handle the exteme case where the input has the dimension 1 by 1 and is not reductible
# we will just change the reduction layer to identity layer
# such situation is very likely to appear though
dim = K.int_shape(x)
if dim[1] == 1 or dim[2] == 1:
print("WARNING: One or more dimensions of the input of the reduction layer is 1. It cannot be further reduced. A identity layer will be used instead.")
return x

parameters = {
'reduction_type': "max_pooling",
'pool_size': 2,
'stride': None,
}

if 'reduction_type' in config:
parameters['reduction_type'] = config['reduction_type']
if 'pool_size' in config:
parameters['pool_size'] = int(config['pool_size'])
if 'stride' in config:
parameters['stride'] = int(config['stride'])

if parameters['reduction_type'] == 'max_pooling':
result = MaxPooling2D(
pool_size=parameters['pool_size'],
strides=parameters['stride']
)(x)
elif parameters['reduction_type'] == 'avg_pooling':
result = AveragePooling2D(
pool_size=parameters['pool_size'],
strides=parameters['stride']
)(x)

return result
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
tensorflow-gpu==1.12.0
keras==2.2.4

0 comments on commit 32d3401

Please sign in to comment.