diff --git a/nnvm/python/nnvm/testing/init.py b/nnvm/python/nnvm/testing/init.py new file mode 100644 index 000000000000..3ce7e40ef87f --- /dev/null +++ b/nnvm/python/nnvm/testing/init.py @@ -0,0 +1,110 @@ +"""Initializer of parameters.""" +import numpy as np + +class Initializer(object): + """The base class of an initializer.""" + def __init__(self, **kwargs): + self._kwargs = kwargs + + def __call__(self, desc, arr): + """Initialize an array + + Parameters + ---------- + desc : str + Initialization pattern descriptor. + + arr : NDArray + The array to be initialized. + """ + if desc.endswith('weight'): + self._init_weight(desc, arr) + elif desc.endswith('bias'): + self._init_bias(desc, arr) + elif desc.endswith('gamma'): + self._init_gamma(desc, arr) + elif desc.endswith('beta'): + self._init_beta(desc, arr) + elif desc.endswith('mean'): + self._init_mean(desc, arr) + elif desc.endswith('var'): + self._init_var(desc, arr) + else: + self._init_default(desc, arr) + + def _init_bias(self, _, arr): + arr[:] = 0.0 + + def _init_gamma(self, _, arr): + arr[:] = 1.0 + + def _init_beta(self, _, arr): + arr[:] = 0.0 + + def _init_mean(self, _, arr): + arr[:] = 0.0 + + def _init_var(self, _, arr): + arr[:] = 1.0 + + def _init_weight(self, name, arr): + """Abstract method to Initialize weight.""" + raise NotImplementedError("Must override it") + + def _init_default(self, name, _): + raise ValueError( + 'Unknown initialization pattern for %s. ' \ + 'Default initialization is now limited to '\ + '"weight", "bias", "gamma" (1.0), and "beta" (0.0).' \ + 'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name) + + +class Xavier(Initializer): + """ "Xavier" initialization for weights + + Parameters + ---------- + rnd_type: str, optional + Random generator type, can be ``'gaussian'`` or ``'uniform'``. + + factor_type: str, optional + Can be ``'avg'``, ``'in'``, or ``'out'``. + + magnitude: float, optional + Scale of random number. + """ + def __init__(self, rnd_type="uniform", factor_type="avg", magnitude=3): + super(Xavier, self).__init__(rnd_type=rnd_type, + factor_type=factor_type, + magnitude=magnitude) + self.rnd_type = rnd_type + self.factor_type = factor_type + self.magnitude = float(magnitude) + + + def _init_weight(self, name, arr): + shape = arr.shape + hw_scale = 1. + if len(shape) < 2: + raise ValueError('Xavier initializer cannot be applied to vector {0}. It requires at' + ' least 2D.'.format(name)) + if len(shape) > 2: + hw_scale = np.prod(shape[2:]) + fan_in, fan_out = shape[1] * hw_scale, shape[0] * hw_scale + factor = 1. + if self.factor_type == "avg": + factor = (fan_in + fan_out) / 2.0 + elif self.factor_type == "in": + factor = fan_in + elif self.factor_type == "out": + factor = fan_out + else: + raise ValueError("Incorrect factor type") + # Hack for mobilenet, because there is less connectivity + if "depthwise" in name: + factor = 3 * 3 + scale = np.sqrt(self.magnitude / factor) + if self.rnd_type == "uniform": + arr[:] = np.random.uniform(-scale, scale, size=arr.shape) + else: + raise ValueError("Unknown random type") diff --git a/nnvm/python/nnvm/testing/mobilenet.py b/nnvm/python/nnvm/testing/mobilenet.py index f5667b649c43..bebd7ddc6802 100644 --- a/nnvm/python/nnvm/testing/mobilenet.py +++ b/nnvm/python/nnvm/testing/mobilenet.py @@ -30,7 +30,7 @@ def separable_conv_block(data, name, depthwise_channels, # depthwise convolution + bn + relu conv1 = sym.conv2d(data=data, channels=depthwise_channels, groups=depthwise_channels, kernel_size=kernel_size, strides=strides, - padding=padding, use_bias=False, layout="NCHW", name=name + "_conv1") + padding=padding, use_bias=False, layout="NCHW", name=name + "_depthwise_conv1") bn1 = sym.batch_norm(data=conv1, epsilon=epsilon, name=name + "_bn1") act1 = sym.relu(data=bn1, name=name + "_relu1") # pointwise convolution + bn + relu diff --git a/nnvm/python/nnvm/testing/utils.py b/nnvm/python/nnvm/testing/utils.py index 5d2146971172..fcc008e61fc3 100644 --- a/nnvm/python/nnvm/testing/utils.py +++ b/nnvm/python/nnvm/testing/utils.py @@ -5,9 +5,10 @@ import tvm from ..compiler import graph_util from ..import graph +from . init import Xavier - -def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32"): +def create_workload(net, batch_size, image_shape=(3, 224, 224), + dtype="float32", initializer=None, seed=0): """Helper function to create benchmark workload for input network Parameters @@ -24,6 +25,12 @@ def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32") dtype : str, optional The data type + initializer : Initializer + The initializer used + + seed : int + The seed used in initialization. + Returns ------- net : nnvm.Symbol @@ -38,15 +45,12 @@ def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32") g = graph.create(net) input_shapes, _ = graph_util.infer_shape(g, data=data_shape) shape_dict = dict(zip(g.index.input_names, input_shapes)) + np.random.seed(seed) + initializer = initializer if initializer else Xavier(magnitude=3) for k, v in shape_dict.items(): if k == "data": continue - # Specially generate non-negative parameters. - if k.endswith("gamma"): - init = np.random.uniform(0.9, 1, size=v) - elif k.endswith("var"): - init = np.random.uniform(0.9, 1, size=v) - else: - init = np.random.uniform(-0.1, 0.1, size=v) - params[k] = tvm.nd.array(init.astype(dtype), ctx=tvm.cpu(0)) + init_value = np.zeros(v).astype(dtype) + initializer(k, init_value) + params[k] = tvm.nd.array(init_value, ctx=tvm.cpu(0)) return net, params diff --git a/nnvm/tutorials/imagenet_inference_gpu.py b/nnvm/tutorials/imagenet_inference_gpu.py index 1eca09fe13cf..778ead0b5a36 100644 --- a/nnvm/tutorials/imagenet_inference_gpu.py +++ b/nnvm/tutorials/imagenet_inference_gpu.py @@ -8,6 +8,7 @@ To begin with, we import nnvm(for compilation) and TVM(for deployment). """ import tvm +import numpy as np from tvm.contrib import nvcc, graph_runtime import nnvm.compiler import nnvm.testing @@ -64,6 +65,7 @@ def tvm_callback_cuda_compile(code): graph, lib, params = nnvm.compiler.build( net, target, shape={"data": data_shape}, params=params) + ###################################################################### # Run the Compiled Module # ----------------------- @@ -74,10 +76,11 @@ def tvm_callback_cuda_compile(code): # This example runs on the same machine. # # Note that the code below no longer depends on NNVM, and only relies TVM's runtime to run(deploy). - +data = np.random.uniform(-1, 1, size=data_shape).astype("float32") module = graph_runtime.create(graph, lib, ctx) # set input module.set_input(**params) +module.set_input("data", data) # run module.run() # get output