[TEST] Xavie initialization for benchmarks (apache#54)

* [TEST] Xavie initialization for benchmarks * remove additional line
tqchen · May 26, 2018 · c6c69fc · c6c69fc
1 parent 4fcb5d0
commit c6c69fc
Show file tree

Hide file tree

Showing 4 changed files with 129 additions and 12 deletions.
diff --git a/nnvm/python/nnvm/testing/init.py b/nnvm/python/nnvm/testing/init.py
@@ -0,0 +1,110 @@
+"""Initializer of parameters."""
+import numpy as np
+
+class Initializer(object):
+    """The base class of an initializer."""
+    def __init__(self, **kwargs):
+        self._kwargs = kwargs
+
+    def __call__(self, desc, arr):
+        """Initialize an array
+
+        Parameters
+        ----------
+        desc : str
+            Initialization pattern descriptor.
+
+        arr : NDArray
+            The array to be initialized.
+        """
+        if desc.endswith('weight'):
+            self._init_weight(desc, arr)
+        elif desc.endswith('bias'):
+            self._init_bias(desc, arr)
+        elif desc.endswith('gamma'):
+            self._init_gamma(desc, arr)
+        elif desc.endswith('beta'):
+            self._init_beta(desc, arr)
+        elif desc.endswith('mean'):
+            self._init_mean(desc, arr)
+        elif desc.endswith('var'):
+            self._init_var(desc, arr)
+        else:
+            self._init_default(desc, arr)
+
+    def _init_bias(self, _, arr):
+        arr[:] = 0.0
+
+    def _init_gamma(self, _, arr):
+        arr[:] = 1.0
+
+    def _init_beta(self, _, arr):
+        arr[:] = 0.0
+
+    def _init_mean(self, _, arr):
+        arr[:] = 0.0
+
+    def _init_var(self, _, arr):
+        arr[:] = 1.0
+
+    def _init_weight(self, name, arr):
+        """Abstract method to Initialize weight."""
+        raise NotImplementedError("Must override it")
+
+    def _init_default(self, name, _):
+        raise ValueError(
+            'Unknown initialization pattern for %s. ' \
+            'Default initialization is now limited to '\
+            '"weight", "bias", "gamma" (1.0), and "beta" (0.0).' \
+            'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name)
+
+
+class Xavier(Initializer):
+    """ "Xavier" initialization for weights
+
+    Parameters
+    ----------
+    rnd_type: str, optional
+        Random generator type, can be ``'gaussian'`` or ``'uniform'``.
+
+    factor_type: str, optional
+        Can be ``'avg'``, ``'in'``, or ``'out'``.
+
+    magnitude: float, optional
+        Scale of random number.
+    """
+    def __init__(self, rnd_type="uniform", factor_type="avg", magnitude=3):
+        super(Xavier, self).__init__(rnd_type=rnd_type,
+                                     factor_type=factor_type,
+                                     magnitude=magnitude)
+        self.rnd_type = rnd_type
+        self.factor_type = factor_type
+        self.magnitude = float(magnitude)
+
+
+    def _init_weight(self, name, arr):
+        shape = arr.shape
+        hw_scale = 1.
+        if len(shape) < 2:
+            raise ValueError('Xavier initializer cannot be applied to vector {0}. It requires at'
+                             ' least 2D.'.format(name))
+        if len(shape) > 2:
+            hw_scale = np.prod(shape[2:])
+        fan_in, fan_out = shape[1] * hw_scale, shape[0] * hw_scale
+        factor = 1.
+        if self.factor_type == "avg":
+            factor = (fan_in + fan_out) / 2.0
+        elif self.factor_type == "in":
+            factor = fan_in
+        elif self.factor_type == "out":
+            factor = fan_out
+        else:
+            raise ValueError("Incorrect factor type")
+        # Hack for mobilenet, because there is less connectivity
+        if "depthwise" in name:
+            factor = 3 * 3
+        scale = np.sqrt(self.magnitude / factor)
+        if self.rnd_type == "uniform":
+            arr[:] = np.random.uniform(-scale, scale, size=arr.shape)
+        else:
+            raise ValueError("Unknown random type")
diff --git a/nnvm/python/nnvm/testing/mobilenet.py b/nnvm/python/nnvm/testing/mobilenet.py
@@ -30,7 +30,7 @@ def separable_conv_block(data, name, depthwise_channels,
     # depthwise convolution + bn + relu
     conv1 = sym.conv2d(data=data, channels=depthwise_channels,
                        groups=depthwise_channels, kernel_size=kernel_size, strides=strides,
-                       padding=padding, use_bias=False, layout="NCHW", name=name + "_conv1")
+                       padding=padding, use_bias=False, layout="NCHW", name=name + "_depthwise_conv1")
     bn1 = sym.batch_norm(data=conv1, epsilon=epsilon, name=name + "_bn1")
     act1 = sym.relu(data=bn1, name=name + "_relu1")
     # pointwise convolution + bn + relu

diff --git a/nnvm/python/nnvm/testing/utils.py b/nnvm/python/nnvm/testing/utils.py
@@ -5,9 +5,10 @@
 import tvm
 from ..compiler import graph_util
 from ..import graph
+from . init import Xavier
 
-
-def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32"):
+def create_workload(net, batch_size, image_shape=(3, 224, 224),
+                    dtype="float32", initializer=None, seed=0):
     """Helper function to create benchmark workload for input network
 
     Parameters
@@ -24,6 +25,12 @@ def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32")
     dtype : str, optional
         The data type
 
+    initializer : Initializer
+        The initializer used
+
+    seed : int
+        The seed used in initialization.
+
     Returns
     -------
     net : nnvm.Symbol
@@ -38,15 +45,12 @@ def create_workload(net, batch_size, image_shape=(3, 224, 224), dtype="float32")
     g = graph.create(net)
     input_shapes, _ = graph_util.infer_shape(g, data=data_shape)
     shape_dict = dict(zip(g.index.input_names, input_shapes))
+    np.random.seed(seed)
+    initializer = initializer if initializer else Xavier(magnitude=3)
     for k, v in shape_dict.items():
         if k == "data":
             continue
-        # Specially generate non-negative parameters.
-        if k.endswith("gamma"):
-            init = np.random.uniform(0.9, 1, size=v)
-        elif k.endswith("var"):
-            init = np.random.uniform(0.9, 1, size=v)
-        else:
-            init = np.random.uniform(-0.1, 0.1, size=v)
-        params[k] = tvm.nd.array(init.astype(dtype), ctx=tvm.cpu(0))
+        init_value = np.zeros(v).astype(dtype)
+        initializer(k, init_value)
+        params[k] = tvm.nd.array(init_value, ctx=tvm.cpu(0))
     return net, params
diff --git a/nnvm/tutorials/imagenet_inference_gpu.py b/nnvm/tutorials/imagenet_inference_gpu.py
@@ -8,6 +8,7 @@
 To begin with, we import nnvm(for compilation) and TVM(for deployment).
 """
 import tvm
+import numpy as np
 from tvm.contrib import nvcc, graph_runtime
 import nnvm.compiler
 import nnvm.testing
@@ -64,6 +65,7 @@ def tvm_callback_cuda_compile(code):
 graph, lib, params = nnvm.compiler.build(
     net, target, shape={"data": data_shape}, params=params)
 
+
 ######################################################################
 # Run the Compiled Module
 # -----------------------
@@ -74,10 +76,11 @@ def tvm_callback_cuda_compile(code):
 # This example runs on the same machine.
 #
 # Note that the code below no longer depends on NNVM, and only relies TVM's runtime to run(deploy).
-
+data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
 module = graph_runtime.create(graph, lib, ctx)
 # set input
 module.set_input(**params)
+module.set_input("data", data)
 # run
 module.run()
 # get output