Pre nn patch (#6201)

* pre-nn patch * ifx * fix * fix
apache · May 13, 2017 · b005306 · b005306
1 parent 1ac8ca2
commit b005306
Show file tree

Hide file tree

Showing 26 changed files with 896 additions and 418 deletions.
diff --git a/cpp-package/example/inception_bn.cpp b/cpp-package/example/inception_bn.cpp
@@ -23,7 +23,7 @@ Symbol ConvFactoryBN(Symbol data, int num_filter,
   Symbol conv = Convolution("conv_" + name + suffix, data,
                             conv_w, conv_b, kernel,
                             num_filter, stride, Shape(1, 1), pad);
-  Symbol bn = BatchNorm("bn_" + name + suffix, conv, BN_GAMMA, BN_BETA);
+  Symbol bn = BatchNorm("bn_" + name + suffix, conv, Symbol(), Symbol(), Symbol(), Symbol());
   return Activation("relu_" + name + suffix, bn, "relu");
 }
 

diff --git a/cpp-package/example/resnet.cpp b/cpp-package/example/resnet.cpp
@@ -48,7 +48,8 @@ Symbol getConv(const std::string & name, Symbol data,
                                   kernel, num_filter, stride, Shape(1, 1),
                                   pad, 1, 512);
 
-  Symbol bn = BatchNorm(name + "_bn", conv, BN_GAMMA, BN_BETA, 2e-5, bn_momentum, false);
+  Symbol bn = BatchNorm(name + "_bn", conv, Symbol(), Symbol(), Symbol(),
+                        Symbol(), 2e-5, bn_momentum, false);
 
   if (with_relu) {
     return Activation(name + "_relu", bn, "relu");
@@ -108,7 +109,8 @@ Symbol ResNetSymbol(int num_class, int num_level = 3, int num_block = 9,
   Symbol data = Symbol::Variable("data");
   Symbol data_label = Symbol::Variable("data_label");
 
-  Symbol zscore = BatchNorm("zscore", data, BN_GAMMA, BN_BETA, 0.001, bn_momentum);
+  Symbol zscore = BatchNorm("zscore", data, Symbol(), Symbol(), Symbol(),
+                            Symbol(), 0.001, bn_momentum);
 
   Symbol conv = getConv("conv0", zscore, num_filter,
                         Shape(3, 3), Shape(1, 1), Shape(1, 1),

diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
@@ -283,33 +283,13 @@ class NDArray {
    * \param end end index in first dim
    * \return sliced NDArray
    */
-  inline NDArray Slice(index_t begin, index_t end) const {
-    NDArray ret = *this;
-    CHECK(!is_none()) << "NDArray is not initialized";
-    CHECK_GE(shape_[0], end) << "Slice end index out of range";
-    size_t length = shape_.ProdShape(1, shape_.ndim());
-    ret.offset_ += begin * length;
-    ret.shape_[0] = end - begin;
-    return ret;
-  }
+  NDArray Slice(index_t begin, index_t end) const;
   /*!
    * \brief Index a NDArray
    * \param idx the index
    * \return idx-th sub array NDArray
    */
-  inline NDArray At(index_t idx) const {
-    NDArray ret = *this;
-    CHECK(!is_none()) << "NDArray is not initialized";
-    CHECK_GT(shape_[0], idx) << "index out of range";
-    size_t length = shape_.ProdShape(1, shape_.ndim());
-    ret.offset_ += idx * length;
-    if (shape_.ndim() > 1) {
-      ret.shape_ = TShape(shape_.data()+1, shape_.data()+shape_.ndim());
-    } else {
-      ret.shape_ = mshadow::Shape1(1);
-    }
-    return ret;
-  }
+  NDArray At(index_t idx) const;
   /*!
    * \brief Create a NDArray that shares memory with current one
    *  The new array must have smaller memory size than the current array.
@@ -337,13 +317,7 @@ class NDArray {
    * \param shape new shape
    * \return NDArray in new shape
    */
-  inline NDArray Reshape(const TShape &shape) const {
-    CHECK_GE(shape_.Size(), shape.Size())
-        << "NDArray.Reshape: target shape size is different from current shape";
-    NDArray ret = *this;
-    ret.shape_ = shape;
-    return ret;
-  }
+  NDArray Reshape(const TShape &shape) const;
   /*!
    * \brief Allocate the space if it is delayed allocated.
    * This is an internal function used by system that normal user should not use
@@ -566,7 +540,7 @@ void SamplePoisson(real_t lambda, NDArray *out);
 /*!
  * \brief Sample negative binomial distribution for each elements of out.
  * \param k failure limit
- * \param p success probability 
+ * \param p success probability
  * \param out output NDArray.
  */
 void SampleNegBinomial(int32_t k, real_t p, NDArray *out);

diff --git a/python/mxnet/_ctypes/ndarray.py b/python/mxnet/_ctypes/ndarray.py
@@ -100,6 +100,7 @@ def _make_ndarray_function(handle, name):
             kwarg_names.append(name)
     #signature.append('is_train=False')
     signature.append('out=None')
+    signature.append('name=None')
     signature.append('**kwargs')
     signature = ndsignature + signature
 
@@ -120,6 +121,10 @@ def %s(*%s, **kwargs):"""%(func_name, arr_name))
         kwargs['%s'] = np.dtype(kwargs['%s']).name"""%(
             dtype_name, dtype_name, dtype_name))
         code.append("""
+    try:
+        kwargs.pop('name')
+    except:
+        pass
     out = kwargs.pop('out', None)
     keys = list(kwargs.keys())
     vals = [str(i) for i in kwargs.values()]""")

diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py
@@ -145,9 +145,9 @@ def __call__(self, param):
                     name_value = param.eval_metric.get_name_value()
                     if self.auto_reset:
                         param.eval_metric.reset()
-                    for name, value in name_value:
-                        logging.info('Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-%s=%f',
-                                     param.epoch, count, speed, name, value)
+                    msg = 'Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec'
+                    msg += '\t%s=%f'*len(name_value)
+                    logging.info(msg, param.epoch, count, speed, *sum(name_value, ()))
                 else:
                     logging.info("Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec",
                                  param.epoch, count, speed)

diff --git a/python/mxnet/context.py b/python/mxnet/context.py
@@ -73,16 +73,17 @@ def device_type(self):
         """
         return Context.devtype2str[self.device_typeid]
 
+    def __hash__(self):
+        """Compute hash value of context for dictionary lookup"""
+        return hash((self.device_typeid, self.device_id))
+
     def __eq__(self, other):
         """Compares two contexts. Two contexts are equal if they
         have the same device type and device id.
         """
-        if not isinstance(other, Context):
-            return False
-        if self.device_typeid == other.device_typeid and \
-                        self.device_id == other.device_id:
-            return True
-        return False
+        return isinstance(other, Context) and \
+            self.device_typeid == other.device_typeid and \
+            self.device_id == other.device_id
 
     def __str__(self):
         return '%s(%d)' % (self.device_type, self.device_id)

diff --git a/python/mxnet/contrib/autograd.py b/python/mxnet/contrib/autograd.py
@@ -51,25 +51,29 @@ def __exit__(self, ptype, value, trace):
             set_is_training(self._prev)
 
 
-def train():
-    """Returns a training TrainingStateScope
+def train_section():
+    """Returns a training scope context to be used in 'with' statement
+    and captures training code.
 
     Example::
-        with autograd.train():
+        with autograd.train_section():
             y = model(x)
             compute_gradient([y])
+        metric.update(...)
+        optim.step(...)
     """
     return TrainingStateScope(True)
 
 
-def test():
-    """Returns a testing TrainingStateScope.
+def test_section():
+    """Returns a testing scope context to be used in 'with' statement
+    and captures testing code.
 
     Example::
-        with autograd.train():
+        with autograd.train_section():
             y = model(x)
             compute_gradient([y])
-            with autograd.test():
+            with autograd.test_section():
                 # testing, IO, gradient updates...
     """
     return TrainingStateScope(False)
@@ -146,7 +150,7 @@ def wrapped(*args):
             assert isinstance(x, NDArray), "type of autograd input should NDArray."
         grads = [zeros_like(x) for x in variables]
         mark_variables(variables, grads)
-        with train():
+        with train_section():
             outputs = func(*args)
         compute_gradient([outputs] if isinstance(outputs, NDArray) else outputs)
         return grads, outputs

diff --git a/python/mxnet/initializer.py b/python/mxnet/initializer.py
@@ -9,6 +9,7 @@
 from .base import string_types
 from .ndarray import NDArray, load
 from . import random
+from . import registry
 
 # inherit str for backward compatibility
 class InitDesc(str):
@@ -29,54 +30,11 @@ def __new__(cls, name, attrs=None, global_init=None):
         ret.global_init = global_init
         return ret
 
-_INITIALIZER_REGISTRY = {}
-
-def register(klass):
-    """Registers a custom initializer.
-
-    Custom initializers can be created by extending `mx.init.Initializer` and implementing the
-    required functions like `_init_weight` and `_init_bias`. The created initializer must be
-    registered using `mx.init.register` before it can be used.
-
-    Parameters
-    ----------
-    klass : class
-        A subclass of `mx.init.Initializer` that needs to be registered as a custom initializer.
-
-    Example
-    -------
-    >>> # Create and register a custom initializer that
-    ... # initializes weights to 0.1 and biases to 1.
-    ...
-    >>> @mx.init.register
-    ... class CustomInit(mx.init.Initializer):
-    ...   def __init__(self):
-    ...     super(CustomInit, self).__init__()
-    ...   def _init_weight(self, _, arr):
-    ...     arr[:] = 0.1
-    ...   def _init_bias(self, _, arr):
-    ...     arr[:] = 1
-    ...
-    >>> # Module is an instance of 'mxnet.module.Module'
-    ...
-    >>> module.init_params(CustomInit())
-    """
-    assert issubclass(klass, Initializer), "Can only register subclass of Initializer"
-    name = klass.__name__.lower()
-    if name in _INITIALIZER_REGISTRY:
-        warnings.warn(
-            "\033[91mNew initializer %s.%s is overriding existing initializer %s.%s\033[0m"%(
-                klass.__module__, klass.__name__,
-                _INITIALIZER_REGISTRY[name].__module__,
-                _INITIALIZER_REGISTRY[name].__name__),
-            UserWarning, stacklevel=2)
-    _INITIALIZER_REGISTRY[name] = klass
-    return klass
 
 class Initializer(object):
     """The base class of an initializer."""
     def __init__(self, **kwargs):
-        self.kwargs = kwargs
+        self._kwargs = kwargs
 
     def dumps(self):
         """Saves the initializer to string
@@ -97,7 +55,7 @@ def dumps(self):
         >>> init.dumps()
         '["xavier", {"rnd_type": "uniform", "magnitude": 2.34, "factor_type": "in"}]'
         """
-        return json.dumps([self.__class__.__name__.lower(), self.kwargs])
+        return json.dumps([self.__class__.__name__.lower(), self._kwargs])
 
     def __call__(self, desc, arr):
         """Initialize an array
@@ -120,8 +78,7 @@ def __call__(self, desc, arr):
 
         if init:
             # when calling Variable initializer
-            klass, kwargs = json.loads(init)
-            _INITIALIZER_REGISTRY[klass.lower()](**kwargs)._init_weight(desc, arr)
+            create(init)._init_weight(desc, arr)
         else:
             # register nnvm::FSetInputVariableAttrs in the backend for new patterns
             # don't add new cases here.
@@ -223,6 +180,48 @@ def _init_default(self, name, _):
             'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name)
 
 
+# pylint: disable=invalid-name
+_register = registry.get_register_func(Initializer, 'initializer')
+alias = registry.get_alias_func(Initializer, 'initializer')
+create = registry.get_create_func(Initializer, 'initializer')
+# pylint: enable=invalid-name
+
+def register(klass):
+    """Registers a custom initializer.
+
+    Custom initializers can be created by extending `mx.init.Initializer` and implementing the
+    required functions like `_init_weight` and `_init_bias`. The created initializer must be
+    registered using `mx.init.register` before it can be called by name.
+
+    Parameters
+    ----------
+    klass : class
+        A subclass of `mx.init.Initializer` that needs to be registered as a custom initializer.
+
+    Example
+    -------
+    >>> # Create and register a custom initializer that
+    ... # initializes weights to 0.1 and biases to 1.
+    ...
+    >>> @mx.init.register
+    ... @alias('myinit')
+    ... class CustomInit(mx.init.Initializer):
+    ...   def __init__(self):
+    ...     super(CustomInit, self).__init__()
+    ...   def _init_weight(self, _, arr):
+    ...     arr[:] = 0.1
+    ...   def _init_bias(self, _, arr):
+    ...     arr[:] = 1
+    ...
+    >>> # Module is an instance of 'mxnet.module.Module'
+    ...
+    >>> module.init_params("custominit")
+    >>> # module.init_params("myinit")
+    >>> # module.init_params(CustomInit())
+    """
+    return _register(klass)
+
+
 class Load(object):
     """Initializes variables by loading data from file or dict.
 
@@ -312,6 +311,7 @@ def __call__(self, name, arr):
                          'add a ".*" pattern at the and with default Initializer.')
 
 @register
+@alias("zeros")
 class Zero(Initializer):
     """Initializes weights to zero.
 
@@ -336,6 +336,7 @@ def _init_weight(self, _, arr):
         arr[:] = 0
 
 @register
+@alias("ones")
 class One(Initializer):
     """Initializes weights to one.
 
@@ -561,9 +562,9 @@ class MSRAPrelu(Xavier):
         initial slope of any PReLU (or similar) nonlinearities.
     """
     def __init__(self, factor_type="avg", slope=0.25):
-        self.kwargs = {'factor_type': factor_type, 'slope': slope}
         magnitude = 2. / (1 + slope ** 2)
         super(MSRAPrelu, self).__init__("gaussian", factor_type, magnitude)
+        self._kwargs = {'factor_type': factor_type, 'slope': slope}
 
 @register
 class Bilinear(Initializer):