From b005306064369528d1295687326eb41ecc481251 Mon Sep 17 00:00:00 2001 From: Eric Junyuan Xie Date: Sat, 13 May 2017 06:14:42 -0700 Subject: [PATCH] Pre nn patch (#6201) * pre-nn patch * ifx * fix * fix --- cpp-package/example/inception_bn.cpp | 2 +- cpp-package/example/resnet.cpp | 6 +- include/mxnet/ndarray.h | 34 +- python/mxnet/_ctypes/ndarray.py | 5 + python/mxnet/callback.py | 6 +- python/mxnet/context.py | 13 +- python/mxnet/contrib/autograd.py | 20 +- python/mxnet/initializer.py | 97 ++--- python/mxnet/metric.py | 483 +++++++++++++++++------ python/mxnet/module/executor_group.py | 41 +- python/mxnet/ndarray.py | 28 +- python/mxnet/optimizer.py | 51 +-- python/mxnet/registry.py | 141 +++++++ python/mxnet/symbol.py | 29 +- src/c_api/c_api.cc | 26 +- src/c_api/c_api_ndarray.cc | 4 +- src/ndarray/autograd.cc | 52 ++- src/ndarray/autograd.h | 4 +- src/ndarray/ndarray.cc | 142 ++++--- src/operator/batch_norm.cc | 2 + src/operator/convolution-inl.h | 44 ++- src/operator/pooling.cu | 23 +- src/operator/tensor/elemwise_unary_op.cc | 4 + tests/python/unittest/test_autograd.py | 8 +- tests/python/unittest/test_metric.py | 22 ++ tests/python/unittest/test_ndarray.py | 27 +- 26 files changed, 896 insertions(+), 418 deletions(-) create mode 100644 python/mxnet/registry.py create mode 100644 tests/python/unittest/test_metric.py diff --git a/cpp-package/example/inception_bn.cpp b/cpp-package/example/inception_bn.cpp index b65611215b7a..a4ed75a0a855 100644 --- a/cpp-package/example/inception_bn.cpp +++ b/cpp-package/example/inception_bn.cpp @@ -23,7 +23,7 @@ Symbol ConvFactoryBN(Symbol data, int num_filter, Symbol conv = Convolution("conv_" + name + suffix, data, conv_w, conv_b, kernel, num_filter, stride, Shape(1, 1), pad); - Symbol bn = BatchNorm("bn_" + name + suffix, conv, BN_GAMMA, BN_BETA); + Symbol bn = BatchNorm("bn_" + name + suffix, conv, Symbol(), Symbol(), Symbol(), Symbol()); return Activation("relu_" + name + suffix, bn, "relu"); } diff --git a/cpp-package/example/resnet.cpp b/cpp-package/example/resnet.cpp index 5d3131223ef3..c09b2c2fa485 100644 --- a/cpp-package/example/resnet.cpp +++ b/cpp-package/example/resnet.cpp @@ -48,7 +48,8 @@ Symbol getConv(const std::string & name, Symbol data, kernel, num_filter, stride, Shape(1, 1), pad, 1, 512); - Symbol bn = BatchNorm(name + "_bn", conv, BN_GAMMA, BN_BETA, 2e-5, bn_momentum, false); + Symbol bn = BatchNorm(name + "_bn", conv, Symbol(), Symbol(), Symbol(), + Symbol(), 2e-5, bn_momentum, false); if (with_relu) { return Activation(name + "_relu", bn, "relu"); @@ -108,7 +109,8 @@ Symbol ResNetSymbol(int num_class, int num_level = 3, int num_block = 9, Symbol data = Symbol::Variable("data"); Symbol data_label = Symbol::Variable("data_label"); - Symbol zscore = BatchNorm("zscore", data, BN_GAMMA, BN_BETA, 0.001, bn_momentum); + Symbol zscore = BatchNorm("zscore", data, Symbol(), Symbol(), Symbol(), + Symbol(), 0.001, bn_momentum); Symbol conv = getConv("conv0", zscore, num_filter, Shape(3, 3), Shape(1, 1), Shape(1, 1), diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h index 31318329c0e1..ea38909d07f1 100644 --- a/include/mxnet/ndarray.h +++ b/include/mxnet/ndarray.h @@ -283,33 +283,13 @@ class NDArray { * \param end end index in first dim * \return sliced NDArray */ - inline NDArray Slice(index_t begin, index_t end) const { - NDArray ret = *this; - CHECK(!is_none()) << "NDArray is not initialized"; - CHECK_GE(shape_[0], end) << "Slice end index out of range"; - size_t length = shape_.ProdShape(1, shape_.ndim()); - ret.offset_ += begin * length; - ret.shape_[0] = end - begin; - return ret; - } + NDArray Slice(index_t begin, index_t end) const; /*! * \brief Index a NDArray * \param idx the index * \return idx-th sub array NDArray */ - inline NDArray At(index_t idx) const { - NDArray ret = *this; - CHECK(!is_none()) << "NDArray is not initialized"; - CHECK_GT(shape_[0], idx) << "index out of range"; - size_t length = shape_.ProdShape(1, shape_.ndim()); - ret.offset_ += idx * length; - if (shape_.ndim() > 1) { - ret.shape_ = TShape(shape_.data()+1, shape_.data()+shape_.ndim()); - } else { - ret.shape_ = mshadow::Shape1(1); - } - return ret; - } + NDArray At(index_t idx) const; /*! * \brief Create a NDArray that shares memory with current one * The new array must have smaller memory size than the current array. @@ -337,13 +317,7 @@ class NDArray { * \param shape new shape * \return NDArray in new shape */ - inline NDArray Reshape(const TShape &shape) const { - CHECK_GE(shape_.Size(), shape.Size()) - << "NDArray.Reshape: target shape size is different from current shape"; - NDArray ret = *this; - ret.shape_ = shape; - return ret; - } + NDArray Reshape(const TShape &shape) const; /*! * \brief Allocate the space if it is delayed allocated. * This is an internal function used by system that normal user should not use @@ -566,7 +540,7 @@ void SamplePoisson(real_t lambda, NDArray *out); /*! * \brief Sample negative binomial distribution for each elements of out. * \param k failure limit - * \param p success probability + * \param p success probability * \param out output NDArray. */ void SampleNegBinomial(int32_t k, real_t p, NDArray *out); diff --git a/python/mxnet/_ctypes/ndarray.py b/python/mxnet/_ctypes/ndarray.py index c81945184780..786b134befa6 100644 --- a/python/mxnet/_ctypes/ndarray.py +++ b/python/mxnet/_ctypes/ndarray.py @@ -100,6 +100,7 @@ def _make_ndarray_function(handle, name): kwarg_names.append(name) #signature.append('is_train=False') signature.append('out=None') + signature.append('name=None') signature.append('**kwargs') signature = ndsignature + signature @@ -120,6 +121,10 @@ def %s(*%s, **kwargs):"""%(func_name, arr_name)) kwargs['%s'] = np.dtype(kwargs['%s']).name"""%( dtype_name, dtype_name, dtype_name)) code.append(""" + try: + kwargs.pop('name') + except: + pass out = kwargs.pop('out', None) keys = list(kwargs.keys()) vals = [str(i) for i in kwargs.values()]""") diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py index 28300805b5be..b585ce82b525 100644 --- a/python/mxnet/callback.py +++ b/python/mxnet/callback.py @@ -145,9 +145,9 @@ def __call__(self, param): name_value = param.eval_metric.get_name_value() if self.auto_reset: param.eval_metric.reset() - for name, value in name_value: - logging.info('Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-%s=%f', - param.epoch, count, speed, name, value) + msg = 'Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec' + msg += '\t%s=%f'*len(name_value) + logging.info(msg, param.epoch, count, speed, *sum(name_value, ())) else: logging.info("Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec", param.epoch, count, speed) diff --git a/python/mxnet/context.py b/python/mxnet/context.py index 3580edae3d11..9822a6d86708 100644 --- a/python/mxnet/context.py +++ b/python/mxnet/context.py @@ -73,16 +73,17 @@ def device_type(self): """ return Context.devtype2str[self.device_typeid] + def __hash__(self): + """Compute hash value of context for dictionary lookup""" + return hash((self.device_typeid, self.device_id)) + def __eq__(self, other): """Compares two contexts. Two contexts are equal if they have the same device type and device id. """ - if not isinstance(other, Context): - return False - if self.device_typeid == other.device_typeid and \ - self.device_id == other.device_id: - return True - return False + return isinstance(other, Context) and \ + self.device_typeid == other.device_typeid and \ + self.device_id == other.device_id def __str__(self): return '%s(%d)' % (self.device_type, self.device_id) diff --git a/python/mxnet/contrib/autograd.py b/python/mxnet/contrib/autograd.py index a6d7f32e2fb1..40ab289c8f4c 100644 --- a/python/mxnet/contrib/autograd.py +++ b/python/mxnet/contrib/autograd.py @@ -51,25 +51,29 @@ def __exit__(self, ptype, value, trace): set_is_training(self._prev) -def train(): - """Returns a training TrainingStateScope +def train_section(): + """Returns a training scope context to be used in 'with' statement + and captures training code. Example:: - with autograd.train(): + with autograd.train_section(): y = model(x) compute_gradient([y]) + metric.update(...) + optim.step(...) """ return TrainingStateScope(True) -def test(): - """Returns a testing TrainingStateScope. +def test_section(): + """Returns a testing scope context to be used in 'with' statement + and captures testing code. Example:: - with autograd.train(): + with autograd.train_section(): y = model(x) compute_gradient([y]) - with autograd.test(): + with autograd.test_section(): # testing, IO, gradient updates... """ return TrainingStateScope(False) @@ -146,7 +150,7 @@ def wrapped(*args): assert isinstance(x, NDArray), "type of autograd input should NDArray." grads = [zeros_like(x) for x in variables] mark_variables(variables, grads) - with train(): + with train_section(): outputs = func(*args) compute_gradient([outputs] if isinstance(outputs, NDArray) else outputs) return grads, outputs diff --git a/python/mxnet/initializer.py b/python/mxnet/initializer.py index 13076ff44fb7..5cc2ede3f3ed 100755 --- a/python/mxnet/initializer.py +++ b/python/mxnet/initializer.py @@ -9,6 +9,7 @@ from .base import string_types from .ndarray import NDArray, load from . import random +from . import registry # inherit str for backward compatibility class InitDesc(str): @@ -29,54 +30,11 @@ def __new__(cls, name, attrs=None, global_init=None): ret.global_init = global_init return ret -_INITIALIZER_REGISTRY = {} - -def register(klass): - """Registers a custom initializer. - - Custom initializers can be created by extending `mx.init.Initializer` and implementing the - required functions like `_init_weight` and `_init_bias`. The created initializer must be - registered using `mx.init.register` before it can be used. - - Parameters - ---------- - klass : class - A subclass of `mx.init.Initializer` that needs to be registered as a custom initializer. - - Example - ------- - >>> # Create and register a custom initializer that - ... # initializes weights to 0.1 and biases to 1. - ... - >>> @mx.init.register - ... class CustomInit(mx.init.Initializer): - ... def __init__(self): - ... super(CustomInit, self).__init__() - ... def _init_weight(self, _, arr): - ... arr[:] = 0.1 - ... def _init_bias(self, _, arr): - ... arr[:] = 1 - ... - >>> # Module is an instance of 'mxnet.module.Module' - ... - >>> module.init_params(CustomInit()) - """ - assert issubclass(klass, Initializer), "Can only register subclass of Initializer" - name = klass.__name__.lower() - if name in _INITIALIZER_REGISTRY: - warnings.warn( - "\033[91mNew initializer %s.%s is overriding existing initializer %s.%s\033[0m"%( - klass.__module__, klass.__name__, - _INITIALIZER_REGISTRY[name].__module__, - _INITIALIZER_REGISTRY[name].__name__), - UserWarning, stacklevel=2) - _INITIALIZER_REGISTRY[name] = klass - return klass class Initializer(object): """The base class of an initializer.""" def __init__(self, **kwargs): - self.kwargs = kwargs + self._kwargs = kwargs def dumps(self): """Saves the initializer to string @@ -97,7 +55,7 @@ def dumps(self): >>> init.dumps() '["xavier", {"rnd_type": "uniform", "magnitude": 2.34, "factor_type": "in"}]' """ - return json.dumps([self.__class__.__name__.lower(), self.kwargs]) + return json.dumps([self.__class__.__name__.lower(), self._kwargs]) def __call__(self, desc, arr): """Initialize an array @@ -120,8 +78,7 @@ def __call__(self, desc, arr): if init: # when calling Variable initializer - klass, kwargs = json.loads(init) - _INITIALIZER_REGISTRY[klass.lower()](**kwargs)._init_weight(desc, arr) + create(init)._init_weight(desc, arr) else: # register nnvm::FSetInputVariableAttrs in the backend for new patterns # don't add new cases here. @@ -223,6 +180,48 @@ def _init_default(self, name, _): 'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name) +# pylint: disable=invalid-name +_register = registry.get_register_func(Initializer, 'initializer') +alias = registry.get_alias_func(Initializer, 'initializer') +create = registry.get_create_func(Initializer, 'initializer') +# pylint: enable=invalid-name + +def register(klass): + """Registers a custom initializer. + + Custom initializers can be created by extending `mx.init.Initializer` and implementing the + required functions like `_init_weight` and `_init_bias`. The created initializer must be + registered using `mx.init.register` before it can be called by name. + + Parameters + ---------- + klass : class + A subclass of `mx.init.Initializer` that needs to be registered as a custom initializer. + + Example + ------- + >>> # Create and register a custom initializer that + ... # initializes weights to 0.1 and biases to 1. + ... + >>> @mx.init.register + ... @alias('myinit') + ... class CustomInit(mx.init.Initializer): + ... def __init__(self): + ... super(CustomInit, self).__init__() + ... def _init_weight(self, _, arr): + ... arr[:] = 0.1 + ... def _init_bias(self, _, arr): + ... arr[:] = 1 + ... + >>> # Module is an instance of 'mxnet.module.Module' + ... + >>> module.init_params("custominit") + >>> # module.init_params("myinit") + >>> # module.init_params(CustomInit()) + """ + return _register(klass) + + class Load(object): """Initializes variables by loading data from file or dict. @@ -312,6 +311,7 @@ def __call__(self, name, arr): 'add a ".*" pattern at the and with default Initializer.') @register +@alias("zeros") class Zero(Initializer): """Initializes weights to zero. @@ -336,6 +336,7 @@ def _init_weight(self, _, arr): arr[:] = 0 @register +@alias("ones") class One(Initializer): """Initializes weights to one. @@ -561,9 +562,9 @@ class MSRAPrelu(Xavier): initial slope of any PReLU (or similar) nonlinearities. """ def __init__(self, factor_type="avg", slope=0.25): - self.kwargs = {'factor_type': factor_type, 'slope': slope} magnitude = 2. / (1 + slope ** 2) super(MSRAPrelu, self).__init__("gaussian", factor_type, magnitude) + self._kwargs = {'factor_type': factor_type, 'slope': slope} @register class Bilinear(Initializer): diff --git a/python/mxnet/metric.py b/python/mxnet/metric.py index 1bc7d9ae423b..17a0b20d106a 100644 --- a/python/mxnet/metric.py +++ b/python/mxnet/metric.py @@ -1,11 +1,17 @@ # coding: utf-8 -# pylint: disable=no-member +# pylint: disable=no-member, too-many-lines """Online evaluation metric module.""" from __future__ import absolute_import import math +from collections import OrderedDict + import numpy + +from .base import numeric_types, string_types from . import ndarray +from . import registry + def check_label_shapes(labels, preds, shape=0): if shape == 0: @@ -17,6 +23,7 @@ def check_label_shapes(labels, preds, shape=0): raise ValueError("Shape of labels {} does not match shape of " "predictions {}".format(label_shape, pred_shape)) + class EvalMetric(object): """Base class for all evaluation metrics. @@ -25,13 +32,64 @@ class EvalMetric(object): This is a base class that provides common metric interfaces. One should not use this class directly, but instead create new metric classes that extend it. - """ - def __init__(self, name, num=None): - self.name = name - self.num = num + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + """ + def __init__(self, name, output_names=None, + label_names=None, **kwargs): + self.name = str(name) + self.output_names = output_names + self.label_names = label_names + self._kwargs = kwargs self.reset() + def __str__(self): + return "EvalMetric: {}".format(dict(self.get_name_value())) + + def get_config(self): + """Save configurations of metric. Can be recreated + from configs with metric.create(**config) + """ + config = self._kwargs.copy() + config.update({ + 'metric': self.__class__.__name__, + 'name': self.name, + 'output_names': self.output_names, + 'label_names': self.label_names}) + return config + + def update_dict(self, label, pred): + """Update the internal evaluation with named label and pred + + Parameters + ---------- + labels : OrderedDict of str -> NDArray + name to array mapping for labels. + + preds : list of NDArray + name to array mapping of predicted outputs. + """ + if self.output_names is not None: + pred = [pred[name] for name in self.output_names] + else: + pred = pred.values() + + if self.label_names is not None: + label = [label[name] for name in self.label_names] + else: + label = label.values() + + self.update(label, pred) + def update(self, labels, preds): """Updates the internal evaluation result. @@ -47,12 +105,8 @@ def update(self, labels, preds): def reset(self): """Resets the internal evaluation result to initial state.""" - if self.num is None: - self.num_inst = 0 - self.sum_metric = 0.0 - else: - self.num_inst = [0] * self.num - self.sum_metric = [0.0] * self.num + self.num_inst = 0 + self.sum_metric = 0.0 def get(self): """Gets the current evaluation result. @@ -64,16 +118,10 @@ def get(self): values : list of float Value of the evaluations. """ - if self.num is None: - if self.num_inst == 0: - return (self.name, float('nan')) - else: - return (self.name, self.sum_metric / self.num_inst) + if self.num_inst == 0: + return (self.name, float('nan')) else: - names = ['%s_%d'%(self.name, i) for i in range(self.num)] - values = [x / y if y != 0 else float('nan') \ - for x, y in zip(self.sum_metric, self.num_inst)] - return (names, values) + return (self.name, self.sum_metric / self.num_inst) def get_name_value(self): """Returns zipped name and value pairs. @@ -88,15 +136,75 @@ def get_name_value(self): name = [name] if not isinstance(value, list): value = [value] - return zip(name, value) + return list(zip(name, value)) - def __str__(self): - return "EvalMetric: {}".format(dict(self.get_name_value())) +# pylint: disable=invalid-name +register = registry.get_register_func(EvalMetric, 'metric') +alias = registry.get_alias_func(EvalMetric, 'metric') +_create = registry.get_create_func(EvalMetric, 'metric') +# pylint: enable=invalid-name + + +def create(metric, *args, **kwargs): + """Creates evaluation metric from metric names or instances of EvalMetric + or a custom metric function. + + Parameters + ---------- + metric : str or callable + Specifies the metric to create. + This argument must be one of the below: + - Name of a metric. + - An instance of `EvalMetric`. + - A list, each element of which is a metric or a metric name. + - An evaluation function that computes custom metric for a given batch of + labels and predictions. + *args : list + Additional arguments to metric constructor. + Only used when metric is str. + **kwargs : dict + Additional arguments to metric constructor. + Only used when metric is str + Examples + -------- + >>> def custom_metric(label, pred): + ... return np.mean(np.abs(label - pred)) + ... + >>> metric1 = mx.metric.create('acc') + >>> metric2 = mx.metric.create(custom_metric) + >>> metric3 = mx.metric.create([metric1, metric2, 'rmse']) + """ + if callable(metric): + return CustomMetric(metric, *args, **kwargs) + elif isinstance(metric, list): + composite_metric = CompositeEvalMetric() + for child_metric in metric: + composite_metric.add(create(child_metric, *args, **kwargs)) + return composite_metric + + return _create(metric, *args, **kwargs) + + +@register +@alias('composite') class CompositeEvalMetric(EvalMetric): """Manages multiple evaluation metrics. + Parameters + ---------- + metrics : list of EvalMetric + List of child metrics. + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + Examples -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] @@ -111,12 +219,13 @@ class CompositeEvalMetric(EvalMetric): (['accuracy', 'f1'], [0.6666666666666666, 0.8]) """ - def __init__(self, **kwargs): - super(CompositeEvalMetric, self).__init__('composite') - try: - self.metrics = kwargs['metrics'] - except KeyError: - self.metrics = [] + def __init__(self, metrics=None, name='composite', + output_names=None, label_names=None): + super(CompositeEvalMetric, self).__init__( + 'composite', output_names=output_names, label_names=label_names) + if metrics is None: + metrics = [] + self.metrics = [create(i) for i in metrics] def add(self, metric): """Adds a child metric. @@ -126,7 +235,7 @@ def add(self, metric): metric A metric instance. """ - self.metrics.append(metric) + self.metrics.append(create(metric)) def get_metric(self, index): """Returns a child metric. @@ -142,6 +251,17 @@ def get_metric(self, index): return ValueError("Metric index {} is out of range 0 and {}".format( index, len(self.metrics))) + def update_dict(self, labels, preds): + if self.label_names is not None: + labels = OrderedDict([i for i in labels.items() + if i[0] in self.label_names]) + if self.output_names is not None: + preds = OrderedDict([i for i in preds.items() + if i[0] in self.output_names]) + + for metric in self.metrics: + metric.update_dict(labels, preds) + def update(self, labels, preds): """Updates the internal evaluation result. @@ -175,20 +295,46 @@ def get(self): Value of the evaluations. """ names = [] - results = [] + values = [] for metric in self.metrics: - result = metric.get() - names.append(result[0]) - results.append(result[1]) - return (names, results) + name, value = metric.get() + if isinstance(name, string_types): + name = [name] + if isinstance(value, numeric_types): + value = [value] + names.extend(name) + values.extend(value) + return (names, values) + + def get_config(self): + config = super(CompositeEvalMetric, self).get_config() + config.update({'metrics': [i.get_config() for i in self.metrics]}) + return config + ######################## # CLASSIFICATION METRICS ######################## + +@register +@alias('acc') class Accuracy(EvalMetric): """Computes accuracy classification score. + Parameters + ---------- + axis : int, default=1 + The axis that represents classes + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + Examples -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] @@ -198,9 +344,12 @@ class Accuracy(EvalMetric): >>> print acc.get() ('accuracy', 0.6666666666666666) """ - - def __init__(self): - super(Accuracy, self).__init__('accuracy') + def __init__(self, axis=1, name='accuracy', + output_names=None, label_names=None): + super(Accuracy, self).__init__( + name, axis=axis, + output_names=output_names, label_names=label_names) + self.axis = axis def update(self, labels, preds): """Updates the internal evaluation result. @@ -217,7 +366,7 @@ def update(self, labels, preds): for label, pred_label in zip(labels, preds): if pred_label.shape != label.shape: - pred_label = ndarray.argmax_channel(pred_label) + pred_label = ndarray.argmax(pred_label, axis=self.axis) pred_label = pred_label.asnumpy().astype('int32') label = label.asnumpy().astype('int32') @@ -226,6 +375,9 @@ def update(self, labels, preds): self.sum_metric += (pred_label.flat == label.flat).sum() self.num_inst += len(pred_label.flat) + +@register +@alias('top_k_accuracy', 'top_k_acc') class TopKAccuracy(EvalMetric): """Computes top k predictions accuracy. @@ -239,6 +391,14 @@ class TopKAccuracy(EvalMetric): ---------- top_k : int Whether targets are in top k predictions. + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. Examples -------- @@ -252,12 +412,12 @@ class TopKAccuracy(EvalMetric): ('top_k_accuracy', 0.3) """ - def __init__(self, **kwargs): - super(TopKAccuracy, self).__init__('top_k_accuracy') - try: - self.top_k = kwargs['top_k'] - except KeyError: - self.top_k = 1 + def __init__(self, top_k=1, name='top_k_accuracy', + output_names=None, label_names=None): + super(TopKAccuracy, self).__init__( + name, top_k=top_k, + output_names=output_names, label_names=label_names) + self.top_k = top_k assert(self.top_k > 1), 'Please use Accuracy if top_k is no more than 1' self.name += '_%d' % self.top_k @@ -290,6 +450,8 @@ def update(self, labels, preds): self.sum_metric += (pred_label[:, num_classes - 1 - j].flat == label.flat).sum() self.num_inst += num_samples + +@register class F1(EvalMetric): """Computes the F1 score of a binary classification problem. @@ -307,6 +469,17 @@ class F1(EvalMetric): This F1 score only supports binary classification. + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + Examples -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0., 1.], [0.4, 0.6]])] @@ -317,8 +490,10 @@ class F1(EvalMetric): ('f1', 0.8) """ - def __init__(self): - super(F1, self).__init__('f1') + def __init__(self, name='f1', + output_names=None, label_names=None): + super(F1, self).__init__( + name, output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -371,6 +546,7 @@ def update(self, labels, preds): self.num_inst += 1 +@register class Perplexity(EvalMetric): """Computes perplexity. @@ -406,6 +582,14 @@ class Perplexity(EvalMetric): The axis from prediction that was used to compute softmax. By default use the last axis. + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. Examples -------- @@ -416,8 +600,11 @@ class Perplexity(EvalMetric): >>> print perp.get() ('Perplexity', 1.7710976285155853) """ - def __init__(self, ignore_label, axis=-1): - super(Perplexity, self).__init__('Perplexity') + def __init__(self, ignore_label, axis=-1, name='perplexity', + output_names=None, label_names=None): + super(Perplexity, self).__init__( + name, ignore_label=ignore_label, + output_names=output_names, label_names=label_names) self.ignore_label = ignore_label self.axis = axis @@ -463,6 +650,8 @@ def get(self): # REGRESSION METRICS #################### + +@register class MAE(EvalMetric): """Computes Mean Absolute Error (MAE) loss. @@ -471,6 +660,17 @@ class MAE(EvalMetric): .. math:: \\frac{\\sum_i^n |y_i - \\hat{y}_i|}{n} + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + Examples -------- >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] @@ -481,8 +681,10 @@ class MAE(EvalMetric): ('mae', 0.5) """ - def __init__(self): - super(MAE, self).__init__('mae') + def __init__(self, name='mae', + output_names=None, label_names=None): + super(MAE, self).__init__( + name, output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -508,6 +710,7 @@ def update(self, labels, preds): self.num_inst += 1 # numpy.prod(label.shape) +@register class MSE(EvalMetric): """Computes Mean Squared Error (MSE) loss. @@ -516,6 +719,17 @@ class MSE(EvalMetric): .. math:: \\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n} + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + Examples -------- >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] @@ -525,8 +739,10 @@ class MSE(EvalMetric): >>> print mean_squared_error.get() ('mse', 0.375) """ - def __init__(self): - super(MSE, self).__init__('mse') + def __init__(self, name='mse', + output_names=None, label_names=None): + super(MSE, self).__init__( + name, output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -551,6 +767,8 @@ def update(self, labels, preds): self.sum_metric += ((label - pred)**2.0).mean() self.num_inst += 1 # numpy.prod(label.shape) + +@register class RMSE(EvalMetric): """Computes Root Mean Squred Error (RMSE) loss. @@ -559,6 +777,17 @@ class RMSE(EvalMetric): .. math:: \\sqrt{\\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n}} + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + Examples -------- >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] @@ -568,8 +797,10 @@ class RMSE(EvalMetric): >>> print root_mean_squared_error.get() ('rmse', 0.612372457981) """ - def __init__(self): - super(RMSE, self).__init__('rmse') + def __init__(self, name='rmse', + output_names=None, label_names=None): + super(RMSE, self).__init__( + name, output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -594,6 +825,9 @@ def update(self, labels, preds): self.sum_metric += numpy.sqrt(((label - pred)**2.0).mean()) self.num_inst += 1 + +@register +@alias('ce') class CrossEntropy(EvalMetric): """Computes Cross Entropy loss. @@ -607,6 +841,14 @@ class CrossEntropy(EvalMetric): eps : float Cross Entropy loss is undefined for predicted value is 0 or 1, so predicted values are added with the small constant. + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. Examples -------- @@ -617,8 +859,11 @@ class CrossEntropy(EvalMetric): >>> print ce.get() ('cross-entropy', 0.57159948348999023) """ - def __init__(self, eps=1e-8): - super(CrossEntropy, self).__init__('cross-entropy') + def __init__(self, eps=1e-8, name='cross-entropy', + output_names=None, label_names=None): + super(CrossEntropy, self).__init__( + name, eps=eps, + output_names=output_names, label_names=label_names) self.eps = eps def update(self, labels, preds): @@ -645,21 +890,52 @@ def update(self, labels, preds): self.sum_metric += (-numpy.log(prob + self.eps)).sum() self.num_inst += label.shape[0] -class Torch(EvalMetric): - """Dummy metric for torch criterions.""" - def __init__(self, name='torch'): - super(Torch, self).__init__(name) + +@register +class Loss(EvalMetric): + """Dummy metric for directly printing loss. + + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + """ + def __init__(self, name='loss', + output_names=None, label_names=None): + super(Loss, self).__init__( + name, output_names=output_names, label_names=label_names) def update(self, _, preds): for pred in preds: - self.sum_metric += pred.asnumpy().mean() - self.num_inst += 1 + self.sum_metric += ndarray.sum(pred).asscalar() + self.num_inst += pred.size + + +@register +class Torch(Loss): + """Dummy metric for torch criterions.""" + def __init__(self, name='torch', + output_names=None, label_names=None): + super(Torch, self).__init__( + name, output_names=output_names, label_names=label_names) + + +@register +class Caffe(Loss): + """Dummy metric for caffe criterions.""" + def __init__(self, name='caffe', + output_names=None, label_names=None): + super(Caffe, self).__init__( + name, output_names=output_names, label_names=label_names) -class Caffe(Torch): - """Dummy metric for caffe criterions""" - def __init__(self): - super(Caffe, self).__init__('caffe') +@register class CustomMetric(EvalMetric): """Computes a customized evaluation metric. @@ -676,6 +952,14 @@ class CustomMetric(EvalMetric): If true, the prediction outputs can have extra outputs. This is useful in RNN, where the states are also produced in outputs for forwarding. (the default is False). + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. Examples -------- @@ -687,12 +971,16 @@ class CustomMetric(EvalMetric): >>> print eval_metrics.get() ('custom()', 6.0) """ - def __init__(self, feval, name=None, allow_extra_outputs=False): + def __init__(self, feval, name=None, allow_extra_outputs=False, + output_names=None, label_names=None): if name is None: name = feval.__name__ if name.find('<') != -1: name = 'custom(%s)' % name - super(CustomMetric, self).__init__(name) + super(CustomMetric, self).__init__( + name, feval=feval, + allow_extra_outputs=allow_extra_outputs, + output_names=output_names, label_names=label_names) self._feval = feval self._allow_extra_outputs = allow_extra_outputs @@ -723,6 +1011,10 @@ def update(self, labels, preds): self.sum_metric += reval self.num_inst += 1 + def get_config(self): + raise NotImplementedError("CustomMetric cannot be serialized") + + # pylint: disable=invalid-name def np(numpy_feval, name=None, allow_extra_outputs=False): """Creates a custom evaluation metric that receives its inputs as numpy arrays. @@ -757,56 +1049,3 @@ def feval(label, pred): feval.__name__ = numpy_feval.__name__ return CustomMetric(feval, name, allow_extra_outputs) # pylint: enable=invalid-name - -def create(metric, **kwargs): - """Creates evaluation metric from metric names or instances of EvalMetric - or a custom metric function. - - Parameters - ---------- - metric : str or callable - Specifies the metric to create. - This argument must be one of the below: - - - Name of a metric. - - An instance of `EvalMetric`. - - A list, each element of which is a metric or a metric name. - - An evaluation function that computes custom metric for a given batch of - labels and predictions. - - Examples - -------- - >>> def custom_metric(label, pred): - ... return np.mean(np.abs(label - pred)) - ... - >>> metric1 = mx.metric.create('acc') - >>> metric2 = mx.metric.create(custom_metric) - >>> metric3 = mx.metric.create([metric1, metric2, 'rmse']) - """ - - if callable(metric): - return CustomMetric(metric) - elif isinstance(metric, EvalMetric): - return metric - elif isinstance(metric, list): - composite_metric = CompositeEvalMetric() - for child_metric in metric: - composite_metric.add(create(child_metric, **kwargs)) - return composite_metric - - metrics = { - 'acc': Accuracy, - 'accuracy': Accuracy, - 'ce': CrossEntropy, - 'f1': F1, - 'mae': MAE, - 'mse': MSE, - 'rmse': RMSE, - 'top_k_accuracy': TopKAccuracy - } - - try: - return metrics[metric.lower()](**kwargs) - except: - raise ValueError("Metric must be either callable or in {}".format( - metrics.keys())) diff --git a/python/mxnet/module/executor_group.py b/python/mxnet/module/executor_group.py index e8fe360e030d..2903399a4b96 100644 --- a/python/mxnet/module/executor_group.py +++ b/python/mxnet/module/executor_group.py @@ -3,6 +3,8 @@ """Executor group is a convenient tool for managing a group of executors.""" import logging +from collections import OrderedDict + import numpy as np from .. import context as ctx @@ -197,10 +199,14 @@ def __init__(self, symbol, contexts, workload, data_shapes, label_shapes, param_ self.data_shapes = None self.label_shapes = None + self.data_names = None + self.label_names = None self.data_layouts = None self.label_layouts = None + self.output_names = self.symbol.list_outputs() self.output_layouts = [DataDesc.get_batch_axis(self.symbol[name].attr('__layout__')) - for name in self.symbol.list_outputs()] + for name in self.output_names] + self.num_outputs = len(self.symbol.list_outputs()) self.bind_exec(data_shapes, label_shapes, shared_group) @@ -302,6 +308,9 @@ def bind_exec(self, data_shapes, label_shapes, shared_group=None, reshape=False) self.data_shapes = data_shapes self.label_shapes = label_shapes + self.data_names = [i.name for i in self.data_shapes] + if label_shapes is not None: + self.label_names = [i.name for i in self.label_shapes] self._collect_arrays() def reshape(self, data_shapes, label_shapes): @@ -370,10 +379,8 @@ def forward(self, data_batch, is_train=None): if is_train is None: is_train = self.for_training - if self.label_arrays is not None: - assert not is_train or data_batch.label - if data_batch.label: - _load_label(data_batch, self.label_arrays, self.label_layouts) + if self.label_arrays is not None and data_batch.label: + _load_label(data_batch, self.label_arrays, self.label_layouts) for exec_ in self.execs: exec_.forward(is_train=is_train) @@ -391,8 +398,10 @@ def get_output_shapes(self): concat_shapes.append((key, tuple(the_shape))) return concat_shapes - def get_outputs(self, merge_multi_context=True): + def get_outputs(self, merge_multi_context=True, begin=0, end=None): """Get outputs of the previous forward computation. + If begin or end is specified, return [begin, end)-th outputs, + otherwise return all outputs. Parameters ---------- @@ -401,6 +410,10 @@ def get_outputs(self, merge_multi_context=True): will be collected from multiple devices. A `True` value indicate that we should merge the collected results so that they look like from a single executor. + begin : int + starting index of returned outputs in all outputs + end : int or None + ending index (excluded) of returned outputs. Returns ------- @@ -408,8 +421,10 @@ def get_outputs(self, merge_multi_context=True): is like ``[[out1_dev1, out1_dev2], [out2_dev1, out2_dev2]]``. All the output elements are `NDArray`. """ + if end is None: + end = self.num_outputs outputs = [[exec_.outputs[i] for exec_ in self.execs] - for i in range(len(self.execs[0].outputs))] + for i in range(begin, end)] if merge_multi_context: outputs = _merge_multi_context(outputs, self.output_layouts) return outputs @@ -508,7 +523,9 @@ def backward(self, out_grads=None): exec_.backward(out_grads=out_grads_slice) def update_metric(self, eval_metric, labels): - """Accumulate the performance according to `eval_metric` on all devices. + """Accumulate the performance according to `eval_metric` on all devices + by comparing outputs from [begin, end) to labels. By default use all + outputs. Parameters ---------- @@ -516,6 +533,10 @@ def update_metric(self, eval_metric, labels): The metric used for evaluation. labels : list of NDArray Typically comes from `label` of a `DataBatch`. + begin : int + Starting index of used outputs. + end : int or None + Ending index of used outputs. """ for texec, islice in zip(self.execs, self.slices): labels_slice = [] @@ -532,7 +553,9 @@ def update_metric(self, eval_metric, labels): else: labels_slice.append(label) - eval_metric.update(labels_slice, texec.outputs) + labels = OrderedDict(zip(self.label_names, labels_slice)) + preds = OrderedDict(zip(self.output_names, texec.outputs)) + eval_metric.update_dict(labels, preds) def _bind_ith_exec(self, i, data_shapes, label_shapes, shared_group): """Internal utility function to bind the i-th executor. diff --git a/python/mxnet/ndarray.py b/python/mxnet/ndarray.py index 067126f8221f..53afb0639abf 100644 --- a/python/mxnet/ndarray.py +++ b/python/mxnet/ndarray.py @@ -506,9 +506,13 @@ def reshape(self, shape): shape : tuple of int The new shape should not change the array size, namely ``np.prod(new_shape)`` should be equal to ``np.prod(self.shape)``. - One shape dimension can be -1. In this case, the value is inferred + + One dimension can be -1. In this case, the value is inferred from the length of the array and remaining dimensions. + 0 Dimensions in shape will be copied from original shape, i.e. + if x.shape == (3, 4, 5), x.reshape((0, 20)).shape will be (3, 20). + Returns ------- @@ -538,22 +542,6 @@ def reshape(self, shape): """ handle = NDArrayHandle() - # Infer the correct size for dim == -1 - shape = list(shape) - for index, element in enumerate(shape): - if element == -1: - remainder = list(self.shape) - for i, e in enumerate(shape): # pylint: disable=invalid-name - if i != index and e == -1: - raise ValueError('Only one dimension can be inferred.') - try: - remainder.remove(e) - except ValueError: - pass - shape[index] = np.product(remainder) - # We have already gone through the whole shape, break - break - # Actual reshape check_call(_LIB.MXNDArrayReshape(self.handle, len(shape), @@ -959,7 +947,7 @@ def empty(shape, ctx=None, dtype=mx_real_t): ctx = Context.default_ctx return NDArray(handle=_new_alloc_handle(shape, ctx, False, dtype)) -def zeros(shape, ctx=None, dtype=mx_real_t): +def zeros(shape, ctx=None, dtype=mx_real_t, **kwargs): """Returns a new array filled with all zeros, with the given shape and type. Parameters @@ -985,13 +973,14 @@ def zeros(shape, ctx=None, dtype=mx_real_t): >>> mx.nd.zeros((1,2), mx.gpu(0), 'float16').asnumpy() array([[ 0., 0.]], dtype=float16) """ + # pylint: disable= unused-argument if ctx is None: ctx = Context.default_ctx # pylint: disable= no-member, protected-access return _internal._zeros(shape=shape, ctx=ctx, dtype=dtype) # pylint: enable= no-member, protected-access -def ones(shape, ctx=None, dtype=mx_real_t): +def ones(shape, ctx=None, dtype=mx_real_t, **kwargs): """Returns a new array filled with all ones, with the given shape and type. Parameters @@ -1018,6 +1007,7 @@ def ones(shape, ctx=None, dtype=mx_real_t): >>> mx.nd.ones((1,2), dtype='float16').asnumpy() array([[ 1., 1.]], dtype=float16) """ + # pylint: disable= unused-argument if ctx is None: ctx = Context.default_ctx # pylint: disable= no-member, protected-access diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py index df30fb071b5c..d2d394076e89 100644 --- a/python/mxnet/optimizer.py +++ b/python/mxnet/optimizer.py @@ -328,11 +328,6 @@ class SGD(Optimizer): def __init__(self, momentum=0.0, **kwargs): super(SGD, self).__init__(**kwargs) self.momentum = momentum - self.kwargs = {'rescale_grad': self.rescale_grad} - if self.momentum > 0: - self.kwargs['momentum'] = self.momentum - if self.clip_gradient: - self.kwargs['clip_gradient'] = self.clip_gradient def create_state(self, index, weight): if self.momentum == 0.0: @@ -347,12 +342,18 @@ def update(self, index, weight, grad, state): wd = self._get_wd(index) self._update_count(index) + kwargs = {'rescale_grad': self.rescale_grad} + if self.momentum > 0: + kwargs['momentum'] = self.momentum + if self.clip_gradient: + kwargs['clip_gradient'] = self.clip_gradient + if state is not None: sgd_mom_update(weight, grad, state, out=weight, - lr=lr, wd=wd, **self.kwargs) + lr=lr, wd=wd, **kwargs) else: sgd_update(weight, grad, out=weight, - lr=lr, wd=wd, **self.kwargs) + lr=lr, wd=wd, **kwargs) @register class DCASGD(Optimizer): @@ -506,10 +507,7 @@ def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, super(Adam, self).__init__(learning_rate=learning_rate, **kwargs) self.beta1 = beta1 self.beta2 = beta2 - self.kwargs = {'beta1': beta1, 'beta2': beta2, 'epsilon': epsilon, - 'rescale_grad': self.rescale_grad} - if self.clip_gradient: - self.kwargs['clip_gradient'] = self.clip_gradient + self.epsilon = epsilon def create_state(self, index, weight): return (zeros(weight.shape, weight.context, dtype=weight.dtype), # mean @@ -526,9 +524,15 @@ def update(self, index, weight, grad, state): coef1 = 1. - self.beta1**t coef2 = 1. - self.beta2**t lr *= math.sqrt(coef2)/coef1 + + kwargs = {'beta1': self.beta1, 'beta2': self.beta2, 'epsilon': self.epsilon, + 'rescale_grad': self.rescale_grad} + if self.clip_gradient: + kwargs['clip_gradient'] = self.clip_gradient + mean, var = state adam_update(weight, grad, mean, var, out=weight, - lr=lr, wd=wd, **self.kwargs) + lr=lr, wd=wd, **kwargs) @register class AdaGrad(Optimizer): @@ -606,15 +610,8 @@ def __init__(self, learning_rate=0.001, gamma1=0.9, gamma2=0.9, self.gamma1 = gamma1 self.gamma2 = gamma2 self.centered = centered + self.epsilon = epsilon self.clip_weights = clip_weights - self.kwargs = {'gamma1': gamma1, 'epsilon': epsilon, - 'rescale_grad': self.rescale_grad} - if self.centered: - self.kwargs['gamma2'] = gamma2 - if self.clip_gradient: - self.kwargs['clip_gradient'] = self.clip_gradient - if self.clip_weights: - self.kwargs['clip_weights'] = self.clip_weights def create_state(self, index, weight): if self.centered: @@ -631,14 +628,24 @@ def update(self, index, weight, grad, state): lr = self._get_lr(index) wd = self._get_wd(index) self._update_count(index) + + kwargs = {'gamma1': self.gamma1, 'epsilon': self.epsilon, + 'rescale_grad': self.rescale_grad} + if self.centered: + kwargs['gamma2'] = self.gamma2 + if self.clip_gradient: + kwargs['clip_gradient'] = self.clip_gradient + if self.clip_weights: + kwargs['clip_weights'] = self.clip_weights + if not self.centered: (n, ) = state rmsprop_update( - weight, grad, n, out=weight, lr=lr, wd=wd, **self.kwargs) + weight, grad, n, out=weight, lr=lr, wd=wd, **kwargs) else: n, g, delta = state rmspropalex_update(weight, grad, n, g, delta, out=weight, - lr=lr, wd=wd, **self.kwargs) + lr=lr, wd=wd, **kwargs) @register class AdaDelta(Optimizer): diff --git a/python/mxnet/registry.py b/python/mxnet/registry.py new file mode 100644 index 000000000000..fdd095e1ebb5 --- /dev/null +++ b/python/mxnet/registry.py @@ -0,0 +1,141 @@ +# coding: utf-8 +# pylint: disable=no-member + +"""Registry for serializable objects.""" +from __future__ import absolute_import + +import json +import warnings + +from .base import string_types + +_REGISTRY = {} + + +def get_register_func(base_class, nickname): + """Get registrator function. + + Parameters + ---------- + base_class : type + base class for classes that will be reigstered + nickname : str + nickname of base_class for logging + + Returns + ------- + a registrator function + """ + if base_class not in _REGISTRY: + _REGISTRY[base_class] = {} + registry = _REGISTRY[base_class] + + def register(klass, name=None): + """Register functions""" + assert issubclass(klass, base_class), \ + "Can only register subclass of %s"%base_class.__name__ + if name is None: + name = klass.__name__.lower() + if name in registry: + warnings.warn( + "\033[91mNew %s %s.%s registered with name %s is" + "overriding existing %s %s.%s\033[0m"%( + nickname, klass.__module__, klass.__name__, name, + nickname, registry[name].__module__, registry[name].__name__), + UserWarning, stacklevel=2) + registry[name] = klass + return klass + + register.__doc__ = "Register %s to the %s factory"%(nickname, nickname) + return register + + +def get_alias_func(base_class, nickname): + """Get registrator function that allow aliases. + + Parameters + ---------- + base_class : type + base class for classes that will be reigstered + nickname : str + nickname of base_class for logging + + Returns + ------- + a registrator function + """ + register = get_register_func(base_class, nickname) + + def alias(*aliases): + """alias registrator""" + def reg(klass): + """registrator function""" + for name in aliases: + register(klass, name) + return klass + return reg + return alias + + +def get_create_func(base_class, nickname): + """Get creator function + + Parameters + ---------- + base_class : type + base class for classes that will be reigstered + nickname : str + nickname of base_class for logging + + Returns + ------- + a creator function + """ + if base_class not in _REGISTRY: + _REGISTRY[base_class] = {} + registry = _REGISTRY[base_class] + + def create(*args, **kwargs): + """Create instance from config""" + if len(args): + name = args[0] + args = args[1:] + else: + name = kwargs.pop(nickname) + + if isinstance(name, base_class): + assert len(args) == 0 and len(kwargs) == 0, \ + "%s is already an instance. Additional arguments are invalid"%(nickname) + return name + + if isinstance(name, dict): + return create(**name) + + assert isinstance(name, string_types), "%s must be of string type"%nickname + + if name.startswith('['): + assert not args and not kwargs + name, kwargs = json.loads(name) + return create(name, **kwargs) + elif name.startswith('{'): + assert not args and not kwargs + kwargs = json.loads(name) + return create(**kwargs) + + name = name.lower() + assert name in registry, \ + "%s is not registered. Please register with %s.register first"%( + str(name), nickname) + return registry[name](*args, **kwargs) + + create.__doc__ = """Create a %s instance from config. + +Parameters +---------- +%s : str or %s instance + class name of desired instance. If is a instance, + it will be returned directly. +**kwargs : dict + arguments to be passed to constructor"""%(nickname, nickname, base_class.__name__) + + return create diff --git a/python/mxnet/symbol.py b/python/mxnet/symbol.py index b2fdf595fed9..13cfd9da183e 100644 --- a/python/mxnet/symbol.py +++ b/python/mxnet/symbol.py @@ -47,8 +47,11 @@ class Symbol(SymbolBase): def __repr__(self): """Get a string representation of the symbol.""" name = self.name - return '<%s %s>' % (self.__class__.__name__, - 'Grouped' if name is None else name) + if name is None: + name = ', '.join([i.name for i in self]) + return '<%s group [%s]>' % (self.__class__.__name__, name) + else: + return '<%s %s>' % (self.__class__.__name__, name) def __iter__(self): """Returns a generator object of symbol. @@ -1432,6 +1435,24 @@ def eval(self, ctx=cpu(), **kwargs): """ return self.bind(ctx, kwargs).forward() + def reshape(self, shape): + """Shorthand for mxnet.sym.reshape. + + Parameters + ---------- + shape : tuple of int + The new shape should not change the array size, namely + ``np.prod(new_shape)`` should be equal to ``np.prod(self.shape)``. + One shape dimension can be -1. In this case, the value is inferred + from the length of the array and remaining dimensions. + + + Returns + ------- + Symbol + A reshaped symbol. + """ + return reshape(self, shape=shape) def var(name, attr=None, shape=None, lr_mult=None, wd_mult=None, dtype=None, init=None, **kwargs): @@ -1485,7 +1506,9 @@ def var(name, attr=None, shape=None, lr_mult=None, wd_mult=None, dtype=None, ini if dtype is not None: attr['__dtype__'] = str(_DTYPE_NP_TO_MX[_numpy.dtype(dtype).type]) if init is not None: - attr['__init__'] = init.dumps() + if not isinstance(init, string_types): + init = init.dumps() + attr['__init__'] = init for k, v in kwargs.items(): if k.startswith('__') and k.endswith('__'): attr[k] = str(v) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 154eb1bc7969..7e5194620b39 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -296,8 +296,32 @@ MXNET_DLL int MXNDArrayReshape(NDArrayHandle handle, NDArrayHandle *out) { NDArray *ptr = new NDArray(); API_BEGIN(); + NDArray *arr = static_cast(handle); TShape new_shape(dims, dims+ndim); - *ptr = static_cast(handle)->Reshape(new_shape); + int size = 1; + int pos = -1; + for (int i = 0; i < ndim; ++i) { + int dim = dims[i]; + if (dim == -1) { + CHECK_EQ(pos, -1) + << "Invalid new shape " << new_shape + << ": more than one dimensions are -1"; + pos = i; + } else { + if (dim == 0) { + CHECK_LT(i, arr->shape().ndim()) + << "Invalid new shape " << new_shape + << ": 0 dimension exceeds original shape " << arr->shape(); + dim = arr->shape()[i]; + } + size *= dim; + new_shape[i] = dim; + } + } + if (pos >= 0) { + new_shape[pos] = arr->shape().Size() / size; + } + *ptr = arr->Reshape(new_shape); *out = ptr; API_END_HANDLE_ERROR(delete ptr); } diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc index a51c7a84805c..c633e8609cd4 100644 --- a/src/c_api/c_api_ndarray.cc +++ b/src/c_api/c_api_ndarray.cc @@ -86,6 +86,8 @@ void SetNDInputsOutputs(const nnvm::Op* op, *num_outputs = num_visible_outputs; ndoutputs.resize(infered_num_outputs); } else { + CHECK(!AutogradRuntime::Get()->IsTraining()) + << "Cannot assign to NDArray or specify 'out' when training with autograd"; CHECK(*num_outputs == infered_num_outputs || *num_outputs == num_visible_outputs) << "Expecting " << infered_num_outputs << " (all) or " << num_visible_outputs << " (visible only) outputs, got " @@ -372,7 +374,7 @@ int MXImperativeInvoke(AtomicSymbolCreator creator, if (fn) { if (AutogradRuntime::Get()->IsTraining()) { - AutogradRuntime::Get()->RecordImperativeFCompute(fn, op, + AutogradRuntime::Get()->RecordImperativeFCompute(op, attrs, &ndinputs, &ndoutputs); } PushFCompute(fn, op, attrs, ctx, read_vars, write_vars, diff --git a/src/ndarray/autograd.cc b/src/ndarray/autograd.cc index 69514297584d..ff7049a10c6e 100644 --- a/src/ndarray/autograd.cc +++ b/src/ndarray/autograd.cc @@ -67,8 +67,7 @@ void AutogradRuntime::MarkVariables( } } -void AutogradRuntime::RecordImperativeFCompute(FCompute fn, - const nnvm::Op* op, +void AutogradRuntime::RecordImperativeFCompute(const nnvm::Op* op, const nnvm::NodeAttrs& attrs, std::vector *p_inputs, std::vector *p_outputs) { @@ -109,9 +108,16 @@ AGNodePtr AutogradRuntime::RecordOp(const nnvm::Op* op, ag_node->opr = opr; for (uint32_t i = 0; i < outputs.size(); ++i) { - outputs[i].entry_.clear(); - ag_node->outputs.push_back(outputs[i]); - outputs[i].entry_ = AGNodeEntry{ag_node, i, 0}; + if (outputs[i].entry_.ag_node == nullptr || + !outputs[i].entry_.ag_node->out_grads.size()) { + outputs[i].entry_.clear(); + ag_node->outputs.push_back(outputs[i]); + outputs[i].entry_ = AGNodeEntry{ag_node, i, 0}; + } else { + NDArray copy = outputs[i]; + copy.entry_.clear(); + ag_node->outputs.push_back(copy); + } } for (size_t i = 0; i < inputs.size(); ++i) { @@ -130,6 +136,7 @@ AGNodePtr AutogradRuntime::RecordOp(const nnvm::Op* op, } void AutogradRuntime::ComputeGradient(const std::vector& outputs) { + static auto& fmutate_inputs = nnvm::Op::GetAttr("FMutateInputs"); std::vector heads; Symbol sym; NodeEntryMap feed_dict; @@ -139,29 +146,44 @@ void AutogradRuntime::ComputeGradient(const std::vector& outputs) { << "computation history. Did you forget to set is_training?"; heads.emplace_back(i.entry_); sym.outputs.emplace_back(i.entry_.nn_entry()); - feed_dict.insert({i.entry_.nn_entry(), i}); } + std::unordered_set mutable_set; + std::vector vlist; std::vector args, args_grad; + std::vector aux_states; std::vector grad_reqs; std::unordered_map> saved_opr; AGDFSVisit(heads, [&](const AGNodePtr& n) { - if (n->opr != nullptr) { - saved_opr.insert({n->nn_node.get(), n->opr}); - } else if (n->nn_node->is_variable()) { - args.push_back(n->outputs[0]); - args_grad.push_back(n->out_grads[0]); - grad_reqs.push_back(n->grad_req); + if (n->nn_node->is_variable()) { + vlist.push_back(n); + } else { + if (n->opr != nullptr) { + saved_opr.insert({n->nn_node.get(), n->opr}); + } + if (fmutate_inputs.count(n->nn_node->op())) { + for (uint32_t i : fmutate_inputs[n->nn_node->op()](n->nn_node->attrs)) { + mutable_set.insert(n->inputs[i].ag_node.get()); + } + } } - for (const auto& i : n->inputs) { - feed_dict.insert({i.nn_entry(), i.ag_node->outputs[i.index]}); + for (uint32_t i = 0; i < n->outputs.size(); ++i) { + feed_dict.insert({NodeEntry{n->nn_node, i, 0}, n->outputs[i]}); } }); + for (const auto& n : vlist) { + if (mutable_set.count(n.get())) { + aux_states.push_back(n->outputs[0]); + } else { + args.push_back(n->outputs[0]); + args_grad.push_back(n->out_grads[0]); + grad_reqs.push_back(n->grad_req); + } + } if (args.size()) { std::map ctx_map; - std::vector aux_states; auto exec = new exec::GraphExecutor(); // (TODO) too hack here exec->saved_opr_ = saved_opr; diff --git a/src/ndarray/autograd.h b/src/ndarray/autograd.h index c4ad0c99bc1c..6a18851de9e3 100644 --- a/src/ndarray/autograd.h +++ b/src/ndarray/autograd.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -65,8 +66,7 @@ class AutogradRuntime { const std::vector& grad_reqs, const std::vector& gradients); /*! \brief record imperative operator which is executed by fcompute. */ - void RecordImperativeFCompute(FCompute fn, - const nnvm::Op* op, + void RecordImperativeFCompute(const nnvm::Op* op, const nnvm::NodeAttrs& attrs, std::vector* p_inputs, std::vector* p_outputs); diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 3b1eed9940a4..c19a82b164c4 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -11,6 +11,7 @@ #include #include #include "./ndarray_function.h" +#include "./autograd.h" #if MXNET_USE_OPENCV #include @@ -22,6 +23,73 @@ DMLC_REGISTRY_ENABLE(::mxnet::NDArrayFunctionReg); namespace mxnet { +NDArray NDArray::Reshape(const TShape &shape) const { + using namespace autograd; + CHECK_GE(shape_.Size(), shape.Size()) + << "NDArray.Reshape: target shape size is different from current shape"; + NDArray ret = *this; + ret.shape_ = shape; + if (AutogradRuntime::Get()->IsTraining()) { + // fake a Reshape op + ret.entry_.clear(); + const nnvm::Op* op = nnvm::Op::Get("Reshape"); + nnvm::NodeAttrs attrs; + attrs.op = op; + std::ostringstream os; + os << shape; + attrs.dict.insert({"shape", os.str()}); + op->attr_parser(&attrs); + std::vector inputs, outputs; + inputs.emplace_back(*this); + outputs.emplace_back(std::move(ret)); + AutogradRuntime::Get()->RecordImperativeFCompute( + op, attrs, &inputs, &outputs); + return outputs[0]; + } else { + return ret; + } +} + + +NDArray NDArray::Slice(index_t begin, index_t end) const { + using namespace autograd; + NDArray ret = *this; + CHECK(!is_none()) << "NDArray is not initialized"; + CHECK_GE(shape_[0], end) << "Slice end index out of range"; + size_t length = shape_.ProdShape(1, shape_.ndim()); + ret.offset_ += begin * length; + ret.shape_[0] = end - begin; + if (AutogradRuntime::Get()->IsTraining()) { + // fake a slice_axis op + ret.entry_.clear(); + const nnvm::Op* op = nnvm::Op::Get("slice_axis"); + nnvm::NodeAttrs attrs; + attrs.op = op; + attrs.dict.insert({"axis", "0"}); + attrs.dict.insert({"begin", std::to_string(begin)}); + attrs.dict.insert({"end", std::to_string(end)}); + op->attr_parser(&attrs); + std::vector inputs, outputs; + inputs.emplace_back(*this); + outputs.emplace_back(std::move(ret)); + AutogradRuntime::Get()->RecordImperativeFCompute( + op, attrs, &inputs, &outputs); + return outputs[0]; + } else { + return ret; + } +} + + +NDArray NDArray::At(index_t idx) const { + NDArray ret = this->Slice(idx, idx+1); + if (shape_.ndim() > 1) { + return ret.Reshape(TShape(shape_.data()+1, shape_.data()+shape_.ndim())); + } else { + return ret; + } +} + /*! * \brief run a ternary operation * \param lhs left operand @@ -545,63 +613,6 @@ NDArray &NDArray::operator/=(const real_t &src) { return ScalarOpApply(this, src); } -/*! - * \brief Get a broadcasted NDArray - * \param src the source ndarray - * \param dim dimension to broadcast - * \param size size after broadcasting - */ -void Broadcast(const NDArray& src, int dim, int size, NDArray *out) { - CHECK(0 <= dim && dim < static_cast(src.shape().ndim())) - << "Broadcast dimension out of bound."; - CHECK(src.shape()[dim] == 1) << "Cannot broadcast a dimension that is not 1."; - TShape new_shape = src.shape(); - new_shape[dim] = size; - if (out->is_none()) { - *out = NDArray(new_shape, src.ctx(), true, src.dtype()); - } else { - CHECK(out->ctx() == src.ctx()) << "target context mismatch"; - CHECK(out->shape() == new_shape) - << "invalid target shape: " << out->shape() << " should be: " << new_shape; - } - std::vector const_vars; - const_vars.push_back(src.var()); - size_t before = src.shape().ProdShape(0, dim); - size_t after = src.shape().ProdShape(dim + 1, src.shape().ndim()); - - // important: callback must always capture by value - NDArray ret = *out; - switch (src.ctx().dev_mask()) { - case cpu::kDevMask: { - Engine::Get()->PushSync([src, ret, before, size, after](RunContext ctx) { - ret.CheckAndAlloc(); - NDArray inter_in = src.Reshape(mshadow::Shape2(before, after)); - NDArray inter_out = ret.Reshape(mshadow::Shape3(before, size, after)); - TBlob tmp = inter_out.data(); - ndarray::EvalBroadcast(inter_in.data(), &tmp, size, ctx); - }, src.ctx(), const_vars, {ret.var()}, - FnProperty::kNormal, 0, PROFILER_MESSAGE_FUNCNAME); - break; - } -#if MXNET_USE_CUDA - case gpu::kDevMask: { - Engine::Get()->PushSync([src, ret, before, size, after](RunContext ctx) { - ret.CheckAndAlloc(); - NDArray inter_in = src.Reshape(mshadow::Shape2(before, after)); - NDArray inter_out = ret.Reshape(mshadow::Shape3(before, size, after)); - TBlob tmp = inter_out.data(); - ndarray::EvalBroadcast(inter_in.data(), &tmp, size, ctx); - // Wait GPU kernel to complete - ctx.get_stream()->Wait(); - }, src.ctx(), const_vars, {ret.var()}, - FnProperty::kNormal, 0, PROFILER_MESSAGE_FUNCNAME); - break; - } -#endif - default: LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR; - } -} - void NDArray::Save(dmlc::Stream *strm) const { // save shape shape_.Save(strm); @@ -857,23 +868,6 @@ void Imdecode(NDArray *ret, NDArray mean, size_t index, #endif // MXNET_USE_OPENCV } -MXNET_REGISTER_NDARRAY_FUN(_broadcast) -.set_type_mask(kAcceptEmptyMutateTarget | kNDArrayArgBeforeScalar) -.set_body([](NDArray **u, real_t *s, NDArray **out, - int num_params, char **param_keys, char **param_vals) { - Broadcast(*u[0], - static_cast(s[0]), - static_cast(s[1]), - out[0]); - }) -.set_num_use_vars(1) -.set_num_scalars(2) -.set_num_mutate_vars(1) -.describe("Broadcast array in the given axis to the given size") -.add_argument("src", "NDArray-or-Symbol", "source ndarray") -.add_argument("axis", "int", "axis to broadcast") -.add_argument("size", "int", "size of broadcast"); - MXNET_REGISTER_NDARRAY_FUN(_imdecode) .set_type_mask(kAcceptEmptyMutateTarget | kNDArrayArgBeforeScalar) .set_body([](NDArray **u, real_t *s, NDArray **out, diff --git a/src/operator/batch_norm.cc b/src/operator/batch_norm.cc index 74f43b60b217..92457e41002e 100644 --- a/src/operator/batch_norm.cc +++ b/src/operator/batch_norm.cc @@ -80,6 +80,8 @@ then set ``gamma`` to 1 and its gradient to 0. .add_argument("data", "NDArray-or-Symbol", "Input data to batch normalization") .add_argument("gamma", "NDArray-or-Symbol", "gamma array") .add_argument("beta", "NDArray-or-Symbol", "beta array") +.add_argument("moving_mean", "NDArray-or-Symbol", "running mean of input") +.add_argument("moving_var", "NDArray-or-Symbol", "running variance of input") .add_arguments(BatchNormParam::__FIELDS__()); NNVM_REGISTER_OP(BatchNorm) diff --git a/src/operator/convolution-inl.h b/src/operator/convolution-inl.h index 8bd2ff5c9d6e..0036befcdb6a 100644 --- a/src/operator/convolution-inl.h +++ b/src/operator/convolution-inl.h @@ -368,13 +368,26 @@ class ConvolutionProp : public OperatorProperty { << "incorrect stride size: " << param_.stride; CHECK_GT(param_.dilate.Size(), 0U) \ << "incorrect dilate size: " << param_.dilate; - CHECK(dilated_ksize_x <= AddPad(dshape[2], param_.pad[0])) - << "kernel size exceed input"; Shape<3> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_x) / param_.stride[0] + 1; + oshape[2] = dshape[2] ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_x) / param_.stride[0] + 1 : 0; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCW, param_.layout.value())); + // Perform incomplete shape inference. Fill in the missing values in data shape. + // 1) We can always fill in the batch_size. + // 2) We can back-calculate the input height/width if the corresponding stride is 1. + oshape = ConvertLayout((*out_shape)[0].get<3>(), param_.layout.value(), kNCW); + dshape[0] = oshape[0]; + if (oshape[2] && param_.stride[0] == 1) { + dshape[2] = oshape[2] + dilated_ksize_x - 1 - 2 * param_.pad[0]; + } + SHAPE_ASSIGN_CHECK(*in_shape, conv::kData, + ConvertLayout(dshape, kNCW, param_.layout.value())); + // Check whether the kernel sizes are valid + if (dshape[2] != 0) { + CHECK_LE(dilated_ksize_x, AddPad(dshape[2], param_.pad[0])) << "kernel size exceed input"; + } return true; } else if (param_.kernel.ndim() == 2) { // 2d conv @@ -406,18 +419,20 @@ class ConvolutionProp : public OperatorProperty { Shape<4> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_y) / param_.stride[0] + 1; - oshape[3] = (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_x) / param_.stride[1] + 1; + oshape[2] = dshape[2] ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_y) / param_.stride[0] + 1 : 0; + oshape[3] = dshape[3] ? + (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_x) / param_.stride[1] + 1 : 0; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCHW, param_.layout.value())); // Perform incomplete shape inference. Fill in the missing values in data shape. // 1) We can always fill in the batch_size. // 2) We can back-calculate the input height/width if the corresponding stride is 1. oshape = ConvertLayout((*out_shape)[0].get<4>(), param_.layout.value(), kNCHW); dshape[0] = oshape[0]; - if (param_.stride[0] == 1) { + if (oshape[2] && param_.stride[0] == 1) { dshape[2] = oshape[2] + dilated_ksize_y - 1 - 2 * param_.pad[0]; } - if (param_.stride[1] == 1) { + if (oshape[3] && param_.stride[1] == 1) { dshape[3] = oshape[3] + dilated_ksize_x - 1 - 2 * param_.pad[1]; } SHAPE_ASSIGN_CHECK(*in_shape, conv::kData, @@ -464,22 +479,25 @@ class ConvolutionProp : public OperatorProperty { Shape<5> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_d) / param_.stride[0] + 1; - oshape[3] = (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_y) / param_.stride[1] + 1; - oshape[4] = (AddPad(dshape[4], param_.pad[2]) - dilated_ksize_x) / param_.stride[2] + 1; + oshape[2] = dshape[2] ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_d) / param_.stride[0] + 1 : 0; + oshape[3] = dshape[3] ? + (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_y) / param_.stride[1] + 1 : 0; + oshape[4] = dshape[4] ? + (AddPad(dshape[4], param_.pad[2]) - dilated_ksize_x) / param_.stride[2] + 1 : 0; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCDHW, param_.layout.value())); // Perform incomplete shape inference. Fill in the missing values in data shape. // 1) We can always fill in the batch_size. // 2) We can back-calculate the input depth/height/width if the corresponding stride is 1. oshape = ConvertLayout((*out_shape)[0].get<5>(), param_.layout.value(), kNCDHW); dshape[0] = oshape[0]; - if (param_.stride[0] == 1) { + if (oshape[2] && param_.stride[0] == 1) { dshape[2] = oshape[2] + dilated_ksize_d - 1 - 2 * param_.pad[0]; } - if (param_.stride[1] == 1) { + if (oshape[3] && param_.stride[1] == 1) { dshape[3] = oshape[3] + dilated_ksize_y - 1 - 2 * param_.pad[1]; } - if (param_.stride[2] == 1) { + if (oshape[4] && param_.stride[2] == 1) { dshape[4] = oshape[4] + dilated_ksize_x - 1 - 2 * param_.pad[2]; } SHAPE_ASSIGN_CHECK(*in_shape, conv::kData, diff --git a/src/operator/pooling.cu b/src/operator/pooling.cu index c420852b1c8d..bc7716b946af 100644 --- a/src/operator/pooling.cu +++ b/src/operator/pooling.cu @@ -15,25 +15,25 @@ namespace op { template<> Operator *CreateOp(PoolingParam param, int dtype) { + Operator *op = NULL; #if MXNET_USE_CUDNN == 1 - MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { - if (!param.cudnn_off) { + if (!param.cudnn_off && param.kernel.ndim() > 1) { + MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { switch (param.pool_type) { case pool_enum::kMaxPooling: - return new CuDNNPoolingOp(param); + op = new CuDNNPoolingOp(param); + break; case pool_enum::kAvgPooling: - return new CuDNNPoolingOp(param); + op = new CuDNNPoolingOp(param); + break; case pool_enum::kSumPooling: LOG(WARNING) << "Sum pooling is not supported by cudnn, MXNet sum pooling is applied."; - return new PoolingOp(param); - default: - LOG(FATAL) << "unknown pooling type"; - return NULL; + break; } - } - }); + }); + } + if (op) return op; #endif // MXNET_USE_CUDNN - Operator *op = NULL; MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { if (pool_enum::kMaxPooling == param.pool_type || pool_enum::kAvgPooling == param.pool_type @@ -48,4 +48,3 @@ Operator *CreateOp(PoolingParam param, int dtype) { } // namespace op } // namespace mxnet - diff --git a/src/operator/tensor/elemwise_unary_op.cc b/src/operator/tensor/elemwise_unary_op.cc index e5a8ed019768..ce29a2fdb308 100644 --- a/src/operator/tensor/elemwise_unary_op.cc +++ b/src/operator/tensor/elemwise_unary_op.cc @@ -96,6 +96,10 @@ MXNET_OPERATOR_REGISTER_UNARY(make_loss) .describe(R"code(Stops gradient computation. .. note:: ``make_loss`` is deprecated, use ``MakeLoss``. )code" ADD_FILELINE) +.set_attr("FListOutputNames", + [](const NodeAttrs& attrs) { + return std::vector{"loss"}; + }) .set_attr("FCompute", IdentityCompute) .set_attr("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { diff --git a/tests/python/unittest/test_autograd.py b/tests/python/unittest/test_autograd.py index 1abff65c5064..c84438d72363 100644 --- a/tests/python/unittest/test_autograd.py +++ b/tests/python/unittest/test_autograd.py @@ -1,5 +1,5 @@ import mxnet.ndarray as nd -from mxnet.contrib.autograd import grad, grad_and_loss, train, test +from mxnet.contrib.autograd import * from mxnet.test_utils import * def autograd_assert(*args, **kwargs): @@ -73,16 +73,18 @@ def f_with_mode(a, b, mode): autograd_assert(a, b, False, argnum=[0, 1], func=f_with_mode, grad_func=f_mul_grad) + def test_training(): x = nd.ones((10, 10)) - with train(): + with train_section(): y = nd.Dropout(x, p=0.5) assert not (y.asnumpy() == x.asnumpy()).all() - with test(): + with test_section(): y = nd.Dropout(x, p=0.5) assert (y.asnumpy() == x.asnumpy()).all() + if __name__ == "__main__": test_training() test_unary_func() diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py new file mode 100644 index 000000000000..98740b05ee32 --- /dev/null +++ b/tests/python/unittest/test_metric.py @@ -0,0 +1,22 @@ +import mxnet as mx +import json + +def check_metric(metric, *args, **kwargs): + metric = mx.metric.create(metric, *args, **kwargs) + str_metric = json.dumps(metric.get_config()) + metric2 = mx.metric.create(str_metric) + + assert metric.get_config() == metric2.get_config() + + +def test_metrics(): + check_metric('acc', axis=0) + check_metric('f1') + check_metric('perplexity', -1) + composite = mx.metric.create(['acc', 'f1']) + check_metric(composite) + + +if __name__ == '__main__': + import nose + nose.runmodule() diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py index 8673673cfdf7..7f0a1d2b6301 100644 --- a/tests/python/unittest/test_ndarray.py +++ b/tests/python/unittest/test_ndarray.py @@ -124,6 +124,7 @@ def test_ndarray_reshape(): true_res = mx.nd.array([[1, 2, 3, 4], [5, 6, 7, 8]]) assert same(tensor.reshape((2, -1)).asnumpy(), true_res.asnumpy()) + assert same(tensor.reshape((0, -1)).asnumpy(), true_res.asnumpy()) true_res = mx.nd.array([[1, 2], [3, 4], [5, 6], @@ -618,27 +619,5 @@ def test_iter(): if __name__ == '__main__': - test_broadcast_binary() - test_ndarray_setitem() - test_ndarray_crop() - test_ndarray_concatenate() - test_broadcast() - test_ndarray_elementwise() - test_ndarray_elementwisesum() - test_ndarray_slice() - test_ndarray_pickle() - test_ndarray_saveload() - test_ndarray_copy() - test_ndarray_negate() - test_ndarray_scalar() - test_clip() - test_dot() - test_ndarray_choose() - test_ndarray_onehot() - test_ndarray_fill() - test_reduce() - test_arange() - test_order() - test_ndarray_equal() - test_take() - test_iter() + import nose + nose.runmodule()