From f07d35e897b5afd27e133074ea92bfba8f8cd78f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 15 Apr 2020 15:02:45 +0000 Subject: [PATCH 01/24] finish 5 changes --- python/mxnet/metric.py | 446 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 418 insertions(+), 28 deletions(-) diff --git a/python/mxnet/metric.py b/python/mxnet/metric.py index eb8f99a66d48..f790c2ded617 100644 --- a/python/mxnet/metric.py +++ b/python/mxnet/metric.py @@ -576,7 +576,9 @@ def update(self, labels, preds): num_samples = pred_label.shape[0] num_dims = len(pred_label.shape) if num_dims == 1: - self.sum_metric += (pred_label.flat == label.flat).sum() + num_correct = (pred_label.flat == label.flat).sum() + self.sum_metric += num_correct + self.global_sum_metric += num_correct elif num_dims == 2: num_classes = pred_label.shape[1] top_k = min(num_classes, self.top_k) @@ -594,9 +596,19 @@ class _BinaryClassificationMetrics(object): True/false positive and true/false negative counts are sufficient statistics for various classification metrics. This class provides the machinery to track those statistics across mini-batches of (label, prediction) pairs. + + Parameters + ---------- + beta : float, default 1 + weight of precision in harmonic mean. + threshold : float, default 0.5 + threshold for deciding whether the predictions are positive or negative. + """ - def __init__(self): + def __init__(self, threshold=0.5, beta=1): + self.threshold = threshold + self.beta = beta self.true_positives = 0 self.false_negatives = 0 self.false_positives = 0 @@ -619,9 +631,19 @@ def update_binary_stats(self, label, pred): """ pred = pred.asnumpy() label = label.asnumpy().astype('int32') - pred_label = numpy.argmax(pred, axis=1) - - check_label_shapes(label, pred) + if len(pred.shape) == 1: # assume each value refers to confidence(positive) + pass + elif pred.shape[-1] > 2: + raise ValueError("%s currently only supports binary classification." + % self.__class__.__name__) + elif pred.shape[-1] == 1: # classify positive when confidence(positive) > threshold + pred = pred.flat + else: + pred = pred.reshape(-1, 2)[:, 1] + pred_label = pred > self.threshold + label = label.flat + + check_label_shapes(label, pred_label) if len(numpy.unique(label)) > 2: raise ValueError("%s currently only supports binary classification." % self.__class__.__name__) @@ -674,14 +696,14 @@ def global_recall(self): @property def fscore(self): if self.precision + self.recall > 0: - return 2 * self.precision * self.recall / (self.precision + self.recall) + return (1 + self.beta ** 2) * self.precision * self.recall / (self.beta ** 2 * self.precision + self.recall) else: return 0. @property def global_fscore(self): if self.global_precision + self.global_recall > 0: - return 2 * self.global_precision * self.global_recall / (self.global_precision + self.global_recall) + return (1 + self.beta ** 2) * self.global_precision * self.global_recall / (self.beta ** 2 * self.global_precision + self.global_recall) else: return 0. @@ -723,6 +745,20 @@ def global_total_examples(self): return self.global_false_negatives + self.global_false_positives + \ self.global_true_negatives + self.global_true_positives + @property + def accuracy(self): + if self.total_examples > 0: + return float(self.true_positives + self.true_negatives) / self.total_examples + else: + return 0. + + @property + def global_accuracy(self): + if self.global_total_examples > 0: + return float(self.global_true_positives + self.global_true_negatives) / self.global_total_examples + else: + return 0. + def local_reset_stats(self): self.false_positives = 0 self.false_negatives = 0 @@ -768,6 +804,8 @@ class F1(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. + threshold : float, default 0.5 + threshold for postive confidence value. average : str, default 'macro' Strategy to be used for aggregating across mini-batches. "macro": average the F1 scores for each batch. @@ -784,9 +822,106 @@ class F1(EvalMetric): """ def __init__(self, name='f1', - output_names=None, label_names=None, average="macro"): + output_names=None, label_names=None, threshold=0.5, average="macro"): + self.average = average + self.metrics = _BinaryClassificationMetrics(threshold=threshold) + EvalMetric.__init__(self, name=name, + output_names=output_names, label_names=label_names, + has_global_stats=True) + + def update(self, labels, preds): + """Updates the internal evaluation result. + + Parameters + ---------- + labels : list of `NDArray` + The labels of the data. + + preds : list of `NDArray` + Predicted values. + """ + labels, preds = check_label_shapes(labels, preds, True) + + for label, pred in zip(labels, preds): + self.metrics.update_binary_stats(label, pred) + + if self.average == "macro": + self.sum_metric += self.metrics.fscore + self.global_sum_metric += self.metrics.global_fscore + self.num_inst += 1 + self.global_num_inst += 1 + self.metrics.reset_stats() + else: + self.sum_metric = self.metrics.fscore * self.metrics.total_examples + self.global_sum_metric = self.metrics.global_fscore * self.metrics.global_total_examples + self.num_inst = self.metrics.total_examples + self.global_num_inst = self.metrics.global_total_examples + + def reset(self): + """Resets the internal evaluation result to initial state.""" + self.sum_metric = 0. + self.num_inst = 0 + self.global_num_inst = 0 + self.global_sum_metric = 0.0 + self.metrics.reset_stats() + + def reset_local(self): + """Resets the internal evaluation result to initial state.""" + self.sum_metric = 0. + self.num_inst = 0 + self.metrics.local_reset_stats() + +@register +class Fbeta(EvalMetric): + """Computes the Fbeta score of a binary classification problem. + + The Fbeta score is equivalent to harmonic mean of the precision and recall, + where the best value is 1.0 and the worst value is 0.0. The formula for Fbeta score is:: + + Fbeta = (1 + beta ** 2) * (precision * recall) / (beta ** 2 * precision + recall) + + The formula for precision and recall is:: + + precision = true_positives / (true_positives + false_positives) + recall = true_positives / (true_positives + false_negatives) + + .. note:: + + This Fbeta score only supports binary classification. + + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + beta : float, default 1 + weight of precision in harmonic mean. + threshold : float, default 0.5 + threshold for deciding whether the predictions are positive or negative. + average : str, default 'macro' + Strategy to be used for aggregating across mini-batches. + "macro": average the F1 scores for each batch. + "micro": compute a single F1 score across all batches. + + Examples + -------- + >>> predicts = [mx.nd.array([[0.3, 0.7], [0., 1.], [0.4, 0.6]])] + >>> labels = [mx.nd.array([0., 1., 1.])] + >>> fbeta = mx.metric.Fbeta(beta=2) + >>> fbeta.update(preds = predicts, labels = labels) + >>> print fbeta.get() + ('fbeta', 0.9090909090909091) + """ + + def __init__(self, name='fbeta', + output_names=None, label_names=None, beta=1, threshold=0.5, average="macro"): self.average = average - self.metrics = _BinaryClassificationMetrics() + self.metrics = _BinaryClassificationMetrics(threshold=threshold, beta=beta) EvalMetric.__init__(self, name=name, output_names=output_names, label_names=label_names, has_global_stats=True) @@ -834,6 +969,76 @@ def reset_local(self): self.metrics.local_reset_stats() +@register +class BinaryAccuracy(EvalMetric): + """Computes the accuracy of a binary classification problem. + + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + threshold : float, default 0.5 + threshold for deciding whether the predictions are positive or negative. + + Examples + -------- + >>> predicts = [mx.nd.array([0.7, 1, 0.55])] + >>> labels = [mx.nd.array([0., 1., 0.])] + >>> bacc = mx.metric.BinaryAccuracy(threshold=0.6) + >>> bacc.update(preds = predicts, labels = labels) + >>> print bacc.get() + ('binary_accuracy', 0.6666666666666666) + """ + + def __init__(self, name='binary_accuracy', + output_names=None, label_names=None, threshold=0.5): + self.metrics = _BinaryClassificationMetrics(threshold=threshold) + EvalMetric.__init__(self, name=name, + output_names=output_names, label_names=label_names, + has_global_stats=True) + + def update(self, labels, preds): + """Updates the internal evaluation result. + + Parameters + ---------- + labels : list of `NDArray` + The labels of the data. + + preds : list of `NDArray` + Predicted values. + """ + labels, preds = check_label_shapes(labels, preds, True) + + for label, pred in zip(labels, preds): + self.metrics.update_binary_stats(label, pred) + + self.sum_metric = self.metrics.accuracy * self.metrics.total_examples + self.global_sum_metric = self.metrics.global_accuracy * self.metrics.global_total_examples + self.num_inst = self.metrics.total_examples + self.global_num_inst = self.metrics.global_total_examples + + def reset(self): + """Resets the internal evaluation result to initial state.""" + self.sum_metric = 0. + self.num_inst = 0 + self.global_num_inst = 0 + self.global_sum_metric = 0.0 + self.metrics.reset_stats() + + def reset_local(self): + """Resets the internal evaluation result to initial state.""" + self.sum_metric = 0. + self.num_inst = 0 + self.metrics.local_reset_stats() + + @register class MCC(EvalMetric): """Computes the Matthews Correlation Coefficient of a binary classification problem. @@ -1092,7 +1297,10 @@ class MAE(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - + average : str, default 'macro' + Strategy to be used for aggregating across mini-batches. + "macro": average MAE results for each batch. + "micro": compute a single MAE result across all batches. Examples -------- >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] @@ -1104,11 +1312,12 @@ class MAE(EvalMetric): """ def __init__(self, name='mae', - output_names=None, label_names=None): + output_names=None, label_names=None, average='macro'): super(MAE, self).__init__( name, output_names=output_names, label_names=label_names, has_global_stats=True) - + self.average = average + def update(self, labels, preds): """Updates the internal evaluation result. @@ -1130,12 +1339,18 @@ def update(self, labels, preds): label = label.reshape(label.shape[0], 1) if len(pred.shape) == 1: pred = pred.reshape(pred.shape[0], 1) - - mae = numpy.abs(label - pred).mean() + + if self.average == "macro": + mae = numpy.abs(label - pred).mean() + num_inst = 1 + else: + num_inst = label.shape[0] + mae = numpy.abs(label - pred).reshape(num_inst, -1).mean(axis=-1).sum() + self.sum_metric += mae self.global_sum_metric += mae - self.num_inst += 1 # numpy.prod(label.shape) - self.global_num_inst += 1 # numpy.prod(label.shape) + self.num_inst += num_inst + self.global_num_inst += num_inst @register @@ -1157,7 +1372,10 @@ class MSE(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - + average : str, default 'macro' + Strategy to be used for aggregating across mini-batches. + "macro": average MSE results for each batch. + "micro": compute a single MSE result across all batches. Examples -------- >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] @@ -1168,11 +1386,12 @@ class MSE(EvalMetric): ('mse', 0.375) """ def __init__(self, name='mse', - output_names=None, label_names=None): + output_names=None, label_names=None, average="macro"): super(MSE, self).__init__( name, output_names=output_names, label_names=label_names, has_global_stats=True) - + self.average = average + def update(self, labels, preds): """Updates the internal evaluation result. @@ -1195,11 +1414,16 @@ def update(self, labels, preds): if len(pred.shape) == 1: pred = pred.reshape(pred.shape[0], 1) - mse = ((label - pred)**2.0).mean() + if self.average == "macro": + mse = ((label - pred)**2.0).mean() + num_inst = 1 + else: + num_inst = label.shape[0] + mse = ((label - pred)**2.0).reshape(num_inst, -1).mean(axis=-1).sum() self.sum_metric += mse self.global_sum_metric += mse - self.num_inst += 1 # numpy.prod(label.shape) - self.global_num_inst += 1 # numpy.prod(label.shape) + self.num_inst += num_inst + self.global_num_inst += num_inst @register @@ -1221,7 +1445,10 @@ class RMSE(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - + average : str, default 'macro' + Strategy to be used for aggregating across mini-batches. + "macro": average RMSE results for each batch. + "micro": compute a single RSME result across all batches. Examples -------- >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] @@ -1232,11 +1459,12 @@ class RMSE(EvalMetric): ('rmse', 0.612372457981) """ def __init__(self, name='rmse', - output_names=None, label_names=None): + output_names=None, label_names=None, average="macro"): super(RMSE, self).__init__( name, output_names=output_names, label_names=label_names, has_global_stats=True) - + self.average = average + def update(self, labels, preds): """Updates the internal evaluation result. @@ -1259,13 +1487,175 @@ def update(self, labels, preds): if len(pred.shape) == 1: pred = pred.reshape(pred.shape[0], 1) - rmse = numpy.sqrt(((label - pred)**2.0).mean()) + if self.average == "macro": + rmse = numpy.sqrt(((label - pred)**2.0).mean()) + num_inst = 1 + else: + num_inst = label.shape[0] + rmse = numpy.sqrt(((label - pred)**2.0).reshape(num_inst, -1).mean(axis=1)).sum() self.sum_metric += rmse self.global_sum_metric += rmse - self.num_inst += 1 - self.global_num_inst += 1 + self.num_inst += num_inst + self.global_num_inst += num_inst + + +@register +class MeanPairwiseDistance(EvalMetric): + """Computes Mean Pairwise Distance. + + The mean pairwise distance is given by + .. math:: + \\sqrt{\\frac{(\\sum_i^n (y_i - \\hat{y}_i)^p)^\\frac{1}{p}}{n}} + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + p : float, default 2 + calculating distance using the p-norm + average : str, default 'macro' + Strategy to be used for aggregating across mini-batches. + "macro": average MPD results for each batch. + "micro": compute a single MPD result across all batches. + Examples + -------- + >>> predicts = [mx.nd.array([[1., 2.], [3., 4.]])] + >>> labels = [mx.nd.array([[1., 0.], [4., 2.]])] + >>> mpd = mx.metric.MeanPairwiseDistance() + >>> mpd.update(labels = labels, preds = predicts) + >>> print mpd.get() + ('mpd', 2.1180338859558105) + """ + def __init__(self, name='mpd', + output_names=None, label_names=None, p=2, average="micro"): + super(MeanPairwiseDistance, self).__init__( + name, output_names=output_names, label_names=label_names, + has_global_stats=True) + self.average = average + self.p = p + + def update(self, labels, preds): + """Updates the internal evaluation result. + + Parameters + ---------- + labels : list of `NDArray` + The labels of the data. + + preds : list of `NDArray` + Predicted values. + """ + labels, preds = check_label_shapes(labels, preds, True) + + for label, pred in zip(labels, preds): + label = label.asnumpy() + pred = pred.asnumpy() + + label = label.reshape(label.shape[0], -1) + pred = pred.reshape(pred.shape[0], -1) + + pd = (((label - pred) ** self.p).sum(axis=-1)) ** (1./self.p) + if self.average == "macro": + pd = pd.mean() + num_inst = 1 + else: + pd = pd.sum() + num_inst = label.shape[0] + + self.sum_metric += pd + self.global_sum_metric += pd + self.num_inst += num_inst + self.global_num_inst += num_inst + + +@register +class MeanCosineSimilarity(EvalMetric): + """Computes Mean Cosine Similarity. + + The mean cosine similarity is given by + + .. math:: + cos\_sim(label, pred) = \frac{{label}.{pred}}{max(||label||.||pred||, eps)} + (calculating on the last dimension of label and pred.) + + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + eps : float, default 1e-8 + small vale to avoid division by zero. + average : str, default 'micro' + Strategy to be used for aggregating across mini-batches. + "macro": average RMSE results for each batch. + "micro": compute a single RSME result across all batches. + Examples + -------- + >>> predicts = [mx.nd.array([[1., 0.], [1., 1.]])] + >>> labels = [mx.nd.array([[3., 4.], [2., 2.]])] + >>> mcs = mx.metric.MeanCosineSimilarity() + >>> mcs.update(labels = labels, preds = predicts) + >>> print mcs.get() + ('cos_sim', 0.8) + """ + def __init__(self, name='cos_sim', + output_names=None, label_names=None, eps=1e-8, average="micro"): + super(MeanCosineSimilarity, self).__init__( + name, output_names=output_names, label_names=label_names, + has_global_stats=True) + self.average = average + self.eps = eps + + def update(self, labels, preds): + """Updates the internal evaluation result. + + Parameters + ---------- + labels : list of `NDArray` + The labels of the data. + + preds : list of `NDArray` + Predicted values. + """ + labels, preds = check_label_shapes(labels, preds, True) + + for label, pred in zip(labels, preds): + label = label.asnumpy() + pred = pred.asnumpy() + + if len(label.shape) == 1: + label = label.reshape(1, label.shape[0]) + if len(pred.shape) == 1: + pred = pred.reshape(1, pred.shape[0]) + + sim = (label * pred).sum(axis=-1) + n_p = numpy.linalg.norm(pred, axis=-1) + n_l = numpy.linalg.norm(label, axis=-1) + sim = sim / numpy.maximum(n_l * n_p, self.eps) + if self.average == "macro": + sim = sim.mean() + num_inst = 1 + else: + sim = sim.sum() + num_inst = numpy.prod(label.shape[:-1]) + self.sum_metric += sim + self.global_sum_metric += sim + self.num_inst += num_inst + self.global_num_inst += num_inst + + @register @alias('ce') class CrossEntropy(EvalMetric): From 575f23b49dd0d5c7504b6dfd05314d10ada8b314 Mon Sep 17 00:00:00 2001 From: acphile Date: Thu, 16 Apr 2020 14:53:18 +0800 Subject: [PATCH 02/24] move metric.py to gluon, replace mx.metric with mx.gluon.metric in python/mxnet/ --- python/mxnet/__init__.py | 1 - .../contrib/svrg_optimization/svrg_module.py | 4 +- .../gluon/contrib/estimator/estimator.py | 2 +- .../gluon/contrib/estimator/event_handler.py | 4 +- python/mxnet/gluon/contrib/estimator/utils.py | 4 +- python/mxnet/{ => gluon}/metric.py | 58 +++++++++---------- python/mxnet/model.py | 2 +- python/mxnet/module/base_module.py | 4 +- 8 files changed, 39 insertions(+), 40 deletions(-) rename python/mxnet/{ => gluon}/metric.py (98%) diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index 49f10aace531..284788fa2276 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -51,7 +51,6 @@ from . import random from . import optimizer from . import model -from . import metric from . import notebook from . import initializer # use mx.init as short for mx.initializer diff --git a/python/mxnet/contrib/svrg_optimization/svrg_module.py b/python/mxnet/contrib/svrg_optimization/svrg_module.py index eecb87cf25bb..fc5a6c224809 100644 --- a/python/mxnet/contrib/svrg_optimization/svrg_module.py +++ b/python/mxnet/contrib/svrg_optimization/svrg_module.py @@ -478,8 +478,8 @@ def fit(self, train_data, eval_data=None, eval_metric='acc', if validation_metric is None: validation_metric = eval_metric - if not isinstance(eval_metric, mx.metric.EvalMetric): - eval_metric = mx.metric.create(eval_metric) + if not isinstance(eval_metric, mx.gluon.metric.EvalMetric): + eval_metric = mx.gluon.metric.create(eval_metric) ################################################################################ # training loop diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py index ed8a53d7c3a6..c47e02b7213f 100644 --- a/python/mxnet/gluon/contrib/estimator/estimator.py +++ b/python/mxnet/gluon/contrib/estimator/estimator.py @@ -33,7 +33,7 @@ from ...trainer import Trainer from ...utils import split_and_load from ....context import Context, cpu, gpu, num_gpus -from ....metric import Loss as metric_loss +from ...metric import Loss as metric_loss from .batch_processor import BatchProcessor __all__ = ['Estimator'] diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py index 338c7f00e05e..5709a803a610 100644 --- a/python/mxnet/gluon/contrib/estimator/event_handler.py +++ b/python/mxnet/gluon/contrib/estimator/event_handler.py @@ -25,8 +25,8 @@ import numpy as np -from ....metric import CompositeEvalMetric, EvalMetric -from ....metric import Loss as metric_loss +from ...metric import CompositeEvalMetric, EvalMetric +from ...metric import Loss as metric_loss from .utils import _check_metrics __all__ = ['TrainBegin', 'TrainEnd', 'EpochBegin', 'EpochEnd', 'BatchBegin', 'BatchEnd', diff --git a/python/mxnet/gluon/contrib/estimator/utils.py b/python/mxnet/gluon/contrib/estimator/utils.py index d9126a2f6763..dc0c4bf8f081 100644 --- a/python/mxnet/gluon/contrib/estimator/utils.py +++ b/python/mxnet/gluon/contrib/estimator/utils.py @@ -20,7 +20,7 @@ """Gluon Estimator Utility Functions""" from ...loss import SoftmaxCrossEntropyLoss -from ....metric import Accuracy, EvalMetric, CompositeEvalMetric +from ...metric import Accuracy, EvalMetric, CompositeEvalMetric def _check_metrics(metrics): if isinstance(metrics, CompositeEvalMetric): @@ -31,7 +31,7 @@ def _check_metrics(metrics): metrics = metrics or [] if not all([isinstance(metric, EvalMetric) for metric in metrics]): raise ValueError("metrics must be a Metric or a list of Metric, " - "refer to mxnet.metric.EvalMetric: {}".format(metrics)) + "refer to mxnet.gluon.metric.EvalMetric: {}".format(metrics)) return metrics def _check_handler_metric_ref(handler, known_metrics): diff --git a/python/mxnet/metric.py b/python/mxnet/gluon/metric.py similarity index 98% rename from python/mxnet/metric.py rename to python/mxnet/gluon/metric.py index f790c2ded617..1c9073096adb 100644 --- a/python/mxnet/metric.py +++ b/python/mxnet/gluon/metric.py @@ -24,9 +24,9 @@ import numpy -from .base import numeric_types, string_types -from . import ndarray -from . import registry +from ..base import numeric_types, string_types +from .. import ndarray +from .. import registry def check_label_shapes(labels, preds, wrap=False, shape=False): @@ -256,9 +256,9 @@ def create(metric, *args, **kwargs): >>> def custom_metric(label, pred): ... return np.mean(np.abs(label - pred)) ... - >>> metric1 = mx.metric.create('acc') - >>> metric2 = mx.metric.create(custom_metric) - >>> metric3 = mx.metric.create([metric1, metric2, 'rmse']) + >>> metric1 = mx.gluon.metric.create('acc') + >>> metric2 = mx.gluon.metric.create(custom_metric) + >>> metric3 = mx.gluon.metric.create([metric1, metric2, 'rmse']) """ if callable(metric): return CustomMetric(metric, *args, **kwargs) @@ -293,9 +293,9 @@ class CompositeEvalMetric(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> eval_metrics_1 = mx.metric.Accuracy() - >>> eval_metrics_2 = mx.metric.F1() - >>> eval_metrics = mx.metric.CompositeEvalMetric() + >>> eval_metrics_1 = mx.gluon.metric.Accuracy() + >>> eval_metrics_2 = mx.gluon.metric.F1() + >>> eval_metrics = mx.gluon.metric.CompositeEvalMetric() >>> for child_metric in [eval_metrics_1, eval_metrics_2]: >>> eval_metrics.add(child_metric) >>> eval_metrics.update(labels = labels, preds = predicts) @@ -460,7 +460,7 @@ class Accuracy(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> acc = mx.metric.Accuracy() + >>> acc = mx.gluon.metric.Accuracy() >>> acc.update(preds = predicts, labels = labels) >>> print acc.get() ('accuracy', 0.6666666666666666) @@ -535,7 +535,7 @@ class TopKAccuracy(EvalMetric): >>> top_k = 3 >>> labels = [mx.nd.array([2, 6, 9, 2, 3, 4, 7, 8, 9, 6])] >>> predicts = [mx.nd.array(np.random.rand(10, 10))] - >>> acc = mx.metric.TopKAccuracy(top_k=top_k) + >>> acc = mx.gluon.metric.TopKAccuracy(top_k=top_k) >>> acc.update(labels, predicts) >>> print acc.get() ('top_k_accuracy', 0.3) @@ -815,7 +815,7 @@ class F1(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0., 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0., 1., 1.])] - >>> f1 = mx.metric.F1() + >>> f1 = mx.gluon.metric.F1() >>> f1.update(preds = predicts, labels = labels) >>> print f1.get() ('f1', 0.8) @@ -912,7 +912,7 @@ class Fbeta(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0., 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0., 1., 1.])] - >>> fbeta = mx.metric.Fbeta(beta=2) + >>> fbeta = mx.gluon.metric.Fbeta(beta=2) >>> fbeta.update(preds = predicts, labels = labels) >>> print fbeta.get() ('fbeta', 0.9090909090909091) @@ -990,7 +990,7 @@ class BinaryAccuracy(EvalMetric): -------- >>> predicts = [mx.nd.array([0.7, 1, 0.55])] >>> labels = [mx.nd.array([0., 1., 0.])] - >>> bacc = mx.metric.BinaryAccuracy(threshold=0.6) + >>> bacc = mx.gluon.metric.BinaryAccuracy(threshold=0.6) >>> bacc.update(preds = predicts, labels = labels) >>> print bacc.get() ('binary_accuracy', 0.6666666666666666) @@ -1092,9 +1092,9 @@ class MCC(EvalMetric): [0.]*(false_positives + true_negatives) + [1.]*(false_negatives + true_positives) )] - >>> f1 = mx.metric.F1() + >>> f1 = mx.gluon.metric.F1() >>> f1.update(preds = predicts, labels = labels) - >>> mcc = mx.metric.MCC() + >>> mcc = mx.gluon.metric.MCC() >>> mcc.update(preds = predicts, labels = labels) >>> print f1.get() ('f1', 0.95233560306652054) @@ -1203,7 +1203,7 @@ class Perplexity(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> perp = mx.metric.Perplexity(ignore_label=None) + >>> perp = mx.gluon.metric.Perplexity(ignore_label=None) >>> perp.update(labels, predicts) >>> print perp.get() ('Perplexity', 1.7710976285155853) @@ -1305,7 +1305,7 @@ class MAE(EvalMetric): -------- >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] - >>> mean_absolute_error = mx.metric.MAE() + >>> mean_absolute_error = mx.gluon.metric.MAE() >>> mean_absolute_error.update(labels = labels, preds = predicts) >>> print mean_absolute_error.get() ('mae', 0.5) @@ -1380,7 +1380,7 @@ class MSE(EvalMetric): -------- >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] - >>> mean_squared_error = mx.metric.MSE() + >>> mean_squared_error = mx.gluon.metric.MSE() >>> mean_squared_error.update(labels = labels, preds = predicts) >>> print mean_squared_error.get() ('mse', 0.375) @@ -1453,7 +1453,7 @@ class RMSE(EvalMetric): -------- >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] - >>> root_mean_squared_error = mx.metric.RMSE() + >>> root_mean_squared_error = mx.gluon.metric.RMSE() >>> root_mean_squared_error.update(labels = labels, preds = predicts) >>> print root_mean_squared_error.get() ('rmse', 0.612372457981) @@ -1528,7 +1528,7 @@ class MeanPairwiseDistance(EvalMetric): -------- >>> predicts = [mx.nd.array([[1., 2.], [3., 4.]])] >>> labels = [mx.nd.array([[1., 0.], [4., 2.]])] - >>> mpd = mx.metric.MeanPairwiseDistance() + >>> mpd = mx.gluon.metric.MeanPairwiseDistance() >>> mpd.update(labels = labels, preds = predicts) >>> print mpd.get() ('mpd', 2.1180338859558105) @@ -1605,7 +1605,7 @@ class MeanCosineSimilarity(EvalMetric): -------- >>> predicts = [mx.nd.array([[1., 0.], [1., 1.]])] >>> labels = [mx.nd.array([[3., 4.], [2., 2.]])] - >>> mcs = mx.metric.MeanCosineSimilarity() + >>> mcs = mx.gluon.metric.MeanCosineSimilarity() >>> mcs.update(labels = labels, preds = predicts) >>> print mcs.get() ('cos_sim', 0.8) @@ -1688,7 +1688,7 @@ class :math:`k`. -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> ce = mx.metric.CrossEntropy() + >>> ce = mx.gluon.metric.CrossEntropy() >>> ce.update(labels, predicts) >>> print ce.get() ('cross-entropy', 0.57159948348999023) @@ -1760,7 +1760,7 @@ class NegativeLogLikelihood(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> nll_loss = mx.metric.NegativeLogLikelihood() + >>> nll_loss = mx.gluon.metric.NegativeLogLikelihood() >>> nll_loss.update(labels, predicts) >>> print nll_loss.get() ('nll-loss', 0.57159948348999023) @@ -1829,7 +1829,7 @@ class PearsonCorrelation(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([[1, 0], [0, 1], [0, 1]])] - >>> pr = mx.metric.PearsonCorrelation() + >>> pr = mx.gluon.metric.PearsonCorrelation() >>> pr.update(labels, predicts) >>> print pr.get() ('pearsonr', 0.42163704544016178) @@ -1957,9 +1957,9 @@ class PCC(EvalMetric): [0]*(false_positives + true_negatives) + [1]*(false_negatives + true_positives) )] - >>> f1 = mx.metric.F1() + >>> f1 = mx.gluon.metric.F1() >>> f1.update(preds = predicts, labels = labels) - >>> pcc = mx.metric.PCC() + >>> pcc = mx.gluon.metric.PCC() >>> pcc.update(preds = predicts, labels = labels) >>> print f1.get() ('f1', 0.95233560306652054) @@ -2129,7 +2129,7 @@ class CustomMetric(EvalMetric): >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] >>> feval = lambda x, y : (x + y).mean() - >>> eval_metrics = mx.metric.CustomMetric(feval=feval) + >>> eval_metrics = mx.gluon.metric.CustomMetric(feval=feval) >>> eval_metrics.update(labels, predicts) >>> print eval_metrics.get() ('custom()', 6.0) @@ -2209,7 +2209,7 @@ def np(numpy_feval, name=None, allow_extra_outputs=False): >>> def custom_metric(label, pred): ... return np.mean(np.abs(label-pred)) ... - >>> metric = mx.metric.np(custom_metric) + >>> metric = mx.gluon.metric.np(custom_metric) """ def feval(label, pred): """Internal eval function.""" diff --git a/python/mxnet/model.py b/python/mxnet/model.py index fa247624975d..bd80ec01738b 100644 --- a/python/mxnet/model.py +++ b/python/mxnet/model.py @@ -30,7 +30,7 @@ from . import ndarray as nd from . import symbol as sym from . import optimizer as opt -from . import metric +from .gluon import metric from . import kvstore as kvs from .context import Context, cpu from .initializer import Uniform diff --git a/python/mxnet/module/base_module.py b/python/mxnet/module/base_module.py index 053a00b3abba..9154aebb4b25 100644 --- a/python/mxnet/module/base_module.py +++ b/python/mxnet/module/base_module.py @@ -24,7 +24,7 @@ import warnings import numpy as np -from .. import metric +from ..gluon import metric from .. import ndarray from ..context import cpu @@ -231,7 +231,7 @@ def score(self, eval_data, eval_metric, num_batch=None, batch_end_callback=None, -------- >>> # An example of using score for prediction. >>> # Evaluate accuracy on val_dataiter - >>> metric = mx.metric.Accuracy() + >>> metric = mx.gluon.metric.Accuracy() >>> mod.score(val_dataiter, metric) >>> mod.score(val_dataiter, ['mse', 'acc']) """ From 89929959d5ac8ea9e0a781344b39e32358dfa883 Mon Sep 17 00:00:00 2001 From: acphile Date: Thu, 16 Apr 2020 17:04:09 +0800 Subject: [PATCH 03/24] fix importError --- python/mxnet/gluon/__init__.py | 2 ++ python/mxnet/gluon/block.py | 3 ++- python/mxnet/gluon/contrib/data/text.py | 2 +- python/mxnet/gluon/contrib/nn/basic_layers.py | 2 +- python/mxnet/gluon/data/dataloader.py | 2 +- python/mxnet/gluon/data/vision/datasets.py | 2 +- python/mxnet/gluon/nn/basic_layers.py | 2 +- 7 files changed, 9 insertions(+), 6 deletions(-) diff --git a/python/mxnet/gluon/__init__.py b/python/mxnet/gluon/__init__.py index 288937cf4a03..514087049edb 100644 --- a/python/mxnet/gluon/__init__.py +++ b/python/mxnet/gluon/__init__.py @@ -19,6 +19,8 @@ # pylint: disable=wildcard-import """Neural network module.""" +from . import metric + from .parameter import * from .block import * diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py index 10c11b85ba97..864db34420ee 100644 --- a/python/mxnet/gluon/block.py +++ b/python/mxnet/gluon/block.py @@ -28,7 +28,8 @@ import numpy as np from ..base import mx_real_t, MXNetError -from .. import symbol, ndarray, initializer, np_symbol, autograd, _deferred_compute as dc +from .. import symbol, ndarray, initializer, autograd, _deferred_compute as dc +from ..symbol.numpy import _symbol as np_symbol from ..symbol import Symbol from ..ndarray import NDArray from .. import name as _name diff --git a/python/mxnet/gluon/contrib/data/text.py b/python/mxnet/gluon/contrib/data/text.py index 0536ac585484..916b41880d45 100644 --- a/python/mxnet/gluon/contrib/data/text.py +++ b/python/mxnet/gluon/contrib/data/text.py @@ -29,7 +29,7 @@ from ...data import dataset from ...utils import download, check_sha1, _get_repo_file_url from ....contrib import text -from .... import nd, base +from .... import ndarray as nd, base class _LanguageModelDataset(dataset._DownloadedDataset): # pylint: disable=abstract-method def __init__(self, root, namespace, vocabulary): diff --git a/python/mxnet/gluon/contrib/nn/basic_layers.py b/python/mxnet/gluon/contrib/nn/basic_layers.py index bc7c3ce19e09..5df1a1e83660 100644 --- a/python/mxnet/gluon/contrib/nn/basic_layers.py +++ b/python/mxnet/gluon/contrib/nn/basic_layers.py @@ -24,7 +24,7 @@ 'PixelShuffle3D'] import warnings -from .... import nd, context +from .... import ndarray as nd, context from ...block import HybridBlock, Block from ...nn import Sequential, HybridSequential, BatchNorm diff --git a/python/mxnet/gluon/data/dataloader.py b/python/mxnet/gluon/data/dataloader.py index d34148417355..e07a3a673a75 100644 --- a/python/mxnet/gluon/data/dataloader.py +++ b/python/mxnet/gluon/data/dataloader.py @@ -37,7 +37,7 @@ pass from . import sampler as _sampler -from ... import nd, context +from ... import ndarray as nd, context from ...util import is_np_shape, is_np_array, set_np from ... import numpy as _mx_np # pylint: disable=reimported diff --git a/python/mxnet/gluon/data/vision/datasets.py b/python/mxnet/gluon/data/vision/datasets.py index bdcaff52a042..90990a8436d8 100644 --- a/python/mxnet/gluon/data/vision/datasets.py +++ b/python/mxnet/gluon/data/vision/datasets.py @@ -30,7 +30,7 @@ from .. import dataset from ...utils import download, check_sha1, _get_repo_file_url -from .... import nd, image, recordio, base +from .... import ndarray as nd, image, recordio, base from .... import numpy as _mx_np # pylint: disable=reimported from ....util import is_np_array diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py index 70b0a71841f1..c417b7752096 100644 --- a/python/mxnet/gluon/nn/basic_layers.py +++ b/python/mxnet/gluon/nn/basic_layers.py @@ -27,7 +27,7 @@ from .activations import Activation from ..block import Block, HybridBlock from ..utils import _indent -from ... import nd, sym +from ... import ndarray as nd, symbol as sym from ...util import is_np_array From 1b8f521d302bc5f1cd23d677e2c97f5d450ea012 Mon Sep 17 00:00:00 2001 From: acphile Date: Thu, 16 Apr 2020 17:11:45 +0800 Subject: [PATCH 04/24] replace mx.metric with mx.gluon.metric in tests/python --- tests/python/gpu/test_contrib_amp.py | 4 +- tests/python/tensorrt/lenet5_train.py | 2 +- tests/python/train/test_autograd.py | 4 +- tests/python/train/test_bucketing.py | 4 +- tests/python/train/test_mlp.py | 2 +- tests/python/train/test_sparse_fm.py | 2 +- .../unittest/test_contrib_svrg_module.py | 4 +- .../unittest/test_gluon_batch_processor.py | 4 +- tests/python/unittest/test_gluon_estimator.py | 24 ++++---- .../unittest/test_gluon_event_handler.py | 18 +++--- tests/python/unittest/test_loss.py | 60 +++++++++---------- tests/python/unittest/test_metric.py | 44 +++++++------- tests/python/unittest/test_metric_perf.py | 4 +- tests/python/unittest/test_module.py | 4 +- 14 files changed, 90 insertions(+), 90 deletions(-) diff --git a/tests/python/gpu/test_contrib_amp.py b/tests/python/gpu/test_contrib_amp.py index 527f8534969c..f3742629b804 100644 --- a/tests/python/gpu/test_contrib_amp.py +++ b/tests/python/gpu/test_contrib_amp.py @@ -325,7 +325,7 @@ def check_amp_convert_bucketing_module(): data_val = mx.rnn.BucketSentenceIter(val_sent, batch_size, buckets=buckets, invalid_label=invalid_label) result_model.bind(data_val.provide_data, data_val.provide_label, for_training=False) - result_model.score(data_val, mx.metric.Perplexity(invalid_label), + result_model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(batch_size, 1)) # AMP conversion with cast_optional_params set to true @@ -333,7 +333,7 @@ def check_amp_convert_bucketing_module(): ''' result_model = amp.convert_bucketing_module(model, cast_optional_params=True) result_model.bind(data_val.provide_data, data_val.provide_label, for_training=False) - result_model.score(data_val, mx.metric.Perplexity(invalid_label), + result_model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(batch_size, 1)) ''' diff --git a/tests/python/tensorrt/lenet5_train.py b/tests/python/tensorrt/lenet5_train.py index a0ea447de5a0..b04b3484de46 100755 --- a/tests/python/tensorrt/lenet5_train.py +++ b/tests/python/tensorrt/lenet5_train.py @@ -74,7 +74,7 @@ def train_lenet5(num_epochs, batch_size, train_iter, val_iter, test_iter): num_epoch=num_epochs) # predict accuracy for lenet - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() lenet_model.score(test_iter, acc) accuracy = acc.get()[1] assert accuracy > 0.95, "LeNet-5 training accuracy on MNIST was too low" diff --git a/tests/python/train/test_autograd.py b/tests/python/train/test_autograd.py index 712672cd0a9f..f8dbf3610a68 100644 --- a/tests/python/train/test_autograd.py +++ b/tests/python/train/test_autograd.py @@ -53,7 +53,7 @@ def get_net(): batch_size=batch_size, shuffle=True, flat=True, silent=False) def score(net, ctx_list): - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() val_data.reset() for batch in val_data: datas = gluon.utils.split_and_load(batch.data[0], ctx_list, batch_axis=0) @@ -67,7 +67,7 @@ def score(net, ctx_list): def train(net, epoch, ctx_list): net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx_list) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5}) - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() loss = gluon.loss.SoftmaxCrossEntropyLoss() for i in range(epoch): diff --git a/tests/python/train/test_bucketing.py b/tests/python/train/test_bucketing.py index a233e46e0992..f4b8f417a2cc 100644 --- a/tests/python/train/test_bucketing.py +++ b/tests/python/train/test_bucketing.py @@ -98,7 +98,7 @@ def sym_gen(seq_len): model.fit( train_data=data_train, eval_data=data_val, - eval_metric=mx.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. + eval_metric=mx.gluon.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. kvstore='device', optimizer='sgd', optimizer_params={'learning_rate': 0.01, @@ -114,7 +114,7 @@ def sym_gen(seq_len): def test_bucket_module(): # This test forecasts random sequence of words to check bucketing. # We cannot guarantee the accuracy of such an impossible task, and comments out the following line. - # assert model.score(data_val, mx.metric.MSE())[0][1] < 350, "High mean square error." + # assert model.score(data_val, mx.gluon.metric.MSE())[0][1] < 350, "High mean square error." model = train_model() diff --git a/tests/python/train/test_mlp.py b/tests/python/train/test_mlp.py index 1b8e06f53027..166fd8de28d8 100644 --- a/tests/python/train/test_mlp.py +++ b/tests/python/train/test_mlp.py @@ -64,7 +64,7 @@ def test_mlp(): softmax, X=train_dataiter, eval_data=val_dataiter, - eval_metric=mx.metric.np(accuracy), + eval_metric=mx.gluon.metric.np(accuracy), epoch_end_callback=mx.callback.do_checkpoint(prefix), ctx=[mx.cpu(i) for i in range(2)], num_epoch=num_epoch, diff --git a/tests/python/train/test_sparse_fm.py b/tests/python/train/test_sparse_fm.py index 99a22f54cbbd..d967e2954775 100644 --- a/tests/python/train/test_sparse_fm.py +++ b/tests/python/train/test_sparse_fm.py @@ -102,7 +102,7 @@ def fm(factor_size, feature_dim, init): else: raise AssertionError("Unsupported optimizer type '" + optimizer + "' specified") # use accuracy as the metric - metric = mx.metric.create('MSE') + metric = mx.gluon.metric.create('MSE') # train 'num_epochs' epoch for epoch in range(num_epochs): train_iter.reset() diff --git a/tests/python/unittest/test_contrib_svrg_module.py b/tests/python/unittest/test_contrib_svrg_module.py index 79407d15fd7f..6c973952ba18 100644 --- a/tests/python/unittest/test_contrib_svrg_module.py +++ b/tests/python/unittest/test_contrib_svrg_module.py @@ -242,7 +242,7 @@ def create_module_with_sgd(): num_epoch = 10 # Use metric MSE - metrics = mx.metric.create("mse") + metrics = mx.gluon.metric.create("mse") # Train with SVRGModule for e in range(num_epoch): @@ -299,7 +299,7 @@ def test_accumulate_kvstore(): def test_fit(): di, mod = setup() num_epoch = 100 - metric = mx.metric.create("mse") + metric = mx.gluon.metric.create("mse") mod.fit(di, eval_metric=metric, optimizer='sgd', optimizer_params=(('learning_rate', 0.025),), num_epoch=num_epoch, kvstore='local') diff --git a/tests/python/unittest/test_gluon_batch_processor.py b/tests/python/unittest/test_gluon_batch_processor.py index 8604713fc129..336d75237820 100644 --- a/tests/python/unittest/test_gluon_batch_processor.py +++ b/tests/python/unittest/test_gluon_batch_processor.py @@ -52,7 +52,7 @@ def test_batch_processor_fit(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() net.initialize(ctx=ctx) processor = BatchProcessor() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) @@ -83,7 +83,7 @@ def test_batch_processor_validation(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() val_loss = gluon.loss.L1Loss() net.initialize(ctx=ctx) processor = BatchProcessor() diff --git a/tests/python/unittest/test_gluon_estimator.py b/tests/python/unittest/test_gluon_estimator.py index 2c00b1609112..66b1e94335c6 100644 --- a/tests/python/unittest/test_gluon_estimator.py +++ b/tests/python/unittest/test_gluon_estimator.py @@ -58,7 +58,7 @@ def test_fit(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, @@ -87,7 +87,7 @@ def test_validation(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() val_loss = gluon.loss.L1Loss() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) @@ -126,7 +126,7 @@ def test_initializer(): ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() # no initializer est = Estimator(net=net, loss=loss, @@ -166,7 +166,7 @@ def test_trainer(): ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() net.initialize(ctx=ctx) # input no trainer with warnings.catch_warnings(record=True) as w: @@ -206,7 +206,7 @@ def test_metric(): est.fit(train_data=train_data, epochs=num_epochs) # input list of metrics - metrics = [mx.metric.Accuracy(), mx.metric.Accuracy()] + metrics = [mx.gluon.metric.Accuracy(), mx.gluon.metric.Accuracy()] est = Estimator(net=net, loss=loss, train_metrics=metrics, @@ -227,14 +227,14 @@ def test_metric(): loss=loss, trainer=trainer, context=ctx) - assert isinstance(est.train_metrics[0], mx.metric.Accuracy) + assert isinstance(est.train_metrics[0], mx.gluon.metric.Accuracy) def test_loss(): ''' test with invalid loss ''' net = _get_test_network() ctx = mx.cpu() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) # input invalid loss @@ -250,7 +250,7 @@ def test_context(): ''' test with no context, list of context, invalid context ''' net = _get_test_network() loss = gluon.loss.L2Loss() - metrics = mx.metric.Accuracy() + metrics = mx.gluon.metric.Accuracy() # input no context est = Estimator(net=net, loss=loss, @@ -332,7 +332,7 @@ def test_default_handlers(): net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) - train_acc = mx.metric.RMSE() + train_acc = mx.gluon.metric.RMSE() loss = gluon.loss.L2Loss() est = Estimator(net=net, @@ -359,7 +359,7 @@ def test_default_handlers(): # handler with mixed metrics, some handler use metrics prepared by estimator # some handler use metrics user prepared - logging = LoggingHandler(metrics=[mx.metric.RMSE("val acc")]) + logging = LoggingHandler(metrics=[mx.gluon.metric.RMSE("val acc")]) with assert_raises(ValueError): est.fit(train_data=train_data, epochs=num_epochs, event_handlers=[logging]) @@ -383,7 +383,7 @@ def test_val_net(): ctx = mx.cpu() loss = gluon.loss.L2Loss() val_loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, @@ -448,7 +448,7 @@ def test_val_handlers(): net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) - train_acc = mx.metric.RMSE() + train_acc = mx.gluon.metric.RMSE() loss = gluon.loss.L2Loss() est = Estimator(net=net, diff --git a/tests/python/unittest/test_gluon_event_handler.py b/tests/python/unittest/test_gluon_event_handler.py index c81d29157e7f..a18895be34d2 100644 --- a/tests/python/unittest/test_gluon_event_handler.py +++ b/tests/python/unittest/test_gluon_event_handler.py @@ -84,7 +84,7 @@ def test_checkpoint_handler(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) checkpoint_handler = event_handler.CheckpointHandler(model_dir=tmpdir, model_prefix=model_prefix, @@ -130,7 +130,7 @@ def test_resume_checkpoint(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) checkpoint_handler = event_handler.CheckpointHandler(model_dir=tmpdir, model_prefix=model_prefix, @@ -155,7 +155,7 @@ def test_early_stopping(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) early_stopping = event_handler.EarlyStoppingHandler(monitor=acc, patience=0, @@ -179,7 +179,7 @@ def test_logging(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) est.logger.addHandler(logging.FileHandler(output_dir)) @@ -226,7 +226,7 @@ def epoch_end(self, estimator, *args, **kwargs): test_data = _get_test_data() net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) custom_handler = CustomStopHandler(3, 2) est.fit(test_data, event_handlers=[custom_handler], epochs=3) @@ -249,7 +249,7 @@ def test_logging_interval(): dataloader = _get_test_data(in_size=data_size) num_epochs = 1 ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() logging = LoggingHandler(metrics=[acc], log_interval=log_interval) est = estimator.Estimator(net=net, loss=ce_loss, @@ -273,7 +273,7 @@ def test_logging_interval(): ''' test case #2: log interval is 5 ''' old_stdout = sys.stdout sys.stdout = mystdout = StringIO() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() log_interval = 5 logging = LoggingHandler(metrics=[acc], log_interval=log_interval) est = estimator.Estimator(net=net, @@ -299,7 +299,7 @@ def test_validation_handler_batch_axis(): test_data = _get_test_data() net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) est.fit(test_data, epochs=3) @@ -315,7 +315,7 @@ def test_validation_handler(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) val_handler = ValidationHandler(val_data=test_data, eval_fn=est.evaluate, diff --git a/tests/python/unittest/test_loss.py b/tests/python/unittest/test_loss.py index a1a49c97d7f4..7f3df178ece4 100644 --- a/tests/python/unittest/test_loss.py +++ b/tests/python/unittest/test_loss.py @@ -79,9 +79,9 @@ def test_ce_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.metric.Loss(), optimizer='adam', + eval_metric=mx.gluon.metric.Loss(), optimizer='adam', initializer=mx.init.Xavier(magnitude=2)) - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 # tracked at: https://github.com/apache/incubator-mxnet/issues/11691 @with_seed() @@ -97,9 +97,9 @@ def test_bce_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.metric.Loss(), optimizer='adam', + eval_metric=mx.gluon.metric.Loss(), optimizer='adam', initializer=mx.init.Xavier(magnitude=2)) - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.01 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.01 # Test against npy data = mx.random.uniform(-5, 5, shape=(10,)) label = mx.random.uniform(0, 1, shape=(10,)) @@ -142,8 +142,8 @@ def test_kl_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + eval_metric=mx.gluon.metric.Loss(), optimizer='adam') + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() @@ -159,9 +159,9 @@ def test_l2_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() @@ -177,9 +177,9 @@ def test_l1_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.1 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.1 @with_seed() @@ -222,9 +222,9 @@ def test_ctc_loss_train(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 10 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 10 @with_seed() @@ -243,12 +243,12 @@ def test_sample_weight_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label', 'w')) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.metric.Loss(), optimizer='adam') + eval_metric=mx.gluon.metric.Loss(), optimizer='adam') data_iter = mx.io.NDArrayIter(data[10:], {'label': label, 'w': weight}, batch_size=10) - score = mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] + score = mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] assert score > 1 data_iter = mx.io.NDArrayIter(data[:10], {'label': label, 'w': weight}, batch_size=10) - score = mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] + score = mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] assert score < 0.05 @@ -266,13 +266,13 @@ def test_saveload(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=100, optimizer_params={'learning_rate': 1.}, - eval_metric=mx.metric.Loss()) + eval_metric=mx.gluon.metric.Loss()) mod.save_checkpoint('test', 100, save_optimizer_states=True) mod = mx.mod.Module.load('test', 100, load_optimizer_states=True, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=100, optimizer_params={'learning_rate': 1.}, - eval_metric=mx.metric.Loss()) - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + eval_metric=mx.gluon.metric.Loss()) + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() def test_huber_loss(): @@ -287,9 +287,9 @@ def test_huber_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() @@ -305,9 +305,9 @@ def test_hinge_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.06 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.06 @with_seed() @@ -323,9 +323,9 @@ def test_squared_hinge_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() @@ -344,9 +344,9 @@ def test_triplet_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('pos','neg')) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() def test_sdml_loss(): @@ -453,9 +453,9 @@ def test_poisson_nllloss_mod(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=20, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Normal(sigma=0.1), eval_metric=mx.metric.Loss(), + initializer=mx.init.Normal(sigma=0.1), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() def test_bce_loss_with_pos_weight(): @@ -474,9 +474,9 @@ def test_bce_loss_with_pos_weight(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label', 'pos_w')) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.metric.Loss(), optimizer='adam', + eval_metric=mx.gluon.metric.Loss(), optimizer='adam', initializer=mx.init.Xavier(magnitude=2)) - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.01 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.01 # Test against npy data = mx.nd.random.uniform(-5, 5, shape=(N, 5)) label = mx.nd.array(np.random.randint(2, size=(N, 5)), dtype='float32') diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py index e7273fba35d5..3408dd503d59 100644 --- a/tests/python/unittest/test_metric.py +++ b/tests/python/unittest/test_metric.py @@ -25,9 +25,9 @@ from copy import deepcopy def check_metric(metric, *args, **kwargs): - metric = mx.metric.create(metric, *args, **kwargs) + metric = mx.gluon.metric.create(metric, *args, **kwargs) str_metric = json.dumps(metric.get_config()) - metric2 = mx.metric.create(str_metric) + metric2 = mx.gluon.metric.create(str_metric) assert metric.get_config() == metric2.get_config() @@ -40,7 +40,7 @@ def test_metrics(): check_metric('pcc') check_metric('nll_loss') check_metric('loss') - composite = mx.metric.create(['acc', 'f1']) + composite = mx.gluon.metric.create(['acc', 'f1']) check_metric(composite) def _check_global_metric(metric, *args, **kwargs): @@ -76,7 +76,7 @@ def _compare_metric_result(m1, m2): shape = kwargs.pop('shape', (10,10)) use_same_shape = kwargs.pop('use_same_shape', False) - m1 = mx.metric.create(metric, *args, **kwargs) + m1 = mx.gluon.metric.create(metric, *args, **kwargs) m2 = deepcopy(m1) # check that global stats are not reset when calling # reset_local() @@ -121,7 +121,7 @@ def custom_metric(label, pred): _check_global_metric(['acc', 'f1'], shape=(10,2)) def test_nll_loss(): - metric = mx.metric.create('nll_loss') + metric = mx.gluon.metric.create('nll_loss') pred = mx.nd.array([[0.2, 0.3, 0.5], [0.6, 0.1, 0.3]]) label = mx.nd.array([2, 1]) metric.update([label], [pred]) @@ -132,7 +132,7 @@ def test_nll_loss(): def test_acc(): pred = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) label = mx.nd.array([0, 1, 1]) - metric = mx.metric.create('acc') + metric = mx.gluon.metric.create('acc') metric.update([label], [pred]) _, acc = metric.get() expected_acc = (np.argmax(pred, axis=1) == label).sum().asscalar() / label.size @@ -142,7 +142,7 @@ def test_acc_2d_label(): # label maybe provided in 2d arrays in custom data iterator pred = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6], [0.8, 0.2], [0.3, 0.5], [0.6, 0.4]]) label = mx.nd.array([[0, 1, 1], [1, 0, 1]]) - metric = mx.metric.create('acc') + metric = mx.gluon.metric.create('acc') metric.update([label], [pred]) _, acc = metric.get() expected_acc = (np.argmax(pred, axis=1).asnumpy() == label.asnumpy().ravel()).sum() / \ @@ -151,8 +151,8 @@ def test_acc_2d_label(): def test_loss_update(): pred = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) - metric1 = mx.metric.create('loss') - metric2 = mx.metric.create('loss') + metric1 = mx.gluon.metric.create('loss') + metric2 = mx.gluon.metric.create('loss') metric1.update(None, [pred]) metric2.update(None, pred) _, acc1 = metric1.get() @@ -160,8 +160,8 @@ def test_loss_update(): assert acc1 == acc2 def test_f1(): - microF1 = mx.metric.create("f1", average="micro") - macroF1 = mx.metric.F1(average="macro") + microF1 = mx.gluon.metric.create("f1", average="micro") + macroF1 = mx.gluon.metric.F1(average="macro") assert np.isnan(macroF1.get()[1]) assert np.isnan(microF1.get()[1]) @@ -207,8 +207,8 @@ def test_f1(): np.testing.assert_almost_equal(macroF1.get()[1], (fscore1 + fscore2) / 2.) def test_mcc(): - microMCC = mx.metric.create("mcc", average="micro") - macroMCC = mx.metric.MCC(average="macro") + microMCC = mx.gluon.metric.create("mcc", average="micro") + macroMCC = mx.gluon.metric.MCC(average="macro") assert np.isnan(microMCC.get()[1]) assert np.isnan(macroMCC.get()[1]) @@ -259,7 +259,7 @@ def test_perplexity(): label = mx.nd.array([0, 1, 1]) p = pred.asnumpy()[np.arange(label.size), label.asnumpy().astype('int32')] perplexity_expected = np.exp(-np.log(p).sum()/label.size) - metric = mx.metric.create('perplexity', -1) + metric = mx.gluon.metric.create('perplexity', -1) metric.update([label], [pred]) _, perplexity = metric.get() assert perplexity == perplexity_expected @@ -269,8 +269,8 @@ def test_pearsonr(): label1 = mx.nd.array([[1, 0], [0, 1], [0, 1]]) pearsonr_expected_np = np.corrcoef(pred1.asnumpy().ravel(), label1.asnumpy().ravel())[0, 1] pearsonr_expected_scipy, _ = pearsonr(pred1.asnumpy().ravel(), label1.asnumpy().ravel()) - macro_pr = mx.metric.create('pearsonr', average='macro') - micro_pr = mx.metric.create('pearsonr', average='micro') + macro_pr = mx.gluon.metric.create('pearsonr', average='macro') + micro_pr = mx.gluon.metric.create('pearsonr', average='micro') assert np.isnan(macro_pr.get()[1]) assert np.isnan(micro_pr.get()[1]) @@ -317,18 +317,18 @@ def test_pcc(): [ 7, 3 ], [ 2, 5 ], ]) - met_pcc = mx.metric.create('pcc') + met_pcc = mx.gluon.metric.create('pcc') met_pcc.update(labels, preds) _, pcc = met_pcc.get() # pcc should agree with mcc for binary classification - met_mcc = mx.metric.create('mcc') + met_mcc = mx.gluon.metric.create('mcc') met_mcc.update(labels, preds) _, mcc = met_mcc.get() np.testing.assert_almost_equal(pcc, mcc) # pcc should agree with Pearson for binary classification - met_pear = mx.metric.create('pearsonr') + met_pear = mx.gluon.metric.create('pearsonr') met_pear.update(labels, [p.argmax(axis=1) for p in preds]) _, pear = met_pear.get() np.testing.assert_almost_equal(pcc, pear) @@ -391,18 +391,18 @@ def test_single_array_input(): pred = mx.nd.array([[1,2,3,4]]) label = pred + 0.1 - mse = mx.metric.create('mse') + mse = mx.gluon.metric.create('mse') mse.update(label, pred) _, mse_res = mse.get() np.testing.assert_almost_equal(mse_res, 0.01) - mae = mx.metric.create('mae') + mae = mx.gluon.metric.create('mae') mae.update(label, pred) mae.get() _, mae_res = mae.get() np.testing.assert_almost_equal(mae_res, 0.1) - rmse = mx.metric.create('rmse') + rmse = mx.gluon.metric.create('rmse') rmse.update(label, pred) rmse.get() _, rmse_res = rmse.get() diff --git a/tests/python/unittest/test_metric_perf.py b/tests/python/unittest/test_metric_perf.py index 36cbc685797c..058d4cb8217f 100644 --- a/tests/python/unittest/test_metric_perf.py +++ b/tests/python/unittest/test_metric_perf.py @@ -66,7 +66,7 @@ def data(self): def run_metric(name, data_gen_cls, i, n, c, pred_ctx, label_ctx, **kwargs): """ Helper function for running one metric benchmark """ - metric = mx.metric.create(name, **kwargs) + metric = mx.gluon.metric.create(name, **kwargs) data_gen = data_gen_cls(n, c, pred_ctx, label_ctx) try: label, pred = data_gen.data() @@ -105,7 +105,7 @@ def test_metric_performance(): output_dims = [128, 1024, 8192] ctxs = [mx.cpu(), mx.gpu()] - print("\nmx.metric benchmarks", file=sys.stderr) + print("\nmx.gluon.metric benchmarks", file=sys.stderr) print( "{:15}{:10}{:12}{:12}{:15}{:15}{}".format( 'Metric', 'Data-Ctx', 'Label-Ctx', 'Data Size', 'Batch Size', 'Output Dim', 'Elapsed Time'), diff --git a/tests/python/unittest/test_module.py b/tests/python/unittest/test_module.py index b82933126d67..1b17b839a298 100644 --- a/tests/python/unittest/test_module.py +++ b/tests/python/unittest/test_module.py @@ -275,7 +275,7 @@ def sym_gen(seq_len): mod2.fit( train_data=data_train, eval_data=data_val, - eval_metric=mx.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. + eval_metric=mx.gluon.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. kvstore='device', optimizer='sgd', optimizer_params={'learning_rate': 0.01, @@ -711,7 +711,7 @@ def fm(factor_size, feature_dim, init): expected_accuracy = 0.02 # use accuracy as the metric - metric = mx.metric.create('MSE') + metric = mx.gluon.metric.create('MSE') # train 'num_epochs' epoch for epoch in range(num_epochs): train_iter.reset() From 2ff2e38a969277a06aa97b599fb228d3ebf1bdce Mon Sep 17 00:00:00 2001 From: acphile Date: Mon, 20 Apr 2020 04:10:35 +0000 Subject: [PATCH 05/24] remove global support --- python/mxnet/gluon/metric.py | 325 ++++------------------------------- 1 file changed, 33 insertions(+), 292 deletions(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index 1c9073096adb..953a57894827 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -89,7 +89,6 @@ def __init__(self, name, output_names=None, self.name = str(name) self.output_names = output_names self.label_names = label_names - self._has_global_stats = kwargs.pop("has_global_stats", False) self._kwargs = kwargs self.reset() @@ -148,13 +147,6 @@ def reset(self): """Resets the internal evaluation result to initial state.""" self.num_inst = 0 self.sum_metric = 0.0 - self.global_num_inst = 0 - self.global_sum_metric = 0.0 - - def reset_local(self): - """Resets the local portion of the internal evaluation results to initial state.""" - self.num_inst = 0 - self.sum_metric = 0.0 def get(self): """Gets the current evaluation result. @@ -171,24 +163,6 @@ def get(self): else: return (self.name, self.sum_metric / self.num_inst) - def get_global(self): - """Gets the current global evaluation result. - - Returns - ------- - names : list of str - Name of the metrics. - values : list of float - Value of the evaluations. - """ - if self._has_global_stats: - if self.global_num_inst == 0: - return (self.name, float('nan')) - else: - return (self.name, self.global_sum_metric / self.global_num_inst) - else: - return self.get() - def get_name_value(self): """Returns zipped name and value pairs. @@ -204,24 +178,6 @@ def get_name_value(self): value = [value] return list(zip(name, value)) - def get_global_name_value(self): - """Returns zipped name and value pairs for global results. - - Returns - ------- - list of tuples - A (name, value) tuple list. - """ - if self._has_global_stats: - name, value = self.get_global() - if not isinstance(name, list): - name = [name] - if not isinstance(value, list): - value = [value] - return list(zip(name, value)) - else: - return self.get_name_value() - # pylint: disable=invalid-name register = registry.get_register_func(EvalMetric, 'metric') alias = registry.get_alias_func(EvalMetric, 'metric') @@ -306,8 +262,7 @@ class CompositeEvalMetric(EvalMetric): def __init__(self, metrics=None, name='composite', output_names=None, label_names=None): super(CompositeEvalMetric, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + name, output_names=output_names, label_names=label_names) if metrics is None: metrics = [] self.metrics = [create(i) for i in metrics] @@ -369,14 +324,6 @@ def reset(self): except AttributeError: pass - def reset_local(self): - """Resets the local portion of the internal evaluation results to initial state.""" - try: - for metric in self.metrics: - metric.reset_local() - except AttributeError: - pass - def get(self): """Returns the current evaluation result. @@ -399,28 +346,6 @@ def get(self): values.extend(value) return (names, values) - def get_global(self): - """Returns the current evaluation result. - - Returns - ------- - names : list of str - Name of the metrics. - values : list of float - Value of the evaluations. - """ - names = [] - values = [] - for metric in self.metrics: - name, value = metric.get_global() - if isinstance(name, string_types): - name = [name] - if isinstance(value, numeric_types): - value = [value] - names.extend(name) - values.extend(value) - return (names, values) - def get_config(self): config = super(CompositeEvalMetric, self).get_config() config.update({'metrics': [i.get_config() for i in self.metrics]}) @@ -469,8 +394,7 @@ def __init__(self, axis=1, name='accuracy', output_names=None, label_names=None): super(Accuracy, self).__init__( name, axis=axis, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) self.axis = axis def update(self, labels, preds): @@ -500,9 +424,7 @@ def update(self, labels, preds): num_correct = (pred_label == label).sum() self.sum_metric += num_correct - self.global_sum_metric += num_correct self.num_inst += len(pred_label) - self.global_num_inst += len(pred_label) @register @@ -545,8 +467,7 @@ def __init__(self, top_k=1, name='top_k_accuracy', output_names=None, label_names=None): super(TopKAccuracy, self).__init__( name, top_k=top_k, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) self.top_k = top_k assert(self.top_k > 1), 'Please use Accuracy if top_k is no more than 1' self.name += '_%d' % self.top_k @@ -578,16 +499,13 @@ def update(self, labels, preds): if num_dims == 1: num_correct = (pred_label.flat == label.flat).sum() self.sum_metric += num_correct - self.global_sum_metric += num_correct elif num_dims == 2: num_classes = pred_label.shape[1] top_k = min(num_classes, self.top_k) for j in range(top_k): num_correct = (pred_label[:, num_classes - 1 - j].flat == label.flat).sum() self.sum_metric += num_correct - self.global_sum_metric += num_correct self.num_inst += num_samples - self.global_num_inst += num_samples class _BinaryClassificationMetrics(object): @@ -613,10 +531,6 @@ def __init__(self, threshold=0.5, beta=1): self.false_negatives = 0 self.false_positives = 0 self.true_negatives = 0 - self.global_true_positives = 0 - self.global_false_negatives = 0 - self.global_false_positives = 0 - self.global_true_negatives = 0 def update_binary_stats(self, label, pred): """Update various binary classification counts for a single (label, pred) pair. @@ -657,13 +571,9 @@ def update_binary_stats(self, label, pred): false_neg = (pred_false * label_true).sum() true_neg = (pred_false * label_false).sum() self.true_positives += true_pos - self.global_true_positives += true_pos self.false_positives += false_pos - self.global_false_positives += false_pos self.false_negatives += false_neg - self.global_false_negatives += false_neg self.true_negatives += true_neg - self.global_true_negatives += true_neg @property def precision(self): @@ -672,13 +582,6 @@ def precision(self): else: return 0. - @property - def global_precision(self): - if self.global_true_positives + self.global_false_positives > 0: - return float(self.global_true_positives) / (self.global_true_positives + self.global_false_positives) - else: - return 0. - @property def recall(self): if self.true_positives + self.false_negatives > 0: @@ -686,13 +589,6 @@ def recall(self): else: return 0. - @property - def global_recall(self): - if self.global_true_positives + self.global_false_negatives > 0: - return float(self.global_true_positives) / (self.global_true_positives + self.global_false_negatives) - else: - return 0. - @property def fscore(self): if self.precision + self.recall > 0: @@ -700,31 +596,15 @@ def fscore(self): else: return 0. - @property - def global_fscore(self): - if self.global_precision + self.global_recall > 0: - return (1 + self.beta ** 2) * self.global_precision * self.global_recall / (self.beta ** 2 * self.global_precision + self.global_recall) - else: - return 0. - - def matthewscc(self, use_global=False): + def matthewscc(self): """Calculate the Matthew's Correlation Coefficent""" - if use_global: - if not self.global_total_examples: - return 0. - - true_pos = float(self.global_true_positives) - false_pos = float(self.global_false_positives) - false_neg = float(self.global_false_negatives) - true_neg = float(self.global_true_negatives) - else: - if not self.total_examples: - return 0. + if not self.total_examples: + return 0. - true_pos = float(self.true_positives) - false_pos = float(self.false_positives) - false_neg = float(self.false_negatives) - true_neg = float(self.true_negatives) + true_pos = float(self.true_positives) + false_pos = float(self.false_positives) + false_neg = float(self.false_negatives) + true_neg = float(self.true_negatives) terms = [(true_pos + false_pos), (true_pos + false_neg), @@ -740,11 +620,6 @@ def total_examples(self): return self.false_negatives + self.false_positives + \ self.true_negatives + self.true_positives - @property - def global_total_examples(self): - return self.global_false_negatives + self.global_false_positives + \ - self.global_true_negatives + self.global_true_positives - @property def accuracy(self): if self.total_examples > 0: @@ -752,28 +627,11 @@ def accuracy(self): else: return 0. - @property - def global_accuracy(self): - if self.global_total_examples > 0: - return float(self.global_true_positives + self.global_true_negatives) / self.global_total_examples - else: - return 0. - - def local_reset_stats(self): - self.false_positives = 0 - self.false_negatives = 0 - self.true_positives = 0 - self.true_negatives = 0 - def reset_stats(self): self.false_positives = 0 self.false_negatives = 0 self.true_positives = 0 self.true_negatives = 0 - self.global_false_positives = 0 - self.global_false_negatives = 0 - self.global_true_positives = 0 - self.global_true_negatives = 0 @register @@ -826,8 +684,7 @@ def __init__(self, name='f1', self.average = average self.metrics = _BinaryClassificationMetrics(threshold=threshold) EvalMetric.__init__(self, name=name, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -847,29 +704,18 @@ def update(self, labels, preds): if self.average == "macro": self.sum_metric += self.metrics.fscore - self.global_sum_metric += self.metrics.global_fscore self.num_inst += 1 - self.global_num_inst += 1 self.metrics.reset_stats() else: self.sum_metric = self.metrics.fscore * self.metrics.total_examples - self.global_sum_metric = self.metrics.global_fscore * self.metrics.global_total_examples self.num_inst = self.metrics.total_examples - self.global_num_inst = self.metrics.global_total_examples def reset(self): """Resets the internal evaluation result to initial state.""" self.sum_metric = 0. self.num_inst = 0 - self.global_num_inst = 0 - self.global_sum_metric = 0.0 self.metrics.reset_stats() - def reset_local(self): - """Resets the internal evaluation result to initial state.""" - self.sum_metric = 0. - self.num_inst = 0 - self.metrics.local_reset_stats() @register class Fbeta(EvalMetric): @@ -923,8 +769,7 @@ def __init__(self, name='fbeta', self.average = average self.metrics = _BinaryClassificationMetrics(threshold=threshold, beta=beta) EvalMetric.__init__(self, name=name, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -944,30 +789,18 @@ def update(self, labels, preds): if self.average == "macro": self.sum_metric += self.metrics.fscore - self.global_sum_metric += self.metrics.global_fscore self.num_inst += 1 - self.global_num_inst += 1 self.metrics.reset_stats() else: self.sum_metric = self.metrics.fscore * self.metrics.total_examples - self.global_sum_metric = self.metrics.global_fscore * self.metrics.global_total_examples self.num_inst = self.metrics.total_examples - self.global_num_inst = self.metrics.global_total_examples def reset(self): """Resets the internal evaluation result to initial state.""" self.sum_metric = 0. self.num_inst = 0 - self.global_num_inst = 0 - self.global_sum_metric = 0.0 self.metrics.reset_stats() - def reset_local(self): - """Resets the internal evaluation result to initial state.""" - self.sum_metric = 0. - self.num_inst = 0 - self.metrics.local_reset_stats() - @register class BinaryAccuracy(EvalMetric): @@ -1000,8 +833,7 @@ def __init__(self, name='binary_accuracy', output_names=None, label_names=None, threshold=0.5): self.metrics = _BinaryClassificationMetrics(threshold=threshold) EvalMetric.__init__(self, name=name, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -1020,24 +852,14 @@ def update(self, labels, preds): self.metrics.update_binary_stats(label, pred) self.sum_metric = self.metrics.accuracy * self.metrics.total_examples - self.global_sum_metric = self.metrics.global_accuracy * self.metrics.global_total_examples self.num_inst = self.metrics.total_examples - self.global_num_inst = self.metrics.global_total_examples def reset(self): """Resets the internal evaluation result to initial state.""" self.sum_metric = 0. self.num_inst = 0 - self.global_num_inst = 0 - self.global_sum_metric = 0.0 self.metrics.reset_stats() - - def reset_local(self): - """Resets the internal evaluation result to initial state.""" - self.sum_metric = 0. - self.num_inst = 0 - self.metrics.local_reset_stats() - + @register class MCC(EvalMetric): @@ -1107,8 +929,7 @@ def __init__(self, name='mcc', self._average = average self._metrics = _BinaryClassificationMetrics() EvalMetric.__init__(self, name=name, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -1128,31 +949,18 @@ def update(self, labels, preds): if self._average == "macro": self.sum_metric += self._metrics.matthewscc() - self.global_sum_metric += self._metrics.matthewscc(use_global=True) self.num_inst += 1 - self.global_num_inst += 1 self._metrics.reset_stats() else: self.sum_metric = self._metrics.matthewscc() * self._metrics.total_examples - self.global_sum_metric = self._metrics.matthewscc(use_global=True) * \ - self._metrics.global_total_examples self.num_inst = self._metrics.total_examples - self.global_num_inst = self._metrics.global_total_examples def reset(self): """Resets the internal evaluation result to initial state.""" self.sum_metric = 0. self.num_inst = 0. - self.global_sum_metric = 0. - self.global_num_inst = 0. self._metrics.reset_stats() - def reset_local(self): - """Resets the internal evaluation result to initial state.""" - self.sum_metric = 0. - self.num_inst = 0. - self._metrics.local_reset_stats() - @register class Perplexity(EvalMetric): @@ -1212,8 +1020,7 @@ def __init__(self, ignore_label, axis=-1, name='perplexity', output_names=None, label_names=None): super(Perplexity, self).__init__( name, ignore_label=ignore_label, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) self.ignore_label = ignore_label self.axis = axis @@ -1243,9 +1050,7 @@ def update(self, labels, preds): loss -= ndarray.sum(ndarray.log(ndarray.maximum(1e-10, pred))).asscalar() num += pred.size self.sum_metric += loss - self.global_sum_metric += loss self.num_inst += num - self.global_num_inst += num def get(self): """Returns the current evaluation result. @@ -1260,19 +1065,6 @@ def get(self): else: return (self.name, math.exp(self.sum_metric/self.num_inst)) - def get_global(self): - """Returns the current global evaluation result. - - Returns - ------- - Tuple of (str, float) - Representing name of the metric and evaluation result. - """ - if self.global_num_inst == 0: - return (self.name, float('nan')) - else: - return (self.name, math.exp(self.global_sum_metric/self.global_num_inst)) - #################### # REGRESSION METRICS #################### @@ -1314,8 +1106,7 @@ class MAE(EvalMetric): def __init__(self, name='mae', output_names=None, label_names=None, average='macro'): super(MAE, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + name, output_names=output_names, label_names=label_names) self.average = average def update(self, labels, preds): @@ -1348,9 +1139,7 @@ def update(self, labels, preds): mae = numpy.abs(label - pred).reshape(num_inst, -1).mean(axis=-1).sum() self.sum_metric += mae - self.global_sum_metric += mae self.num_inst += num_inst - self.global_num_inst += num_inst @register @@ -1388,8 +1177,7 @@ class MSE(EvalMetric): def __init__(self, name='mse', output_names=None, label_names=None, average="macro"): super(MSE, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + name, output_names=output_names, label_names=label_names) self.average = average def update(self, labels, preds): @@ -1421,9 +1209,7 @@ def update(self, labels, preds): num_inst = label.shape[0] mse = ((label - pred)**2.0).reshape(num_inst, -1).mean(axis=-1).sum() self.sum_metric += mse - self.global_sum_metric += mse self.num_inst += num_inst - self.global_num_inst += num_inst @register @@ -1461,8 +1247,7 @@ class RMSE(EvalMetric): def __init__(self, name='rmse', output_names=None, label_names=None, average="macro"): super(RMSE, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + name, output_names=output_names, label_names=label_names) self.average = average def update(self, labels, preds): @@ -1494,9 +1279,7 @@ def update(self, labels, preds): num_inst = label.shape[0] rmse = numpy.sqrt(((label - pred)**2.0).reshape(num_inst, -1).mean(axis=1)).sum() self.sum_metric += rmse - self.global_sum_metric += rmse self.num_inst += num_inst - self.global_num_inst += num_inst @register @@ -1536,8 +1319,7 @@ class MeanPairwiseDistance(EvalMetric): def __init__(self, name='mpd', output_names=None, label_names=None, p=2, average="micro"): super(MeanPairwiseDistance, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + name, output_names=output_names, label_names=label_names) self.average = average self.p = p @@ -1561,18 +1343,16 @@ def update(self, labels, preds): label = label.reshape(label.shape[0], -1) pred = pred.reshape(pred.shape[0], -1) - pd = (((label - pred) ** self.p).sum(axis=-1)) ** (1./self.p) + dis = (((label - pred) ** self.p).sum(axis=-1)) ** (1./self.p) if self.average == "macro": - pd = pd.mean() + dis = dis.mean() num_inst = 1 else: - pd = pd.sum() + dis = dis.sum() num_inst = label.shape[0] - self.sum_metric += pd - self.global_sum_metric += pd + self.sum_metric += dis self.num_inst += num_inst - self.global_num_inst += num_inst @register @@ -1613,8 +1393,7 @@ class MeanCosineSimilarity(EvalMetric): def __init__(self, name='cos_sim', output_names=None, label_names=None, eps=1e-8, average="micro"): super(MeanCosineSimilarity, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + name, output_names=output_names, label_names=label_names) self.average = average self.eps = eps @@ -1651,9 +1430,7 @@ def update(self, labels, preds): sim = sim.sum() num_inst = numpy.prod(label.shape[:-1]) self.sum_metric += sim - self.global_sum_metric += sim self.num_inst += num_inst - self.global_num_inst += num_inst @register @@ -1697,8 +1474,7 @@ def __init__(self, eps=1e-12, name='cross-entropy', output_names=None, label_names=None): super(CrossEntropy, self).__init__( name, eps=eps, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) self.eps = eps def update(self, labels, preds): @@ -1724,9 +1500,7 @@ def update(self, labels, preds): prob = pred[numpy.arange(label.shape[0]), numpy.int64(label)] cross_entropy = (-numpy.log(prob + self.eps)).sum() self.sum_metric += cross_entropy - self.global_sum_metric += cross_entropy self.num_inst += label.shape[0] - self.global_num_inst += label.shape[0] @register @alias('nll_loss') @@ -1769,8 +1543,7 @@ def __init__(self, eps=1e-12, name='nll-loss', output_names=None, label_names=None): super(NegativeLogLikelihood, self).__init__( name, eps=eps, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) self.eps = eps def update(self, labels, preds): @@ -1796,9 +1569,8 @@ def update(self, labels, preds): prob = pred[numpy.arange(num_examples, dtype=numpy.int64), numpy.int64(label)] nll = (-numpy.log(prob + self.eps)).sum() self.sum_metric += nll - self.global_sum_metric += nll self.num_inst += num_examples - self.global_num_inst += num_examples + @register @alias('pearsonr') @@ -1838,8 +1610,7 @@ def __init__(self, name='pearsonr', output_names=None, label_names=None, average='macro'): self.average = average super(PearsonCorrelation, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + name, output_names=output_names, label_names=label_names) if self.average == 'micro': self.reset_micro() @@ -1855,8 +1626,6 @@ def reset_micro(self): def reset(self): self.num_inst = 0 self.sum_metric = 0.0 - self.global_num_inst = 0 - self.global_sum_metric = 0.0 if self.average == 'micro': self.reset_micro() @@ -1891,11 +1660,8 @@ def update(self, labels, preds): if self.average == 'macro': pearson_corr = numpy.corrcoef(pred, label)[0, 1] self.sum_metric += pearson_corr - self.global_sum_metric += pearson_corr self.num_inst += 1 - self.global_num_inst += 1 else: - self.global_num_inst += 1 self.num_inst += 1 self._label_nums, self._mean_l, self._sse_l = \ self.update_variance(label, self._label_nums, self._mean_l, self._sse_l) @@ -1967,18 +1733,14 @@ class PCC(EvalMetric): ('pcc', 0.01917751877733392) """ def __init__(self, name='pcc', - output_names=None, label_names=None, - has_global_stats=True): + output_names=None, label_names=None): self.k = 2 super(PCC, self).__init__( - name=name, output_names=output_names, label_names=label_names, - has_global_stats=has_global_stats) + name=name, output_names=output_names, label_names=label_names) def _grow(self, inc): self.lcm = numpy.pad( self.lcm, ((0, inc), (0, inc)), 'constant', constant_values=(0)) - self.gcm = numpy.pad( - self.gcm, ((0, inc), (0, inc)), 'constant', constant_values=(0)) self.k += inc def _calc_mcc(self, cmat): @@ -2021,27 +1783,14 @@ def update(self, labels, preds): for i, j in zip(pred, label): bcm[i, j] += 1 self.lcm += bcm - self.gcm += bcm - self.num_inst += 1 - self.global_num_inst += 1 @property def sum_metric(self): return self._calc_mcc(self.lcm) * self.num_inst - @property - def global_sum_metric(self): - return self._calc_mcc(self.gcm) * self.global_num_inst - def reset(self): """Resets the internal evaluation result to initial state.""" - self.global_num_inst = 0. - self.gcm = numpy.zeros((self.k, self.k)) - self.reset_local() - - def reset_local(self): - """Resets the local portion of the internal evaluation results to initial state.""" self.num_inst = 0. self.lcm = numpy.zeros((self.k, self.k)) @@ -2064,8 +1813,7 @@ class Loss(EvalMetric): def __init__(self, name='loss', output_names=None, label_names=None): super(Loss, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + name, output_names=output_names, label_names=label_names) def update(self, _, preds): @@ -2075,9 +1823,7 @@ def update(self, _, preds): for pred in preds: loss = ndarray.sum(pred).asscalar() self.sum_metric += loss - self.global_sum_metric += loss self.num_inst += pred.size - self.global_num_inst += pred.size @register @@ -2143,8 +1889,7 @@ def __init__(self, feval, name=None, allow_extra_outputs=False, super(CustomMetric, self).__init__( name, feval=feval, allow_extra_outputs=allow_extra_outputs, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) self._feval = feval self._allow_extra_outputs = allow_extra_outputs @@ -2170,14 +1915,10 @@ def update(self, labels, preds): if isinstance(reval, tuple): (sum_metric, num_inst) = reval self.sum_metric += sum_metric - self.global_sum_metric += sum_metric self.num_inst += num_inst - self.global_num_inst += num_inst else: self.sum_metric += reval - self.global_sum_metric += reval self.num_inst += 1 - self.global_num_inst += 1 def get_config(self): raise NotImplementedError("CustomMetric cannot be serialized") From c06f3635e63a235f9c0e3e2cae55ebe5595fa792 Mon Sep 17 00:00:00 2001 From: acphile Date: Mon, 20 Apr 2020 04:12:21 +0000 Subject: [PATCH 06/24] remove macro support --- python/mxnet/gluon/metric.py | 232 ++++++++--------------------------- 1 file changed, 53 insertions(+), 179 deletions(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index 953a57894827..07e759e4d1f0 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -718,7 +718,7 @@ def reset(self): @register -class Fbeta(EvalMetric): +class Fbeta(F1): """Computes the Fbeta score of a binary classification problem. The Fbeta score is equivalent to harmonic mean of the precision and recall, @@ -766,41 +766,11 @@ class Fbeta(EvalMetric): def __init__(self, name='fbeta', output_names=None, label_names=None, beta=1, threshold=0.5, average="macro"): - self.average = average + super(Fbeta, self).__init__(name=name, + output_names=output_names, label_names=label_names, + threshold=threshold, average=average) self.metrics = _BinaryClassificationMetrics(threshold=threshold, beta=beta) - EvalMetric.__init__(self, name=name, - output_names=output_names, label_names=label_names) - - def update(self, labels, preds): - """Updates the internal evaluation result. - - Parameters - ---------- - labels : list of `NDArray` - The labels of the data. - - preds : list of `NDArray` - Predicted values. - """ - labels, preds = check_label_shapes(labels, preds, True) - - for label, pred in zip(labels, preds): - self.metrics.update_binary_stats(label, pred) - - if self.average == "macro": - self.sum_metric += self.metrics.fscore - self.num_inst += 1 - self.metrics.reset_stats() - else: - self.sum_metric = self.metrics.fscore * self.metrics.total_examples - self.num_inst = self.metrics.total_examples - - def reset(self): - """Resets the internal evaluation result to initial state.""" - self.sum_metric = 0. - self.num_inst = 0 - self.metrics.reset_stats() - + @register class BinaryAccuracy(EvalMetric): @@ -892,10 +862,6 @@ class MCC(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - average : str, default 'macro' - Strategy to be used for aggregating across mini-batches. - "macro": average the MCC for each batch. - "micro": compute a single MCC across all batches. Examples -------- @@ -925,8 +891,7 @@ class MCC(EvalMetric): """ def __init__(self, name='mcc', - output_names=None, label_names=None, average="macro"): - self._average = average + output_names=None, label_names=None): self._metrics = _BinaryClassificationMetrics() EvalMetric.__init__(self, name=name, output_names=output_names, label_names=label_names) @@ -947,13 +912,8 @@ def update(self, labels, preds): for label, pred in zip(labels, preds): self._metrics.update_binary_stats(label, pred) - if self._average == "macro": - self.sum_metric += self._metrics.matthewscc() - self.num_inst += 1 - self._metrics.reset_stats() - else: - self.sum_metric = self._metrics.matthewscc() * self._metrics.total_examples - self.num_inst = self._metrics.total_examples + self.sum_metric = self._metrics.matthewscc() * self._metrics.total_examples + self.num_inst = self._metrics.total_examples def reset(self): """Resets the internal evaluation result to initial state.""" @@ -1089,14 +1049,11 @@ class MAE(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - average : str, default 'macro' - Strategy to be used for aggregating across mini-batches. - "macro": average MAE results for each batch. - "micro": compute a single MAE result across all batches. + Examples -------- - >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] - >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] + >>> predicts = [mx.nd.array([3, -0.5, 2, 7])] + >>> labels = [mx.nd.array([2.5, 0.0, 2, 8])] >>> mean_absolute_error = mx.gluon.metric.MAE() >>> mean_absolute_error.update(labels = labels, preds = predicts) >>> print mean_absolute_error.get() @@ -1104,10 +1061,9 @@ class MAE(EvalMetric): """ def __init__(self, name='mae', - output_names=None, label_names=None, average='macro'): + output_names=None, label_names=None): super(MAE, self).__init__( name, output_names=output_names, label_names=label_names) - self.average = average def update(self, labels, preds): """Updates the internal evaluation result. @@ -1126,17 +1082,8 @@ def update(self, labels, preds): label = label.asnumpy() pred = pred.asnumpy() - if len(label.shape) == 1: - label = label.reshape(label.shape[0], 1) - if len(pred.shape) == 1: - pred = pred.reshape(pred.shape[0], 1) - - if self.average == "macro": - mae = numpy.abs(label - pred).mean() - num_inst = 1 - else: - num_inst = label.shape[0] - mae = numpy.abs(label - pred).reshape(num_inst, -1).mean(axis=-1).sum() + num_inst = label.shape[0] + mae = numpy.abs(label - pred).reshape(num_inst, -1).mean(axis=-1).sum() self.sum_metric += mae self.num_inst += num_inst @@ -1161,24 +1108,20 @@ class MSE(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - average : str, default 'macro' - Strategy to be used for aggregating across mini-batches. - "macro": average MSE results for each batch. - "micro": compute a single MSE result across all batches. + Examples -------- - >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] - >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] + >>> predicts = [mx.nd.array([3, -0.5, 2, 7])] + >>> labels = [mx.nd.array([2.5, 0.0, 2, 8])] >>> mean_squared_error = mx.gluon.metric.MSE() >>> mean_squared_error.update(labels = labels, preds = predicts) >>> print mean_squared_error.get() ('mse', 0.375) """ def __init__(self, name='mse', - output_names=None, label_names=None, average="macro"): + output_names=None, label_names=None): super(MSE, self).__init__( name, output_names=output_names, label_names=label_names) - self.average = average def update(self, labels, preds): """Updates the internal evaluation result. @@ -1197,23 +1140,15 @@ def update(self, labels, preds): label = label.asnumpy() pred = pred.asnumpy() - if len(label.shape) == 1: - label = label.reshape(label.shape[0], 1) - if len(pred.shape) == 1: - pred = pred.reshape(pred.shape[0], 1) - - if self.average == "macro": - mse = ((label - pred)**2.0).mean() - num_inst = 1 - else: - num_inst = label.shape[0] - mse = ((label - pred)**2.0).reshape(num_inst, -1).mean(axis=-1).sum() + num_inst = label.shape[0] + mse = ((label - pred)**2.0).reshape(num_inst, -1).mean(axis=-1).sum() + self.sum_metric += mse self.num_inst += num_inst @register -class RMSE(EvalMetric): +class RMSE(MSE): """Computes Root Mean Squred Error (RMSE) loss. The root mean squared error is given by @@ -1231,55 +1166,26 @@ class RMSE(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - average : str, default 'macro' - Strategy to be used for aggregating across mini-batches. - "macro": average RMSE results for each batch. - "micro": compute a single RSME result across all batches. + Examples -------- - >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] - >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] + >>> predicts = [mx.nd.array([3, -0.5, 2, 7])] + >>> labels = [mx.nd.array([2.5, 0.0, 2, 8])] >>> root_mean_squared_error = mx.gluon.metric.RMSE() >>> root_mean_squared_error.update(labels = labels, preds = predicts) >>> print root_mean_squared_error.get() ('rmse', 0.612372457981) """ def __init__(self, name='rmse', - output_names=None, label_names=None, average="macro"): + output_names=None, label_names=None): super(RMSE, self).__init__( name, output_names=output_names, label_names=label_names) - self.average = average - def update(self, labels, preds): - """Updates the internal evaluation result. - - Parameters - ---------- - labels : list of `NDArray` - The labels of the data. - - preds : list of `NDArray` - Predicted values. - """ - labels, preds = check_label_shapes(labels, preds, True) - - for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() - - if len(label.shape) == 1: - label = label.reshape(label.shape[0], 1) - if len(pred.shape) == 1: - pred = pred.reshape(pred.shape[0], 1) - - if self.average == "macro": - rmse = numpy.sqrt(((label - pred)**2.0).mean()) - num_inst = 1 - else: - num_inst = label.shape[0] - rmse = numpy.sqrt(((label - pred)**2.0).reshape(num_inst, -1).mean(axis=1)).sum() - self.sum_metric += rmse - self.num_inst += num_inst + def get(self): + if self.num_inst == 0: + return (self.name, float('nan')) + else: + return (self.name, math.sqrt(self.sum_metric / self.num_inst)) @register @@ -1303,10 +1209,7 @@ class MeanPairwiseDistance(EvalMetric): By default include all labels. p : float, default 2 calculating distance using the p-norm - average : str, default 'macro' - Strategy to be used for aggregating across mini-batches. - "macro": average MPD results for each batch. - "micro": compute a single MPD result across all batches. + Examples -------- >>> predicts = [mx.nd.array([[1., 2.], [3., 4.]])] @@ -1317,10 +1220,9 @@ class MeanPairwiseDistance(EvalMetric): ('mpd', 2.1180338859558105) """ def __init__(self, name='mpd', - output_names=None, label_names=None, p=2, average="micro"): + output_names=None, label_names=None, p=2): super(MeanPairwiseDistance, self).__init__( name, output_names=output_names, label_names=label_names) - self.average = average self.p = p def update(self, labels, preds): @@ -1344,12 +1246,8 @@ def update(self, labels, preds): pred = pred.reshape(pred.shape[0], -1) dis = (((label - pred) ** self.p).sum(axis=-1)) ** (1./self.p) - if self.average == "macro": - dis = dis.mean() - num_inst = 1 - else: - dis = dis.sum() - num_inst = label.shape[0] + dis = dis.sum() + num_inst = label.shape[0] self.sum_metric += dis self.num_inst += num_inst @@ -1377,10 +1275,6 @@ class MeanCosineSimilarity(EvalMetric): By default include all labels. eps : float, default 1e-8 small vale to avoid division by zero. - average : str, default 'micro' - Strategy to be used for aggregating across mini-batches. - "macro": average RMSE results for each batch. - "micro": compute a single RSME result across all batches. Examples -------- >>> predicts = [mx.nd.array([[1., 0.], [1., 1.]])] @@ -1391,10 +1285,9 @@ class MeanCosineSimilarity(EvalMetric): ('cos_sim', 0.8) """ def __init__(self, name='cos_sim', - output_names=None, label_names=None, eps=1e-8, average="micro"): + output_names=None, label_names=None, eps=1e-8): super(MeanCosineSimilarity, self).__init__( name, output_names=output_names, label_names=label_names) - self.average = average self.eps = eps def update(self, labels, preds): @@ -1423,12 +1316,8 @@ def update(self, labels, preds): n_p = numpy.linalg.norm(pred, axis=-1) n_l = numpy.linalg.norm(label, axis=-1) sim = sim / numpy.maximum(n_l * n_p, self.eps) - if self.average == "macro": - sim = sim.mean() - num_inst = 1 - else: - sim = sim.sum() - num_inst = numpy.prod(label.shape[:-1]) + sim = sim.sum() + num_inst = numpy.prod(label.shape[:-1]) self.sum_metric += sim self.num_inst += num_inst @@ -1592,10 +1481,6 @@ class PearsonCorrelation(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - average : str, default 'macro' - Strategy to be used for aggregating across mini-batches. - "macro": average the pearsonr scores for each batch. - "micro": compute a single pearsonr score across all batches. Examples -------- @@ -1607,14 +1492,12 @@ class PearsonCorrelation(EvalMetric): ('pearsonr', 0.42163704544016178) """ def __init__(self, name='pearsonr', - output_names=None, label_names=None, average='macro'): - self.average = average + output_names=None, label_names=None): super(PearsonCorrelation, self).__init__( name, output_names=output_names, label_names=label_names) - if self.average == 'micro': - self.reset_micro() + self.reset() - def reset_micro(self): + def reset(self): self._sse_p = 0 self._mean_p = 0 self._sse_l = 0 @@ -1622,12 +1505,9 @@ def reset_micro(self): self._pred_nums = 0 self._label_nums = 0 self._conv = 0 - - def reset(self): + self.num_inst = 0 self.sum_metric = 0.0 - if self.average == 'micro': - self.reset_micro() def update_variance(self, new_values, *aggregate): #Welford's online algorithm for variance update @@ -1657,27 +1537,21 @@ def update(self, labels, preds): check_label_shapes(label, pred, False, True) label = label.asnumpy().ravel().astype(numpy.float64) pred = pred.asnumpy().ravel().astype(numpy.float64) - if self.average == 'macro': - pearson_corr = numpy.corrcoef(pred, label)[0, 1] - self.sum_metric += pearson_corr - self.num_inst += 1 - else: - self.num_inst += 1 - self._label_nums, self._mean_l, self._sse_l = \ - self.update_variance(label, self._label_nums, self._mean_l, self._sse_l) - self.update_cov(label, pred) - self._pred_nums, self._mean_p, self._sse_p = \ - self.update_variance(pred, self._pred_nums, self._mean_p, self._sse_p) + + self.num_inst += 1 + self._label_nums, self._mean_l, self._sse_l = \ + self.update_variance(label, self._label_nums, self._mean_l, self._sse_l) + self.update_cov(label, pred) + self._pred_nums, self._mean_p, self._sse_p = \ + self.update_variance(pred, self._pred_nums, self._mean_p, self._sse_p) def get(self): if self.num_inst == 0: return (self.name, float('nan')) - if self.average == 'macro': - return (self.name, self.sum_metric / self.num_inst) - else: - n = self._label_nums - pearsonr = self._conv / ((n-1) * numpy.sqrt(self._sse_p / (n - 1)) * numpy.sqrt(self._sse_l / (n - 1))) - return (self.name, pearsonr) + + n = self._label_nums + pearsonr = self._conv / ((n-1) * numpy.sqrt(self._sse_p / (n - 1)) * numpy.sqrt(self._sse_l / (n - 1))) + return (self.name, pearsonr) @register class PCC(EvalMetric): From 6beba21647e4a4a04a804e167504bbf66e40ff4d Mon Sep 17 00:00:00 2001 From: acphile Date: Mon, 20 Apr 2020 08:53:10 +0000 Subject: [PATCH 07/24] rewrite BinaryAccuracy --- python/mxnet/gluon/metric.py | 64 +++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index 07e759e4d1f0..95d4340b69bf 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -508,6 +508,28 @@ def update(self, labels, preds): self.num_inst += num_samples +def predict_with_threshold(pred, threshold=0.5): + """Do thresholding of predictions in binaray and multilabel cases. + + Parameters + ---------- + preds : ndarray + predictions in shape of (batch_size, ...) or (batch_size, ..., num_categories) + + preds : float or ndarray + threshold(s) in shape of float or (num_categories) + """ + if isinstance(threshold, float): + return pred > threshold + elif isinstance(threshold, numpy.ndarray) or isinstance(threshold, ndarray.ndarray.NDArray): + num_classes = pred.shape[-1] + assert threshold.shape[-1] == num_classes, \ + "shape mismatch: %s vs. %s"%(pred.shape[-1], threshold.shape[-1]) + return pred > threshold + else: + raise ValueError("{} is a wrong type for threshold!".format(type(threshold))) + + class _BinaryClassificationMetrics(object): """Private container class for classification metric statistics. @@ -620,13 +642,6 @@ def total_examples(self): return self.false_negatives + self.false_positives + \ self.true_negatives + self.true_positives - @property - def accuracy(self): - if self.total_examples > 0: - return float(self.true_positives + self.true_negatives) / self.total_examples - else: - return 0. - def reset_stats(self): self.false_positives = 0 self.false_negatives = 0 @@ -680,7 +695,7 @@ class F1(EvalMetric): """ def __init__(self, name='f1', - output_names=None, label_names=None, threshold=0.5, average="macro"): + output_names=None, label_names=None, threshold=0.5, average="micro"): self.average = average self.metrics = _BinaryClassificationMetrics(threshold=threshold) EvalMetric.__init__(self, name=name, @@ -765,7 +780,7 @@ class Fbeta(F1): """ def __init__(self, name='fbeta', - output_names=None, label_names=None, beta=1, threshold=0.5, average="macro"): + output_names=None, label_names=None, beta=1, threshold=0.5, average="micro"): super(Fbeta, self).__init__(name=name, output_names=output_names, label_names=label_names, threshold=threshold, average=average) @@ -774,7 +789,7 @@ def __init__(self, name='fbeta', @register class BinaryAccuracy(EvalMetric): - """Computes the accuracy of a binary classification problem. + """Computes the accuracy of a binary or multilabel classification problem. Parameters ---------- @@ -786,7 +801,7 @@ class BinaryAccuracy(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - threshold : float, default 0.5 + threshold : float or ndarray, default 0.5 threshold for deciding whether the predictions are positive or negative. Examples @@ -801,7 +816,7 @@ class BinaryAccuracy(EvalMetric): def __init__(self, name='binary_accuracy', output_names=None, label_names=None, threshold=0.5): - self.metrics = _BinaryClassificationMetrics(threshold=threshold) + self.threshold = threshold EvalMetric.__init__(self, name=name, output_names=output_names, label_names=label_names) @@ -811,24 +826,27 @@ def update(self, labels, preds): Parameters ---------- labels : list of `NDArray` - The labels of the data. + Each label denotes positive/negative for each class. preds : list of `NDArray` - Predicted values. + Each prediction value is a confidence value of being positive for each class. """ labels, preds = check_label_shapes(labels, preds, True) - for label, pred in zip(labels, preds): - self.metrics.update_binary_stats(label, pred) + for label, pred_label in zip(labels, preds): + pred_label = predict_with_threshold(pred_label, self.threshold) + + pred_label = pred_label.asnumpy().astype('int32') + label = label.asnumpy().astype('int32') + # flatten before checking shapes to avoid shape miss match + label = label.flat + pred_label = pred_label.flat - self.sum_metric = self.metrics.accuracy * self.metrics.total_examples - self.num_inst = self.metrics.total_examples + check_label_shapes(label, pred_label) - def reset(self): - """Resets the internal evaluation result to initial state.""" - self.sum_metric = 0. - self.num_inst = 0 - self.metrics.reset_stats() + num_correct = (pred_label == label).sum() + self.sum_metric += num_correct + self.num_inst += len(pred_label) @register From b1fc42b9227826934545da0335a14422f3c5a230 Mon Sep 17 00:00:00 2001 From: acphile Date: Tue, 21 Apr 2020 05:40:38 +0000 Subject: [PATCH 08/24] extend F1 to multiclass/multilabel --- python/mxnet/gluon/metric.py | 189 +++++++++++++++++++++++------------ 1 file changed, 126 insertions(+), 63 deletions(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index 95d4340b69bf..8214ad86ae67 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -509,7 +509,7 @@ def update(self, labels, preds): def predict_with_threshold(pred, threshold=0.5): - """Do thresholding of predictions in binaray and multilabel cases. + """Do thresholding of predictions in binary and multilabel cases. Parameters ---------- @@ -529,8 +529,12 @@ def predict_with_threshold(pred, threshold=0.5): else: raise ValueError("{} is a wrong type for threshold!".format(type(threshold))) + +def one_hot(x, m): + return (numpy.arange(m)==x[:,None]).astype('int32') -class _BinaryClassificationMetrics(object): + +class _ClassificationMetrics(object): """Private container class for classification metric statistics. True/false positive and true/false negative counts are sufficient statistics for various classification metrics. @@ -539,6 +543,10 @@ class _BinaryClassificationMetrics(object): Parameters ---------- + class_type : str, default "binary" + "binary": f1 for binary classification. + "multiclass": f1 for multiclassification problem. + "multilabel": f1 for multilabel classification. beta : float, default 1 weight of precision in harmonic mean. threshold : float, default 0.5 @@ -546,15 +554,23 @@ class _BinaryClassificationMetrics(object): """ - def __init__(self, threshold=0.5, beta=1): + def __init__(self, class_type="binary", threshold=0.5, beta=1): + self.class_type = class_type self.threshold = threshold self.beta = beta - self.true_positives = 0 - self.false_negatives = 0 - self.false_positives = 0 - self.true_negatives = 0 - - def update_binary_stats(self, label, pred): + self.reset_stats() + + def _set(self, num): + if self.num_classes is None: + self.num_classes = num + self.true_positives = numpy.zeros(num) + self.false_negatives = numpy.zeros(num) + self.false_positives = numpy.zeros(num) + self.true_negatives = numpy.zeros(num) + else: + assert self.num_classes == num, "Input number of classes has changed from {} to {}".format(self.num_classes, num) + + def update_stats(self, label, pred): """Update various binary classification counts for a single (label, pred) pair. Parameters @@ -567,31 +583,46 @@ def update_binary_stats(self, label, pred): """ pred = pred.asnumpy() label = label.asnumpy().astype('int32') - if len(pred.shape) == 1: # assume each value refers to confidence(positive) - pass - elif pred.shape[-1] > 2: - raise ValueError("%s currently only supports binary classification." - % self.__class__.__name__) - elif pred.shape[-1] == 1: # classify positive when confidence(positive) > threshold - pred = pred.flat + if self.class_type == "binary": + self._set(1) + if len(numpy.unique(label)) > 2: + raise ValueError("Wrong label for binary classification.") + if pred.shape == label.shape: + pass + elif pred.shape[-1] > 2: + raise ValueError("The shape of prediction {} is wrong for binary classification.".format(pred.shape)) + elif pred.shape[-1] == 2: + pred = pred.reshape(-1, 2)[:, 1] + pred_label = predict_with_threshold(pred, self.threshold).flat + label = label.flat + + elif self.class_type == "multiclass": + num = pred.shape[-1] + self._set(num) + assert label.max() < num, "pred contains fewer classes than label!" + pred_label = one_hot(pred.argmax(axis=-1).reshape(-1), num) + label = one_hot(label.reshape(-1), num) + + elif self.class_type == "multilabel": + num = pred.shape[-1] + self._set(num) + assert pred.shape == label.shape, "The shape of label should be same as that of prediction for multilabel classification." + pred_label = predict_with_threshold(pred, self.threshold).reshape(-1, num) + label = label.reshape(-1, num) else: - pred = pred.reshape(-1, 2)[:, 1] - pred_label = pred > self.threshold - label = label.flat - + raise ValueError("Wrong class_type {}! Only supports ['binary', 'multiclass', 'multilabel']".format(self.class_type)) + check_label_shapes(label, pred_label) - if len(numpy.unique(label)) > 2: - raise ValueError("%s currently only supports binary classification." - % self.__class__.__name__) + pred_true = (pred_label == 1) pred_false = 1 - pred_true label_true = (label == 1) label_false = 1 - label_true - true_pos = (pred_true * label_true).sum() - false_pos = (pred_true * label_false).sum() - false_neg = (pred_false * label_true).sum() - true_neg = (pred_false * label_false).sum() + true_pos = (pred_true * label_true).sum(0) + false_pos = (pred_true * label_false).sum(0) + false_neg = (pred_false * label_true).sum(0) + true_neg = (pred_false * label_false).sum(0) self.true_positives += true_pos self.false_positives += false_pos self.false_negatives += false_neg @@ -599,25 +630,44 @@ def update_binary_stats(self, label, pred): @property def precision(self): - if self.true_positives + self.false_positives > 0: - return float(self.true_positives) / (self.true_positives + self.false_positives) + if self.num_classes is not None: + return self.true_positives / numpy.maximum(self.true_positives + self.false_positives, 1e-12) else: return 0. + @property + def global_precision(self): + if self.num_classes is not None: + return self.true_positives.sum() / numpy.maximum(self.true_positives.sum() + self.false_positives.sum(), 1e-12) + else: + return 0. + @property def recall(self): - if self.true_positives + self.false_negatives > 0: - return float(self.true_positives) / (self.true_positives + self.false_negatives) + if self.num_classes is not None: + return self.true_positives / numpy.maximum(self.true_positives + self.false_negatives, 1e-12) else: return 0. @property - def fscore(self): - if self.precision + self.recall > 0: - return (1 + self.beta ** 2) * self.precision * self.recall / (self.beta ** 2 * self.precision + self.recall) + def global_recall(self): + if self.num_classes is not None: + return self.true_positives.sum() / numpy.maximum(self.true_positives.sum() + self.false_negatives.sum(), 1e-12) else: return 0. + + @property + def fscore(self): + return (1 + self.beta ** 2) * self.precision * self.recall / numpy.maximum(self.beta ** 2 * self.precision + self.recall, 1e-12) + @property + def global_fscore(self): + if self.global_precision + self.global_recall > 0: + return (1 + self.beta ** 2) * self.global_precision * self.global_recall / \ + (self.beta ** 2 * self.global_precision + self.global_recall) + else: + return 0. + def matthewscc(self): """Calculate the Matthew's Correlation Coefficent""" if not self.total_examples: @@ -639,14 +689,17 @@ def matthewscc(self): @property def total_examples(self): - return self.false_negatives + self.false_positives + \ - self.true_negatives + self.true_positives + if self.num_classes is None: + return 0 + return self.false_negatives[0] + self.false_positives[0] + \ + self.true_negatives[0] + self.true_positives[0] def reset_stats(self): - self.false_positives = 0 - self.false_negatives = 0 - self.true_positives = 0 - self.true_negatives = 0 + self.num_classes = None + self.true_positives = None + self.false_negatives = None + self.false_positives = None + self.true_negatives = None @register @@ -677,12 +730,17 @@ class F1(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. + class_type : str, default "binary" + "binary": f1 for binary classification. + "multiclass": f1 for multiclassification problem. + "multilabel": f1 for multilabel classification. threshold : float, default 0.5 threshold for postive confidence value. - average : str, default 'macro' + average : str, default 'micro' Strategy to be used for aggregating across mini-batches. - "macro": average the F1 scores for each batch. - "micro": compute a single F1 score across all batches. + "macro": Calculate metrics for each label and return unweighted mean of f1. + "micro": Calculate metrics globally by counting the total true positives, false negatives and false positives. + None: Return f1 scores for each class (numpy.ndarray) . Examples -------- @@ -695,9 +753,9 @@ class F1(EvalMetric): """ def __init__(self, name='f1', - output_names=None, label_names=None, threshold=0.5, average="micro"): + output_names=None, label_names=None, class_type="binary", threshold=0.5, average="micro"): self.average = average - self.metrics = _BinaryClassificationMetrics(threshold=threshold) + self.metrics = _ClassificationMetrics(class_type=class_type, threshold=threshold) EvalMetric.__init__(self, name=name, output_names=output_names, label_names=label_names) @@ -715,16 +773,16 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - self.metrics.update_binary_stats(label, pred) + self.metrics.update_stats(label, pred) - if self.average == "macro": - self.sum_metric += self.metrics.fscore - self.num_inst += 1 - self.metrics.reset_stats() + if self.average == "micro": + self.sum_metric = self.metrics.global_fscore * self.metrics.total_examples + elif self.average == "macro": + self.sum_metric = self.metrics.fscore.mean() * self.metrics.total_examples else: - self.sum_metric = self.metrics.fscore * self.metrics.total_examples - self.num_inst = self.metrics.total_examples - + self.sum_metric = self.metrics.fscore * self.metrics.total_examples + self.num_inst = self.metrics.total_examples + def reset(self): """Resets the internal evaluation result to initial state.""" self.sum_metric = 0. @@ -760,14 +818,19 @@ class Fbeta(F1): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. + class_type : str, default "binary" + "binary": f1 for binary classification. + "multiclass": f1 for multiclassification problem. + "multilabel": f1 for multilabel classification. beta : float, default 1 - weight of precision in harmonic mean. + weight of precision in harmonic mean. threshold : float, default 0.5 - threshold for deciding whether the predictions are positive or negative. - average : str, default 'macro' + threshold for postive confidence value. + average : str, default 'micro' Strategy to be used for aggregating across mini-batches. - "macro": average the F1 scores for each batch. - "micro": compute a single F1 score across all batches. + "macro": Calculate metrics for each label and return unweighted mean of f1. + "micro": Calculate metrics globally by counting the total true positives, false negatives and false positives. + None: Return f1 scores for each class. Examples -------- @@ -780,11 +843,11 @@ class Fbeta(F1): """ def __init__(self, name='fbeta', - output_names=None, label_names=None, beta=1, threshold=0.5, average="micro"): + output_names=None, label_names=None, class_type="binary", beta=1, threshold=0.5, average="micro"): super(Fbeta, self).__init__(name=name, output_names=output_names, label_names=label_names, - threshold=threshold, average=average) - self.metrics = _BinaryClassificationMetrics(threshold=threshold, beta=beta) + class_type=class_type, threshold=threshold, average=average) + self.metrics = _ClassificationMetrics(class_type=class_type, threshold=threshold, beta=beta) @register @@ -910,7 +973,7 @@ class MCC(EvalMetric): def __init__(self, name='mcc', output_names=None, label_names=None): - self._metrics = _BinaryClassificationMetrics() + self._metrics = _ClassificationMetrics() EvalMetric.__init__(self, name=name, output_names=output_names, label_names=label_names) @@ -928,7 +991,7 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - self._metrics.update_binary_stats(label, pred) + self._metrics.update_stats(label, pred) self.sum_metric = self._metrics.matthewscc() * self._metrics.total_examples self.num_inst = self._metrics.total_examples From 4b091b088916f9bf74411d26384b36b18f790fd1 Mon Sep 17 00:00:00 2001 From: acphile Date: Tue, 21 Apr 2020 07:04:49 +0000 Subject: [PATCH 09/24] add tests for new F1, remove global tests --- tests/python/unittest/test_metric.py | 183 ++++++++++++--------------- 1 file changed, 79 insertions(+), 104 deletions(-) diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py index 3408dd503d59..81f57f0eed6d 100644 --- a/tests/python/unittest/test_metric.py +++ b/tests/python/unittest/test_metric.py @@ -43,83 +43,6 @@ def test_metrics(): composite = mx.gluon.metric.create(['acc', 'f1']) check_metric(composite) -def _check_global_metric(metric, *args, **kwargs): - def _create_pred_label(): - if use_same_shape: - pred = mx.nd.random.uniform(0, 1, shape=shape) - label = mx.nd.random.uniform(0, 1, shape=shape) - else: - # Make a random prediction - idx = np.random.rand(*shape).argsort(1) - pred = mx.nd.array(1 - 0.1 * idx) - # Label is half 1 and half 0 - # Setting all 0s or all 1s would make either - # MCC or F1 metrics always produce 0 - label = mx.nd.ones(shape[0]) - label[:shape[0] // 2] = 0 - return pred, label - - def _compare_metric_result(m1, m2): - # Compare names - assert m1[0] == m2[0] - # Compare values - if isinstance(m1[1], (list, tuple)): - assert len(m1[1]) == len(m2[1]) - for r1, r2 in zip(m1[1], m2[1]): - assert r1 == r2 or \ - (math.isnan(r1) and - math.isnan(r2)) - else: - assert m1[1] == m2[1] or \ - (math.isnan(m1[1]) and - math.isnan(m2[1])) - - shape = kwargs.pop('shape', (10,10)) - use_same_shape = kwargs.pop('use_same_shape', False) - m1 = mx.gluon.metric.create(metric, *args, **kwargs) - m2 = deepcopy(m1) - # check that global stats are not reset when calling - # reset_local() - for i in range(10): - pred, label = _create_pred_label() - m1.update([label], [pred]) - m1.reset_local() - m2.update([label], [pred]) - assert m1.get_global() == m2.get() - - # check that reset_local() properly resets the local state - m1.reset_local() - m2.reset() - pred, label = _create_pred_label() - m1.update([label], [pred]) - m1.reset_local() - pred, label = _create_pred_label() - m1.update([label], [pred]) - m2.update([label], [pred]) - _compare_metric_result(m1.get(), m2.get()) - -@with_seed() -def test_global_metric(): - _check_global_metric('acc') - _check_global_metric('TopKAccuracy', top_k=3) - _check_global_metric('f1', shape=(10,2)) - _check_global_metric('f1', shape=(10,2), average='micro') - _check_global_metric('mcc', shape=(10,2)) - _check_global_metric('mcc', shape=(10,2), average='micro') - _check_global_metric('perplexity', -1) - _check_global_metric('pearsonr', use_same_shape=True) - _check_global_metric('pcc', shape=(10,2)) - _check_global_metric('nll_loss') - _check_global_metric('loss') - _check_global_metric('ce') - _check_global_metric('mae', use_same_shape=True) - _check_global_metric('mse', use_same_shape=True) - _check_global_metric('rmse', use_same_shape=True) - def custom_metric(label, pred): - return np.mean(np.abs(label-pred)) - _check_global_metric(custom_metric, use_same_shape=True) - _check_global_metric(['acc', 'f1'], shape=(10,2)) - def test_nll_loss(): metric = mx.gluon.metric.create('nll_loss') pred = mx.nd.array([[0.2, 0.3, 0.5], [0.6, 0.1, 0.3]]) @@ -159,7 +82,7 @@ def test_loss_update(): _, acc2 = metric2.get() assert acc1 == acc2 -def test_f1(): +def test_binary_f1(): microF1 = mx.gluon.metric.create("f1", average="micro") macroF1 = mx.gluon.metric.F1(average="macro") @@ -191,7 +114,7 @@ def test_f1(): microF1.update([label11, label12], [pred11, pred12]) macroF1.update([label11, label12], [pred11, pred12]) assert microF1.num_inst == 4 - assert macroF1.num_inst == 1 + assert macroF1.num_inst == 4 # f1 = 2 * tp / (2 * tp + fp + fn) fscore1 = 2. * (1) / (2 * 1 + 1 + 0) np.testing.assert_almost_equal(microF1.get()[1], fscore1) @@ -200,29 +123,96 @@ def test_f1(): microF1.update([label21, label22], [pred21, pred22]) macroF1.update([label21, label22], [pred21, pred22]) assert microF1.num_inst == 6 - assert macroF1.num_inst == 2 + assert macroF1.num_inst == 6 fscore2 = 2. * (1) / (2 * 1 + 0 + 0) fscore_total = 2. * (1 + 1) / (2 * (1 + 1) + (1 + 0) + (0 + 0)) np.testing.assert_almost_equal(microF1.get()[1], fscore_total) - np.testing.assert_almost_equal(macroF1.get()[1], (fscore1 + fscore2) / 2.) + np.testing.assert_almost_equal(macroF1.get()[1], fscore_total) + +def test_multiclass_f1(): + microF1 = mx.gluon.metric.create("f1", class_type="multiclass", average="micro") + macroF1 = mx.gluon.metric.F1(class_type="multiclass", average="macro") + + assert np.isnan(macroF1.get()[1]) + assert np.isnan(microF1.get()[1]) + + # check one class is zero + pred = mx.nd.array([[0.9, 0.1], + [0.8, 0.2]]) + label = mx.nd.array([0, 0]) + macroF1.update([label], [pred]) + microF1.update([label], [pred]) + assert macroF1.get()[1] == 0.5 # one class is 1.0, the other is 0. (divided by 0) + assert microF1.get()[1] == 1.0 # globally f1 is 1.0 + macroF1.reset() + microF1.reset() + + # test case from sklearn, here pred is probabilistic distributions instead of predicted labels + pred11 = mx.nd.array([[1, 0, 0], [0, 1, 0]]) + label11 = mx.nd.array([0, 2]) + pred12 = mx.nd.array([[0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]]) + label12 = mx.nd.array([1, 0, 0, 1]) + + microF1.update([label11, label12], [pred11, pred12]) + macroF1.update([label11, label12], [pred11, pred12]) + assert microF1.num_inst == 6 + assert macroF1.num_inst == 6 + + from sklearn.metrics import f1_score + overall_pred = [0, 1, 2, 0, 1, 2] + overall_label = [0, 2, 1, 0, 0, 1] + fmacro = f1_score(overall_label, overall_pred, average="macro") + fmicro = f1_score(overall_label, overall_pred, average="micro") + np.testing.assert_almost_equal(microF1.get()[1], fmicro) + np.testing.assert_almost_equal(macroF1.get()[1], fmacro) + +def test_multilabel_f1(): + microF1 = mx.gluon.metric.create("f1", class_type="multilabel", average="micro") + macroF1 = mx.gluon.metric.F1(class_type="multilabel", average="macro") + + assert np.isnan(macroF1.get()[1]) + assert np.isnan(microF1.get()[1]) + + # check one class is zero + pred = mx.nd.array([[0.9, 0.1], + [0.8, 0.2]]) + label = mx.nd.array([[1, 1], [1, 1]]) + macroF1.update([label], [pred]) + microF1.update([label], [pred]) + assert macroF1.get()[1] == 0.5 # one class is 1.0, the other is 0. (divided by 0) + assert microF1.get()[1] == 2.0 / 3 + macroF1.reset() + microF1.reset() + pred11 = mx.nd.array([[0.9, 0.4, 0.3], [0.2, 0.7, 0.8]]) + label11 = mx.nd.array([[1, 0, 1], [0, 0, 1]]) + pred12 = mx.nd.array([[0.6, 0.6, 0.7]]) + label12 = mx.nd.array([[0, 1, 1]]) + + microF1.update([label11, label12], [pred11, pred12]) + macroF1.update([label11, label12], [pred11, pred12]) + assert microF1.num_inst == 3 + assert macroF1.num_inst == 3 + from sklearn.metrics import f1_score + overall_pred = [[1, 0, 0], [0, 1, 1], [1, 1, 1]] + overall_label = [[1, 0, 1], [0, 0, 1], [0, 1, 1]] + fmacro = f1_score(overall_label, overall_pred, average="macro") + fmicro = f1_score(overall_label, overall_pred, average="micro") + np.testing.assert_almost_equal(microF1.get()[1], fmicro) + np.testing.assert_almost_equal(macroF1.get()[1], fmacro) + def test_mcc(): - microMCC = mx.gluon.metric.create("mcc", average="micro") - macroMCC = mx.gluon.metric.MCC(average="macro") + microMCC = mx.gluon.metric.create("mcc") assert np.isnan(microMCC.get()[1]) - assert np.isnan(macroMCC.get()[1]) - + # check divide by zero pred = mx.nd.array([[0.9, 0.1], [0.8, 0.2]]) label = mx.nd.array([0, 0]) microMCC.update([label], [pred]) - macroMCC.update([label], [pred]) assert microMCC.get()[1] == 0.0 - assert macroMCC.get()[1] == 0.0 microMCC.reset() - macroMCC.reset() pred11 = mx.nd.array([[0.1, 0.9], [0.5, 0.5]]) @@ -235,24 +225,18 @@ def test_mcc(): pred22 = mx.nd.array([[0.2, 0.8]]) label22 = mx.nd.array([1]) microMCC.update([label11, label12], [pred11, pred12]) - macroMCC.update([label11, label12], [pred11, pred12]) assert microMCC.num_inst == 4 - assert macroMCC.num_inst == 1 tp1 = 1; fp1 = 0; fn1 = 1; tn1=2 mcc1 = (tp1*tn1 - fp1*fn1) / np.sqrt((tp1+fp1)*(tp1+fn1)*(tn1+fp1)*(tn1+fn1)) np.testing.assert_almost_equal(microMCC.get()[1], mcc1) - np.testing.assert_almost_equal(macroMCC.get()[1], mcc1) microMCC.update([label21, label22], [pred21, pred22]) - macroMCC.update([label21, label22], [pred21, pred22]) assert microMCC.num_inst == 6 - assert macroMCC.num_inst == 2 tp2 = 1; fp2 = 0; fn2 = 0; tn2=1 mcc2 = (tp2*tn2 - fp2*fn2) / np.sqrt((tp2+fp2)*(tp2+fn2)*(tn2+fp2)*(tn2+fn2)) tpT = tp1+tp2; fpT = fp1+fp2; fnT = fn1+fn2; tnT = tn1+tn2; mccT = (tpT*tnT - fpT*fnT) / np.sqrt((tpT+fpT)*(tpT+fnT)*(tnT+fpT)*(tnT+fnT)) np.testing.assert_almost_equal(microMCC.get()[1], mccT) - np.testing.assert_almost_equal(macroMCC.get()[1], .5*(mcc1+mcc2)) def test_perplexity(): pred = mx.nd.array([[0.8, 0.2], [0.2, 0.8], [0, 1.]]) @@ -269,17 +253,12 @@ def test_pearsonr(): label1 = mx.nd.array([[1, 0], [0, 1], [0, 1]]) pearsonr_expected_np = np.corrcoef(pred1.asnumpy().ravel(), label1.asnumpy().ravel())[0, 1] pearsonr_expected_scipy, _ = pearsonr(pred1.asnumpy().ravel(), label1.asnumpy().ravel()) - macro_pr = mx.gluon.metric.create('pearsonr', average='macro') - micro_pr = mx.gluon.metric.create('pearsonr', average='micro') + micro_pr = mx.gluon.metric.create('pearsonr') - assert np.isnan(macro_pr.get()[1]) assert np.isnan(micro_pr.get()[1]) - macro_pr.update([label1], [pred1]) micro_pr.update([label1], [pred1]) - np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_np) - np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_scipy) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_np) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_scipy) @@ -292,11 +271,7 @@ def test_pearsonr(): pearsonr_expected_np = np.corrcoef(pred12.asnumpy().ravel(), label12.asnumpy().ravel())[0, 1] pearsonr_expected_scipy, _ = pearsonr(pred12.asnumpy().ravel(), label12.asnumpy().ravel()) - macro_pr.reset() micro_pr.update([label2], [pred2]) - macro_pr.update([label12], [pred12]) - np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_np) - np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_scipy) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_np) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_scipy) From 1dfe0e0376431bbced789b77652e37cf3bb2092e Mon Sep 17 00:00:00 2001 From: acphile Date: Wed, 22 Apr 2020 09:56:40 +0000 Subject: [PATCH 10/24] use mxnet.numpy instead of numpy --- python/mxnet/gluon/metric.py | 115 ++++++++++++++++----------- tests/python/unittest/test_metric.py | 3 +- 2 files changed, 69 insertions(+), 49 deletions(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index 8214ad86ae67..83da4eb9e28f 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -22,7 +22,8 @@ import math from collections import OrderedDict -import numpy +from .. import numpy +from ..util import use_np from ..base import numeric_types, string_types from .. import ndarray @@ -359,6 +360,7 @@ def get_config(self): @register @alias('acc') +@use_np class Accuracy(EvalMetric): """Computes accuracy classification score. @@ -414,11 +416,11 @@ def update(self, labels, preds): for label, pred_label in zip(labels, preds): if pred_label.shape != label.shape: pred_label = ndarray.argmax(pred_label, axis=self.axis) - pred_label = pred_label.asnumpy().astype('int32') - label = label.asnumpy().astype('int32') + pred_label = pred_label.as_np_ndarray().astype('int32') + label = label.as_np_ndarray().astype('int32') # flatten before checking shapes to avoid shape miss match - label = label.flat - pred_label = pred_label.flat + label = label.reshape(-1) + pred_label = pred_label.reshape(-1) check_label_shapes(label, pred_label) @@ -429,6 +431,7 @@ def update(self, labels, preds): @register @alias('top_k_accuracy', 'top_k_acc') +@use_np class TopKAccuracy(EvalMetric): """Computes top k predictions accuracy. @@ -491,19 +494,19 @@ def update(self, labels, preds): # we do not care about the order of top k elements. It is # much faster, which is important since that computation is # single-threaded due to Python GIL. - pred_label = numpy.argpartition(pred_label.asnumpy().astype('float32'), -self.top_k) - label = label.asnumpy().astype('int32') + pred_label = numpy.argpartition(pred_label.as_np_ndarray().astype('float32'), -self.top_k) + label = label.as_np_ndarray().astype('int32') check_label_shapes(label, pred_label) num_samples = pred_label.shape[0] num_dims = len(pred_label.shape) if num_dims == 1: - num_correct = (pred_label.flat == label.flat).sum() + num_correct = (pred_label.reshape(-1) == label.reshape(-1)).sum() self.sum_metric += num_correct elif num_dims == 2: num_classes = pred_label.shape[1] top_k = min(num_classes, self.top_k) for j in range(top_k): - num_correct = (pred_label[:, num_classes - 1 - j].flat == label.flat).sum() + num_correct = (pred_label[:, num_classes - 1 - j].reshape(-1) == label.reshape(-1)).sum() self.sum_metric += num_correct self.num_inst += num_samples @@ -530,10 +533,11 @@ def predict_with_threshold(pred, threshold=0.5): raise ValueError("{} is a wrong type for threshold!".format(type(threshold))) -def one_hot(x, m): - return (numpy.arange(m)==x[:,None]).astype('int32') +def one_hot(idx, num): + return (numpy.arange(num).astype(idx) == idx[:,None]).astype('int32') - + +@use_np class _ClassificationMetrics(object): """Private container class for classification metric statistics. @@ -581,8 +585,8 @@ def update_stats(self, label, pred): pred : `NDArray` Predicted values. """ - pred = pred.asnumpy() - label = label.asnumpy().astype('int32') + pred = pred.as_np_ndarray() + label = label.as_np_ndarray().astype('int32') if self.class_type == "binary": self._set(1) if len(numpy.unique(label)) > 2: @@ -593,8 +597,8 @@ def update_stats(self, label, pred): raise ValueError("The shape of prediction {} is wrong for binary classification.".format(pred.shape)) elif pred.shape[-1] == 2: pred = pred.reshape(-1, 2)[:, 1] - pred_label = predict_with_threshold(pred, self.threshold).flat - label = label.flat + pred_label = predict_with_threshold(pred, self.threshold).reshape(-1) + label = label.reshape(-1) elif self.class_type == "multiclass": num = pred.shape[-1] @@ -615,9 +619,9 @@ def update_stats(self, label, pred): check_label_shapes(label, pred_label) pred_true = (pred_label == 1) - pred_false = 1 - pred_true + pred_false = (pred_label == 0) label_true = (label == 1) - label_false = 1 - label_true + label_false = (label == 0) true_pos = (pred_true * label_true).sum(0) false_pos = (pred_true * label_false).sum(0) @@ -668,7 +672,7 @@ def global_fscore(self): else: return 0. - def matthewscc(self): + def binary_matthewscc(self): """Calculate the Matthew's Correlation Coefficent""" if not self.total_examples: return 0. @@ -703,6 +707,7 @@ def reset_stats(self): @register +@use_np class F1(EvalMetric): """Computes the F1 score of a binary classification problem. @@ -791,6 +796,7 @@ def reset(self): @register +@use_np class Fbeta(F1): """Computes the Fbeta score of a binary classification problem. @@ -851,6 +857,7 @@ def __init__(self, name='fbeta', @register +@use_np class BinaryAccuracy(EvalMetric): """Computes the accuracy of a binary or multilabel classification problem. @@ -899,11 +906,11 @@ def update(self, labels, preds): for label, pred_label in zip(labels, preds): pred_label = predict_with_threshold(pred_label, self.threshold) - pred_label = pred_label.asnumpy().astype('int32') - label = label.asnumpy().astype('int32') + pred_label = pred_label.as_np_ndarray().astype('int32') + label = label.as_np_ndarray().astype('int32') # flatten before checking shapes to avoid shape miss match - label = label.flat - pred_label = pred_label.flat + label = label.reshape(-1) + pred_label = pred_label.reshape(-1) check_label_shapes(label, pred_label) @@ -913,6 +920,7 @@ def update(self, labels, preds): @register +@use_np class MCC(EvalMetric): """Computes the Matthews Correlation Coefficient of a binary classification problem. @@ -993,7 +1001,7 @@ def update(self, labels, preds): for label, pred in zip(labels, preds): self._metrics.update_stats(label, pred) - self.sum_metric = self._metrics.matthewscc() * self._metrics.total_examples + self.sum_metric = self._metrics.binary_matthewscc() * self._metrics.total_examples self.num_inst = self._metrics.total_examples def reset(self): @@ -1112,6 +1120,7 @@ def get(self): @register +@use_np class MAE(EvalMetric): """Computes Mean Absolute Error (MAE) loss. @@ -1160,8 +1169,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray() num_inst = label.shape[0] mae = numpy.abs(label - pred).reshape(num_inst, -1).mean(axis=-1).sum() @@ -1171,6 +1180,7 @@ def update(self, labels, preds): @register +@use_np class MSE(EvalMetric): """Computes Mean Squared Error (MSE) loss. @@ -1218,8 +1228,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray() num_inst = label.shape[0] mse = ((label - pred)**2.0).reshape(num_inst, -1).mean(axis=-1).sum() @@ -1229,6 +1239,7 @@ def update(self, labels, preds): @register +@use_np class RMSE(MSE): """Computes Root Mean Squred Error (RMSE) loss. @@ -1270,6 +1281,7 @@ def get(self): @register +@use_np class MeanPairwiseDistance(EvalMetric): """Computes Mean Pairwise Distance. @@ -1320,8 +1332,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray() label = label.reshape(label.shape[0], -1) pred = pred.reshape(pred.shape[0], -1) @@ -1335,6 +1347,7 @@ def update(self, labels, preds): @register +@use_np class MeanCosineSimilarity(EvalMetric): """Computes Mean Cosine Similarity. @@ -1385,8 +1398,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray() if len(label.shape) == 1: label = label.reshape(1, label.shape[0]) @@ -1398,13 +1411,14 @@ def update(self, labels, preds): n_l = numpy.linalg.norm(label, axis=-1) sim = sim / numpy.maximum(n_l * n_p, self.eps) sim = sim.sum() - num_inst = numpy.prod(label.shape[:-1]) + num_inst = len(label.reshape(-1, label.shape[-1])) # numpy.prod(label.shape[:-1]) is not supported self.sum_metric += sim self.num_inst += num_inst @register @alias('ce') +@use_np class CrossEntropy(EvalMetric): """Computes Cross Entropy loss. @@ -1461,10 +1475,10 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray() - label = label.ravel() + label = label.reshape(-1) assert label.shape[0] == pred.shape[0] prob = pred[numpy.arange(label.shape[0]), numpy.int64(label)] @@ -1474,6 +1488,7 @@ def update(self, labels, preds): @register @alias('nll_loss') +@use_np class NegativeLogLikelihood(EvalMetric): """Computes the negative log-likelihood loss. @@ -1530,10 +1545,10 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray() - label = label.ravel() + label = label.reshape(-1) num_examples = pred.shape[0] assert label.shape[0] == num_examples, (label.shape[0], num_examples) prob = pred[numpy.arange(num_examples, dtype=numpy.int64), numpy.int64(label)] @@ -1544,6 +1559,7 @@ def update(self, labels, preds): @register @alias('pearsonr') +@use_np class PearsonCorrelation(EvalMetric): """Computes Pearson correlation. @@ -1616,8 +1632,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): check_label_shapes(label, pred, False, True) - label = label.asnumpy().ravel().astype(numpy.float64) - pred = pred.asnumpy().ravel().astype(numpy.float64) + label = label.as_np_ndarray().reshape(-1).astype(numpy.float64) + pred = pred.as_np_ndarray().reshape(-1).astype(numpy.float64) self.num_inst += 1 self._label_nums, self._mean_l, self._sse_l = \ @@ -1635,6 +1651,7 @@ def get(self): return (self.name, pearsonr) @register +@use_np class PCC(EvalMetric): """PCC is a multiclass equivalent for the Matthews correlation coefficient derived from a discrete solution to the Pearson correlation coefficient. @@ -1706,7 +1723,8 @@ def _calc_mcc(self, cmat): cov_yy = numpy.sum(y * (n - y)) if cov_xx == 0 or cov_yy == 0: return float('nan') - i = cmat.diagonal() + # i = cmat.diagonal() # mxnet.numpy.ndarray.diagonal() is currently not available. + i = cmat[numpy.arange(self.k), numpy.arange(self.k)] cov_xy = numpy.sum(i * n - x * y) return cov_xy / (cov_xx * cov_yy) ** 0.5 @@ -1725,13 +1743,13 @@ def update(self, labels, preds): # update the confusion matrix for label, pred in zip(labels, preds): - label = label.astype('int32', copy=False).asnumpy() - pred = pred.asnumpy() + label = label.astype('int32', copy=False).as_np_ndarray() + pred = pred.as_np_ndarray() if pred.shape != label.shape: - pred = pred.argmax(axis=1) + pred = pred.argmax(axis=1).astype(label, copy=False) else: pred = pred.astype('int32', copy=False) - n = max(pred.max(), label.max()) + n = int(max(pred.max(), label.max())) if n >= self.k: self._grow(n + 1 - self.k) bcm = numpy.zeros((self.k, self.k)) @@ -1800,6 +1818,7 @@ def __init__(self, name='caffe', @register +@use_np class CustomMetric(EvalMetric): """Computes a customized evaluation metric. @@ -1863,8 +1882,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for pred, label in zip(preds, labels): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray() reval = self._feval(label, pred) if isinstance(reval, tuple): diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py index 81f57f0eed6d..0a6d48f3f2b4 100644 --- a/tests/python/unittest/test_metric.py +++ b/tests/python/unittest/test_metric.py @@ -16,6 +16,7 @@ # under the License. import mxnet as mx +mx.npx.set_np() import numpy as np import scipy from scipy.stats import pearsonr @@ -352,7 +353,7 @@ def test_pcc(): # * order # * batch size # * update frequency - labels = [ [ i ] for i in labels[0] ] + labels = [ [ i.reshape(-1) ] for i in labels[0] ] labels.reverse() preds = [ [ i.reshape((1, -1)) ] for i in preds[0] ] preds.reverse() From 59d98b36307bab31a9ffbf616b2f88235d52ee94 Mon Sep 17 00:00:00 2001 From: acphile Date: Sat, 25 Apr 2020 03:24:33 +0000 Subject: [PATCH 11/24] fix sanity --- python/mxnet/gluon/metric.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index 5b83feb1534a..1099cc901ef7 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -534,7 +534,7 @@ def predict_with_threshold(pred, threshold=0.5): def one_hot(idx, num): - return (numpy.arange(num).astype(idx) == idx[:,None]).astype('int32') + return (numpy.arange(num).astype(idx) == idx[:, None]).astype('int32') @use_np @@ -572,7 +572,8 @@ def _set(self, num): self.false_positives = numpy.zeros(num) self.true_negatives = numpy.zeros(num) else: - assert self.num_classes == num, "Input number of classes has changed from {} to {}".format(self.num_classes, num) + assert self.num_classes == num, \ + "Input number of classes has changed from {} to {}".format(self.num_classes, num) def update_stats(self, label, pred): """Update various binary classification counts for a single (label, pred) pair. @@ -610,11 +611,13 @@ def update_stats(self, label, pred): elif self.class_type == "multilabel": num = pred.shape[-1] self._set(num) - assert pred.shape == label.shape, "The shape of label should be same as that of prediction for multilabel classification." + assert pred.shape == label.shape, \ + "The shape of label should be same as that of prediction for multilabel classification." pred_label = predict_with_threshold(pred, self.threshold).reshape(-1, num) label = label.reshape(-1, num) else: - raise ValueError("Wrong class_type {}! Only supports ['binary', 'multiclass', 'multilabel']".format(self.class_type)) + raise ValueError( + "Wrong class_type {}! Only supports ['binary', 'multiclass', 'multilabel']".format(self.class_type)) check_label_shapes(label, pred_label) @@ -642,7 +645,8 @@ def precision(self): @property def micro_precision(self): if self.num_classes is not None: - return self.true_positives.sum() / numpy.maximum(self.true_positives.sum() + self.false_positives.sum(), 1e-12) + return self.true_positives.sum() / \ + numpy.maximum(self.true_positives.sum() + self.false_positives.sum(), 1e-12) else: return 0. @@ -656,13 +660,15 @@ def recall(self): @property def micro_recall(self): if self.num_classes is not None: - return self.true_positives.sum() / numpy.maximum(self.true_positives.sum() + self.false_negatives.sum(), 1e-12) + return self.true_positives.sum() / \ + numpy.maximum(self.true_positives.sum() + self.false_negatives.sum(), 1e-12) else: return 0. @property def fscore(self): - return (1 + self.beta ** 2) * self.precision * self.recall / numpy.maximum(self.beta ** 2 * self.precision + self.recall, 1e-12) + return (1 + self.beta ** 2) * self.precision * self.recall / \ + numpy.maximum(self.beta ** 2 * self.precision + self.recall, 1e-12) @property def micro_fscore(self): @@ -744,7 +750,7 @@ class F1(EvalMetric): average : str, default 'micro' Strategy to be used for aggregating across mini-batches. "macro": Calculate metrics for each label and return unweighted mean of f1. - "micro": Calculate metrics globally by counting the total true positives, false negatives and false positives. + "micro": Calculate metrics globally by counting the total TP, FN and FP. None: Return f1 scores for each class (numpy.ndarray) . Examples @@ -835,7 +841,7 @@ class Fbeta(F1): average : str, default 'micro' Strategy to be used for aggregating across mini-batches. "macro": Calculate metrics for each label and return unweighted mean of f1. - "micro": Calculate metrics globally by counting the total true positives, false negatives and false positives. + "micro": Calculate metrics globally by counting the total TP, FN and FP. None: Return f1 scores for each class. Examples @@ -850,9 +856,9 @@ class Fbeta(F1): def __init__(self, name='fbeta', output_names=None, label_names=None, class_type="binary", beta=1, threshold=0.5, average="micro"): - super(Fbeta, self).__init__(name=name, - output_names=output_names, label_names=label_names, - class_type=class_type, threshold=threshold, average=average) + super(Fbeta, self).__init__( + name=name, output_names=output_names, label_names=label_names, + class_type=class_type, threshold=threshold, average=average) self.metrics = _ClassificationMetrics(class_type=class_type, threshold=threshold, beta=beta) From 40e87e3139314ef9a5d9c26cc6b7e249f79e1551 Mon Sep 17 00:00:00 2001 From: acphile Date: Mon, 27 Apr 2020 05:53:49 +0000 Subject: [PATCH 12/24] rewrite ce and ppl, improve some details --- python/mxnet/gluon/metric.py | 221 +++++++++++++-------------- tests/python/unittest/test_metric.py | 16 +- 2 files changed, 112 insertions(+), 125 deletions(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index 1099cc901ef7..f750d2087fa1 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -162,7 +162,15 @@ def get(self): if self.num_inst == 0: return (self.name, float('nan')) else: - return (self.name, self.sum_metric / self.num_inst) + res = self.sum_metric / self.num_inst + if isinstance(res, numpy.ndarray) and len(res.shape) == 0: + """ + currently calling ' c = mxnet.numpy.array([1,2,3]).sum() ' would get + ' array(6.) ', a ndarray with shape () + In this case, returning a 'float' in .get() is more explicit. + """ + res = res.item() + return (self.name, res) def get_name_value(self): """Returns zipped name and value pairs. @@ -590,7 +598,7 @@ def update_stats(self, label, pred): label = label.as_np_ndarray().astype('int32') if self.class_type == "binary": self._set(1) - if len(numpy.unique(label)) > 2: + if label.max() > 1: raise ValueError("Wrong label for binary classification.") if pred.shape == label.shape: pass @@ -1017,109 +1025,6 @@ def reset(self): self._metrics.reset_stats() -@register -class Perplexity(EvalMetric): - """Computes perplexity. - - Perplexity is a measurement of how well a probability distribution - or model predicts a sample. A low perplexity indicates the model - is good at predicting the sample. - - The perplexity of a model q is defined as - - .. math:: - b^{\\big(-\\frac{1}{N} \\sum_{i=1}^N \\log_b q(x_i) \\big)} - = \\exp \\big(-\\frac{1}{N} \\sum_{i=1}^N \\log q(x_i)\\big) - - where we let `b = e`. - - :math:`q(x_i)` is the predicted value of its ground truth - label on sample :math:`x_i`. - - For example, we have three samples :math:`x_1, x_2, x_3` and their labels - are :math:`[0, 1, 1]`. - Suppose our model predicts :math:`q(x_1) = p(y_1 = 0 | x_1) = 0.3` - and :math:`q(x_2) = 1.0`, - :math:`q(x_3) = 0.6`. The perplexity of model q is - :math:`exp\\big(-(\\log 0.3 + \\log 1.0 + \\log 0.6) / 3\\big) = 1.77109762852`. - - Parameters - ---------- - ignore_label : int or None - Index of invalid label to ignore when - counting. By default, sets to -1. - If set to `None`, it will include all entries. - axis : int (default -1) - The axis from prediction that was used to - compute softmax. By default use the last - axis. - name : str - Name of this metric instance for display. - output_names : list of str, or None - Name of predictions that should be used when updating with update_dict. - By default include all predictions. - label_names : list of str, or None - Name of labels that should be used when updating with update_dict. - By default include all labels. - - Examples - -------- - >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] - >>> labels = [mx.nd.array([0, 1, 1])] - >>> perp = mx.gluon.metric.Perplexity(ignore_label=None) - >>> perp.update(labels, predicts) - >>> print perp.get() - ('Perplexity', 1.7710976285155853) - """ - def __init__(self, ignore_label, axis=-1, name='perplexity', - output_names=None, label_names=None): - super(Perplexity, self).__init__( - name, ignore_label=ignore_label, - output_names=output_names, label_names=label_names) - self.ignore_label = ignore_label - self.axis = axis - - def update(self, labels, preds): - """Updates the internal evaluation result. - - Parameters - ---------- - labels : list of `NDArray` - The labels of the data. - - preds : list of `NDArray` - Predicted values. - """ - assert len(labels) == len(preds) - loss = 0. - num = 0 - for label, pred in zip(labels, preds): - assert label.size == pred.size/pred.shape[-1], \ - "shape mismatch: %s vs. %s"%(label.shape, pred.shape) - label = label.as_in_context(pred.context).reshape((label.size,)) - pred = ndarray.pick(pred, label.astype(dtype='int32'), axis=self.axis) - if self.ignore_label is not None: - ignore = (label == self.ignore_label).astype(pred.dtype) - num -= ndarray.sum(ignore).asscalar() - pred = pred*(1-ignore) + ignore - loss -= ndarray.sum(ndarray.log(ndarray.maximum(1e-10, pred))).asscalar() - num += pred.size - self.sum_metric += loss - self.num_inst += num - - def get(self): - """Returns the current evaluation result. - - Returns - ------- - Tuple of (str, float) - Representing name of the metric and evaluation result. - """ - if self.num_inst == 0: - return (self.name, float('nan')) - else: - return (self.name, math.exp(self.sum_metric/self.num_inst)) - #################### # REGRESSION METRICS #################### @@ -1439,9 +1344,13 @@ class :math:`k`. Parameters ---------- - eps : float - Cross Entropy loss is undefined for predicted value is 0 or 1, - so predicted values are added with the small constant. + ignore_label : int or None, default None + Index of invalid label to ignore when + counting. By default, sets to -1. + If set to `None`, it will include all entries. + axis : int (default -1) + The axis from prediction that was used to + compute softmax. By default use the last axis. name : str Name of this metric instance for display. output_names : list of str, or None @@ -1460,12 +1369,12 @@ class :math:`k`. >>> print ce.get() ('cross-entropy', 0.57159948348999023) """ - def __init__(self, eps=1e-12, name='cross-entropy', + def __init__(self, ignore_label=None, axis=-1, name='cross-entropy', output_names=None, label_names=None): super(CrossEntropy, self).__init__( - name, eps=eps, - output_names=output_names, label_names=label_names) - self.eps = eps + name, output_names=output_names, label_names=label_names) + self.ignore_label = ignore_label + self.axis = axis def update(self, labels, preds): """Updates the internal evaluation result. @@ -1480,17 +1389,91 @@ def update(self, labels, preds): """ labels, preds = check_label_shapes(labels, preds, True) + loss = 0. + num = 0 for label, pred in zip(labels, preds): + assert label.size == pred.size/pred.shape[-1], \ + "shape mismatch: %s vs. %s"%(label.shape, pred.shape) + label = label.as_in_context(pred.context).reshape((label.size,)) + pred = ndarray.pick(pred, label.astype(dtype='int32'), axis=self.axis) label = label.as_np_ndarray() pred = pred.as_np_ndarray() + if self.ignore_label is not None: + ignore = (label == self.ignore_label).astype(pred.dtype) + num -= ignore.sum() + pred = pred * (1 - ignore) + ignore + loss -= numpy.log(numpy.maximum(1e-12, pred)).sum() + num += pred.size + self.sum_metric += loss + self.num_inst += num - label = label.reshape(-1) - assert label.shape[0] == pred.shape[0] - prob = pred[numpy.arange(label.shape[0]), numpy.int64(label)] - cross_entropy = (-numpy.log(prob + self.eps)).sum() - self.sum_metric += cross_entropy - self.num_inst += label.shape[0] +@register +@use_np +class Perplexity(CrossEntropy): + """Computes perplexity. + + Perplexity is a measurement of how well a probability distribution + or model predicts a sample. A low perplexity indicates the model + is good at predicting the sample. + + The perplexity of a model q is defined as + + .. math:: + b^{\\big(-\\frac{1}{N} \\sum_{i=1}^N \\log_b q(x_i) \\big)} + = \\exp \\big(-\\frac{1}{N} \\sum_{i=1}^N \\log q(x_i)\\big) + + where we let `b = e`. + + :math:`q(x_i)` is the predicted value of its ground truth + label on sample :math:`x_i`. + + For example, we have three samples :math:`x_1, x_2, x_3` and their labels + are :math:`[0, 1, 1]`. + Suppose our model predicts :math:`q(x_1) = p(y_1 = 0 | x_1) = 0.3` + and :math:`q(x_2) = 1.0`, + :math:`q(x_3) = 0.6`. The perplexity of model q is + :math:`exp\\big(-(\\log 0.3 + \\log 1.0 + \\log 0.6) / 3\\big) = 1.77109762852`. + + Parameters + ---------- + ignore_label : int or None, default None + Index of invalid label to ignore when + counting. By default, sets to -1. + If set to `None`, it will include all entries. + axis : int (default -1) + The axis from prediction that was used to + compute softmax. By default use the last axis. + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + + Examples + -------- + >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] + >>> labels = [mx.nd.array([0, 1, 1])] + >>> perp = mx.gluon.metric.Perplexity(ignore_label=None) + >>> perp.update(labels, predicts) + >>> print perp.get() + ('Perplexity', 1.7710976285155853) + """ + def __init__(self, ignore_label=None, axis=-1, name='perplexity', + output_names=None, label_names=None): + super(Perplexity, self).__init__( + name=name, ignore_label=ignore_label, axis=axis, + output_names=output_names, label_names=label_names) + + def get(self): + if self.num_inst == 0: + return (self.name, float('nan')) + else: + return (self.name, math.exp(self.sum_metric/self.num_inst)) + @register @alias('nll_loss') @@ -1654,7 +1637,7 @@ def get(self): n = self._label_nums pearsonr = self._conv / ((n-1) * numpy.sqrt(self._sse_p / (n - 1)) * numpy.sqrt(self._sse_l / (n - 1))) - return (self.name, pearsonr) + return (self.name, float(pearsonr)) @register @use_np diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py index 665feb39ceee..a9f7e4c1b568 100644 --- a/tests/python/unittest/test_metric.py +++ b/tests/python/unittest/test_metric.py @@ -36,7 +36,7 @@ def test_metrics(): check_metric('acc', axis=0) check_metric('f1') check_metric('mcc') - check_metric('perplexity', -1) + check_metric('perplexity', axis=-1) check_metric('pearsonr') check_metric('pcc') check_metric('nll_loss') @@ -60,7 +60,7 @@ def test_acc(): metric.update([label], [pred]) _, acc = metric.get() expected_acc = (np.argmax(pred, axis=1) == label).sum().asscalar() / label.size - assert acc == expected_acc + np.testing.assert_almost_equal(acc, expected_acc) def test_acc_2d_label(): # label maybe provided in 2d arrays in custom data iterator @@ -71,7 +71,7 @@ def test_acc_2d_label(): _, acc = metric.get() expected_acc = (np.argmax(pred, axis=1).asnumpy() == label.asnumpy().ravel()).sum() / \ float(label.asnumpy().ravel().size) - assert acc == expected_acc + np.testing.assert_almost_equal(acc, expected_acc) def test_loss_update(): pred = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) @@ -181,7 +181,7 @@ def test_multilabel_f1(): macroF1.update([label], [pred]) microF1.update([label], [pred]) assert macroF1.get()[1] == 0.5 # one class is 1.0, the other is 0. (divided by 0) - assert microF1.get()[1] == 2.0 / 3 + np.testing.assert_almost_equal(microF1.get()[1], 2.0 / 3) macroF1.reset() microF1.reset() @@ -244,10 +244,10 @@ def test_perplexity(): label = mx.nd.array([0, 1, 1]) p = pred.asnumpy()[np.arange(label.size), label.asnumpy().astype('int32')] perplexity_expected = np.exp(-np.log(p).sum()/label.size) - metric = mx.gluon.metric.create('perplexity', -1) + metric = mx.gluon.metric.create('perplexity', axis=-1) metric.update([label], [pred]) _, perplexity = metric.get() - assert perplexity == perplexity_expected + np.testing.assert_almost_equal(perplexity, perplexity_expected) def test_pearsonr(): pred1 = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) @@ -383,3 +383,7 @@ def test_single_array_input(): rmse.get() _, rmse_res = rmse.get() np.testing.assert_almost_equal(rmse_res, 0.1) + +if __name__ == '__main__': + import nose + nose.runmodule() From 5e153e12278c2d07096556f6e3131a8132f36968 Mon Sep 17 00:00:00 2001 From: acphile Date: Mon, 27 Apr 2020 11:09:20 +0000 Subject: [PATCH 13/24] use mxnet.numpy.float64 --- python/mxnet/gluon/metric.py | 127 +++++++++++++++++------------------ 1 file changed, 63 insertions(+), 64 deletions(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index f750d2087fa1..2679cda2cda2 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -164,11 +164,9 @@ def get(self): else: res = self.sum_metric / self.num_inst if isinstance(res, numpy.ndarray) and len(res.shape) == 0: - """ - currently calling ' c = mxnet.numpy.array([1,2,3]).sum() ' would get - ' array(6.) ', a ndarray with shape () - In this case, returning a 'float' in .get() is more explicit. - """ + # currently calling ' c = mxnet.numpy.array([1,2,3]).sum() ' would get + # ' array(6.) ', a ndarray with shape () + # In this case, returning a 'float' in .get() is more explicit. res = res.item() return (self.name, res) @@ -432,7 +430,7 @@ def update(self, labels, preds): check_label_shapes(label, pred_label) - num_correct = (pred_label == label).sum() + num_correct = (pred_label == label).sum().astype('float64') self.sum_metric += num_correct self.num_inst += len(pred_label) @@ -509,19 +507,19 @@ def update(self, labels, preds): num_dims = len(pred_label.shape) if num_dims == 1: num_correct = (pred_label.reshape(-1) == label.reshape(-1)).sum() - self.sum_metric += num_correct + self.sum_metric += num_correct.astype('float64') elif num_dims == 2: num_classes = pred_label.shape[1] top_k = min(num_classes, self.top_k) for j in range(top_k): num_correct = (pred_label[:, num_classes - 1 - j].reshape(-1) == label.reshape(-1)).sum() - self.sum_metric += num_correct + self.sum_metric += num_correct.astype('float64') self.num_inst += num_samples def predict_with_threshold(pred, threshold=0.5): """Do thresholding of predictions in binary and multilabel cases. - + Parameters ---------- preds : ndarray @@ -532,38 +530,38 @@ def predict_with_threshold(pred, threshold=0.5): """ if isinstance(threshold, float): return pred > threshold - elif isinstance(threshold, numpy.ndarray) or isinstance(threshold, ndarray.ndarray.NDArray): + elif isinstance(threshold, (numpy.ndarray, ndarray.ndarray.NDArray)): num_classes = pred.shape[-1] assert threshold.shape[-1] == num_classes, \ "shape mismatch: %s vs. %s"%(pred.shape[-1], threshold.shape[-1]) - return pred > threshold + return pred > threshold else: raise ValueError("{} is a wrong type for threshold!".format(type(threshold))) - + def one_hot(idx, num): return (numpy.arange(num).astype(idx) == idx[:, None]).astype('int32') - -@use_np + +@use_np class _ClassificationMetrics(object): """Private container class for classification metric statistics. True/false positive and true/false negative counts are sufficient statistics for various classification metrics. This class provides the machinery to track those statistics across mini-batches of (label, prediction) pairs. - + Parameters ---------- class_type : str, default "binary" "binary": f1 for binary classification. "multiclass": f1 for multiclassification problem. - "multilabel": f1 for multilabel classification. + "multilabel": f1 for multilabel classification. beta : float, default 1 - weight of precision in harmonic mean. + weight of precision in harmonic mean. threshold : float, default 0.5 threshold for deciding whether the predictions are positive or negative. - + """ def __init__(self, class_type="binary", threshold=0.5, beta=1): @@ -575,14 +573,14 @@ def __init__(self, class_type="binary", threshold=0.5, beta=1): def _set(self, num): if self.num_classes is None: self.num_classes = num - self.true_positives = numpy.zeros(num) - self.false_negatives = numpy.zeros(num) - self.false_positives = numpy.zeros(num) - self.true_negatives = numpy.zeros(num) + self.true_positives = numpy.zeros(num, dtype='float64') + self.false_negatives = numpy.zeros(num, dtype='float64') + self.false_positives = numpy.zeros(num, dtype='float64') + self.true_negatives = numpy.zeros(num, dtype='float64') else: assert self.num_classes == num, \ "Input number of classes has changed from {} to {}".format(self.num_classes, num) - + def update_stats(self, label, pred): """Update various binary classification counts for a single (label, pred) pair. @@ -605,17 +603,17 @@ def update_stats(self, label, pred): elif pred.shape[-1] > 2: raise ValueError("The shape of prediction {} is wrong for binary classification.".format(pred.shape)) elif pred.shape[-1] == 2: - pred = pred.reshape(-1, 2)[:, 1] + pred = pred.reshape(-1, 2)[:, 1] pred_label = predict_with_threshold(pred, self.threshold).reshape(-1) label = label.reshape(-1) - + elif self.class_type == "multiclass": num = pred.shape[-1] self._set(num) assert label.max() < num, "pred contains fewer classes than label!" - pred_label = one_hot(pred.argmax(axis=-1).reshape(-1), num) + pred_label = one_hot(pred.argmax(axis=-1).reshape(-1), num) label = one_hot(label.reshape(-1), num) - + elif self.class_type == "multilabel": num = pred.shape[-1] self._set(num) @@ -626,9 +624,9 @@ def update_stats(self, label, pred): else: raise ValueError( "Wrong class_type {}! Only supports ['binary', 'multiclass', 'multilabel']".format(self.class_type)) - + check_label_shapes(label, pred_label) - + pred_true = (pred_label == 1) pred_false = (pred_label == 0) label_true = (label == 1) @@ -657,7 +655,7 @@ def micro_precision(self): numpy.maximum(self.true_positives.sum() + self.false_positives.sum(), 1e-12) else: return 0. - + @property def recall(self): if self.num_classes is not None: @@ -672,7 +670,7 @@ def micro_recall(self): numpy.maximum(self.true_positives.sum() + self.false_negatives.sum(), 1e-12) else: return 0. - + @property def fscore(self): return (1 + self.beta ** 2) * self.precision * self.recall / \ @@ -685,7 +683,7 @@ def micro_fscore(self): (self.beta ** 2 * self.micro_precision + self.micro_recall) else: return 0. - + def binary_matthewscc(self): """Calculate the Matthew's Correlation Coefficent""" if not self.total_examples: @@ -752,14 +750,14 @@ class F1(EvalMetric): class_type : str, default "binary" "binary": f1 for binary classification. "multiclass": f1 for multiclassification problem. - "multilabel": f1 for multilabel classification. + "multilabel": f1 for multilabel classification. threshold : float, default 0.5 threshold for postive confidence value. average : str, default 'micro' Strategy to be used for aggregating across mini-batches. "macro": Calculate metrics for each label and return unweighted mean of f1. "micro": Calculate metrics globally by counting the total TP, FN and FP. - None: Return f1 scores for each class (numpy.ndarray) . + None: Return f1 scores for each class (numpy.ndarray) . Examples -------- @@ -795,13 +793,13 @@ def update(self, labels, preds): self.metrics.update_stats(label, pred) if self.average == "micro": - self.sum_metric = self.metrics.micro_fscore * self.metrics.total_examples + self.sum_metric = self.metrics.micro_fscore * self.metrics.total_examples elif self.average == "macro": - self.sum_metric = self.metrics.fscore.mean() * self.metrics.total_examples + self.sum_metric = self.metrics.fscore.mean() * self.metrics.total_examples else: - self.sum_metric = self.metrics.fscore * self.metrics.total_examples - self.num_inst = self.metrics.total_examples - + self.sum_metric = self.metrics.fscore * self.metrics.total_examples + self.num_inst = self.metrics.total_examples + def reset(self): """Resets the internal evaluation result to initial state.""" self.sum_metric = 0. @@ -841,16 +839,16 @@ class Fbeta(F1): class_type : str, default "binary" "binary": f1 for binary classification. "multiclass": f1 for multiclassification problem. - "multilabel": f1 for multilabel classification. + "multilabel": f1 for multilabel classification. beta : float, default 1 - weight of precision in harmonic mean. + weight of precision in harmonic mean. threshold : float, default 0.5 threshold for postive confidence value. average : str, default 'micro' Strategy to be used for aggregating across mini-batches. "macro": Calculate metrics for each label and return unweighted mean of f1. "micro": Calculate metrics globally by counting the total TP, FN and FP. - None: Return f1 scores for each class. + None: Return f1 scores for each class. Examples -------- @@ -865,10 +863,10 @@ class Fbeta(F1): def __init__(self, name='fbeta', output_names=None, label_names=None, class_type="binary", beta=1, threshold=0.5, average="micro"): super(Fbeta, self).__init__( - name=name, output_names=output_names, label_names=label_names, + name=name, output_names=output_names, label_names=label_names, class_type=class_type, threshold=threshold, average=average) self.metrics = _ClassificationMetrics(class_type=class_type, threshold=threshold, beta=beta) - + @register @use_np @@ -919,7 +917,7 @@ def update(self, labels, preds): for label, pred_label in zip(labels, preds): pred_label = predict_with_threshold(pred_label, self.threshold) - + pred_label = pred_label.as_np_ndarray().astype('int32') label = label.as_np_ndarray().astype('int32') # flatten before checking shapes to avoid shape miss match @@ -928,11 +926,11 @@ def update(self, labels, preds): check_label_shapes(label, pred_label) - num_correct = (pred_label == label).sum() + num_correct = (pred_label == label).sum().astype('float64') self.sum_metric += num_correct self.num_inst += len(pred_label) - - + + @register @use_np class MCC(EvalMetric): @@ -1065,7 +1063,7 @@ def __init__(self, name='mae', output_names=None, label_names=None): super(MAE, self).__init__( name, output_names=output_names, label_names=label_names) - + def update(self, labels, preds): """Updates the internal evaluation result. @@ -1085,7 +1083,7 @@ def update(self, labels, preds): num_inst = label.shape[0] mae = numpy.abs(label - pred).reshape(num_inst, -1).mean(axis=-1).sum() - + self.sum_metric += mae self.num_inst += num_inst @@ -1124,7 +1122,7 @@ def __init__(self, name='mse', output_names=None, label_names=None): super(MSE, self).__init__( name, output_names=output_names, label_names=label_names) - + def update(self, labels, preds): """Updates the internal evaluation result. @@ -1144,7 +1142,7 @@ def update(self, labels, preds): num_inst = label.shape[0] mse = ((label - pred)**2.0).reshape(num_inst, -1).mean(axis=-1).sum() - + self.sum_metric += mse self.num_inst += num_inst @@ -1183,12 +1181,12 @@ def __init__(self, name='rmse', output_names=None, label_names=None): super(RMSE, self).__init__( name, output_names=output_names, label_names=label_names) - + def get(self): if self.num_inst == 0: return (self.name, float('nan')) else: - return (self.name, math.sqrt(self.sum_metric / self.num_inst)) + return (self.name, math.sqrt(self.sum_metric / self.num_inst)) @register @@ -1228,7 +1226,7 @@ def __init__(self, name='mpd', super(MeanPairwiseDistance, self).__init__( name, output_names=output_names, label_names=label_names) self.p = p - + def update(self, labels, preds): """Updates the internal evaluation result. @@ -1255,7 +1253,7 @@ def update(self, labels, preds): self.sum_metric += dis self.num_inst += num_inst - + @register @use_np @@ -1265,9 +1263,9 @@ class MeanCosineSimilarity(EvalMetric): The mean cosine similarity is given by .. math:: - cos\_sim(label, pred) = \frac{{label}.{pred}}{max(||label||.||pred||, eps)} + cos_sim(label, pred) = \frac{{label}.{pred}}{max(||label||.||pred||, eps)} (calculating on the last dimension of label and pred.) - + Parameters ---------- name : str @@ -1280,6 +1278,7 @@ class MeanCosineSimilarity(EvalMetric): By default include all labels. eps : float, default 1e-8 small vale to avoid division by zero. + Examples -------- >>> predicts = [mx.nd.array([[1., 0.], [1., 1.]])] @@ -1294,7 +1293,7 @@ def __init__(self, name='cos_sim', super(MeanCosineSimilarity, self).__init__( name, output_names=output_names, label_names=label_names) self.eps = eps - + def update(self, labels, preds): """Updates the internal evaluation result. @@ -1326,7 +1325,7 @@ def update(self, labels, preds): self.sum_metric += sim self.num_inst += num_inst - + @register @alias('ce') @use_np @@ -1465,7 +1464,7 @@ class Perplexity(CrossEntropy): def __init__(self, ignore_label=None, axis=-1, name='perplexity', output_names=None, label_names=None): super(Perplexity, self).__init__( - name=name, ignore_label=ignore_label, axis=axis, + name=name, ignore_label=ignore_label, axis=axis, output_names=output_names, label_names=label_names) def get(self): @@ -1591,7 +1590,7 @@ def reset(self): self._pred_nums = 0 self._label_nums = 0 self._conv = 0 - + self.num_inst = 0 self.sum_metric = 0.0 @@ -1741,7 +1740,7 @@ def update(self, labels, preds): n = int(max(pred.max(), label.max())) if n >= self.k: self._grow(n + 1 - self.k) - bcm = numpy.zeros((self.k, self.k)) + bcm = numpy.zeros((self.k, self.k), dtype='float64') for i, j in zip(pred, label): bcm[i, j] += 1 self.lcm += bcm @@ -1754,7 +1753,7 @@ def sum_metric(self): def reset(self): """Resets the internal evaluation result to initial state.""" self.num_inst = 0. - self.lcm = numpy.zeros((self.k, self.k)) + self.lcm = numpy.zeros((self.k, self.k), dtype='float64') @register From bf68c6db891ad8cddb549a4eac8508342137afc0 Mon Sep 17 00:00:00 2001 From: acphile Date: Tue, 28 Apr 2020 14:32:30 +0000 Subject: [PATCH 14/24] remove sklearn --- python/mxnet/gluon/metric.py | 4 ++-- tests/python/unittest/test_metric.py | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index 2679cda2cda2..9840f5569b93 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -707,8 +707,8 @@ def binary_matthewscc(self): def total_examples(self): if self.num_classes is None: return 0 - return self.false_negatives[0] + self.false_positives[0] + \ - self.true_negatives[0] + self.true_positives[0] + return int(self.false_negatives[0] + self.false_positives[0] + \ + self.true_negatives[0] + self.true_positives[0]) def reset_stats(self): self.num_classes = None diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py index a9f7e4c1b568..af81251fa11b 100644 --- a/tests/python/unittest/test_metric.py +++ b/tests/python/unittest/test_metric.py @@ -16,7 +16,7 @@ # under the License. import mxnet as mx -mx.npx.set_np() +from mxnet.test_utils import use_np import numpy as np import scipy from scipy.stats import pearsonr @@ -159,11 +159,11 @@ def test_multiclass_f1(): assert microF1.num_inst == 6 assert macroF1.num_inst == 6 - from sklearn.metrics import f1_score - overall_pred = [0, 1, 2, 0, 1, 2] - overall_label = [0, 2, 1, 0, 0, 1] - fmacro = f1_score(overall_label, overall_pred, average="macro") - fmicro = f1_score(overall_label, overall_pred, average="micro") + # from sklearn.metrics import f1_score + # overall_pred = [0, 1, 2, 0, 1, 2] + # overall_label = [0, 2, 1, 0, 0, 1] + fmacro = 0.26666666666666666 #f1_score(overall_label, overall_pred, average="macro") + fmicro = 0.3333333333333333 #f1_score(overall_label, overall_pred, average="micro") np.testing.assert_almost_equal(microF1.get()[1], fmicro) np.testing.assert_almost_equal(macroF1.get()[1], fmacro) @@ -194,11 +194,11 @@ def test_multilabel_f1(): macroF1.update([label11, label12], [pred11, pred12]) assert microF1.num_inst == 3 assert macroF1.num_inst == 3 - from sklearn.metrics import f1_score - overall_pred = [[1, 0, 0], [0, 1, 1], [1, 1, 1]] - overall_label = [[1, 0, 1], [0, 0, 1], [0, 1, 1]] - fmacro = f1_score(overall_label, overall_pred, average="macro") - fmicro = f1_score(overall_label, overall_pred, average="micro") + #from sklearn.metrics import f1_score + #overall_pred = [[1, 0, 0], [0, 1, 1], [1, 1, 1]] + #overall_label = [[1, 0, 1], [0, 0, 1], [0, 1, 1]] + fmacro = 0.7111111111111111 #f1_score(overall_label, overall_pred, average="macro") + fmicro = 0.7272727272727272 #f1_score(overall_label, overall_pred, average="micro") np.testing.assert_almost_equal(microF1.get()[1], fmicro) np.testing.assert_almost_equal(macroF1.get()[1], fmacro) From 56b846e4d5f95ee77d0e6bf83bd55f629ee48593 Mon Sep 17 00:00:00 2001 From: acphile Date: Wed, 29 Apr 2020 03:57:31 +0000 Subject: [PATCH 15/24] remove reset_local() and get_global in other files --- example/ssd/train/metric.py | 11 ----------- python/mxnet/callback.py | 4 ++-- python/mxnet/module/base_module.py | 2 +- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/example/ssd/train/metric.py b/example/ssd/train/metric.py index eeb9796bf4a8..731f8fcc19f4 100644 --- a/example/ssd/train/metric.py +++ b/example/ssd/train/metric.py @@ -39,17 +39,6 @@ def reset(self): self.num_inst = [0] * self.num self.sum_metric = [0.0] * self.num - def reset_local(self): - """ - override reset behavior - """ - if getattr(self, 'num', None) is None: - self.num_inst = 0 - self.sum_metric = 0.0 - else: - self.num_inst = [0] * self.num - self.sum_metric = [0.0] * self.num - def update(self, labels, preds): """ Implementation of updating metrics diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py index 4be509270fd3..bd515707eace 100644 --- a/python/mxnet/callback.py +++ b/python/mxnet/callback.py @@ -112,7 +112,7 @@ def _callback(param): logging.info('Iter[%d] Batch[%d] Train-%s=%f', param.epoch, param.nbatch, name, value) if auto_reset: - param.eval_metric.reset_local() + param.eval_metric.reset() return _callback @@ -163,7 +163,7 @@ def __call__(self, param): if param.eval_metric is not None: name_value = param.eval_metric.get_name_value() if self.auto_reset: - param.eval_metric.reset_local() + param.eval_metric.reset() msg = 'Epoch[%d] Batch [%d-%d]\tSpeed: %.2f samples/sec' msg += '\t%s=%f'*len(name_value) logging.info(msg, param.epoch, count-self.frequent, count, speed, *sum(name_value, ())) diff --git a/python/mxnet/module/base_module.py b/python/mxnet/module/base_module.py index 9154aebb4b25..92fb7f188bfb 100644 --- a/python/mxnet/module/base_module.py +++ b/python/mxnet/module/base_module.py @@ -543,7 +543,7 @@ def fit(self, train_data, eval_data=None, eval_metric='acc', monitor.toc_print() if end_of_batch: - eval_name_vals = eval_metric.get_global_name_value() + eval_name_vals = eval_metric.get_name_value() if batch_end_callback is not None: batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, From 8a437e967ee2203e7854d3b043d451ed09e6da79 Mon Sep 17 00:00:00 2001 From: acphile Date: Wed, 29 Apr 2020 06:10:45 +0000 Subject: [PATCH 16/24] fix test_mlp --- tests/python/train/test_mlp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/python/train/test_mlp.py b/tests/python/train/test_mlp.py index 166fd8de28d8..5fc1277e95fe 100644 --- a/tests/python/train/test_mlp.py +++ b/tests/python/train/test_mlp.py @@ -35,7 +35,8 @@ def accuracy(label, pred): py = np.argmax(pred, axis=1) - return np.sum(py == label) / float(label.size) + return np.sum(py == label.astype(py)) / float(label.size) + # currently mxnet.numpy (which used in gluon.metric) did not support "==" between different types num_epoch = 4 prefix = './mlp' From b7c2b3bb67ba7ebeecaa58d36c14c87153725234 Mon Sep 17 00:00:00 2001 From: acphile Date: Wed, 29 Apr 2020 18:23:04 +0800 Subject: [PATCH 17/24] replace mx.metric with mx.gluon.metric in example --- benchmark/python/sparse/sparse_end2end.py | 2 +- example/adversary/adversary_generation.ipynb | 6 +++--- .../variational_autoencoder/VAE_example.ipynb | 2 +- example/caffe/caffe_net.py | 2 +- example/caffe/train_model.py | 2 +- example/capsnet/capsulenet.py | 2 +- example/ctc/lstm_ocr_train.py | 2 +- .../deep-embedded-clustering/autoencoder.py | 4 ++-- example/deep-embedded-clustering/dec.py | 2 +- .../gluon_mnist.py | 4 ++-- .../module_mnist.py | 2 +- .../resnet50_imagenet.py | 10 +++++----- example/distributed_training/cifar10_dist.py | 2 +- .../cifar10_kvstore_hvd.py | 4 ++-- example/fcn-xs/solver.py | 2 +- example/gluon/audio/urban_sounds/train.py | 2 +- example/gluon/dc_gan/dcgan.py | 2 +- example/gluon/image_classification.py | 2 +- example/gluon/mnist/mnist.py | 4 ++-- example/gluon/sn_gan/train.py | 2 +- .../gluon/super_resolution/super_resolution.py | 2 +- example/gluon/tree_lstm/main.py | 2 +- example/image-classification/common/fit.py | 4 ++-- example/image-classification/score.py | 4 ++-- example/image-classification/test_score.py | 4 ++-- example/kaggle-ndsb2/Train.py | 4 ++-- .../matrix_factorization/train.py | 2 +- example/module/mnist_mlp.py | 2 +- example/multi-task/multi-task-learning.ipynb | 8 ++++---- .../multivariate_time_series/src/metrics.py | 8 ++++---- .../named_entity_recognition/src/metrics.py | 10 +++++----- example/nce-loss/nce.py | 6 +++--- .../neural_collaborative_filtering/train.py | 2 +- example/quantization/imagenet_inference.py | 4 ++-- example/rcnn/symnet/metric.py | 12 ++++++------ example/rcnn/train.py | 2 +- example/rnn/bucketing/cudnn_rnn_bucketing.py | 6 +++--- example/rnn/bucketing/lstm_bucketing.py | 2 +- example/rnn/old/char-rnn.ipynb | 2 +- example/rnn/old/gru_bucketing.py | 2 +- example/rnn/old/lstm_bucketing.py | 2 +- example/rnn/old/rnn_cell_demo.py | 2 +- example/sparse/factorization_machine/metric.py | 18 +++++++++--------- example/sparse/factorization_machine/train.py | 2 +- example/sparse/linear_classification/train.py | 2 +- example/sparse/matrix_factorization/train.py | 2 +- example/sparse/wide_deep/inference.py | 2 +- example/sparse/wide_deep/train.py | 2 +- example/speech_recognition/stt_metric.py | 2 +- example/ssd/evaluate/eval_metric.py | 2 +- example/ssd/train/metric.py | 2 +- example/svm_mnist/svm_mnist.py | 4 ++-- .../api_usage_example/example_api_train.py | 2 +- .../api_usage_example/example_inference.py | 2 +- .../benchmarks/svrg_benchmark.ipynb | 4 ++-- .../svrg_module/linear_regression/common.py | 2 +- example/vae-gan/vaegan_mxnet.py | 8 ++++---- tests/nightly/estimator/test_estimator_cnn.py | 4 ++-- tests/nightly/estimator/test_sentiment_rnn.py | 10 +++++----- tests/nightly/test_optimizer.py | 2 +- tests/nightly/test_tlocal_racecondition.py | 2 +- tools/caffe_converter/test_converter.py | 2 +- 62 files changed, 114 insertions(+), 114 deletions(-) diff --git a/benchmark/python/sparse/sparse_end2end.py b/benchmark/python/sparse/sparse_end2end.py index d032f9d6c38e..fc949b649767 100644 --- a/benchmark/python/sparse/sparse_end2end.py +++ b/benchmark/python/sparse/sparse_end2end.py @@ -225,7 +225,7 @@ def row_sparse_pull(kv, key, data, slices, weight_array, priority): learning_rate=0.1, rescale_grad=1.0/batch_size/num_worker) mod.init_optimizer(optimizer=sgd, kvstore=kv) # use accuracy as the metric - metric = mx.metric.create('acc') + metric = mx.gluon.metric.create('acc') index = mod._exec_group.param_names.index('w') # weight_array bound to executors of the contexts diff --git a/example/adversary/adversary_generation.ipynb b/example/adversary/adversary_generation.ipynb index 76c5f4cff569..0dda371a8f41 100644 --- a/example/adversary/adversary_generation.ipynb +++ b/example/adversary/adversary_generation.ipynb @@ -168,7 +168,7 @@ "epoch = 3\n", "for e in range(epoch):\n", " train_loss = 0.\n", - " acc = mx.metric.Accuracy()\n", + " acc = mx.gluon.metric.Accuracy()\n", " for i, (data, label) in enumerate(train_data):\n", " data = data.as_in_context(ctx)\n", " label = label.as_in_context(ctx)\n", @@ -223,7 +223,7 @@ " l = loss(output, label)\n", "l.backward()\n", "\n", - "acc = mx.metric.Accuracy()\n", + "acc = mx.gluon.metric.Accuracy()\n", "acc.update(label, output)\n", "\n", "print(\"Validation batch accuracy {}\".format(acc.get()[1]))" @@ -256,7 +256,7 @@ "\n", "output = net(data_perturbated) \n", "\n", - "acc = mx.metric.Accuracy()\n", + "acc = mx.gluon.metric.Accuracy()\n", "acc.update(label, output)\n", "\n", "print(\"Validation batch accuracy after perturbation {}\".format(acc.get()[1]))" diff --git a/example/autoencoder/variational_autoencoder/VAE_example.ipynb b/example/autoencoder/variational_autoencoder/VAE_example.ipynb index 964e13725c69..7de336611b38 100755 --- a/example/autoencoder/variational_autoencoder/VAE_example.ipynb +++ b/example/autoencoder/variational_autoencoder/VAE_example.ipynb @@ -610,7 +610,7 @@ ], "source": [ "# calculate the ELBO which is minus the loss for test set\n", - "metric = mx.metric.Loss()\n", + "metric = mx.gluon.metric.Loss()\n", "model.score(nd_iter_test, metric)" ] }, diff --git a/example/caffe/caffe_net.py b/example/caffe/caffe_net.py index 803efda9b68e..d748298a2965 100644 --- a/example/caffe/caffe_net.py +++ b/example/caffe/caffe_net.py @@ -140,6 +140,6 @@ def parse_args(): # train if use_caffe_loss: - train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.metric.Caffe()) + train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.gluon.metric.Caffe()) else: train_model.fit(args, net, get_iterator(data_shape, use_caffe_data)) diff --git a/example/caffe/train_model.py b/example/caffe/train_model.py index d7dfd5d7a31e..96e81e06add4 100644 --- a/example/caffe/train_model.py +++ b/example/caffe/train_model.py @@ -93,7 +93,7 @@ def fit(args, network, data_loader, eval_metrics=None, batch_end_callback=None): eval_metrics = ['accuracy'] # TopKAccuracy only allows top_k > 1 for top_k in [5, 10, 20]: - eval_metrics.append(mx.metric.create('top_k_accuracy', top_k=top_k)) + eval_metrics.append(mx.gluon.metric.create('top_k_accuracy', top_k=top_k)) if batch_end_callback is not None: if not isinstance(batch_end_callback, list): diff --git a/example/capsnet/capsulenet.py b/example/capsnet/capsulenet.py index 4d455dbc504c..2e38d85fbdea 100644 --- a/example/capsnet/capsulenet.py +++ b/example/capsnet/capsulenet.py @@ -122,7 +122,7 @@ def to4d(img): return img.reshape(img.shape[0], 1, 28, 28).astype(np.float32)/255 -class LossMetric(mx.metric.EvalMetric): +class LossMetric(mx.gluon.metric.EvalMetric): """Evaluate the loss function""" def __init__(self, batch_size, num_gpus): super(LossMetric, self).__init__('LossMetric') diff --git a/example/ctc/lstm_ocr_train.py b/example/ctc/lstm_ocr_train.py index 49d9531920ae..e774ff73ab08 100644 --- a/example/ctc/lstm_ocr_train.py +++ b/example/ctc/lstm_ocr_train.py @@ -103,7 +103,7 @@ def main(): module.fit(train_data=data_train, eval_data=data_val, # use metrics.accuracy or metrics.accuracy_lcs - eval_metric=mx.metric.np(metrics.accuracy, allow_extra_outputs=True), + eval_metric=mx.gluon.metric.np(metrics.accuracy, allow_extra_outputs=True), optimizer='sgd', optimizer_params={'learning_rate': hp.learning_rate, 'momentum': hp.momentum, diff --git a/example/deep-embedded-clustering/autoencoder.py b/example/deep-embedded-clustering/autoencoder.py index c75634475e3a..d6c15ae19df1 100644 --- a/example/deep-embedded-clustering/autoencoder.py +++ b/example/deep-embedded-clustering/autoencoder.py @@ -165,7 +165,7 @@ def l2_norm(label, pred): return np.mean(np.square(label-pred))/2.0 solver = Solver(optimizer, momentum=0.9, wd=decay, learning_rate=l_rate, lr_scheduler=lr_scheduler) - solver.set_metric(mx.metric.CustomMetric(l2_norm)) + solver.set_metric(mx.gluon.metric.CustomMetric(l2_norm)) solver.set_monitor(Monitor(print_every)) data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=True, last_batch_handle='roll_over') @@ -188,7 +188,7 @@ def l2_norm(label, pred): return np.mean(np.square(label-pred))/2.0 solver = Solver(optimizer, momentum=0.9, wd=decay, learning_rate=l_rate, lr_scheduler=lr_scheduler) - solver.set_metric(mx.metric.CustomMetric(l2_norm)) + solver.set_metric(mx.gluon.metric.CustomMetric(l2_norm)) solver.set_monitor(Monitor(print_every)) data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=True, last_batch_handle='roll_over') diff --git a/example/deep-embedded-clustering/dec.py b/example/deep-embedded-clustering/dec.py index 8fb3891e3e99..f67792f0fe37 100644 --- a/example/deep-embedded-clustering/dec.py +++ b/example/deep-embedded-clustering/dec.py @@ -122,7 +122,7 @@ def cluster(self, X, y=None, update_interval=None): def ce(label, pred): return np.sum(label*np.log(label/(pred+0.000001)))/label.shape[0] - solver.set_metric(mx.metric.CustomMetric(ce)) + solver.set_metric(mx.gluon.metric.CustomMetric(ce)) label_buff = np.zeros((X.shape[0], self.num_centers)) train_iter = mx.io.NDArrayIter({'data': X}, {'label': label_buff}, batch_size=batch_size, diff --git a/example/distributed_training-horovod/gluon_mnist.py b/example/distributed_training-horovod/gluon_mnist.py index 7b39f5776a42..c2e6f0bdc533 100644 --- a/example/distributed_training-horovod/gluon_mnist.py +++ b/example/distributed_training-horovod/gluon_mnist.py @@ -104,7 +104,7 @@ def conv_nets(): # Function to evaluate accuracy for a model def evaluate(model, data_iter, context): data_iter.reset() - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() for _, batch in enumerate(data_iter): data = batch.data[0].as_in_context(context) label = batch.label[0].as_in_context(context) @@ -149,7 +149,7 @@ def evaluate(model, data_iter, context): # Create loss function and train metric loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() -metric = mx.metric.Accuracy() +metric = mx.gluon.metric.Accuracy() # Train model for epoch in range(args.epochs): diff --git a/example/distributed_training-horovod/module_mnist.py b/example/distributed_training-horovod/module_mnist.py index 4fcb02a46996..74f6bc9daf21 100644 --- a/example/distributed_training-horovod/module_mnist.py +++ b/example/distributed_training-horovod/module_mnist.py @@ -157,7 +157,7 @@ def conv_net(): num_epoch=args.epochs) # train for at most 10 dataset passes # Step 7: evaluate model accuracy -acc = mx.metric.Accuracy() +acc = mx.gluon.metric.Accuracy() model.score(val_iter, acc) if hvd.rank() == 0: diff --git a/example/distributed_training-horovod/resnet50_imagenet.py b/example/distributed_training-horovod/resnet50_imagenet.py index 5e5169e98ece..ae8a56100929 100644 --- a/example/distributed_training-horovod/resnet50_imagenet.py +++ b/example/distributed_training-horovod/resnet50_imagenet.py @@ -286,8 +286,8 @@ def evaluate(epoch): return val_data.reset() - acc_top1 = mx.metric.Accuracy() - acc_top5 = mx.metric.TopKAccuracy(5) + acc_top1 = mx.gluon.metric.Accuracy() + acc_top5 = mx.gluon.metric.TopKAccuracy(5) for _, batch in enumerate(val_data): data, label = batch_fn(batch, context) output = net(data.astype(args.dtype, copy=False)) @@ -321,7 +321,7 @@ def evaluate(epoch): # Create loss function and train metric loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() # Train model for epoch in range(args.num_epochs): @@ -450,8 +450,8 @@ def train_module(): # Evaluate performance if not using synthetic data if args.use_rec: - acc_top1 = mx.metric.Accuracy() - acc_top5 = mx.metric.TopKAccuracy(5) + acc_top1 = mx.gluon.metric.Accuracy() + acc_top5 = mx.gluon.metric.TopKAccuracy(5) res = mod.score(val_data, [acc_top1, acc_top5]) for name, val in res: logging.info('Epoch[%d] Rank[%d] Validation-%s=%f', diff --git a/example/distributed_training/cifar10_dist.py b/example/distributed_training/cifar10_dist.py index d3ba515776f6..8c5fb3639ef9 100644 --- a/example/distributed_training/cifar10_dist.py +++ b/example/distributed_training/cifar10_dist.py @@ -121,7 +121,7 @@ def evaluate_accuracy(data_iterator, network): ---------- tuple of array element """ - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() # Iterate through data and label for i, (data, label) in enumerate(data_iterator): diff --git a/example/distributed_training/cifar10_kvstore_hvd.py b/example/distributed_training/cifar10_kvstore_hvd.py index e6780e5db85e..ff679864f7c3 100644 --- a/example/distributed_training/cifar10_kvstore_hvd.py +++ b/example/distributed_training/cifar10_kvstore_hvd.py @@ -123,7 +123,7 @@ def evaluate(data_iterator, network, context): ---------- tuple of array element """ - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() # Iterate through data and label for i, (data, label) in enumerate(data_iterator): @@ -208,7 +208,7 @@ def __len__(self): optimizer_params={'learning_rate': args.lr}, kvstore=store) -train_metric = mx.metric.Accuracy() +train_metric = mx.gluon.metric.Accuracy() # Run as many epochs as required for epoch in range(args.epochs): diff --git a/example/fcn-xs/solver.py b/example/fcn-xs/solver.py index e99b31a13055..ab8964f80898 100644 --- a/example/fcn-xs/solver.py +++ b/example/fcn-xs/solver.py @@ -23,7 +23,7 @@ from collections import namedtuple from mxnet import optimizer as opt from mxnet.optimizer import get_updater -from mxnet import metric +from mxnet.gluon import metric # Parameter to pass to batch_end_callback BatchEndParam = namedtuple('BatchEndParams', ['epoch', 'nbatch', 'eval_metric']) diff --git a/example/gluon/audio/urban_sounds/train.py b/example/gluon/audio/urban_sounds/train.py index c88f9fb55187..8a55c5b5bc67 100644 --- a/example/gluon/audio/urban_sounds/train.py +++ b/example/gluon/audio/urban_sounds/train.py @@ -28,7 +28,7 @@ def evaluate_accuracy(data_iterator, net): """Function to evaluate accuracy of any data iterator passed to it as an argument""" - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() for data, label in data_iterator: output = net(data) predictions = nd.argmax(output, axis=1) diff --git a/example/gluon/dc_gan/dcgan.py b/example/gluon/dc_gan/dcgan.py index 93af13ababf3..1b1fa75c1c2a 100644 --- a/example/gluon/dc_gan/dcgan.py +++ b/example/gluon/dc_gan/dcgan.py @@ -259,7 +259,7 @@ def main(): real_label = mx.nd.ones((opt.batch_size,), ctx=ctx) fake_label = mx.nd.zeros((opt.batch_size,), ctx=ctx) - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() print('Training... ') stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') diff --git a/example/gluon/image_classification.py b/example/gluon/image_classification.py index 44a2afea3681..de31b06655eb 100644 --- a/example/gluon/image_classification.py +++ b/example/gluon/image_classification.py @@ -27,7 +27,7 @@ from mxnet.gluon.model_zoo import vision as models from mxnet import autograd as ag from mxnet.test_utils import get_mnist_iterator -from mxnet.metric import Accuracy, TopKAccuracy, CompositeEvalMetric +from mxnet.gluon.metric import Accuracy, TopKAccuracy, CompositeEvalMetric import numpy as np from data import (get_cifar10_iterator, get_imagenet_iterator, diff --git a/example/gluon/mnist/mnist.py b/example/gluon/mnist/mnist.py index 6aea3abc5041..4c1cc16bb7df 100644 --- a/example/gluon/mnist/mnist.py +++ b/example/gluon/mnist/mnist.py @@ -70,7 +70,7 @@ def transformer(data, label): # train def test(ctx): - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() for data, label in val_data: data = data.as_in_context(ctx) label = label.as_in_context(ctx) @@ -86,7 +86,7 @@ def train(epochs, ctx): # Trainer is for updating parameters with gradient. trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': opt.lr, 'momentum': opt.momentum}) - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() loss = gluon.loss.SoftmaxCrossEntropyLoss() for epoch in range(epochs): diff --git a/example/gluon/sn_gan/train.py b/example/gluon/sn_gan/train.py index 46e44791cebd..fc4e87d632fe 100644 --- a/example/gluon/sn_gan/train.py +++ b/example/gluon/sn_gan/train.py @@ -102,7 +102,7 @@ def facc(label, pred): g_net.collect_params().zero_grad() d_net.collect_params().zero_grad() # define evaluation metric -metric = mx.metric.CustomMetric(facc) +metric = mx.gluon.metric.CustomMetric(facc) # initialize labels real_label = nd.ones(BATCH_SIZE, CTX) fake_label = nd.zeros(BATCH_SIZE, CTX) diff --git a/example/gluon/super_resolution/super_resolution.py b/example/gluon/super_resolution/super_resolution.py index 4a3e8d92aa39..52bfc2241f82 100644 --- a/example/gluon/super_resolution/super_resolution.py +++ b/example/gluon/super_resolution/super_resolution.py @@ -156,7 +156,7 @@ def hybrid_forward(self, F, x): return x net = SuperResolutionNet(upscale_factor) -metric = mx.metric.MSE() +metric = mx.gluon.metric.MSE() def test(ctx): val_data.reset() diff --git a/example/gluon/tree_lstm/main.py b/example/gluon/tree_lstm/main.py index 53af3fa019e9..41e4f4f13ed8 100644 --- a/example/gluon/tree_lstm/main.py +++ b/example/gluon/tree_lstm/main.py @@ -96,7 +96,7 @@ net = SimilarityTreeLSTM(sim_hidden_size, rnn_hidden_size, vocab.size, vocab.embed.shape[1], num_classes) # use pearson correlation and mean-square error for evaluation -metric = mx.metric.create(['pearsonr', 'mse']) +metric = mx.gluon.metric.create(['pearsonr', 'mse']) def to_target(x): target = np.zeros((1, num_classes)) diff --git a/example/image-classification/common/fit.py b/example/image-classification/common/fit.py index 38ca296cf986..8662db3baba4 100644 --- a/example/image-classification/common/fit.py +++ b/example/image-classification/common/fit.py @@ -290,7 +290,7 @@ def fit(args, network, data_loader, **kwargs): # evaluation metrices eval_metrics = ['accuracy'] if args.top_k > 0: - eval_metrics.append(mx.metric.create( + eval_metrics.append(mx.gluon.metric.create( 'top_k_accuracy', top_k=args.top_k)) supported_loss = ['ce', 'nll_loss'] @@ -306,7 +306,7 @@ def fit(args, network, data_loader, **kwargs): logging.warning(loss_type + ' is not an valid loss type, only cross-entropy or ' \ 'negative likelihood loss is supported!') else: - eval_metrics.append(mx.metric.create(loss_type)) + eval_metrics.append(mx.gluon.metric.create(loss_type)) else: logging.warning("The output is not softmax_output, loss argument will be skipped!") diff --git a/example/image-classification/score.py b/example/image-classification/score.py index f40e649f1f42..dbad44ef6981 100644 --- a/example/image-classification/score.py +++ b/example/image-classification/score.py @@ -97,8 +97,8 @@ def score(model, data_val, metrics, gpus, batch_size, rgb_mean=None, mean_img=No logger = logging.getLogger() logger.setLevel(logging.DEBUG) - metrics = [mx.metric.create('acc'), - mx.metric.create('top_k_accuracy', top_k = 5)] + metrics = [mx.gluon.metric.create('acc'), + mx.gluon.metric.create('top_k_accuracy', top_k = 5)] (speed,) = score(metrics = metrics, **vars(args)) logging.info('Finished with %f images per second', speed) diff --git a/example/image-classification/test_score.py b/example/image-classification/test_score.py index 58c5c66a7f1f..1a82bcff5ba3 100644 --- a/example/image-classification/test_score.py +++ b/example/image-classification/test_score.py @@ -43,7 +43,7 @@ def test_imagenet1k_resnet(imagenet_val_5k_settings): models = ['imagenet1k-resnet-50', 'imagenet1k-resnet-152'] accs = [.77, .78] for (m, g) in zip(models, accs): - acc = mx.metric.create('acc') + acc = mx.gluon.metric.create('acc') (speed,) = score(model=m, data_val=imagenet_val_5k, rgb_mean='0,0,0', metrics=acc, **kwargs) r = acc.get()[1] @@ -52,7 +52,7 @@ def test_imagenet1k_resnet(imagenet_val_5k_settings): def test_imagenet1k_inception_bn(imagenet_val_5k_settings): imagenet_val_5k, kwargs = imagenet_val_5k_settings - acc = mx.metric.create('acc') + acc = mx.gluon.metric.create('acc') m = 'imagenet1k-inception-bn' g = 0.75 (speed,) = score(model=m, diff --git a/example/kaggle-ndsb2/Train.py b/example/kaggle-ndsb2/Train.py index 51e308a2e21c..c3ab165d11da 100644 --- a/example/kaggle-ndsb2/Train.py +++ b/example/kaggle-ndsb2/Train.py @@ -111,7 +111,7 @@ def encode_csv(label_csv, systole_csv, diastole_csv): wd = 0.00001, momentum = 0.9) -systole_model.fit(X=data_train, eval_metric = mx.metric.np(CRPS)) +systole_model.fit(X=data_train, eval_metric = mx.gluon.metric.np(CRPS)) # # Predict systole @@ -139,7 +139,7 @@ def encode_csv(label_csv, systole_csv, diastole_csv): wd = 0.00001, momentum = 0.9) -diastole_model.fit(X=data_train, eval_metric = mx.metric.np(CRPS)) +diastole_model.fit(X=data_train, eval_metric = mx.gluon.metric.np(CRPS)) # # Predict diastole diff --git a/example/model-parallel/matrix_factorization/train.py b/example/model-parallel/matrix_factorization/train.py index 591dab3a6534..fea2c153f853 100644 --- a/example/model-parallel/matrix_factorization/train.py +++ b/example/model-parallel/matrix_factorization/train.py @@ -94,7 +94,7 @@ 'rescale_grad': 1.0/batch_size} # use MSE as the metric - metric = mx.metric.create(['MSE']) + metric = mx.gluon.metric.create(['MSE']) speedometer = mx.callback.Speedometer(batch_size, print_every) diff --git a/example/module/mnist_mlp.py b/example/module/mnist_mlp.py index 7d63a584aec9..f6d5bf306bd8 100644 --- a/example/module/mnist_mlp.py +++ b/example/module/mnist_mlp.py @@ -55,7 +55,7 @@ mod.init_params() mod.init_optimizer(optimizer_params={'learning_rate':0.01, 'momentum': 0.9}) -metric = mx.metric.create('acc') +metric = mx.gluon.metric.create('acc') for i_epoch in range(n_epoch): for i_iter, batch in enumerate(train_dataiter): diff --git a/example/multi-task/multi-task-learning.ipynb b/example/multi-task/multi-task-learning.ipynb index 048d6d9862b8..e615559441f6 100644 --- a/example/multi-task/multi-task-learning.ipynb +++ b/example/multi-task/multi-task-learning.ipynb @@ -267,8 +267,8 @@ "outputs": [], "source": [ "def evaluate_accuracy(net, data_iterator):\n", - " acc_digits = mx.metric.Accuracy(name='digits')\n", - " acc_odd_even = mx.metric.Accuracy(name='odd_even')\n", + " acc_digits = mx.gluon.metric.Accuracy(name='digits')\n", + " acc_odd_even = mx.gluon.metric.Accuracy(name='odd_even')\n", " \n", " for i, (data, label_digit, label_odd_even) in enumerate(data_iterator):\n", " data = data.as_in_context(ctx)\n", @@ -335,8 +335,8 @@ "source": [ "for e in range(epochs):\n", " # Accuracies for each task\n", - " acc_digits = mx.metric.Accuracy(name='digits')\n", - " acc_odd_even = mx.metric.Accuracy(name='odd_even')\n", + " acc_digits = mx.gluon.metric.Accuracy(name='digits')\n", + " acc_odd_even = mx.gluon.metric.Accuracy(name='odd_even')\n", " # Accumulative losses\n", " l_digits_ = 0.\n", " l_odd_even_ = 0. \n", diff --git a/example/multivariate_time_series/src/metrics.py b/example/multivariate_time_series/src/metrics.py index 4818591068f8..6dd8e765f0ed 100644 --- a/example/multivariate_time_series/src/metrics.py +++ b/example/multivariate_time_series/src/metrics.py @@ -46,10 +46,10 @@ def get_custom_metrics(): """ :return: mxnet metric object """ - _rse = mx.metric.create(rse) - _rae = mx.metric.create(rae) - _corr = mx.metric.create(corr) - return mx.metric.create([_rae, _rse, _corr]) + _rse = mx.gluon.metric.create(rse) + _rae = mx.gluon.metric.create(rae) + _corr = mx.gluon.metric.create(corr) + return mx.gluon.metric.create([_rae, _rse, _corr]) def evaluate(pred, label): return {"RAE":rae(label, pred), "RSE":rse(label,pred),"CORR": corr(label,pred)} \ No newline at end of file diff --git a/example/named_entity_recognition/src/metrics.py b/example/named_entity_recognition/src/metrics.py index a1d270af6863..d04904c7763e 100644 --- a/example/named_entity_recognition/src/metrics.py +++ b/example/named_entity_recognition/src/metrics.py @@ -79,9 +79,9 @@ def entity_f1(label, pred): return classifer_metrics(label, pred)[2] def composite_classifier_metrics(): - metric1 = mx.metric.CustomMetric(feval=entity_precision, name='entity precision') - metric2 = mx.metric.CustomMetric(feval=entity_recall, name='entity recall') - metric3 = mx.metric.CustomMetric(feval=entity_f1, name='entity f1 score') - metric4 = mx.metric.Accuracy() + metric1 = mx.gluon.metric.CustomMetric(feval=entity_precision, name='entity precision') + metric2 = mx.gluon.metric.CustomMetric(feval=entity_recall, name='entity recall') + metric3 = mx.gluon.metric.CustomMetric(feval=entity_f1, name='entity f1 score') + metric4 = mx.gluon.metric.Accuracy() - return mx.metric.CompositeEvalMetric([metric4, metric1, metric2, metric3]) + return mx.gluon.metric.CompositeEvalMetric([metric4, metric1, metric2, metric3]) diff --git a/example/nce-loss/nce.py b/example/nce-loss/nce.py index e59220a026a8..6764e9c20852 100644 --- a/example/nce-loss/nce.py +++ b/example/nce-loss/nce.py @@ -62,7 +62,7 @@ def nce_loss_subwords( label=label_weight) -class NceAccuracy(mx.metric.EvalMetric): +class NceAccuracy(mx.gluon.metric.EvalMetric): def __init__(self): super(NceAccuracy, self).__init__('nce-accuracy') @@ -75,7 +75,7 @@ def update(self, labels, preds): self.num_inst += 1 -class NceAuc(mx.metric.EvalMetric): +class NceAuc(mx.gluon.metric.EvalMetric): def __init__(self): super(NceAuc, self).__init__('nce-auc') @@ -105,7 +105,7 @@ def update(self, labels, preds): self.num_inst += 1 -class NceLSTMAuc(mx.metric.EvalMetric): +class NceLSTMAuc(mx.gluon.metric.EvalMetric): def __init__(self): super(NceLSTMAuc, self).__init__('nce-lstm-auc') diff --git a/example/neural_collaborative_filtering/train.py b/example/neural_collaborative_filtering/train.py index c68f271a6f0d..f99b16fd5b0e 100644 --- a/example/neural_collaborative_filtering/train.py +++ b/example/neural_collaborative_filtering/train.py @@ -124,7 +124,7 @@ def cross_entropy(label, pred, eps=1e-12): mod.init_params() mod.init_optimizer(optimizer='adam', optimizer_params=[('learning_rate', learning_rate), ('beta1',beta1), ('beta2',beta2), ('epsilon',eps)]) - metric = mx.metric.create(cross_entropy) + metric = mx.gluon.metric.create(cross_entropy) speedometer = mx.callback.Speedometer(batch_size, log_interval) best_hr, best_ndcg, best_iter = -1, -1, -1 logging.info('Training started ...') diff --git a/example/quantization/imagenet_inference.py b/example/quantization/imagenet_inference.py index 4d690d37d00c..2f41fec2a9a3 100644 --- a/example/quantization/imagenet_inference.py +++ b/example/quantization/imagenet_inference.py @@ -70,8 +70,8 @@ def advance_data_iter(data_iter, n): def score(sym, arg_params, aux_params, data, devs, label_name, max_num_examples, logger=None): - metrics = [mx.metric.create('acc'), - mx.metric.create('top_k_accuracy', top_k=5)] + metrics = [mx.gluon.metric.create('acc'), + mx.gluon.metric.create('top_k_accuracy', top_k=5)] if not isinstance(metrics, list): metrics = [metrics, ] mod = mx.mod.Module(symbol=sym, context=devs, label_names=[label_name, ]) diff --git a/example/rcnn/symnet/metric.py b/example/rcnn/symnet/metric.py index fa8d7919e919..6509ba436d75 100644 --- a/example/rcnn/symnet/metric.py +++ b/example/rcnn/symnet/metric.py @@ -25,7 +25,7 @@ def get_names(): return pred, label -class RPNAccMetric(mx.metric.EvalMetric): +class RPNAccMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RPNAccMetric, self).__init__('RPNAcc') self.pred, self.label = get_names() @@ -49,7 +49,7 @@ def update(self, labels, preds): self.num_inst += len(pred_label.flat) -class RCNNAccMetric(mx.metric.EvalMetric): +class RCNNAccMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RCNNAccMetric, self).__init__('RCNNAcc') self.pred, self.label = get_names() @@ -66,7 +66,7 @@ def update(self, labels, preds): self.num_inst += len(pred_label.flat) -class RPNLogLossMetric(mx.metric.EvalMetric): +class RPNLogLossMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RPNLogLossMetric, self).__init__('RPNLogLoss') self.pred, self.label = get_names() @@ -93,7 +93,7 @@ def update(self, labels, preds): self.num_inst += label.shape[0] -class RCNNLogLossMetric(mx.metric.EvalMetric): +class RCNNLogLossMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RCNNLogLossMetric, self).__init__('RCNNLogLoss') self.pred, self.label = get_names() @@ -114,7 +114,7 @@ def update(self, labels, preds): self.num_inst += label.shape[0] -class RPNL1LossMetric(mx.metric.EvalMetric): +class RPNL1LossMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RPNL1LossMetric, self).__init__('RPNL1Loss') self.pred, self.label = get_names() @@ -130,7 +130,7 @@ def update(self, labels, preds): self.num_inst += num_inst -class RCNNL1LossMetric(mx.metric.EvalMetric): +class RCNNL1LossMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RCNNL1LossMetric, self).__init__('RCNNL1Loss') self.pred, self.label = get_names() diff --git a/example/rcnn/train.py b/example/rcnn/train.py index 7b1f2f7f31a5..4d89ac6e2cdd 100644 --- a/example/rcnn/train.py +++ b/example/rcnn/train.py @@ -85,7 +85,7 @@ def train_net(sym, roidb, args): eval_metric = RCNNAccMetric() cls_metric = RCNNLogLossMetric() bbox_metric = RCNNL1LossMetric() - eval_metrics = mx.metric.CompositeEvalMetric() + eval_metrics = mx.gluon.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) diff --git a/example/rnn/bucketing/cudnn_rnn_bucketing.py b/example/rnn/bucketing/cudnn_rnn_bucketing.py index 38275ae3dfb8..8f77172087ef 100644 --- a/example/rnn/bucketing/cudnn_rnn_bucketing.py +++ b/example/rnn/bucketing/cudnn_rnn_bucketing.py @@ -156,7 +156,7 @@ def sym_gen(seq_len): model.fit( train_data = data_train, eval_data = data_val, - eval_metric = mx.metric.Perplexity(invalid_label), + eval_metric = mx.gluon.metric.Perplexity(invalid_label), kvstore = args.kv_store, optimizer = args.optimizer, optimizer_params = opt_params, @@ -244,14 +244,14 @@ def sym_gen(seq_len): if args.dtype == "float32": model.set_params(arg_params, aux_params) - model.score(data_val, mx.metric.Perplexity(invalid_label), + model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(args.batch_size, 5)) else: assert args.dtype == "float16", "Only float32 and float16 are supported currently" model = amp.convert_bucketing_module(model, target_dtype="float16") model.bind(data_val.provide_data, data_val.provide_label, for_training=False) - model.score(data_val, mx.metric.Perplexity(invalid_label), + model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(args.batch_size, 5)) if __name__ == '__main__': diff --git a/example/rnn/bucketing/lstm_bucketing.py b/example/rnn/bucketing/lstm_bucketing.py index 7f150104f458..281aa8988ab0 100644 --- a/example/rnn/bucketing/lstm_bucketing.py +++ b/example/rnn/bucketing/lstm_bucketing.py @@ -115,7 +115,7 @@ def sym_gen(seq_len): model.fit( train_data = data_train, eval_data = data_val, - eval_metric = mx.metric.Perplexity(invalid_label), + eval_metric = mx.gluon.metric.Perplexity(invalid_label), kvstore = args.kv_store, optimizer = args.optimizer, optimizer_params = { 'learning_rate': args.lr, diff --git a/example/rnn/old/char-rnn.ipynb b/example/rnn/old/char-rnn.ipynb index 1ec56cd9aa8c..4fd32d932512 100644 --- a/example/rnn/old/char-rnn.ipynb +++ b/example/rnn/old/char-rnn.ipynb @@ -347,7 +347,7 @@ "source": [ "# Fit it\n", "model.fit(X=data_train,\n", - " eval_metric = mx.metric.np(Perplexity),\n", + " eval_metric = mx.gluon.metric.np(Perplexity),\n", " batch_end_callback=mx.callback.Speedometer(batch_size, 50),\n", " epoch_end_callback=mx.callback.do_checkpoint(\"obama\"))" ] diff --git a/example/rnn/old/gru_bucketing.py b/example/rnn/old/gru_bucketing.py index b9f651a90dc0..47c13ec0db43 100644 --- a/example/rnn/old/gru_bucketing.py +++ b/example/rnn/old/gru_bucketing.py @@ -88,6 +88,6 @@ def sym_gen(seq_len): logging.basicConfig(level=logging.DEBUG, format=head) model.fit(X=data_train, eval_data=data_val, - eval_metric = mx.metric.np(Perplexity), + eval_metric = mx.gluon.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50),) diff --git a/example/rnn/old/lstm_bucketing.py b/example/rnn/old/lstm_bucketing.py index 0fe4116250a2..2bea6cc3898f 100644 --- a/example/rnn/old/lstm_bucketing.py +++ b/example/rnn/old/lstm_bucketing.py @@ -90,6 +90,6 @@ def sym_gen(seq_len): logging.basicConfig(level=logging.DEBUG, format=head) model.fit(X=data_train, eval_data=data_val, kvstore='device', - eval_metric = mx.metric.np(Perplexity), + eval_metric = mx.gluon.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50),) diff --git a/example/rnn/old/rnn_cell_demo.py b/example/rnn/old/rnn_cell_demo.py index c5772fa3a5b7..64a8ee0fe72b 100644 --- a/example/rnn/old/rnn_cell_demo.py +++ b/example/rnn/old/rnn_cell_demo.py @@ -144,7 +144,7 @@ def sym_gen(seq_len): logging.basicConfig(level=logging.DEBUG, format=head) mod.fit(data_train, eval_data=data_val, num_epoch=num_epoch, - eval_metric=mx.metric.np(Perplexity), + eval_metric=mx.gluon.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50), initializer=mx.init.Xavier(factor_type="in", magnitude=2.34), optimizer='sgd', diff --git a/example/sparse/factorization_machine/metric.py b/example/sparse/factorization_machine/metric.py index a8c52c781c0f..8c80f0092203 100644 --- a/example/sparse/factorization_machine/metric.py +++ b/example/sparse/factorization_machine/metric.py @@ -19,9 +19,9 @@ import numpy as np from operator import itemgetter -@mx.metric.register -@mx.metric.alias('log_loss') -class LogLossMetric(mx.metric.EvalMetric): +@mx.gluon.metric.register +@mx.gluon.metric.alias('log_loss') +class LogLossMetric(mx.gluon.metric.EvalMetric): """Computes the negative log-likelihood loss. The negative log-likelihoodd loss over a batch of sample size :math:`N` is given by @@ -51,7 +51,7 @@ class LogLossMetric(mx.metric.EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3], [0], [0.4]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> log_loss= mx.metric.NegativeLogLikelihood() + >>> log_loss= mx.gluon.metric.NegativeLogLikelihood() >>> log_loss.update(labels, predicts) >>> print(log_loss.get()) ('log-loss', 0.57159948348999023) @@ -74,7 +74,7 @@ def update(self, labels, preds): preds : list of `NDArray` Predicted values. """ - mx.metric.check_label_shapes(labels, preds) + mx.gluon.metric.check_label_shapes(labels, preds) for label, pred in zip(labels, preds): label = label.asnumpy() @@ -88,16 +88,16 @@ def update(self, labels, preds): self.sum_metric += (-np.log(prob + self.eps)).sum() self.num_inst += num_examples -@mx.metric.register -@mx.metric.alias('auc') -class AUCMetric(mx.metric.EvalMetric): +@mx.gluon.metric.register +@mx.gluon.metric.alias('auc') +class AUCMetric(mx.gluon.metric.EvalMetric): def __init__(self, eps=1e-12): super(AUCMetric, self).__init__( 'auc') self.eps = eps def update(self, labels, preds): - mx.metric.check_label_shapes(labels, preds) + mx.gluon.metric.check_label_shapes(labels, preds) label_weight = labels[0].asnumpy() preds = preds[0].asnumpy() tmp = [] diff --git a/example/sparse/factorization_machine/train.py b/example/sparse/factorization_machine/train.py index b30f9cc81acf..1e2ab0e2f0ff 100644 --- a/example/sparse/factorization_machine/train.py +++ b/example/sparse/factorization_machine/train.py @@ -110,7 +110,7 @@ def all_row_ids(data_batch): mod.init_optimizer(optimizer='adam', kvstore=kv, optimizer_params=optimizer_params) # metrics - metric = mx.metric.create(['log_loss', 'auc']) + metric = mx.gluon.metric.create(['log_loss', 'auc']) speedometer = mx.callback.Speedometer(batch_size, log_interval) logging.info('Training started ...') diff --git a/example/sparse/linear_classification/train.py b/example/sparse/linear_classification/train.py index 0a8acfd87bef..77eb2c09de28 100644 --- a/example/sparse/linear_classification/train.py +++ b/example/sparse/linear_classification/train.py @@ -100,7 +100,7 @@ def all_row_ids(data_batch): optim = mx.optimizer.create(optimizer, learning_rate=0.01, rescale_grad=1.0/batch_size/num_worker) mod.init_optimizer(optimizer=optim, kvstore=kv) # use accuracy as the metric - metric = mx.metric.create(['nll_loss']) + metric = mx.gluon.metric.create(['nll_loss']) # get the sparse weight parameter speedometer = mx.callback.Speedometer(batch_size, 100) diff --git a/example/sparse/matrix_factorization/train.py b/example/sparse/matrix_factorization/train.py index 44bab2c416ba..d9dccce89459 100644 --- a/example/sparse/matrix_factorization/train.py +++ b/example/sparse/matrix_factorization/train.py @@ -101,7 +101,7 @@ def all_row_ids(data_batch): rescale_grad=1.0/batch_size) mod.init_optimizer(optimizer=optim, kvstore='device') # use MSE as the metric - metric = mx.metric.create(['MSE']) + metric = mx.gluon.metric.create(['MSE']) speedometer = mx.callback.Speedometer(batch_size, log_interval) logging.info('Training started ...') for epoch in range(num_epoch): diff --git a/example/sparse/wide_deep/inference.py b/example/sparse/wide_deep/inference.py index e14396e50c15..c615020200e2 100644 --- a/example/sparse/wide_deep/inference.py +++ b/example/sparse/wide_deep/inference.py @@ -93,7 +93,7 @@ else: logging.info('Inference started ...') # use accuracy as the metric - metric = mx.metric.create(['acc']) + metric = mx.gluon.metric.create(['acc']) accuracy_avg = 0.0 for batch in data_iter: nbatch += 1 diff --git a/example/sparse/wide_deep/train.py b/example/sparse/wide_deep/train.py index eea70301660d..c8c2b157865a 100644 --- a/example/sparse/wide_deep/train.py +++ b/example/sparse/wide_deep/train.py @@ -83,7 +83,7 @@ optim = mx.optimizer.create(optimizer, learning_rate=lr, rescale_grad=1.0/batch_size) mod.init_optimizer(optimizer=optim) # use accuracy as the metric - metric = mx.metric.create(['acc']) + metric = mx.gluon.metric.create(['acc']) # get the sparse weight parameter speedometer = mx.callback.Speedometer(batch_size, log_interval) diff --git a/example/speech_recognition/stt_metric.py b/example/speech_recognition/stt_metric.py index 26609627ea58..1eb77aa301cb 100644 --- a/example/speech_recognition/stt_metric.py +++ b/example/speech_recognition/stt_metric.py @@ -35,7 +35,7 @@ def check_label_shapes(labels, preds, shape=0): "predictions {}".format(label_shape, pred_shape)) -class STTMetric(mx.metric.EvalMetric): +class STTMetric(mx.gluon.metric.EvalMetric): def __init__(self, batch_size, num_gpu, is_epoch_end=False, is_logging=True): super(STTMetric, self).__init__('STTMetric') diff --git a/example/ssd/evaluate/eval_metric.py b/example/ssd/evaluate/eval_metric.py index 1deb381fb859..b038d3afb376 100644 --- a/example/ssd/evaluate/eval_metric.py +++ b/example/ssd/evaluate/eval_metric.py @@ -18,7 +18,7 @@ import mxnet as mx import numpy as np -class MApMetric(mx.metric.EvalMetric): +class MApMetric(mx.gluon.metric.EvalMetric): """ Calculate mean AP for object detection task diff --git a/example/ssd/train/metric.py b/example/ssd/train/metric.py index 731f8fcc19f4..a99c8762de16 100644 --- a/example/ssd/train/metric.py +++ b/example/ssd/train/metric.py @@ -19,7 +19,7 @@ import numpy as np -class MultiBoxMetric(mx.metric.EvalMetric): +class MultiBoxMetric(mx.gluon.metric.EvalMetric): """Calculate metrics for Multibox training """ def __init__(self, eps=1e-8): super(MultiBoxMetric, self).__init__('MultiBox') diff --git a/example/svm_mnist/svm_mnist.py b/example/svm_mnist/svm_mnist.py index e166cb6ac707..9ceae6d4588b 100644 --- a/example/svm_mnist/svm_mnist.py +++ b/example/svm_mnist/svm_mnist.py @@ -113,8 +113,8 @@ 'momentum': 0.9, # Momentum for SGD with momentum 'wd': 0.00001, # Weight decay for regularization }) - results[output.name] = mod.score(test_iter, mx.metric.Accuracy())[0][1]*100 - print('Accuracy for %s:'%output.name, mod.score(test_iter, mx.metric.Accuracy())[0][1]*100, '%\n') + results[output.name] = mod.score(test_iter, mx.gluon.metric.Accuracy())[0][1]*100 + print('Accuracy for %s:'%output.name, mod.score(test_iter, mx.gluon.metric.Accuracy())[0][1]*100, '%\n') for key, value in results.items(): print(key, value, "%s") diff --git a/example/svrg_module/api_usage_example/example_api_train.py b/example/svrg_module/api_usage_example/example_api_train.py index f6cd1b2e592c..cc9987fe3edb 100644 --- a/example/svrg_module/api_usage_example/example_api_train.py +++ b/example/svrg_module/api_usage_example/example_api_train.py @@ -40,7 +40,7 @@ def test_svrg_intermediate_level_api(args): mod.init_params(initializer=mx.init.Uniform(0.01), allow_missing=False, force_init=False, allow_extra=False) kv = mx.kv.create("local") mod.init_optimizer(kvstore=kv, optimizer='sgd', optimizer_params=(('learning_rate', 0.025),)) - metrics = mx.metric.create("mse") + metrics = mx.gluon.metric.create("mse") for e in range(num_epoch): metrics.reset() if e % mod.update_freq == 0: diff --git a/example/svrg_module/api_usage_example/example_inference.py b/example/svrg_module/api_usage_example/example_inference.py index 312f9796074d..7e5b7a40abe2 100644 --- a/example/svrg_module/api_usage_example/example_inference.py +++ b/example/svrg_module/api_usage_example/example_inference.py @@ -42,7 +42,7 @@ def get_validation_score(args): mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) mod.init_params(initializer=mx.init.Uniform(0.01), allow_missing=False, force_init=False, allow_extra=False) mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=(('learning_rate', 0.025),)) - metrics = mx.metric.create("mse") + metrics = mx.gluon.metric.create("mse") for e in range(epoch): metrics.reset() if e % mod.update_freq == 0: diff --git a/example/svrg_module/benchmarks/svrg_benchmark.ipynb b/example/svrg_module/benchmarks/svrg_benchmark.ipynb index 54ae81281db3..66f52d70be5f 100644 --- a/example/svrg_module/benchmarks/svrg_benchmark.ipynb +++ b/example/svrg_module/benchmarks/svrg_benchmark.ipynb @@ -127,7 +127,7 @@ " mod.bind(data_shapes=di.provide_data, label_shapes=di.provide_label)\n", " mod.init_params(initializer=mx.init.Zero(), allow_missing=False, force_init=False, allow_extra=False)\n", " mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=optimizer_params)\n", - " metrics = mx.metric.create(\"mse\")\n", + " metrics = mx.gluon.metric.create(\"mse\")\n", " \n", " results = {}\n", " for e in range(num_epoch):\n", @@ -170,7 +170,7 @@ " mod.bind(data_shapes=di.provide_data, label_shapes=di.provide_label)\n", " mod.init_params(initializer=mx.init.Zero(), allow_missing=False, force_init=False, allow_extra=False)\n", " mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=optimizer_params)\n", - " metrics = mx.metric.create(\"mse\")\n", + " metrics = mx.gluon.metric.create(\"mse\")\n", " \n", " results = {}\n", " for e in range(num_epoch):\n", diff --git a/example/svrg_module/linear_regression/common.py b/example/svrg_module/linear_regression/common.py index 14a144f40ce2..edf4f729f3e6 100644 --- a/example/svrg_module/linear_regression/common.py +++ b/example/svrg_module/linear_regression/common.py @@ -39,7 +39,7 @@ def create_lin_reg_network(train_features, train_labels, feature_dim, batch_size def create_metrics(metrics): - metric = mx.metric.create(metrics) + metric = mx.gluon.metric.create(metrics) return metric diff --git a/example/vae-gan/vaegan_mxnet.py b/example/vae-gan/vaegan_mxnet.py index 38e7e2ecc92f..1881f383c18b 100644 --- a/example/vae-gan/vaegan_mxnet.py +++ b/example/vae-gan/vaegan_mxnet.py @@ -424,10 +424,10 @@ def kldivergence(label, pred): KLLoss = KLLoss / nElements return KLLoss - mG = mx.metric.CustomMetric(fentropy) - mD = mx.metric.CustomMetric(fentropy) - mE = mx.metric.CustomMetric(kldivergence) - mACC = mx.metric.CustomMetric(facc) + mG = mx.gluon.metric.CustomMetric(fentropy) + mD = mx.gluon.metric.CustomMetric(fentropy) + mE = mx.gluon.metric.CustomMetric(kldivergence) + mACC = mx.gluon.metric.CustomMetric(facc) print('Training...') stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') diff --git a/tests/nightly/estimator/test_estimator_cnn.py b/tests/nightly/estimator/test_estimator_cnn.py index 0d113cdf4984..466c01019575 100644 --- a/tests/nightly/estimator/test_estimator_cnn.py +++ b/tests/nightly/estimator/test_estimator_cnn.py @@ -116,7 +116,7 @@ def test_estimator_cpu(): # Define estimator est = estimator.Estimator(net=net, loss=loss, - train_metrics=mx.metric.Accuracy(), + train_metrics=mx.gluon.metric.Accuracy(), trainer=trainer, context=context) # Call fit() @@ -140,7 +140,7 @@ def test_estimator_gpu(): train_data, test_data = load_data_mnist(batch_size, resize=224) loss = gluon.loss.SoftmaxCrossEntropyLoss() net.hybridize() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) # Define estimator est = estimator.Estimator(net=net, diff --git a/tests/nightly/estimator/test_sentiment_rnn.py b/tests/nightly/estimator/test_sentiment_rnn.py index 367c69b88a0b..7d3561db3789 100644 --- a/tests/nightly/estimator/test_sentiment_rnn.py +++ b/tests/nightly/estimator/test_sentiment_rnn.py @@ -190,11 +190,11 @@ def run(net, train_dataloader, test_dataloader, num_epochs, ctx, lr): trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) # Define loss and evaluation metrics loss = gluon.loss.SoftmaxCrossEntropyLoss() - metrics = mx.metric.CompositeEvalMetric() - acc = mx.metric.Accuracy() - nested_metrics = mx.metric.CompositeEvalMetric() - metrics.add([acc, mx.metric.Loss()]) - nested_metrics.add([metrics, mx.metric.Accuracy()]) + metrics = mx.gluon.metric.CompositeEvalMetric() + acc = mx.gluon.metric.Accuracy() + nested_metrics = mx.gluon.metric.CompositeEvalMetric() + metrics.add([acc, mx.gluon.metric.Loss()]) + nested_metrics.add([metrics, mx.gluon.metric.Accuracy()]) # Define estimator est = estimator.Estimator(net=net, loss=loss, train_metrics=nested_metrics, diff --git a/tests/nightly/test_optimizer.py b/tests/nightly/test_optimizer.py index 0a87368d991e..9c2fcb8a62cf 100644 --- a/tests/nightly/test_optimizer.py +++ b/tests/nightly/test_optimizer.py @@ -83,7 +83,7 @@ def test_lars(): num_epoch=num_epochs) # predict accuracy for lenet - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() lenet_model.score(test_iter, acc) accuracy = acc.get()[1] assert accuracy > 0.98, "LeNet-5 training accuracy on MNIST was too low" diff --git a/tests/nightly/test_tlocal_racecondition.py b/tests/nightly/test_tlocal_racecondition.py index d43c45937c05..986e1f464bfb 100644 --- a/tests/nightly/test_tlocal_racecondition.py +++ b/tests/nightly/test_tlocal_racecondition.py @@ -91,7 +91,7 @@ def infer_type(self, in_type): def create_operator(self, ctx, shapes, dtypes): return MyCustom() -class MyMetric(mx.metric.EvalMetric): +class MyMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(MyMetric, self).__init__("MyMetric") self.name = ['empty'] diff --git a/tools/caffe_converter/test_converter.py b/tools/caffe_converter/test_converter.py index 49f8bdb167c2..880de1be449f 100644 --- a/tools/caffe_converter/test_converter.py +++ b/tools/caffe_converter/test_converter.py @@ -40,7 +40,7 @@ def test_imagenet_model_performance(model_name, val_data, gpus, batch_size): meta_info = get_model_meta_info(model_name) [model_name, mean] = convert_caffe_model(model_name, meta_info) sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, 0) - acc = [mx.metric.create('acc'), mx.metric.create('top_k_accuracy', top_k=5)] + acc = [mx.gluon.metric.create('acc'), mx.gluon.metric.create('top_k_accuracy', top_k=5)] if isinstance(mean, str): mean_args = {'mean_img':mean} else: From ec615a5463d4d3320a099b35e2e4ee8472bc5eea Mon Sep 17 00:00:00 2001 From: acphile Date: Wed, 29 Apr 2020 13:46:05 +0000 Subject: [PATCH 18/24] fix context difference --- python/mxnet/gluon/metric.py | 48 +++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index 9840f5569b93..dc9d5c957444 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -420,10 +420,12 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred_label in zip(labels, preds): + pred_label = pred_label.as_np_ndarray() + label = label.as_np_ndarray().as_in_ctx(pred_label.ctx) if pred_label.shape != label.shape: - pred_label = ndarray.argmax(pred_label, axis=self.axis) - pred_label = pred_label.as_np_ndarray().astype('int32') - label = label.as_np_ndarray().astype('int32') + pred_label = pred_label.argmax(axis=self.axis) + pred_label = pred_label.astype('int32') + label = label.astype('int32') # flatten before checking shapes to avoid shape miss match label = label.reshape(-1) pred_label = pred_label.reshape(-1) @@ -501,7 +503,7 @@ def update(self, labels, preds): # much faster, which is important since that computation is # single-threaded due to Python GIL. pred_label = numpy.argpartition(pred_label.as_np_ndarray().astype('float32'), -self.top_k) - label = label.as_np_ndarray().astype('int32') + label = label.as_np_ndarray().astype('int32').as_in_ctx(pred_label.ctx) check_label_shapes(label, pred_label) num_samples = pred_label.shape[0] num_dims = len(pred_label.shape) @@ -570,13 +572,13 @@ def __init__(self, class_type="binary", threshold=0.5, beta=1): self.beta = beta self.reset_stats() - def _set(self, num): + def _set(self, num, ctx): if self.num_classes is None: self.num_classes = num - self.true_positives = numpy.zeros(num, dtype='float64') - self.false_negatives = numpy.zeros(num, dtype='float64') - self.false_positives = numpy.zeros(num, dtype='float64') - self.true_negatives = numpy.zeros(num, dtype='float64') + self.true_positives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) + self.false_negatives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) + self.false_positives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) + self.true_negatives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) else: assert self.num_classes == num, \ "Input number of classes has changed from {} to {}".format(self.num_classes, num) @@ -593,9 +595,9 @@ def update_stats(self, label, pred): Predicted values. """ pred = pred.as_np_ndarray() - label = label.as_np_ndarray().astype('int32') + label = label.as_np_ndarray().astype('int32').as_in_ctx(pred.ctx) if self.class_type == "binary": - self._set(1) + self._set(1, pred.ctx) if label.max() > 1: raise ValueError("Wrong label for binary classification.") if pred.shape == label.shape: @@ -609,14 +611,14 @@ def update_stats(self, label, pred): elif self.class_type == "multiclass": num = pred.shape[-1] - self._set(num) + self._set(num, pred.ctx) assert label.max() < num, "pred contains fewer classes than label!" pred_label = one_hot(pred.argmax(axis=-1).reshape(-1), num) label = one_hot(label.reshape(-1), num) elif self.class_type == "multilabel": num = pred.shape[-1] - self._set(num) + self._set(num, pred.ctx) assert pred.shape == label.shape, \ "The shape of label should be same as that of prediction for multilabel classification." pred_label = predict_with_threshold(pred, self.threshold).reshape(-1, num) @@ -919,7 +921,7 @@ def update(self, labels, preds): pred_label = predict_with_threshold(pred_label, self.threshold) pred_label = pred_label.as_np_ndarray().astype('int32') - label = label.as_np_ndarray().astype('int32') + label = label.as_np_ndarray().astype('int32').as_in_ctx(pred_label.ctx) # flatten before checking shapes to avoid shape miss match label = label.reshape(-1) pred_label = pred_label.reshape(-1) @@ -1078,7 +1080,7 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray() + label = label.as_np_ndarray().as_in_ctx(pred.ctx) pred = pred.as_np_ndarray() num_inst = label.shape[0] @@ -1137,7 +1139,7 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray() + label = label.as_np_ndarray().as_in_ctx(pred.ctx) pred = pred.as_np_ndarray() num_inst = label.shape[0] @@ -1241,7 +1243,7 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray() + label = label.as_np_ndarray().as_in_ctx(pred.ctx) pred = pred.as_np_ndarray() label = label.reshape(label.shape[0], -1) @@ -1308,7 +1310,7 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray() + label = label.as_np_ndarray().as_in_ctx(pred.ctx) pred = pred.as_np_ndarray() if len(label.shape) == 1: @@ -1393,7 +1395,7 @@ def update(self, labels, preds): for label, pred in zip(labels, preds): assert label.size == pred.size/pred.shape[-1], \ "shape mismatch: %s vs. %s"%(label.shape, pred.shape) - label = label.as_in_context(pred.context).reshape((label.size,)) + label = label.as_in_context(pred.ctx).reshape((label.size,)) pred = ndarray.pick(pred, label.astype(dtype='int32'), axis=self.axis) label = label.as_np_ndarray() pred = pred.as_np_ndarray() @@ -1533,7 +1535,7 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray() + label = label.as_np_ndarray().as_in_ctx(pred.ctx) pred = pred.as_np_ndarray() label = label.reshape(-1) @@ -1620,7 +1622,7 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): check_label_shapes(label, pred, False, True) - label = label.as_np_ndarray().reshape(-1).astype(numpy.float64) + label = label.as_np_ndarray().as_in_ctx(pred.ctx).reshape(-1).astype(numpy.float64) pred = pred.as_np_ndarray().reshape(-1).astype(numpy.float64) self.num_inst += 1 @@ -1731,7 +1733,7 @@ def update(self, labels, preds): # update the confusion matrix for label, pred in zip(labels, preds): - label = label.astype('int32', copy=False).as_np_ndarray() + label = label.astype('int32', copy=False).as_np_ndarray().as_in_ctx(pred.ctx) pred = pred.as_np_ndarray() if pred.shape != label.shape: pred = pred.argmax(axis=1).astype(label, copy=False) @@ -1870,7 +1872,7 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for pred, label in zip(preds, labels): - label = label.as_np_ndarray() + label = label.as_np_ndarray().as_in_ctx(pred.ctx) pred = pred.as_np_ndarray() reval = self._feval(label, pred) From c4a3b67635a1fe3ea90569a7c70a95958b82f22e Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Thu, 30 Apr 2020 05:48:17 +0000 Subject: [PATCH 19/24] Disable -DUSE_TVM_OP on GPU builds --- ci/docker/runtime_functions.sh | 49 ++------------------------------- ci/jenkins/Jenkins_steps.groovy | 27 ------------------ ci/jenkins/Jenkinsfile_unix_gpu | 2 -- 3 files changed, 2 insertions(+), 76 deletions(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 12b50133f22b..3808ba0da76f 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -723,7 +723,6 @@ build_ubuntu_gpu_mkldnn() { CC=gcc-7 CXX=g++-7 cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DUSE_MKL_IF_AVAILABLE=OFF \ - -DUSE_TVM_OP=ON \ -DUSE_CUDA=ON \ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DUSE_CPP_PACKAGE=ON \ @@ -737,7 +736,6 @@ build_ubuntu_gpu_mkldnn_nocudnn() { CC=gcc-7 CXX=g++-7 cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DUSE_MKL_IF_AVAILABLE=OFF \ - -DUSE_TVM_OP=ON \ -DUSE_CUDA=ON \ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DUSE_CUDNN=OFF \ @@ -752,7 +750,6 @@ build_ubuntu_gpu_cuda101_cudnn7() { CC=gcc-7 CXX=g++-7 cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DUSE_MKL_IF_AVAILABLE=OFF \ - -DUSE_TVM_OP=ON \ -DUSE_CUDA=ON \ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DUSE_CUDNN=ON \ @@ -775,7 +772,6 @@ build_ubuntu_gpu_cuda101_cudnn7_make() { USE_CUDA=1 \ USE_CUDA_PATH=/usr/local/cuda \ USE_CUDNN=1 \ - USE_TVM_OP=1 \ USE_CPP_PACKAGE=1 \ USE_DIST_KVSTORE=1 \ CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ @@ -795,7 +791,6 @@ build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test() { USE_CUDA=1 \ USE_CUDA_PATH=/usr/local/cuda \ USE_CUDNN=1 \ - USE_TVM_OP=0 \ USE_CPP_PACKAGE=1 \ USE_DIST_KVSTORE=1 \ CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ @@ -805,23 +800,6 @@ build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test() { make cython PYTHON=python3 } -build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op() { - set -ex - cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ - -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ - -DUSE_MKL_IF_AVAILABLE=OFF \ - -DUSE_TVM_OP=OFF \ - -DUSE_CUDA=ON \ - -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ - -DUSE_CUDNN=ON \ - -DUSE_MKLDNN=OFF \ - -DBUILD_CYTHON_MODULES=ON \ - -DUSE_DIST_KVSTORE=ON \ - -G Ninja /work/mxnet - ninja -} - build_ubuntu_amalgamation() { set -ex # Amalgamation can not be run with -j nproc @@ -852,7 +830,6 @@ build_ubuntu_gpu_cmake() { -DUSE_SIGNAL_HANDLER=ON \ -DUSE_CUDA=ON \ -DUSE_CUDNN=ON \ - -DUSE_TVM_OP=ON \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLML_MKL=OFF \ -DUSE_MKLDNN=OFF \ @@ -873,7 +850,6 @@ build_ubuntu_gpu_cmake_no_rtc() { -DUSE_SIGNAL_HANDLER=ON \ -DUSE_CUDA=ON \ -DUSE_CUDNN=ON \ - -DUSE_TVM_OP=ON \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLML_MKL=OFF \ -DUSE_MKLDNN=ON \ @@ -888,27 +864,6 @@ build_ubuntu_gpu_cmake_no_rtc() { ninja } -build_ubuntu_gpu_cmake_no_tvm_op() { - set -ex - cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ - -DUSE_SIGNAL_HANDLER=ON \ - -DUSE_CUDA=ON \ - -DUSE_CUDNN=ON \ - -DUSE_TVM_OP=OFF \ - -DUSE_MKL_IF_AVAILABLE=OFF \ - -DUSE_MKLML_MKL=OFF \ - -DUSE_MKLDNN=OFF \ - -DUSE_DIST_KVSTORE=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ - -DBUILD_CYTHON_MODULES=1 \ - -G Ninja \ - /work/mxnet - - ninja -} - build_ubuntu_cpu_large_tensor() { set -ex cd /work/build @@ -931,7 +886,6 @@ build_ubuntu_gpu_large_tensor() { -DUSE_SIGNAL_HANDLER=ON \ -DUSE_CUDA=ON \ -DUSE_CUDNN=ON \ - -DUSE_TVM_OP=ON \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLML_MKL=OFF \ -DUSE_MKLDNN=OFF \ @@ -989,7 +943,8 @@ cd_unittest_ubuntu() { # Adding these here as CI doesn't test all CUDA environments pytest example/image-classification/test_score.py - integrationtest_ubuntu_gpu_dist_kvstore + # TODO(szha): fix and reenable the hanging issue. tracked in #18098 + # integrationtest_ubuntu_gpu_dist_kvstore fi if [[ ${mxnet_variant} = *mkl ]]; then diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index f129fe1299ab..747ddcf27ce0 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -289,20 +289,6 @@ def compile_unix_full_gpu_mkldnn_cpp_test() { }] } -def compile_unix_full_gpu_no_tvm_op() { - return ['GPU: CUDA10.1+cuDNN7 TVM_OP OFF': { - node(NODE_LINUX_CPU) { - ws('workspace/build-gpu-no-tvm-op') { - timeout(time: max_time, unit: 'MINUTES') { - utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false) - utils.pack_lib('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op) - } - } - } - }] -} - def compile_unix_cmake_gpu() { return ['GPU: CMake': { node(NODE_LINUX_CPU) { @@ -317,19 +303,6 @@ def compile_unix_cmake_gpu() { }] } -def compile_unix_cmake_gpu_no_tvm_op() { - return ['GPU: CMake TVM_OP OFF': { - node(NODE_LINUX_CPU) { - ws('workspace/build-cmake-gpu-no-tvm-op') { - timeout(time: max_time, unit: 'MINUTES') { - utils.init_git() - utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false) - } - } - } - }] -} - def compile_unix_cmake_gpu_no_rtc() { return ['GPU: CMake CUDA RTC OFF': { node(NODE_LINUX_CPU) { diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu index 7742a654faa1..384f9f2908cd 100644 --- a/ci/jenkins/Jenkinsfile_unix_gpu +++ b/ci/jenkins/Jenkinsfile_unix_gpu @@ -41,8 +41,6 @@ core_logic: { custom_steps.compile_unix_cmake_gpu(), custom_steps.compile_unix_tensorrt_gpu(), custom_steps.compile_unix_int64_gpu(), - custom_steps.compile_unix_full_gpu_no_tvm_op(), - custom_steps.compile_unix_cmake_gpu_no_tvm_op(), custom_steps.compile_unix_cmake_gpu_no_rtc(), custom_steps.compile_unix_full_gpu_mkldnn_cpp_test() ]) From 0456416a05a0f9bf8e65cf64c6315f75ee503522 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Thu, 30 Apr 2020 05:52:10 +0000 Subject: [PATCH 20/24] Fix disable tvm op for gpu runs --- ci/docker/runtime_functions.sh | 3 +-- ci/jenkins/Jenkins_steps.groovy | 16 ---------------- ci/jenkins/Jenkinsfile_unix_gpu | 1 - 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 3808ba0da76f..9856346dc460 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -943,8 +943,7 @@ cd_unittest_ubuntu() { # Adding these here as CI doesn't test all CUDA environments pytest example/image-classification/test_score.py - # TODO(szha): fix and reenable the hanging issue. tracked in #18098 - # integrationtest_ubuntu_gpu_dist_kvstore + integrationtest_ubuntu_gpu_dist_kvstore fi if [[ ${mxnet_variant} = *mkl ]]; then diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index 747ddcf27ce0..59ad73d58f0f 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -817,22 +817,6 @@ def test_unix_python3_gpu() { }] } -def test_unix_python3_gpu_no_tvm_op() { - return ['Python3: GPU TVM_OP OFF': { - node(NODE_LINUX_GPU) { - ws('workspace/ut-python3-gpu-no-tvm-op') { - try { - utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op) - python3_gpu_ut_cython('ubuntu_gpu_cu101') - utils.publish_test_coverage() - } finally { - utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml') - } - } - } - }] -} - def test_unix_python3_quantize_gpu() { return ['Python3: Quantize GPU': { node(NODE_LINUX_GPU_P3) { diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu index 384f9f2908cd..0e2310fc9220 100644 --- a/ci/jenkins/Jenkinsfile_unix_gpu +++ b/ci/jenkins/Jenkinsfile_unix_gpu @@ -59,7 +59,6 @@ core_logic: { custom_steps.test_unix_scala_gpu(), // TODO(szha): fix and reenable the hanging issue. tracked in #18098 // custom_steps.test_unix_distributed_kvstore_gpu(), - custom_steps.test_unix_python3_gpu_no_tvm_op(), custom_steps.test_unix_capi_cpp_package(), ]) } From 8163fbbe8881c0584c8f4f3aeba8b87e3e3006be Mon Sep 17 00:00:00 2001 From: acphile Date: Thu, 7 May 2020 10:37:03 +0000 Subject: [PATCH 21/24] use label.ctx in metric.py; remove gluoncv dependency in test_cvnets --- python/mxnet/gluon/metric.py | 71 +++++++++++++++------------- tests/python/tensorrt/test_cvnets.py | 13 ++--- 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index dc9d5c957444..8503d80e92ad 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -420,8 +420,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred_label in zip(labels, preds): - pred_label = pred_label.as_np_ndarray() - label = label.as_np_ndarray().as_in_ctx(pred_label.ctx) + pred_label = pred_label.as_np_ndarray().as_in_ctx(label.ctx) + label = label.as_np_ndarray() if pred_label.shape != label.shape: pred_label = pred_label.argmax(axis=self.axis) pred_label = pred_label.astype('int32') @@ -502,8 +502,8 @@ def update(self, labels, preds): # we do not care about the order of top k elements. It is # much faster, which is important since that computation is # single-threaded due to Python GIL. - pred_label = numpy.argpartition(pred_label.as_np_ndarray().astype('float32'), -self.top_k) - label = label.as_np_ndarray().astype('int32').as_in_ctx(pred_label.ctx) + pred_label = numpy.argpartition(pred_label.as_np_ndarray().astype('float32'), -self.top_k).as_in_ctx(label.ctx) + label = label.as_np_ndarray().astype('int32') check_label_shapes(label, pred_label) num_samples = pred_label.shape[0] num_dims = len(pred_label.shape) @@ -594,10 +594,10 @@ def update_stats(self, label, pred): pred : `NDArray` Predicted values. """ - pred = pred.as_np_ndarray() - label = label.as_np_ndarray().astype('int32').as_in_ctx(pred.ctx) + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) + label = label.as_np_ndarray().astype('int32') if self.class_type == "binary": - self._set(1, pred.ctx) + self._set(1, label.ctx) if label.max() > 1: raise ValueError("Wrong label for binary classification.") if pred.shape == label.shape: @@ -611,14 +611,14 @@ def update_stats(self, label, pred): elif self.class_type == "multiclass": num = pred.shape[-1] - self._set(num, pred.ctx) + self._set(num, label.ctx) assert label.max() < num, "pred contains fewer classes than label!" pred_label = one_hot(pred.argmax(axis=-1).reshape(-1), num) label = one_hot(label.reshape(-1), num) elif self.class_type == "multilabel": num = pred.shape[-1] - self._set(num, pred.ctx) + self._set(num, label.ctx) assert pred.shape == label.shape, \ "The shape of label should be same as that of prediction for multilabel classification." pred_label = predict_with_threshold(pred, self.threshold).reshape(-1, num) @@ -920,8 +920,8 @@ def update(self, labels, preds): for label, pred_label in zip(labels, preds): pred_label = predict_with_threshold(pred_label, self.threshold) - pred_label = pred_label.as_np_ndarray().astype('int32') - label = label.as_np_ndarray().astype('int32').as_in_ctx(pred_label.ctx) + pred_label = pred_label.as_np_ndarray().astype('int32').as_in_ctx(label.ctx) + label = label.as_np_ndarray().astype('int32') # flatten before checking shapes to avoid shape miss match label = label.reshape(-1) pred_label = pred_label.reshape(-1) @@ -1080,8 +1080,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray().as_in_ctx(pred.ctx) - pred = pred.as_np_ndarray() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) num_inst = label.shape[0] mae = numpy.abs(label - pred).reshape(num_inst, -1).mean(axis=-1).sum() @@ -1139,8 +1139,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray().as_in_ctx(pred.ctx) - pred = pred.as_np_ndarray() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) num_inst = label.shape[0] mse = ((label - pred)**2.0).reshape(num_inst, -1).mean(axis=-1).sum() @@ -1243,8 +1243,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray().as_in_ctx(pred.ctx) - pred = pred.as_np_ndarray() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) label = label.reshape(label.shape[0], -1) pred = pred.reshape(pred.shape[0], -1) @@ -1310,8 +1310,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray().as_in_ctx(pred.ctx) - pred = pred.as_np_ndarray() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) if len(label.shape) == 1: label = label.reshape(1, label.shape[0]) @@ -1345,6 +1345,8 @@ class :math:`k`. Parameters ---------- + eps : float, default 1e-12 + Use small constant for the case that predicted value is 0. ignore_label : int or None, default None Index of invalid label to ignore when counting. By default, sets to -1. @@ -1370,12 +1372,13 @@ class :math:`k`. >>> print ce.get() ('cross-entropy', 0.57159948348999023) """ - def __init__(self, ignore_label=None, axis=-1, name='cross-entropy', + def __init__(self, eps=1e-12, ignore_label=None, axis=-1, name='cross-entropy', output_names=None, label_names=None): super(CrossEntropy, self).__init__( name, output_names=output_names, label_names=label_names) self.ignore_label = ignore_label self.axis = axis + self.eps = eps def update(self, labels, preds): """Updates the internal evaluation result. @@ -1395,15 +1398,15 @@ def update(self, labels, preds): for label, pred in zip(labels, preds): assert label.size == pred.size/pred.shape[-1], \ "shape mismatch: %s vs. %s"%(label.shape, pred.shape) - label = label.as_in_context(pred.ctx).reshape((label.size,)) - pred = ndarray.pick(pred, label.astype(dtype='int32'), axis=self.axis) + label = label.reshape((label.size,)) + pred = ndarray.pick(pred.as_in_context(label.ctx), label.astype(dtype='int32'), axis=self.axis) label = label.as_np_ndarray() pred = pred.as_np_ndarray() if self.ignore_label is not None: ignore = (label == self.ignore_label).astype(pred.dtype) num -= ignore.sum() pred = pred * (1 - ignore) + ignore - loss -= numpy.log(numpy.maximum(1e-12, pred)).sum() + loss -= numpy.log(numpy.maximum(self.eps, pred)).sum() num += pred.size self.sum_metric += loss self.num_inst += num @@ -1438,6 +1441,8 @@ class Perplexity(CrossEntropy): Parameters ---------- + eps : float, default 1e-12 + Use small constant for the case that predicted value is 0. ignore_label : int or None, default None Index of invalid label to ignore when counting. By default, sets to -1. @@ -1463,10 +1468,10 @@ class Perplexity(CrossEntropy): >>> print perp.get() ('Perplexity', 1.7710976285155853) """ - def __init__(self, ignore_label=None, axis=-1, name='perplexity', + def __init__(self, eps=1e-12, ignore_label=None, axis=-1, name='perplexity', output_names=None, label_names=None): super(Perplexity, self).__init__( - name=name, ignore_label=ignore_label, axis=axis, + name=name, eps=eps, ignore_label=ignore_label, axis=axis, output_names=output_names, label_names=label_names) def get(self): @@ -1535,8 +1540,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.as_np_ndarray().as_in_ctx(pred.ctx) - pred = pred.as_np_ndarray() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) label = label.reshape(-1) num_examples = pred.shape[0] @@ -1622,8 +1627,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): check_label_shapes(label, pred, False, True) - label = label.as_np_ndarray().as_in_ctx(pred.ctx).reshape(-1).astype(numpy.float64) - pred = pred.as_np_ndarray().reshape(-1).astype(numpy.float64) + label = label.as_np_ndarray().reshape(-1).astype(numpy.float64) + pred = pred.as_np_ndarray().as_in_ctx(label.ctx).reshape(-1).astype(numpy.float64) self.num_inst += 1 self._label_nums, self._mean_l, self._sse_l = \ @@ -1733,8 +1738,8 @@ def update(self, labels, preds): # update the confusion matrix for label, pred in zip(labels, preds): - label = label.astype('int32', copy=False).as_np_ndarray().as_in_ctx(pred.ctx) - pred = pred.as_np_ndarray() + label = label.astype('int32', copy=False).as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) if pred.shape != label.shape: pred = pred.argmax(axis=1).astype(label, copy=False) else: @@ -1872,8 +1877,8 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for pred, label in zip(preds, labels): - label = label.as_np_ndarray().as_in_ctx(pred.ctx) - pred = pred.as_np_ndarray() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) reval = self._feval(label, pred) if isinstance(reval, tuple): diff --git a/tests/python/tensorrt/test_cvnets.py b/tests/python/tensorrt/test_cvnets.py index 99312d76dc7a..56cda90a80b1 100644 --- a/tests/python/tensorrt/test_cvnets.py +++ b/tests/python/tensorrt/test_cvnets.py @@ -16,7 +16,6 @@ # under the License. import gc -import gluoncv import mxnet as mx import numpy as np @@ -29,7 +28,12 @@ def get_classif_model(model_name, use_tensorrt, ctx=mx.gpu(0), batch_size=128): mx.contrib.tensorrt.set_use_fp16(False) h, w = 32, 32 - net = gluoncv.model_zoo.get_model(model_name, pretrained=True) + model_url = "https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/models/" + param_file = "{}-0000.params".format(model_name) + symbol_file = "{}-symbol.json".format(model_name) + mx.test_utils.download("{}/{}".format(model_url, param_file), fname=param_file, overwrite=True) + mx.test_utils.download("{}/{}".format(model_url, symbol_file), fname=symbol_file, overwrite=True) + net = gluon.SymbolBlock.imports(symbol_file, ['data'], param_file) net.hybridize() net.forward(mx.nd.zeros((batch_size, 3, h, w))) net.export(model_name) @@ -130,10 +134,7 @@ def test_tensorrt_on_cifar_resnets(batch_size=32, tolerance=0.1, num_workers=1): 'cifar_resnet20_v2', 'cifar_resnet56_v2', 'cifar_resnet110_v2', - 'cifar_wideresnet16_10', - 'cifar_wideresnet28_10', - 'cifar_wideresnet40_8', - 'cifar_resnext29_16x64d' + 'cifar_wideresnet16_10' ] num_models = len(models) From d53e6ef32217c1bb798882db9134e39d7318ba5e Mon Sep 17 00:00:00 2001 From: acphile Date: Thu, 7 May 2020 12:25:04 +0000 Subject: [PATCH 22/24] fix sanity --- python/mxnet/gluon/metric.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py index 8503d80e92ad..5b081ceac4d8 100644 --- a/python/mxnet/gluon/metric.py +++ b/python/mxnet/gluon/metric.py @@ -502,7 +502,8 @@ def update(self, labels, preds): # we do not care about the order of top k elements. It is # much faster, which is important since that computation is # single-threaded due to Python GIL. - pred_label = numpy.argpartition(pred_label.as_np_ndarray().astype('float32'), -self.top_k).as_in_ctx(label.ctx) + pred_label = pred_label.as_np_ndarray().as_in_ctx(label.ctx).astype('float32') + pred_label = numpy.argpartition(pred_label, -self.top_k) label = label.as_np_ndarray().astype('int32') check_label_shapes(label, pred_label) num_samples = pred_label.shape[0] From a2b0ffe13f8021b5c958206018db270aaf3e528e Mon Sep 17 00:00:00 2001 From: acphile Date: Fri, 8 May 2020 03:18:47 +0000 Subject: [PATCH 23/24] fix importError --- python/mxnet/gluon/contrib/data/vision/dataloader.py | 4 ++-- .../mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py | 2 +- python/mxnet/gluon/data/dataloader.py | 2 +- tests/python/tensorrt/test_cvnets.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/mxnet/gluon/contrib/data/vision/dataloader.py b/python/mxnet/gluon/contrib/data/vision/dataloader.py index 0c71d90453d8..3213398b2214 100644 --- a/python/mxnet/gluon/contrib/data/vision/dataloader.py +++ b/python/mxnet/gluon/contrib/data/vision/dataloader.py @@ -21,9 +21,9 @@ import logging import numpy as np -from ..... import nd +from ..... import ndarray as nd from .....util import is_np_array -from ..... import np as _mx_np # pylint: disable=reimported +from ..... import numpy as _mx_np # pylint: disable=reimported from ....nn import HybridSequential, Sequential, HybridBlock, Block from ....data.vision import transforms from ....data import DataLoader diff --git a/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py b/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py index 1629c212957f..65a18aaf80cd 100644 --- a/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py +++ b/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py @@ -23,7 +23,7 @@ from .......base import numeric_types from ......block import Block from .......util import is_np_array -from ....... import nd, npx, np +from ....... import ndarray as nd, numpy_extension as npx, numpy as np from .utils import _check_bbox_shape, bbox_crop, bbox_translate from .utils import bbox_resize, bbox_random_crop_with_constraints diff --git a/python/mxnet/gluon/data/dataloader.py b/python/mxnet/gluon/data/dataloader.py index d991bc769ac9..c51981678367 100644 --- a/python/mxnet/gluon/data/dataloader.py +++ b/python/mxnet/gluon/data/dataloader.py @@ -39,7 +39,7 @@ from . import sampler as _sampler from . import batchify as _batchify -from ... import nd, context +from ... import ndarray as nd, context from ...util import is_np_shape, is_np_array, set_np from ... import numpy as _mx_np # pylint: disable=reimported diff --git a/tests/python/tensorrt/test_cvnets.py b/tests/python/tensorrt/test_cvnets.py index 56cda90a80b1..cd090c5e2f5c 100644 --- a/tests/python/tensorrt/test_cvnets.py +++ b/tests/python/tensorrt/test_cvnets.py @@ -28,7 +28,7 @@ def get_classif_model(model_name, use_tensorrt, ctx=mx.gpu(0), batch_size=128): mx.contrib.tensorrt.set_use_fp16(False) h, w = 32, 32 - model_url = "https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/models/" + model_url = "https://raw.githubusercontent.com/dmlc/web-data/221ce5b7c6d5b0777a1e3471f7f03ff98da90a0a/gluoncv/models" param_file = "{}-0000.params".format(model_name) symbol_file = "{}-symbol.json".format(model_name) mx.test_utils.download("{}/{}".format(model_url, param_file), fname=param_file, overwrite=True) From ef3058adba8353fd8fa81b39264b543d64e0dac7 Mon Sep 17 00:00:00 2001 From: acphile Date: Sat, 9 May 2020 14:59:04 +0800 Subject: [PATCH 24/24] remove nose --- tests/python/unittest/test_metric.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py index af81251fa11b..c2e4783de411 100644 --- a/tests/python/unittest/test_metric.py +++ b/tests/python/unittest/test_metric.py @@ -384,6 +384,3 @@ def test_single_array_input(): _, rmse_res = rmse.get() np.testing.assert_almost_equal(rmse_res, 0.1) -if __name__ == '__main__': - import nose - nose.runmodule()