diff --git a/mmcls/core/evaluation/eval_metrics.py b/mmcls/core/evaluation/eval_metrics.py index bc90fa85349..7b2bd9c5ee8 100644 --- a/mmcls/core/evaluation/eval_metrics.py +++ b/mmcls/core/evaluation/eval_metrics.py @@ -3,6 +3,7 @@ import numpy as np import torch +from torch.nn.functional import one_hot def calculate_confusion_matrix(pred, target): @@ -27,16 +28,17 @@ def calculate_confusion_matrix(pred, target): (f'pred and target should be torch.Tensor or np.ndarray, ' f'but got {type(pred)} and {type(target)}.') + # Modified from PyTorch-Ignite num_classes = pred.size(1) - _, pred_label = pred.topk(1, dim=1) - pred_label = pred_label.view(-1) - target_label = target.view(-1) + pred_label = torch.argmax(pred, dim=1).flatten() + target_label = target.flatten() assert len(pred_label) == len(target_label) - confusion_matrix = torch.zeros(num_classes, num_classes) + with torch.no_grad(): - for t, p in zip(target_label, pred_label): - confusion_matrix[t.long(), p.long()] += 1 - return confusion_matrix + indices = num_classes * target_label + pred_label + matrix = torch.bincount(indices, minlength=num_classes**2) + matrix = matrix.reshape(num_classes, num_classes) + return matrix def precision_recall_f1(pred, target, average_mode='macro', thrs=0.): @@ -73,13 +75,15 @@ class are returned. If 'macro', calculate metrics for each class, if average_mode not in allowed_average_mode: raise ValueError(f'Unsupport type of averaging {average_mode}.') - if isinstance(pred, torch.Tensor): - pred = pred.numpy() - if isinstance(target, torch.Tensor): - target = target.numpy() - assert (isinstance(pred, np.ndarray) and isinstance(target, np.ndarray)),\ - (f'pred and target should be torch.Tensor or np.ndarray, ' - f'but got {type(pred)} and {type(target)}.') + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + assert isinstance(pred, torch.Tensor), \ + (f'pred should be torch.Tensor or np.ndarray, but got {type(pred)}.') + if isinstance(target, np.ndarray): + target = torch.from_numpy(target) + assert isinstance(target, torch.Tensor), \ + f'target should be torch.Tensor or np.ndarray, ' \ + f'but got {type(target)}.' if isinstance(thrs, Number): thrs = (thrs, ) @@ -90,30 +94,37 @@ class are returned. If 'macro', calculate metrics for each class, raise TypeError( f'thrs should be a number or tuple, but got {type(thrs)}.') - label = np.indices(pred.shape)[1] - pred_label = np.argsort(pred, axis=1)[:, -1] - pred_score = np.sort(pred, axis=1)[:, -1] + num_classes = pred.size(1) + pred_score, pred_label = torch.topk(pred, k=1) + pred_score = pred_score.flatten() + pred_label = pred_label.flatten() + + gt_positive = one_hot(target.flatten(), num_classes) precisions = [] recalls = [] f1_scores = [] for thr in thrs: # Only prediction values larger than thr are counted as positive - _pred_label = pred_label.copy() + pred_positive = one_hot(pred_label, num_classes) if thr is not None: - _pred_label[pred_score <= thr] = -1 - pred_positive = label == _pred_label.reshape(-1, 1) - gt_positive = label == target.reshape(-1, 1) - precision = (pred_positive & gt_positive).sum(0) / np.maximum( - pred_positive.sum(0), 1) * 100 - recall = (pred_positive & gt_positive).sum(0) / np.maximum( - gt_positive.sum(0), 1) * 100 - f1_score = 2 * precision * recall / np.maximum(precision + recall, - 1e-20) + pred_positive[pred_score <= thr] = 0 + class_correct = (pred_positive & gt_positive).sum(0) + precision = class_correct / np.maximum(pred_positive.sum(0), 1.) * 100 + recall = class_correct / np.maximum(gt_positive.sum(0), 1.) * 100 + f1_score = 2 * precision * recall / np.maximum( + precision + recall, + torch.finfo(torch.float32).eps) if average_mode == 'macro': precision = float(precision.mean()) recall = float(recall.mean()) f1_score = float(f1_score.mean()) + elif average_mode == 'none': + precision = precision.detach().cpu().numpy() + recall = recall.detach().cpu().numpy() + f1_score = f1_score.detach().cpu().numpy() + else: + raise ValueError(f'Unsupport type of averaging {average_mode}.') precisions.append(precision) recalls.append(recall) f1_scores.append(f1_score) diff --git a/mmcls/datasets/base_dataset.py b/mmcls/datasets/base_dataset.py index 3c9edf15b2e..739aa23e040 100644 --- a/mmcls/datasets/base_dataset.py +++ b/mmcls/datasets/base_dataset.py @@ -172,13 +172,11 @@ def evaluate(self, if isinstance(thrs, tuple): for key, values in eval_results_.items(): eval_results.update({ - f'{key}_thr_{thr:.2f}': value.item() + f'{key}_thr_{thr:.2f}': value for thr, value in zip(thrs, values) }) else: - eval_results.update( - {k: v.item() - for k, v in eval_results_.items()}) + eval_results.update(eval_results_) if 'support' in metrics: support_value = support( diff --git a/mmcls/models/losses/accuracy.py b/mmcls/models/losses/accuracy.py index 873e579b823..8ccbcd1e8db 100644 --- a/mmcls/models/losses/accuracy.py +++ b/mmcls/models/losses/accuracy.py @@ -35,7 +35,7 @@ def accuracy_numpy(pred, target, topk=(1, ), thrs=0.): # Only prediction values larger than thr are counted as correct _correct_k = correct_k & (pred_score[:, :k] > thr) _correct_k = np.logical_or.reduce(_correct_k, axis=1) - res_thr.append(_correct_k.sum() * 100. / num) + res_thr.append((_correct_k.sum() * 100. / num).item()) if res_single: res.append(res_thr[0]) else: @@ -65,7 +65,7 @@ def accuracy_torch(pred, target, topk=(1, ), thrs=0.): # Only prediction values larger than thr are counted as correct _correct = correct & (pred_score.t() > thr) correct_k = _correct[:k].reshape(-1).float().sum(0, keepdim=True) - res_thr.append(correct_k.mul_(100. / num)) + res_thr.append((correct_k.mul_(100. / num)).item()) if res_single: res.append(res_thr[0]) else: @@ -99,14 +99,20 @@ def accuracy(pred, target, topk=1, thrs=0.): else: return_single = False - if isinstance(pred, torch.Tensor) and isinstance(target, torch.Tensor): - res = accuracy_torch(pred, target, topk, thrs) - elif isinstance(pred, np.ndarray) and isinstance(target, np.ndarray): - res = accuracy_numpy(pred, target, topk, thrs) - else: - raise TypeError( - f'pred and target should both be torch.Tensor or np.ndarray, ' - f'but got {type(pred)} and {type(target)}.') + assert isinstance(pred, (torch.Tensor, np.ndarray)), \ + f'The pred should be torch.Tensor or np.ndarray ' \ + f'instead of {type(pred)}.' + assert isinstance(target, (torch.Tensor, np.ndarray)), \ + f'The target should be torch.Tensor or np.ndarray ' \ + f'instead of {type(target)}.' + + # torch version is faster in most situations. + to_tensor = (lambda x: torch.from_numpy(x) + if isinstance(x, np.ndarray) else x) + pred = to_tensor(pred) + target = to_tensor(target) + + res = accuracy_torch(pred, target, topk, thrs) return res[0] if return_single else res diff --git a/tests/test_metrics/test_metrics.py b/tests/test_metrics/test_metrics.py index df06e25962f..67acb09599f 100644 --- a/tests/test_metrics/test_metrics.py +++ b/tests/test_metrics/test_metrics.py @@ -1,9 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. +from functools import partial + import pytest import torch from mmcls.core import average_performance, mAP -from mmcls.models.losses.accuracy import Accuracy +from mmcls.models.losses.accuracy import Accuracy, accuracy_numpy def test_mAP(): @@ -77,10 +79,15 @@ def test_accuracy(): assert compute_acc(pred_array, target_array)[0] == acc_top1 compute_acc = Accuracy(topk=(1, 2)) - assert compute_acc(pred_tensor, target_tensor)[0] == acc_top1 + assert compute_acc(pred_tensor, target_array)[0] == acc_top1 assert compute_acc(pred_tensor, target_tensor)[1] == acc_top2 assert compute_acc(pred_array, target_array)[0] == acc_top1 assert compute_acc(pred_array, target_array)[1] == acc_top2 - with pytest.raises(TypeError): - compute_acc(pred_tensor, target_array) + with pytest.raises(AssertionError): + compute_acc(pred_tensor, 'other_type') + + # test accuracy_numpy + compute_acc = partial(accuracy_numpy, topk=(1, 2)) + assert compute_acc(pred_array, target_array)[0] == acc_top1 + assert compute_acc(pred_array, target_array)[1] == acc_top2