diff --git a/mmcls/core/evaluation/eval_metrics.py b/mmcls/core/evaluation/eval_metrics.py
index bc90fa85349..7b2bd9c5ee8 100644
--- a/mmcls/core/evaluation/eval_metrics.py
+++ b/mmcls/core/evaluation/eval_metrics.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 import torch
+from torch.nn.functional import one_hot
 
 
 def calculate_confusion_matrix(pred, target):
@@ -27,16 +28,17 @@ def calculate_confusion_matrix(pred, target):
         (f'pred and target should be torch.Tensor or np.ndarray, '
          f'but got {type(pred)} and {type(target)}.')
 
+    # Modified from PyTorch-Ignite
     num_classes = pred.size(1)
-    _, pred_label = pred.topk(1, dim=1)
-    pred_label = pred_label.view(-1)
-    target_label = target.view(-1)
+    pred_label = torch.argmax(pred, dim=1).flatten()
+    target_label = target.flatten()
     assert len(pred_label) == len(target_label)
-    confusion_matrix = torch.zeros(num_classes, num_classes)
+
     with torch.no_grad():
-        for t, p in zip(target_label, pred_label):
-            confusion_matrix[t.long(), p.long()] += 1
-    return confusion_matrix
+        indices = num_classes * target_label + pred_label
+        matrix = torch.bincount(indices, minlength=num_classes**2)
+        matrix = matrix.reshape(num_classes, num_classes)
+    return matrix
 
 
 def precision_recall_f1(pred, target, average_mode='macro', thrs=0.):
@@ -73,13 +75,15 @@ class are returned. If 'macro', calculate metrics for each class,
     if average_mode not in allowed_average_mode:
         raise ValueError(f'Unsupport type of averaging {average_mode}.')
 
-    if isinstance(pred, torch.Tensor):
-        pred = pred.numpy()
-    if isinstance(target, torch.Tensor):
-        target = target.numpy()
-    assert (isinstance(pred, np.ndarray) and isinstance(target, np.ndarray)),\
-        (f'pred and target should be torch.Tensor or np.ndarray, '
-         f'but got {type(pred)} and {type(target)}.')
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+    assert isinstance(pred, torch.Tensor), \
+        (f'pred should be torch.Tensor or np.ndarray, but got {type(pred)}.')
+    if isinstance(target, np.ndarray):
+        target = torch.from_numpy(target)
+    assert isinstance(target, torch.Tensor), \
+        f'target should be torch.Tensor or np.ndarray, ' \
+        f'but got {type(target)}.'
 
     if isinstance(thrs, Number):
         thrs = (thrs, )
@@ -90,30 +94,37 @@ class are returned. If 'macro', calculate metrics for each class,
         raise TypeError(
             f'thrs should be a number or tuple, but got {type(thrs)}.')
 
-    label = np.indices(pred.shape)[1]
-    pred_label = np.argsort(pred, axis=1)[:, -1]
-    pred_score = np.sort(pred, axis=1)[:, -1]
+    num_classes = pred.size(1)
+    pred_score, pred_label = torch.topk(pred, k=1)
+    pred_score = pred_score.flatten()
+    pred_label = pred_label.flatten()
+
+    gt_positive = one_hot(target.flatten(), num_classes)
 
     precisions = []
     recalls = []
     f1_scores = []
     for thr in thrs:
         # Only prediction values larger than thr are counted as positive
-        _pred_label = pred_label.copy()
+        pred_positive = one_hot(pred_label, num_classes)
         if thr is not None:
-            _pred_label[pred_score <= thr] = -1
-        pred_positive = label == _pred_label.reshape(-1, 1)
-        gt_positive = label == target.reshape(-1, 1)
-        precision = (pred_positive & gt_positive).sum(0) / np.maximum(
-            pred_positive.sum(0), 1) * 100
-        recall = (pred_positive & gt_positive).sum(0) / np.maximum(
-            gt_positive.sum(0), 1) * 100
-        f1_score = 2 * precision * recall / np.maximum(precision + recall,
-                                                       1e-20)
+            pred_positive[pred_score <= thr] = 0
+        class_correct = (pred_positive & gt_positive).sum(0)
+        precision = class_correct / np.maximum(pred_positive.sum(0), 1.) * 100
+        recall = class_correct / np.maximum(gt_positive.sum(0), 1.) * 100
+        f1_score = 2 * precision * recall / np.maximum(
+            precision + recall,
+            torch.finfo(torch.float32).eps)
         if average_mode == 'macro':
             precision = float(precision.mean())
             recall = float(recall.mean())
             f1_score = float(f1_score.mean())
+        elif average_mode == 'none':
+            precision = precision.detach().cpu().numpy()
+            recall = recall.detach().cpu().numpy()
+            f1_score = f1_score.detach().cpu().numpy()
+        else:
+            raise ValueError(f'Unsupport type of averaging {average_mode}.')
         precisions.append(precision)
         recalls.append(recall)
         f1_scores.append(f1_score)
diff --git a/mmcls/datasets/base_dataset.py b/mmcls/datasets/base_dataset.py
index 3c9edf15b2e..739aa23e040 100644
--- a/mmcls/datasets/base_dataset.py
+++ b/mmcls/datasets/base_dataset.py
@@ -172,13 +172,11 @@ def evaluate(self,
             if isinstance(thrs, tuple):
                 for key, values in eval_results_.items():
                     eval_results.update({
-                        f'{key}_thr_{thr:.2f}': value.item()
+                        f'{key}_thr_{thr:.2f}': value
                         for thr, value in zip(thrs, values)
                     })
             else:
-                eval_results.update(
-                    {k: v.item()
-                     for k, v in eval_results_.items()})
+                eval_results.update(eval_results_)
 
         if 'support' in metrics:
             support_value = support(
diff --git a/mmcls/models/losses/accuracy.py b/mmcls/models/losses/accuracy.py
index 873e579b823..8ccbcd1e8db 100644
--- a/mmcls/models/losses/accuracy.py
+++ b/mmcls/models/losses/accuracy.py
@@ -35,7 +35,7 @@ def accuracy_numpy(pred, target, topk=(1, ), thrs=0.):
             # Only prediction values larger than thr are counted as correct
             _correct_k = correct_k & (pred_score[:, :k] > thr)
             _correct_k = np.logical_or.reduce(_correct_k, axis=1)
-            res_thr.append(_correct_k.sum() * 100. / num)
+            res_thr.append((_correct_k.sum() * 100. / num).item())
         if res_single:
             res.append(res_thr[0])
         else:
@@ -65,7 +65,7 @@ def accuracy_torch(pred, target, topk=(1, ), thrs=0.):
             # Only prediction values larger than thr are counted as correct
             _correct = correct & (pred_score.t() > thr)
             correct_k = _correct[:k].reshape(-1).float().sum(0, keepdim=True)
-            res_thr.append(correct_k.mul_(100. / num))
+            res_thr.append((correct_k.mul_(100. / num)).item())
         if res_single:
             res.append(res_thr[0])
         else:
@@ -99,14 +99,20 @@ def accuracy(pred, target, topk=1, thrs=0.):
     else:
         return_single = False
 
-    if isinstance(pred, torch.Tensor) and isinstance(target, torch.Tensor):
-        res = accuracy_torch(pred, target, topk, thrs)
-    elif isinstance(pred, np.ndarray) and isinstance(target, np.ndarray):
-        res = accuracy_numpy(pred, target, topk, thrs)
-    else:
-        raise TypeError(
-            f'pred and target should both be torch.Tensor or np.ndarray, '
-            f'but got {type(pred)} and {type(target)}.')
+    assert isinstance(pred, (torch.Tensor, np.ndarray)), \
+        f'The pred should be torch.Tensor or np.ndarray ' \
+        f'instead of {type(pred)}.'
+    assert isinstance(target, (torch.Tensor, np.ndarray)), \
+        f'The target should be torch.Tensor or np.ndarray ' \
+        f'instead of {type(target)}.'
+
+    # torch version is faster in most situations.
+    to_tensor = (lambda x: torch.from_numpy(x)
+                 if isinstance(x, np.ndarray) else x)
+    pred = to_tensor(pred)
+    target = to_tensor(target)
+
+    res = accuracy_torch(pred, target, topk, thrs)
 
     return res[0] if return_single else res
 
diff --git a/tests/test_metrics/test_metrics.py b/tests/test_metrics/test_metrics.py
index df06e25962f..67acb09599f 100644
--- a/tests/test_metrics/test_metrics.py
+++ b/tests/test_metrics/test_metrics.py
@@ -1,9 +1,11 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from functools import partial
+
 import pytest
 import torch
 
 from mmcls.core import average_performance, mAP
-from mmcls.models.losses.accuracy import Accuracy
+from mmcls.models.losses.accuracy import Accuracy, accuracy_numpy
 
 
 def test_mAP():
@@ -77,10 +79,15 @@ def test_accuracy():
     assert compute_acc(pred_array, target_array)[0] == acc_top1
 
     compute_acc = Accuracy(topk=(1, 2))
-    assert compute_acc(pred_tensor, target_tensor)[0] == acc_top1
+    assert compute_acc(pred_tensor, target_array)[0] == acc_top1
     assert compute_acc(pred_tensor, target_tensor)[1] == acc_top2
     assert compute_acc(pred_array, target_array)[0] == acc_top1
     assert compute_acc(pred_array, target_array)[1] == acc_top2
 
-    with pytest.raises(TypeError):
-        compute_acc(pred_tensor, target_array)
+    with pytest.raises(AssertionError):
+        compute_acc(pred_tensor, 'other_type')
+
+    # test accuracy_numpy
+    compute_acc = partial(accuracy_numpy, topk=(1, 2))
+    assert compute_acc(pred_array, target_array)[0] == acc_top1
+    assert compute_acc(pred_array, target_array)[1] == acc_top2