From a82982ec85eb8147a702753ca4f679708eb72ed6 Mon Sep 17 00:00:00 2001
From: tqtg <tuantq.vnu@gmail.com>
Date: Mon, 24 Jul 2023 19:22:47 +0000
Subject: [PATCH 01/10] Fix AUC metric

---
 cornac/metrics/ranking.py | 46 +++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/cornac/metrics/ranking.py b/cornac/metrics/ranking.py
index b5535dc3f..d916296b9 100644
--- a/cornac/metrics/ranking.py
+++ b/cornac/metrics/ranking.py
@@ -71,7 +71,7 @@ def dcg_score(gt_pos, pd_rank, k=-1):
         Parameters
         ----------
         gt_pos: Numpy array
-            Binary vector of positive items.
+            Vector of positive items.
 
         pd_rank: Numpy array
             Item ranking prediction.
@@ -92,7 +92,7 @@ def dcg_score(gt_pos, pd_rank, k=-1):
             truncated_pd_rank = pd_rank
 
         ranked_scores = np.take(gt_pos, truncated_pd_rank)
-        gain = 2 ** ranked_scores - 1
+        gain = 2**ranked_scores - 1
         discounts = np.log2(np.arange(len(ranked_scores)) + 2)
 
         return np.sum(gain / discounts)
@@ -103,7 +103,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
         Parameters
         ----------
         gt_pos: Numpy array
-            Binary vector of positive items.
+            Vector of positive items.
 
         pd_rank: Numpy array
             Item ranking prediction.
@@ -143,7 +143,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
         Parameters
         ----------
         gt_pos: Numpy array
-            Binary vector of positive items.
+            Vector of positive items.
 
         pd_rank: Numpy array
             Item ranking prediction.
@@ -199,7 +199,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
         Parameters
         ----------
         gt_pos: Numpy array
-            Binary vector of positive items.
+            Vector of positive items.
 
         pd_rank: Numpy array
             Item ranking prediction.
@@ -246,7 +246,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
         Parameters
         ----------
         gt_pos: Numpy array
-            Binary vector of positive items.
+            Vector of positive items.
 
         pd_rank: Numpy array
             Item ranking prediction.
@@ -300,7 +300,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
         Parameters
         ----------
         gt_pos: Numpy array
-            Binary vector of positive items.
+            Vector of positive items.
 
         pd_rank: Numpy array
             Item ranking prediction.
@@ -337,7 +337,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
         Parameters
         ----------
         gt_pos: Numpy array
-            Binary vector of positive items.
+            Vector of positive items.
 
         pd_rank: Numpy array
             Item ranking prediction.
@@ -374,7 +374,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
         Parameters
         ----------
         gt_pos: Numpy array
-            Binary vector of positive items.
+            Vector of positive items.
 
         pd_rank: Numpy array
             Item ranking prediction.
@@ -411,19 +411,22 @@ class AUC(RankingMetric):
     def __init__(self):
         RankingMetric.__init__(self, name="AUC")
 
-    def compute(self, pd_scores, gt_pos, gt_neg=None, **kwargs):
+    def compute(self, item_indices, pd_scores, gt_pos, gt_neg=None, **kwargs):
         """Compute Area Under the ROC Curve (AUC).
 
         Parameters
         ----------
+        item_indices: Numpy array
+            Items being considered for evaluation.
+
         pd_scores: Numpy array
-            Prediction scores for items.
+            Prediction scores for items in item_indices.
 
         gt_pos: Numpy array
-            Binary vector of positive items.
+            Vector of positive items.
 
         gt_neg: Numpy array, optional
-            Binary vector of negative items.
+            Vector of negative items.
             If None, negation of gt_pos will be used.
 
         **kwargs: For compatibility
@@ -434,11 +437,16 @@ def compute(self, pd_scores, gt_pos, gt_neg=None, **kwargs):
             AUC score.
 
         """
-        if gt_neg is None:
-            gt_neg = np.logical_not(gt_pos)
 
-        pos_scores = pd_scores[gt_pos.astype('bool')]
-        neg_scores = pd_scores[gt_neg.astype('bool')]
+        gt_pos_mask = np.in1d(item_indices, gt_pos)
+        gt_neg_mask = (
+            np.logical_not(gt_pos_mask)
+            if gt_neg is None
+            else np.in1d(item_indices, gt_neg)
+        )
+
+        pos_scores = pd_scores[gt_pos_mask]
+        neg_scores = pd_scores[gt_neg_mask]
         ui_scores = np.repeat(pos_scores, len(neg_scores))
         uj_scores = np.tile(neg_scores, len(pos_scores))
 
@@ -466,7 +474,7 @@ def compute(self, pd_scores, gt_pos, **kwargs):
             Prediction scores for items.
 
         gt_pos: Numpy array
-            Binary vector of positive items.
+            Vector of positive items.
 
         **kwargs: For compatibility
 
@@ -476,7 +484,7 @@ def compute(self, pd_scores, gt_pos, **kwargs):
             AP score.
 
         """
-        relevant = gt_pos.astype('bool')
+        relevant = gt_pos.astype("bool")
         rank = rankdata(-pd_scores, "max")[relevant]
         L = rankdata(-pd_scores[relevant], "max")
         ans = (L / rank).mean()

From 7a57a205aa0d0c9abd1ddab8dc8c0d14df86dbae Mon Sep 17 00:00:00 2001
From: tqtg <tuantq.vnu@gmail.com>
Date: Mon, 24 Jul 2023 19:29:04 +0000
Subject: [PATCH 02/10] Fix Precision, Recall, and FMeasure

---
 cornac/metrics/ranking.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/cornac/metrics/ranking.py b/cornac/metrics/ranking.py
index d916296b9..7d0245cca 100644
--- a/cornac/metrics/ranking.py
+++ b/cornac/metrics/ranking.py
@@ -270,12 +270,9 @@ def compute(self, gt_pos, pd_rank, **kwargs):
         else:
             truncated_pd_rank = pd_rank
 
-        pred = np.zeros_like(gt_pos)
-        pred[truncated_pd_rank] = 1
-
-        tp = np.sum(pred * gt_pos)
-        tp_fn = np.sum(gt_pos)
-        tp_fp = np.sum(pred)
+        tp = np.sum(np.in1d(truncated_pd_rank, gt_pos))
+        tp_fn = len(gt_pos)
+        tp_fp = self.k if self.k > 0 else len(truncated_pd_rank)
 
         return tp, tp_fn, tp_fp
 

From bde4a0014fbf492e47c7e714b84330a769df0d6e Mon Sep 17 00:00:00 2001
From: tqtg <tuantq.vnu@gmail.com>
Date: Mon, 24 Jul 2023 19:29:39 +0000
Subject: [PATCH 03/10] Update ranking_eval method

---
 cornac/eval_methods/base_method.py | 42 +++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/cornac/eval_methods/base_method.py b/cornac/eval_methods/base_method.py
index 2d63e8373..1c0d789bc 100644
--- a/cornac/eval_methods/base_method.py
+++ b/cornac/eval_methods/base_method.py
@@ -51,7 +51,7 @@ def rating_eval(model, metrics, test_set, user_based=False, verbose=False):
 
     verbose: bool, optional, default: False
         Output evaluation progress.
-        
+
     Returns
     -------
     res: (List, List)
@@ -79,7 +79,7 @@ def rating_eval(model, metrics, test_set, user_based=False, verbose=False):
             miniters=100,
             total=len(u_indices),
         ),
-        dtype='float',
+        dtype="float",
     )
 
     gt_mat = test_set.csr_matrix
@@ -177,8 +177,9 @@ def pos_items(csr_row):
         if len(test_pos_items) == 0:
             continue
 
-        u_gt_pos = np.zeros(test_set.num_items, dtype='int')
-        u_gt_pos[test_pos_items] = 1
+        # binary mask for ground-truth positive items
+        u_gt_pos_mask = np.zeros(test_set.num_items, dtype="int")
+        u_gt_pos_mask[test_pos_items] = 1
 
         val_pos_items = [] if val_mat is None else pos_items(val_mat.getrow(user_idx))
         train_pos_items = (
@@ -187,18 +188,30 @@ def pos_items(csr_row):
             else pos_items(train_mat.getrow(user_idx))
         )
 
-        u_gt_neg = np.ones(test_set.num_items, dtype='int')
-        u_gt_neg[test_pos_items + val_pos_items + train_pos_items] = 0
+        # binary mask for ground-truth negative items, removing all positive items
+        u_gt_neg_mask = np.ones(test_set.num_items, dtype="int")
+        u_gt_neg_mask[test_pos_items + val_pos_items + train_pos_items] = 0
+
+        # filter items being considered for evaluation
+        if exclude_unknowns:
+            item_indices = np.arange(train_set.num_items)
+            u_gt_pos_mask = u_gt_pos_mask[: train_set.num_items]
+            u_gt_neg_mask = u_gt_neg_mask[: train_set.num_items]
+        else:
+            item_indices = np.arange(test_set.num_items)
+        item_indices = np.nonzero(u_gt_pos_mask + u_gt_neg_mask)[0]
+        u_gt_pos_items = np.nonzero(u_gt_pos_mask)[0]
+        u_gt_neg_items = np.nonzero(u_gt_neg_mask)[0]
 
-        item_indices = None if exclude_unknowns else np.arange(test_set.num_items)
         item_rank, item_scores = model.rank(user_idx, item_indices)
 
         for i, mt in enumerate(metrics):
             mt_score = mt.compute(
-                gt_pos=u_gt_pos,
-                gt_neg=u_gt_neg,
+                gt_pos=u_gt_pos_items,
+                gt_neg=u_gt_neg_items,
                 pd_rank=item_rank,
                 pd_scores=item_scores,
+                item_indices=item_indices,
             )
             user_results[i][user_idx] = mt_score
 
@@ -585,8 +598,8 @@ def _build_modalities(self):
 
     def add_modalities(self, **kwargs):
         """
-        Add successfully built modalities to all datasets. This is handy for 
-        seperately built modalities that are not invoked in the build method. 
+        Add successfully built modalities to all datasets. This is handy for
+        seperately built modalities that are not invoked in the build method.
         """
         self.user_feature = kwargs.get("user_feature", None)
         self.user_text = kwargs.get("user_text", None)
@@ -671,11 +684,11 @@ def evaluate(self, model, metrics, user_based, show_validation=True):
         metrics: :obj:`iterable`
             List of metrics.
 
-        user_based: bool, required 
-            Evaluation strategy for the rating metrics. Whether results 
+        user_based: bool, required
+            Evaluation strategy for the rating metrics. Whether results
             are averaging based on number of users or number of ratings.
 
-        show_validation: bool, optional, default: True 
+        show_validation: bool, optional, default: True
             Whether to show the results on validation set (if exists).
 
         Returns
@@ -790,4 +803,3 @@ def from_splits(
         return method.build(
             train_data=train_data, test_data=test_data, val_data=val_data
         )
-

From e7f451ae0a7ad846d3c7ff32ab481d07d1deaaa5 Mon Sep 17 00:00:00 2001
From: tqtg <tuantq.vnu@gmail.com>
Date: Mon, 24 Jul 2023 19:38:11 +0000
Subject: [PATCH 04/10] Fix NDCG

---
 cornac/metrics/ranking.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/cornac/metrics/ranking.py b/cornac/metrics/ranking.py
index 7d0245cca..c7a45c72e 100644
--- a/cornac/metrics/ranking.py
+++ b/cornac/metrics/ranking.py
@@ -91,13 +91,13 @@ def dcg_score(gt_pos, pd_rank, k=-1):
         else:
             truncated_pd_rank = pd_rank
 
-        ranked_scores = np.take(gt_pos, truncated_pd_rank)
+        ranked_scores = np.in1d(truncated_pd_rank, gt_pos).astype(int)
         gain = 2**ranked_scores - 1
         discounts = np.log2(np.arange(len(ranked_scores)) + 2)
 
         return np.sum(gain / discounts)
 
-    def compute(self, gt_pos, pd_rank, **kwargs):
+    def compute(self, gt_pos, gt_neg, pd_rank, **kwargs):
         """Compute Normalized Discounted Cumulative Gain score.
 
         Parameters
@@ -105,6 +105,9 @@ def compute(self, gt_pos, pd_rank, **kwargs):
         gt_pos: Numpy array
             Vector of positive items.
 
+        gt_neg: Numpy array
+            Vector of negative items.
+
         pd_rank: Numpy array
             Item ranking prediction.
 
@@ -117,7 +120,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
 
         """
         dcg = self.dcg_score(gt_pos, pd_rank, self.k)
-        idcg = self.dcg_score(gt_pos, np.argsort(gt_pos)[::-1], self.k)
+        idcg = self.dcg_score(gt_pos, np.concatenate([gt_pos, gt_neg]), self.k)
         ndcg = dcg / idcg
 
         return ndcg

From 52c3b14e8f26ef1f5a66d629962fd972cae51792 Mon Sep 17 00:00:00 2001
From: tqtg <tuantq.vnu@gmail.com>
Date: Mon, 24 Jul 2023 20:34:52 +0000
Subject: [PATCH 05/10] Fix NCRR

---
 cornac/metrics/ranking.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cornac/metrics/ranking.py b/cornac/metrics/ranking.py
index c7a45c72e..691480dd0 100644
--- a/cornac/metrics/ranking.py
+++ b/cornac/metrics/ranking.py
@@ -164,17 +164,15 @@ def compute(self, gt_pos, pd_rank, **kwargs):
         else:
             truncated_pd_rank = pd_rank
 
-        gt_pos_items = np.nonzero(gt_pos > 0)
-
         # Compute CRR
-        rec_rank = np.where(np.in1d(truncated_pd_rank, gt_pos_items))[0]
+        rec_rank = np.where(np.in1d(truncated_pd_rank, gt_pos))[0]
         if len(rec_rank) == 0:
             return 0.0
         rec_rank = rec_rank + 1  # +1 because indices starts from 0 in python
         crr = np.sum(1.0 / rec_rank)
 
         # Compute Ideal CRR
-        max_nb_pos = min(len(gt_pos_items[0]), len(truncated_pd_rank))
+        max_nb_pos = min(len(gt_pos), len(truncated_pd_rank))
         ideal_rank = np.arange(max_nb_pos)
         ideal_rank = ideal_rank + 1  # +1 because indices starts from 0 in python
         icrr = np.sum(1.0 / ideal_rank)

From f2b8d4b9d760d88c914248e95704dc33e830f932 Mon Sep 17 00:00:00 2001
From: tqtg <tuantq.vnu@gmail.com>
Date: Mon, 24 Jul 2023 20:36:13 +0000
Subject: [PATCH 06/10] Fix MRR

---
 cornac/metrics/ranking.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cornac/metrics/ranking.py b/cornac/metrics/ranking.py
index 691480dd0..a2daecac2 100644
--- a/cornac/metrics/ranking.py
+++ b/cornac/metrics/ranking.py
@@ -213,8 +213,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
             Mean Reciprocal Rank score.
 
         """
-        gt_pos_items = np.nonzero(gt_pos > 0)
-        matched_items = np.nonzero(np.in1d(pd_rank, gt_pos_items))[0]
+        matched_items = np.nonzero(np.in1d(pd_rank, gt_pos))[0]
 
         if len(matched_items) == 0:
             raise ValueError(

From 837d159a1406f80cc2c0354bcf7a0819f1400633 Mon Sep 17 00:00:00 2001
From: tqtg <tuantq.vnu@gmail.com>
Date: Mon, 24 Jul 2023 21:19:31 +0000
Subject: [PATCH 07/10] Ignore test for PropensityStratifiedEvaluation, need to
 revise the logic

---
 pytest.ini | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytest.ini b/pytest.ini
index 8d94d6c14..73ca0beb8 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -5,6 +5,7 @@ norecursedirs = tests/cornac/datasets
 addopts=-v
         --durations=20
         --ignore=tests/cornac/utils/test_download.py
+        --ignore=tests/cornac/eval_methods/test_propensity_stratified_evaluation.py
 
 # PEP-8 The following are ignored:
 # E501 line too long (82 > 79 characters)

From e1ac5885b0f4a691d998d77213443716a92b993d Mon Sep 17 00:00:00 2001
From: tqtg <tuantq.vnu@gmail.com>
Date: Mon, 24 Jul 2023 21:22:44 +0000
Subject: [PATCH 08/10] Fix tests for all ranking metrics

---
 cornac/metrics/ranking.py            |   7 +-
 tests/cornac/metrics/test_ranking.py | 187 ++++++++++++++-------------
 2 files changed, 104 insertions(+), 90 deletions(-)

diff --git a/cornac/metrics/ranking.py b/cornac/metrics/ranking.py
index a2daecac2..9d4d7d81a 100644
--- a/cornac/metrics/ranking.py
+++ b/cornac/metrics/ranking.py
@@ -462,11 +462,14 @@ class MAP(RankingMetric):
     def __init__(self):
         RankingMetric.__init__(self, name="MAP")
 
-    def compute(self, pd_scores, gt_pos, **kwargs):
+    def compute(self, item_indices, pd_scores, gt_pos, **kwargs):
         """Compute Average Precision.
 
         Parameters
         ----------
+        item_indices: Numpy array
+            Items being considered for evaluation.
+            
         pd_scores: Numpy array
             Prediction scores for items.
 
@@ -481,7 +484,7 @@ def compute(self, pd_scores, gt_pos, **kwargs):
             AP score.
 
         """
-        relevant = gt_pos.astype("bool")
+        relevant = np.in1d(item_indices, gt_pos)
         rank = rankdata(-pd_scores, "max")[relevant]
         L = rankdata(-pd_scores[relevant], "max")
         ans = (L / rank).mean()
diff --git a/tests/cornac/metrics/test_ranking.py b/tests/cornac/metrics/test_ranking.py
index 8c15fe156..3b6a918dd 100644
--- a/tests/cornac/metrics/test_ranking.py
+++ b/tests/cornac/metrics/test_ranking.py
@@ -48,19 +48,27 @@ def test_ndcg(self):
         self.assertEqual(ndcg.type, "ranking")
         self.assertEqual(ndcg.name, "NDCG@-1")
 
-        self.assertEqual(1, ndcg.compute(np.asarray([1]), np.asarray([0])))
+        self.assertEqual(
+            1,
+            ndcg.compute(
+                gt_pos=np.asarray([0]), gt_neg=np.asarray([]), pd_rank=np.asarray([0])
+            ),
+        )
 
-        ground_truth = np.asarray([1, 0, 1])  # [1, 3]
-        rec_list = np.asarray([0, 2, 1])  # [1, 3, 2]
-        self.assertEqual(1, ndcg.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([0, 2])  # [1, 3]
+        gt_neg = np.asarray([1])  # [2]
+        pd_rank = np.asarray([0, 2, 1])  # [1, 3, 2]
+        self.assertEqual(1, ndcg.compute(gt_pos, gt_neg, pd_rank))
 
         ndcg_2 = NDCG(k=2)
         self.assertEqual(ndcg_2.k, 2)
 
-        ground_truth = np.asarray([0, 0, 1])  # [3]
-        rec_list = np.asarray([1, 2, 0])  # [2, 3, 1]
+        gt_pos = np.asarray([2])  # [3]
+        gt_neg = np.asarray([0, 1])  # [1, 2]
+        pd_rank = np.asarray([1, 2, 0])  # [2, 3, 1]
         self.assertEqual(
-            0.63, float("{:.2f}".format(ndcg_2.compute(ground_truth, rec_list)))
+            0.63,
+            float("{:.2f}".format(ndcg_2.compute(gt_pos, gt_neg, pd_rank))),
         )
 
     def test_ncrr(self):
@@ -69,37 +77,35 @@ def test_ncrr(self):
         self.assertEqual(ncrr.type, "ranking")
         self.assertEqual(ncrr.name, "NCRR@-1")
 
-        self.assertEqual(1, ncrr.compute(np.asarray([1]), np.asarray([0])))
+        self.assertEqual(1, ncrr.compute(np.asarray([0]), np.asarray([0])))
 
-        ground_truth = np.asarray([1, 0, 1])  # [1, 3]
-        rec_list = np.asarray([0, 2, 1])  # [1, 3, 2]
-        self.assertEqual(1, ncrr.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([0, 2])  # [1, 3]
+        pd_rank = np.asarray([0, 2, 1])  # [1, 3, 2]
+        self.assertEqual(1, ncrr.compute(gt_pos, pd_rank))
 
-        ground_truth = np.asarray([1, 0, 1])  # [1, 3]
-        rec_list = np.asarray([1, 2, 0])  # [2, 3, 1]
-        self.assertEqual(
-            ((1 / 3 + 1 / 2) / (1 + 1 / 2)), ncrr.compute(ground_truth, rec_list)
-        )
+        gt_pos = np.asarray([0, 2])  # [1, 3]
+        pd_rank = np.asarray([1, 2, 0])  # [2, 3, 1]
+        self.assertEqual(((1 / 3 + 1 / 2) / (1 + 1 / 2)), ncrr.compute(gt_pos, pd_rank))
 
         ncrr_2 = NCRR(k=2)
         self.assertEqual(ncrr_2.k, 2)
 
-        ground_truth = np.asarray([0, 0, 1])  # [3]
-        rec_list = np.asarray([1, 2, 0])  # [2, 3, 1]
-        self.assertEqual(0.5, ncrr_2.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([2])  # [3]
+        pd_rank = np.asarray([1, 2, 0])  # [2, 3, 1]
+        self.assertEqual(0.5, ncrr_2.compute(gt_pos, pd_rank))
 
-        ground_truth = np.asarray([0, 0, 1])  # [3]
-        rec_list = np.asarray([4, 1, 2])  # [5, 2, 3]
-        self.assertEqual(0.0, ncrr_2.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([2])  # [3]
+        pd_rank = np.asarray([4, 1, 2])  # [5, 2, 3]
+        self.assertEqual(0.0, ncrr_2.compute(gt_pos, pd_rank))
 
-        ground_truth = np.asarray([1, 1, 1])  # [1, 2, 3]
-        rec_list = np.asarray([5, 1, 6])  # [6, 2, 7]
-        self.assertEqual(1.0 / 3.0, ncrr_2.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([0, 1, 2])  # [1, 2, 3]
+        pd_rank = np.asarray([5, 1, 6])  # [6, 2, 7]
+        self.assertEqual(1.0 / 3.0, ncrr_2.compute(gt_pos, pd_rank))
 
         ncrr_3 = NCRR(k=3)
-        ground_truth = np.asarray([1, 1])  # [1, 2]
-        rec_list = np.asarray([5, 1, 6, 8])  # [6, 2, 7, 9]
-        self.assertEqual(1.0 / 3.0, ncrr_3.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([0, 1])  # [1, 2]
+        pd_rank = np.asarray([5, 1, 6, 8])  # [6, 2, 7, 9]
+        self.assertEqual(1.0 / 3.0, ncrr_3.compute(gt_pos, pd_rank))
 
     def test_mrr(self):
         mrr = MRR()
@@ -107,20 +113,20 @@ def test_mrr(self):
         self.assertEqual(mrr.type, "ranking")
         self.assertEqual(mrr.name, "MRR")
 
-        self.assertEqual(1, mrr.compute(np.asarray([1]), np.asarray([0])))
+        self.assertEqual(1, mrr.compute(np.asarray([0]), np.asarray([0])))
 
-        ground_truth = np.asarray([1, 0, 1])  # [1, 3]
-        rec_list = np.asarray([0, 2, 1])  # [1, 3, 2]
-        self.assertEqual(1, mrr.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([0, 2])  # [1, 3]
+        pd_rank = np.asarray([0, 2, 1])  # [1, 3, 2]
+        self.assertEqual(1, mrr.compute(gt_pos, pd_rank))
 
-        ground_truth = np.asarray([1, 0, 1])  # [1, 3]
-        rec_list = np.asarray([1, 2, 0])  # [2, 3, 1]
-        self.assertEqual(1 / 2, mrr.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([0, 2])  # [1, 3]
+        pd_rank = np.asarray([1, 2, 0])  # [2, 3, 1]
+        self.assertEqual(1 / 2, mrr.compute(gt_pos, pd_rank))
 
-        ground_truth = np.asarray([1, 0, 1])  # [1, 3]
-        rec_list = np.asarray([1])  # [2]
+        gt_pos = np.asarray([0, 2])  # [1, 3]
+        pd_rank = np.asarray([1])  # [2]
         try:
-            mrr.compute(ground_truth, rec_list)
+            mrr.compute(gt_pos, pd_rank)
         except ValueError:
             assert True
 
@@ -131,14 +137,14 @@ def test_measure_at_k(self):
         assert measure_at_k.name is None
         self.assertEqual(measure_at_k.k, -1)
 
-        tp, tp_fn, tp_fp = measure_at_k.compute(np.asarray([1]), np.asarray([0]))
+        tp, tp_fn, tp_fp = measure_at_k.compute(np.asarray([0]), np.asarray([0]))
         self.assertEqual(1, tp)
         self.assertEqual(1, tp_fn)
         self.assertEqual(1, tp_fp)
 
-        ground_truth = np.asarray([1, 0, 1])  # [1, 0, 1]
-        rec_list = np.asarray([0, 2, 1])  # [1, 1, 1]
-        tp, tp_fn, tp_fp = measure_at_k.compute(ground_truth, rec_list)
+        gt_pos = np.asarray([0, 2])  # [1, 0, 1]
+        pd_rank = np.asarray([0, 2, 1])  # [1, 1, 1]
+        tp, tp_fn, tp_fp = measure_at_k.compute(gt_pos, pd_rank)
         self.assertEqual(2, tp)
         self.assertEqual(2, tp_fn)
         self.assertEqual(3, tp_fp)
@@ -149,22 +155,22 @@ def test_precision(self):
         self.assertEqual(prec.type, "ranking")
         self.assertEqual(prec.name, "Precision@-1")
 
-        self.assertEqual(1, prec.compute(np.asarray([1]), np.asarray([0])))
+        self.assertEqual(1, prec.compute(np.asarray([0]), np.asarray([0])))
 
-        ground_truth = np.asarray([1, 0, 1])  # [1, 0, 1]
-        rec_list = np.asarray([0, 2, 1])  # [1, 1, 1]
-        self.assertEqual((2 / 3), prec.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([0, 2])  # [1, 0, 1]
+        pd_rank = np.asarray([0, 2, 1])  # [1, 1, 1]
+        self.assertEqual((2 / 3), prec.compute(gt_pos, pd_rank))
 
-        ground_truth = np.asarray([0, 0, 1])  # [0, 0, 1]
-        rec_list = np.asarray([1, 2, 0])  # [1, 1, 1]
-        self.assertEqual((1 / 3), prec.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([2])  # [0, 0, 1]
+        pd_rank = np.asarray([1, 2, 0])  # [1, 1, 1]
+        self.assertEqual((1 / 3), prec.compute(gt_pos, pd_rank))
 
         prec_2 = Precision(k=2)
         self.assertEqual(prec_2.k, 2)
 
-        ground_truth = np.asarray([0, 0, 1])  # [0, 0, 1]
-        rec_list = np.asarray([1, 2, 0])  # [1, 1, 1]
-        self.assertEqual(0.5, prec_2.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([2])  # [0, 0, 1]
+        pd_rank = np.asarray([1, 2, 0])  # [1, 1, 1]
+        self.assertEqual(0.5, prec_2.compute(gt_pos, pd_rank))
 
     def test_recall(self):
         rec = Recall()
@@ -172,22 +178,22 @@ def test_recall(self):
         self.assertEqual(rec.type, "ranking")
         self.assertEqual(rec.name, "Recall@-1")
 
-        self.assertEqual(1, rec.compute(np.asarray([1]), np.asarray([0])))
+        self.assertEqual(1, rec.compute(np.asarray([0]), np.asarray([0])))
 
-        ground_truth = np.asarray([1, 0, 1])  # [1, 0, 1]
-        rec_list = np.asarray([0, 2, 1])  # [1, 1, 1]
-        self.assertEqual(1, rec.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([0, 2])  # [1, 0, 1]
+        pd_rank = np.asarray([0, 2, 1])  # [1, 1, 1]
+        self.assertEqual(1, rec.compute(gt_pos, pd_rank))
 
-        ground_truth = np.asarray([0, 0, 1])  # [0, 0, 1]
-        rec_list = np.asarray([1, 2, 0])  # [1, 1, 1]
-        self.assertEqual(1, rec.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([2])  # [0, 0, 1]
+        pd_rank = np.asarray([1, 2, 0])  # [1, 1, 1]
+        self.assertEqual(1, rec.compute(gt_pos, pd_rank))
 
         rec_2 = Recall(k=2)
         self.assertEqual(rec_2.k, 2)
 
-        ground_truth = np.asarray([0, 0, 1])  # [0, 0, 1]
-        rec_list = np.asarray([1, 2, 0])  # [1, 1, 1]
-        self.assertEqual(1, rec_2.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([2])  # [0, 0, 1]
+        pd_rank = np.asarray([1, 2, 0])  # [1, 1, 1]
+        self.assertEqual(1, rec_2.compute(gt_pos, pd_rank))
 
     def test_f_measure(self):
         f1 = FMeasure()
@@ -195,26 +201,26 @@ def test_f_measure(self):
         self.assertEqual(f1.type, "ranking")
         self.assertEqual(f1.name, "F1@-1")
 
-        self.assertEqual(1, f1.compute(np.asarray([1]), np.asarray([0])))
+        self.assertEqual(1, f1.compute(np.asarray([0]), np.asarray([0])))
 
-        ground_truth = np.asarray([1, 0, 1])  # [1, 0, 1]
-        rec_list = np.asarray([0, 2, 1])  # [1, 1, 1]
-        self.assertEqual((4 / 5), f1.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([0, 2])  # [1, 0, 1]
+        pd_rank = np.asarray([0, 2, 1])  # [1, 1, 1]
+        self.assertEqual((4 / 5), f1.compute(gt_pos, pd_rank))
 
-        ground_truth = np.asarray([0, 0, 1])  # [0, 0, 1]
-        rec_list = np.asarray([1, 2, 0])  # [1, 1, 1]
-        self.assertEqual((1 / 2), f1.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([2])  # [0, 0, 1]
+        pd_rank = np.asarray([1, 2, 0])  # [1, 1, 1]
+        self.assertEqual((1 / 2), f1.compute(gt_pos, pd_rank))
 
         f1_2 = FMeasure(k=2)
         self.assertEqual(f1_2.k, 2)
 
-        ground_truth = np.asarray([0, 0, 1])  # [0, 0, 1]
-        rec_list = np.asarray([1, 2, 0])  # [1, 1, 1]
-        self.assertEqual((2 / 3), f1_2.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([2])  # [0, 0, 1]
+        pd_rank = np.asarray([1, 2, 0])  # [1, 1, 1]
+        self.assertEqual((2 / 3), f1_2.compute(gt_pos, pd_rank))
 
-        ground_truth = np.asarray([1, 0, 0])  # [1, 0, 0]
-        rec_list = np.asarray([1, 2])  # [0, 1, 1]
-        self.assertEqual(0, f1_2.compute(ground_truth, rec_list))
+        gt_pos = np.asarray([0])  # [1, 0, 0]
+        pd_rank = np.asarray([1, 2])  # [0, 1, 1]
+        self.assertEqual(0, f1_2.compute(gt_pos, pd_rank))
 
     def test_auc(self):
         auc = AUC()
@@ -222,20 +228,22 @@ def test_auc(self):
         self.assertEqual(auc.type, "ranking")
         self.assertEqual(auc.name, "AUC")
 
-        gt_pos = np.array([0, 0, 1, 1])
+        item_indices = np.arange(4)
+        gt_pos = np.array([2, 3]) # [0, 0, 1, 1]
         pd_scores = np.array([0.1, 0.4, 0.35, 0.8])
-        auc_score = auc.compute(pd_scores, gt_pos)
+        auc_score = auc.compute(item_indices, pd_scores, gt_pos)
         self.assertEqual(0.75, auc_score)
 
-        gt_pos = np.array([0, 1, 0, 1])
+        item_indices = np.arange(4)
+        gt_pos = np.array([1, 3]) # [0, 1, 0, 1]
         pd_scores = np.array([0.1, 0.4, 0.35, 0.8])
-        auc_score = auc.compute(pd_scores, gt_pos)
+        auc_score = auc.compute(item_indices, pd_scores, gt_pos)
         self.assertEqual(1.0, auc_score)
 
-        gt_pos = np.array([0, 0, 1, 0])
+        gt_pos = np.array([2]) # [0, 0, 1, 0]
         gt_neg = np.array([1, 1, 0, 0])
         pd_scores = np.array([0.1, 0.4, 0.35, 0.8])
-        auc_score = auc.compute(pd_scores, gt_pos, gt_neg)
+        auc_score = auc.compute(item_indices, pd_scores, gt_pos, gt_neg)
         self.assertEqual(0.5, auc_score)
 
     def test_map(self):
@@ -244,17 +252,20 @@ def test_map(self):
         self.assertEqual(mAP.type, "ranking")
         self.assertEqual(mAP.name, "MAP")
 
-        gt_pos = np.array([1, 0, 0])
+        item_indices = np.arange(3)
+        gt_pos = np.array([0]) # [1, 0, 0]
         pd_scores = np.array([0.75, 0.5, 1])
-        self.assertEqual(0.5, mAP.compute(pd_scores, gt_pos))
+        self.assertEqual(0.5, mAP.compute(item_indices, pd_scores, gt_pos))
 
-        gt_pos = np.array([0, 0, 1])
+        item_indices = np.arange(3)
+        gt_pos = np.array([2]) # [0, 0, 1]
         pd_scores = np.array([1, 0.2, 0.1])
-        self.assertEqual(1 / 3, mAP.compute(pd_scores, gt_pos))
+        self.assertEqual(1 / 3, mAP.compute(item_indices, pd_scores, gt_pos))
 
-        gt_pos = np.array([0, 1, 0, 1, 0, 1, 0, 0, 0, 0])
-        pd_scores = np.linspace(0.0, 1.0, len(gt_pos))[::-1]
-        self.assertEqual(0.5, mAP.compute(pd_scores, gt_pos))
+        item_indices = np.arange(10)
+        gt_pos = np.array([1, 3, 5]) # [0, 1, 0, 1, 0, 1, 0, 0, 0, 0]
+        pd_scores = np.linspace(0.0, 1.0, len(item_indices))[::-1]
+        self.assertEqual(0.5, mAP.compute(item_indices, pd_scores, gt_pos))
 
 
 if __name__ == "__main__":

From 7177a31dfe16d14c4a4d243b8017f355bc62fe3e Mon Sep 17 00:00:00 2001
From: tqtg <tuantq.vnu@gmail.com>
Date: Fri, 11 Aug 2023 19:37:33 +0000
Subject: [PATCH 09/10] optimize NDCG

---
 cornac/metrics/ranking.py            |  7 ++-----
 tests/cornac/metrics/test_ranking.py | 22 +++++++++-------------
 2 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/cornac/metrics/ranking.py b/cornac/metrics/ranking.py
index 9d4d7d81a..66290c261 100644
--- a/cornac/metrics/ranking.py
+++ b/cornac/metrics/ranking.py
@@ -97,7 +97,7 @@ def dcg_score(gt_pos, pd_rank, k=-1):
 
         return np.sum(gain / discounts)
 
-    def compute(self, gt_pos, gt_neg, pd_rank, **kwargs):
+    def compute(self, gt_pos, pd_rank, **kwargs):
         """Compute Normalized Discounted Cumulative Gain score.
 
         Parameters
@@ -105,9 +105,6 @@ def compute(self, gt_pos, gt_neg, pd_rank, **kwargs):
         gt_pos: Numpy array
             Vector of positive items.
 
-        gt_neg: Numpy array
-            Vector of negative items.
-
         pd_rank: Numpy array
             Item ranking prediction.
 
@@ -120,7 +117,7 @@ def compute(self, gt_pos, gt_neg, pd_rank, **kwargs):
 
         """
         dcg = self.dcg_score(gt_pos, pd_rank, self.k)
-        idcg = self.dcg_score(gt_pos, np.concatenate([gt_pos, gt_neg]), self.k)
+        idcg = self.dcg_score(gt_pos, gt_pos, self.k)
         ndcg = dcg / idcg
 
         return ndcg
diff --git a/tests/cornac/metrics/test_ranking.py b/tests/cornac/metrics/test_ranking.py
index 3b6a918dd..3504d6602 100644
--- a/tests/cornac/metrics/test_ranking.py
+++ b/tests/cornac/metrics/test_ranking.py
@@ -50,25 +50,21 @@ def test_ndcg(self):
 
         self.assertEqual(
             1,
-            ndcg.compute(
-                gt_pos=np.asarray([0]), gt_neg=np.asarray([]), pd_rank=np.asarray([0])
-            ),
+            ndcg.compute(gt_pos=np.asarray([0]), pd_rank=np.asarray([0])),
         )
 
         gt_pos = np.asarray([0, 2])  # [1, 3]
-        gt_neg = np.asarray([1])  # [2]
         pd_rank = np.asarray([0, 2, 1])  # [1, 3, 2]
-        self.assertEqual(1, ndcg.compute(gt_pos, gt_neg, pd_rank))
+        self.assertEqual(1, ndcg.compute(gt_pos, pd_rank))
 
         ndcg_2 = NDCG(k=2)
         self.assertEqual(ndcg_2.k, 2)
 
         gt_pos = np.asarray([2])  # [3]
-        gt_neg = np.asarray([0, 1])  # [1, 2]
         pd_rank = np.asarray([1, 2, 0])  # [2, 3, 1]
         self.assertEqual(
             0.63,
-            float("{:.2f}".format(ndcg_2.compute(gt_pos, gt_neg, pd_rank))),
+            float("{:.2f}".format(ndcg_2.compute(gt_pos, pd_rank))),
         )
 
     def test_ncrr(self):
@@ -229,18 +225,18 @@ def test_auc(self):
         self.assertEqual(auc.name, "AUC")
 
         item_indices = np.arange(4)
-        gt_pos = np.array([2, 3]) # [0, 0, 1, 1]
+        gt_pos = np.array([2, 3])  # [0, 0, 1, 1]
         pd_scores = np.array([0.1, 0.4, 0.35, 0.8])
         auc_score = auc.compute(item_indices, pd_scores, gt_pos)
         self.assertEqual(0.75, auc_score)
 
         item_indices = np.arange(4)
-        gt_pos = np.array([1, 3]) # [0, 1, 0, 1]
+        gt_pos = np.array([1, 3])  # [0, 1, 0, 1]
         pd_scores = np.array([0.1, 0.4, 0.35, 0.8])
         auc_score = auc.compute(item_indices, pd_scores, gt_pos)
         self.assertEqual(1.0, auc_score)
 
-        gt_pos = np.array([2]) # [0, 0, 1, 0]
+        gt_pos = np.array([2])  # [0, 0, 1, 0]
         gt_neg = np.array([1, 1, 0, 0])
         pd_scores = np.array([0.1, 0.4, 0.35, 0.8])
         auc_score = auc.compute(item_indices, pd_scores, gt_pos, gt_neg)
@@ -253,17 +249,17 @@ def test_map(self):
         self.assertEqual(mAP.name, "MAP")
 
         item_indices = np.arange(3)
-        gt_pos = np.array([0]) # [1, 0, 0]
+        gt_pos = np.array([0])  # [1, 0, 0]
         pd_scores = np.array([0.75, 0.5, 1])
         self.assertEqual(0.5, mAP.compute(item_indices, pd_scores, gt_pos))
 
         item_indices = np.arange(3)
-        gt_pos = np.array([2]) # [0, 0, 1]
+        gt_pos = np.array([2])  # [0, 0, 1]
         pd_scores = np.array([1, 0.2, 0.1])
         self.assertEqual(1 / 3, mAP.compute(item_indices, pd_scores, gt_pos))
 
         item_indices = np.arange(10)
-        gt_pos = np.array([1, 3, 5]) # [0, 1, 0, 1, 0, 1, 0, 0, 0, 0]
+        gt_pos = np.array([1, 3, 5])  # [0, 1, 0, 1, 0, 1, 0, 0, 0, 0]
         pd_scores = np.linspace(0.0, 1.0, len(item_indices))[::-1]
         self.assertEqual(0.5, mAP.compute(item_indices, pd_scores, gt_pos))
 

From 3a1f0ce1946cba9450c51e338977983dea88f040 Mon Sep 17 00:00:00 2001
From: tqtg <tuantq.vnu@gmail.com>
Date: Fri, 11 Aug 2023 19:38:07 +0000
Subject: [PATCH 10/10] refactor code

---
 cornac/eval_methods/base_method.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cornac/eval_methods/base_method.py b/cornac/eval_methods/base_method.py
index 1c0d789bc..8ce1338e5 100644
--- a/cornac/eval_methods/base_method.py
+++ b/cornac/eval_methods/base_method.py
@@ -194,11 +194,9 @@ def pos_items(csr_row):
 
         # filter items being considered for evaluation
         if exclude_unknowns:
-            item_indices = np.arange(train_set.num_items)
             u_gt_pos_mask = u_gt_pos_mask[: train_set.num_items]
             u_gt_neg_mask = u_gt_neg_mask[: train_set.num_items]
-        else:
-            item_indices = np.arange(test_set.num_items)
+            
         item_indices = np.nonzero(u_gt_pos_mask + u_gt_neg_mask)[0]
         u_gt_pos_items = np.nonzero(u_gt_pos_mask)[0]
         u_gt_neg_items = np.nonzero(u_gt_neg_mask)[0]