Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter positive items for ranking evaluation #523

Merged
merged 11 commits into from
Aug 13, 2023
Merged
25 changes: 18 additions & 7 deletions cornac/eval_methods/base_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,9 @@ def pos_items(csr_row):
if len(test_pos_items) == 0:
continue

u_gt_pos = np.zeros(test_set.num_items, dtype="int")
u_gt_pos[test_pos_items] = 1
# binary mask for ground-truth positive items
u_gt_pos_mask = np.zeros(test_set.num_items, dtype="int")
u_gt_pos_mask[test_pos_items] = 1

val_pos_items = [] if val_mat is None else pos_items(val_mat.getrow(user_idx))
train_pos_items = (
Expand All @@ -187,18 +188,28 @@ def pos_items(csr_row):
else pos_items(train_mat.getrow(user_idx))
)

u_gt_neg = np.ones(test_set.num_items, dtype="int")
u_gt_neg[test_pos_items + val_pos_items + train_pos_items] = 0
# binary mask for ground-truth negative items, removing all positive items
u_gt_neg_mask = np.ones(test_set.num_items, dtype="int")
u_gt_neg_mask[test_pos_items + val_pos_items + train_pos_items] = 0

# filter items being considered for evaluation
if exclude_unknowns:
u_gt_pos_mask = u_gt_pos_mask[: train_set.num_items]
u_gt_neg_mask = u_gt_neg_mask[: train_set.num_items]

item_indices = np.nonzero(u_gt_pos_mask + u_gt_neg_mask)[0]
tqtg marked this conversation as resolved.
Show resolved Hide resolved
u_gt_pos_items = np.nonzero(u_gt_pos_mask)[0]
u_gt_neg_items = np.nonzero(u_gt_neg_mask)[0]

item_indices = None if exclude_unknowns else np.arange(test_set.num_items)
item_rank, item_scores = model.rank(user_idx, item_indices)

for i, mt in enumerate(metrics):
mt_score = mt.compute(
gt_pos=u_gt_pos,
gt_neg=u_gt_neg,
gt_pos=u_gt_pos_items,
gt_neg=u_gt_neg_items,
pd_rank=item_rank,
pd_scores=item_scores,
item_indices=item_indices,
)
user_results[i][user_idx] = mt_score

Expand Down
73 changes: 39 additions & 34 deletions cornac/metrics/ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def dcg_score(gt_pos, pd_rank, k=-1):
Parameters
----------
gt_pos: Numpy array
Binary vector of positive items.
Vector of positive items.

pd_rank: Numpy array
Item ranking prediction.
Expand All @@ -91,8 +91,8 @@ def dcg_score(gt_pos, pd_rank, k=-1):
else:
truncated_pd_rank = pd_rank

ranked_scores = np.take(gt_pos, truncated_pd_rank)
gain = 2 ** ranked_scores - 1
ranked_scores = np.in1d(truncated_pd_rank, gt_pos).astype(int)
gain = 2**ranked_scores - 1
discounts = np.log2(np.arange(len(ranked_scores)) + 2)

return np.sum(gain / discounts)
Expand All @@ -103,7 +103,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
Parameters
----------
gt_pos: Numpy array
Binary vector of positive items.
Vector of positive items.

pd_rank: Numpy array
Item ranking prediction.
Expand All @@ -117,7 +117,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):

"""
dcg = self.dcg_score(gt_pos, pd_rank, self.k)
idcg = self.dcg_score(gt_pos, np.argsort(gt_pos)[::-1], self.k)
idcg = self.dcg_score(gt_pos, gt_pos, self.k)
ndcg = dcg / idcg

return ndcg
Expand All @@ -143,7 +143,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
Parameters
----------
gt_pos: Numpy array
Binary vector of positive items.
Vector of positive items.

pd_rank: Numpy array
Item ranking prediction.
Expand All @@ -161,17 +161,15 @@ def compute(self, gt_pos, pd_rank, **kwargs):
else:
truncated_pd_rank = pd_rank

gt_pos_items = np.nonzero(gt_pos > 0)

# Compute CRR
rec_rank = np.where(np.in1d(truncated_pd_rank, gt_pos_items))[0]
rec_rank = np.where(np.in1d(truncated_pd_rank, gt_pos))[0]
if len(rec_rank) == 0:
return 0.0
rec_rank = rec_rank + 1 # +1 because indices starts from 0 in python
crr = np.sum(1.0 / rec_rank)

# Compute Ideal CRR
max_nb_pos = min(len(gt_pos_items[0]), len(truncated_pd_rank))
max_nb_pos = min(len(gt_pos), len(truncated_pd_rank))
ideal_rank = np.arange(max_nb_pos)
ideal_rank = ideal_rank + 1 # +1 because indices starts from 0 in python
icrr = np.sum(1.0 / ideal_rank)
Expand Down Expand Up @@ -199,7 +197,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
Parameters
----------
gt_pos: Numpy array
Binary vector of positive items.
Vector of positive items.

pd_rank: Numpy array
Item ranking prediction.
Expand All @@ -212,8 +210,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
Mean Reciprocal Rank score.

"""
gt_pos_items = np.nonzero(gt_pos > 0)
matched_items = np.nonzero(np.in1d(pd_rank, gt_pos_items))[0]
matched_items = np.nonzero(np.in1d(pd_rank, gt_pos))[0]

if len(matched_items) == 0:
raise ValueError(
Expand Down Expand Up @@ -246,7 +243,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
Parameters
----------
gt_pos: Numpy array
Binary vector of positive items.
Vector of positive items.

pd_rank: Numpy array
Item ranking prediction.
Expand All @@ -270,12 +267,9 @@ def compute(self, gt_pos, pd_rank, **kwargs):
else:
truncated_pd_rank = pd_rank

pred = np.zeros_like(gt_pos)
pred[truncated_pd_rank] = 1

tp = np.sum(pred * gt_pos)
tp_fn = np.sum(gt_pos)
tp_fp = np.sum(pred)
tp = np.sum(np.in1d(truncated_pd_rank, gt_pos))
tp_fn = len(gt_pos)
tp_fp = self.k if self.k > 0 else len(truncated_pd_rank)

return tp, tp_fn, tp_fp

Expand All @@ -300,7 +294,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
Parameters
----------
gt_pos: Numpy array
Binary vector of positive items.
Vector of positive items.

pd_rank: Numpy array
Item ranking prediction.
Expand Down Expand Up @@ -337,7 +331,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
Parameters
----------
gt_pos: Numpy array
Binary vector of positive items.
Vector of positive items.

pd_rank: Numpy array
Item ranking prediction.
Expand Down Expand Up @@ -374,7 +368,7 @@ def compute(self, gt_pos, pd_rank, **kwargs):
Parameters
----------
gt_pos: Numpy array
Binary vector of positive items.
Vector of positive items.

pd_rank: Numpy array
Item ranking prediction.
Expand Down Expand Up @@ -411,19 +405,22 @@ class AUC(RankingMetric):
def __init__(self):
RankingMetric.__init__(self, name="AUC")

def compute(self, pd_scores, gt_pos, gt_neg=None, **kwargs):
def compute(self, item_indices, pd_scores, gt_pos, gt_neg=None, **kwargs):
"""Compute Area Under the ROC Curve (AUC).

Parameters
----------
item_indices: Numpy array
Items being considered for evaluation.

pd_scores: Numpy array
Prediction scores for items.
Prediction scores for items in item_indices.

gt_pos: Numpy array
Binary vector of positive items.
Vector of positive items.

gt_neg: Numpy array, optional
Binary vector of negative items.
Vector of negative items.
If None, negation of gt_pos will be used.

**kwargs: For compatibility
Expand All @@ -434,11 +431,16 @@ def compute(self, pd_scores, gt_pos, gt_neg=None, **kwargs):
AUC score.

"""
if gt_neg is None:
gt_neg = np.logical_not(gt_pos)

pos_scores = pd_scores[gt_pos.astype('bool')]
neg_scores = pd_scores[gt_neg.astype('bool')]
gt_pos_mask = np.in1d(item_indices, gt_pos)
gt_neg_mask = (
np.logical_not(gt_pos_mask)
if gt_neg is None
else np.in1d(item_indices, gt_neg)
)

pos_scores = pd_scores[gt_pos_mask]
neg_scores = pd_scores[gt_neg_mask]
ui_scores = np.repeat(pos_scores, len(neg_scores))
uj_scores = np.tile(neg_scores, len(pos_scores))

Expand All @@ -457,16 +459,19 @@ class MAP(RankingMetric):
def __init__(self):
RankingMetric.__init__(self, name="MAP")

def compute(self, pd_scores, gt_pos, **kwargs):
def compute(self, item_indices, pd_scores, gt_pos, **kwargs):
"""Compute Average Precision.

Parameters
----------
item_indices: Numpy array
Items being considered for evaluation.

pd_scores: Numpy array
Prediction scores for items.

gt_pos: Numpy array
Binary vector of positive items.
Vector of positive items.

**kwargs: For compatibility

Expand All @@ -476,7 +481,7 @@ def compute(self, pd_scores, gt_pos, **kwargs):
AP score.

"""
relevant = gt_pos.astype('bool')
relevant = np.in1d(item_indices, gt_pos)
rank = rankdata(-pd_scores, "max")[relevant]
L = rankdata(-pd_scores[relevant], "max")
ans = (L / rank).mean()
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ norecursedirs = tests/cornac/datasets
addopts=-v
--durations=20
--ignore=tests/cornac/utils/test_download.py
--ignore=tests/cornac/eval_methods/test_propensity_stratified_evaluation.py

# PEP-8 The following are ignored:
# E501 line too long (82 > 79 characters)
Expand Down
Loading