Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix .rank() method for multiple models #615

Merged
merged 5 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 31 additions & 19 deletions cornac/models/comparer/recom_comparer_obj.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -663,39 +663,51 @@ class ComparERObj(Recommender):
item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :])
return item_score

def rank(self, user_id, item_ids=None):
def rank(self, user_idx, item_indices=None, k=-1):
"""Rank all test items for a given user.

Parameters
----------
user_id: int, required
user_idx: int, required
The index of the user for whom to perform item raking.

item_ids: 1d array, optional, default: None
item_indices: 1d array, optional, default: None
A list of candidate item indices to be ranked by the user.
If `None`, list of ranked known item indices and their scores will be returned

k: int, required
Cut-off length for recommendations, k=-1 will return ranked list of all items.
This is more important for ANN to know the limit to avoid exhaustive ranking.

Returns
-------
Tuple of `item_rank`, and `item_scores`. The order of values
in item_scores are corresponding to the order of their ids in item_ids
(ranked_items, item_scores): tuple
`ranked_items` contains item indices being ranked by their scores.
`item_scores` contains scores of items corresponding to index in `item_indices` input.

"""
X_ = self.U1[user_id, :].dot(self.V.T)
X_ = self.U1[user_idx, :].dot(self.V.T)
most_cared_aspects_indices = (-X_).argsort()[:self.num_most_cared_aspects]
most_cared_X_ = X_[most_cared_aspects_indices]
most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T)
explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale)
item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_id)

if item_ids is None:
item_scores = item_scores
item_rank = item_scores.argsort()[::-1]
else:
num_items = max(self.num_items, max(item_ids) + 1)
item_scores = np.ones(num_items) * np.min(item_scores)
item_scores[:self.num_items] = item_scores
item_rank = item_scores.argsort()[::-1]
item_rank = intersects(item_rank, item_ids, assume_unique=True)
item_scores = item_scores[item_ids]
return item_rank, item_scores
all_item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx)

# rank items based on their scores
item_indices = (
np.arange(self.num_items)
if item_indices is None
else np.asarray(item_indices)
)
item_scores = all_item_scores[item_indices]

if k != -1: # O(n + k log k), faster for small k which is usually the case
partitioned_idx = np.argpartition(item_scores, -k)
top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]

return ranked_items, item_scores
29 changes: 19 additions & 10 deletions cornac/models/comparer/recom_comparer_sub.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ class ComparERSub(MTER):

return correct, skipped, loss, bpr_loss

def rank(self, user_idx, item_indices=None):
def rank(self, user_idx, item_indices=None, k=-1):
if self.alpha > 0 and self.n_top_aspects > 0:
n_top_aspects = min(self.n_top_aspects, self.num_aspects)
ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx])
Expand All @@ -786,12 +786,21 @@ class ComparERSub(MTER):
all_item_scores[: self.num_items] = known_item_scores

# rank items based on their scores
if item_indices is None:
item_scores = all_item_scores[: self.num_items]
item_rank = item_scores.argsort()[::-1]
else:
item_scores = all_item_scores[item_indices]
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]

return item_rank, item_scores
return super().rank(user_idx, item_indices)
item_indices = (
np.arange(self.num_items)
if item_indices is None
else np.asarray(item_indices)
)
item_scores = all_item_scores[item_indices]

if k != -1: # O(n + k log k), faster for small k which is usually the case
partitioned_idx = np.argpartition(item_scores, -k)
top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]

return ranked_items, item_scores
return super().rank(user_idx, item_indices, k)
36 changes: 25 additions & 11 deletions cornac/models/efm/recom_efm.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ class EFM(Recommender):
item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :])
return item_score

def rank(self, user_idx, item_indices=None):
def rank(self, user_idx, item_indices=None, k=-1):
"""Rank all test items for a given user.

Parameters
Expand All @@ -480,10 +480,15 @@ class EFM(Recommender):
A list of candidate item indices to be ranked by the user.
If `None`, list of ranked known item indices and their scores will be returned

k: int, required
Cut-off length for recommendations, k=-1 will return ranked list of all items.
This is more important for ANN to know the limit to avoid exhaustive ranking.

Returns
-------
Tuple of `item_rank`, and `item_scores`. The order of values
in item_scores are corresponding to the order of their ids in item_ids
(ranked_items, item_scores): tuple
`ranked_items` contains item indices being ranked by their scores.
`item_scores` contains scores of items corresponding to index in `item_indices` input.

"""
X_ = self.U1[user_idx, :].dot(self.V.T)
Expand All @@ -504,11 +509,20 @@ class EFM(Recommender):
all_item_scores[: self.num_items] = known_item_scores

# rank items based on their scores
if item_indices is None:
item_scores = all_item_scores[: self.num_items]
item_rank = item_scores.argsort()[::-1]
else:
item_scores = all_item_scores[item_indices]
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]

return item_rank, item_scores
item_indices = (
np.arange(self.num_items)
if item_indices is None
else np.asarray(item_indices)
)
item_scores = all_item_scores[item_indices]

if k != -1: # O(n + k log k), faster for small k which is usually the case
partitioned_idx = np.argpartition(item_scores, -k)
top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]

return ranked_items, item_scores
29 changes: 19 additions & 10 deletions cornac/models/lrppm/recom_lrppm.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ class LRPPM(Recommender):
item_score = self.I[i_idx].dot(self.U[u_idx])
return item_score

def rank(self, user_idx, item_indices=None):
def rank(self, user_idx, item_indices=None, k=-1):
if self.alpha > 0 and self.num_top_aspects > 0:
n_items = self.num_items
num_top_aspects = min(self.num_top_aspects, self.num_aspects)
Expand All @@ -540,12 +540,21 @@ class LRPPM(Recommender):
all_item_scores[: self.num_items] = known_item_scores

# rank items based on their scores
if item_indices is None:
item_scores = all_item_scores[: self.num_items]
item_rank = item_scores.argsort()[::-1]
else:
item_scores = all_item_scores[item_indices]
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]

return item_rank, item_scores
return super().rank(user_idx, item_indices)
item_indices = (
np.arange(self.num_items)
if item_indices is None
else np.asarray(item_indices)
)
item_scores = all_item_scores[item_indices]

if k != -1: # O(n + k log k), faster for small k which is usually the case
partitioned_idx = np.argpartition(item_scores, -k)
top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]

return ranked_items, item_scores
return super().rank(user_idx, item_indices, k)
Loading