Skip to content

Commit

Permalink
Remove usage of old Dataset from IBPR model (#227)
Browse files Browse the repository at this point in the history
  • Loading branch information
tqtg authored Sep 10, 2019
1 parent c7765af commit 4c6061c
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 67 deletions.
77 changes: 29 additions & 48 deletions cornac/models/ibpr/ibpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,44 +13,15 @@
# limitations under the License.
# ============================================================================

import random

import numpy as np
import torch
from tqdm import tqdm

from ...utils.data_utils import Dataset

"""Firstly, we define a helper function to generate\sample training ordinal triplets:
Step 1:
given rated item i, randomly choose item j and check whether rating of j is lower than i,
if not randomly sample another item.
each row of the sampled data in the following form:
[userId itemId_i itemId_j]
for each user u, he/she prefers item i over item j.
"""


def sample_data(X, data):
sampled_data = np.zeros((data.shape[0], 5), dtype=np.int)
data = data.astype(int)

for k in range(0, data.shape[0]):
u = data[k, 0]
i = data[k, 1]
ratingi = data[k, 2]
j = random.randint(0, X.shape[1] - 1)

while X[u, j] > ratingi:
j = random.randint(0, X.shape[1] - 1)

sampled_data[k, :] = [u, i, j, ratingi, X[u, j]]

return sampled_data


def ibpr(X, data, k, lamda=0.001, n_epochs=150, learning_rate=0.05, batch_size=100, init_params=None):
# X = sp.csr_matrix(X)
Data = Dataset(data)
def ibpr(train_set, k, lamda=0.001, n_epochs=150, learning_rate=0.05, batch_size=100,
init_params=None, verbose=False):
X = train_set.csr_matrix

# Initial user factors
if init_params['U'] is None:
Expand All @@ -67,19 +38,21 @@ def ibpr(X, data, k, lamda=0.001, n_epochs=150, learning_rate=0.05, batch_size=1
V = torch.from_numpy(V)

optimizer = torch.optim.Adam([U, V], lr=learning_rate)
for epoch in range(n_epochs):
num_steps = int(Data.data.shape[0] / batch_size)
for i in range(1, num_steps + 1):
batch_c, _ = Data.next_batch(batch_size)
# print(batch_c, idx)
sampled_batch = sample_data(X, batch_c)

regU = U[sampled_batch[:, 0], :]
regI = V[sampled_batch[:, 1], :]
regJ = V[sampled_batch[:, 2], :]
for epoch in range(1, n_epochs + 1):
sum_loss = 0.
count = 0
progress_bar = tqdm(total=train_set.num_batches(batch_size),
desc='Epoch {}/{}'.format(epoch, n_epochs),
disable=not verbose)

for batch_u, batch_i, batch_j in train_set.uij_iter(batch_size, shuffle=True):
regU = U[batch_u, :]
regI = V[batch_i, :]
regJ = V[batch_j, :]

regU_unq = U[np.unique(sampled_batch[:, 0]), :]
regI_unq = V[np.unique(sampled_batch[:, 1:]), :]
regU_unq = U[np.unique(batch_u), :]
regI_unq = V[np.union1d(batch_i, batch_j), :]

regU_norm = regU / regU.norm(dim=1)[:, None]
regI_norm = regI / regI.norm(dim=1)[:, None]
Expand All @@ -88,14 +61,22 @@ def ibpr(X, data, k, lamda=0.001, n_epochs=150, learning_rate=0.05, batch_size=1
Scorei = torch.acos(torch.clamp(torch.sum(regU_norm * regI_norm, dim=1), -1 + 1e-7, 1 - 1e-7))
Scorej = torch.acos(torch.clamp(torch.sum(regU_norm * regJ_norm, dim=1), -1 + 1e-7, 1 - 1e-7))

loss = lamda * (regU_unq.norm().pow(2) + regI_unq.norm().pow(2)) - torch.log(
torch.sigmoid(Scorej - Scorei)).sum()
loss = lamda * (regU_unq.norm().pow(2) + regI_unq.norm().pow(2)) \
- torch.log(torch.sigmoid(Scorej - Scorei)).sum()
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('epoch:', epoch, 'loss:', loss)

# since the user's preference is defined by the angular distance, we can normalize the user/item vectors without changing the ranking
sum_loss += loss.data.item()
count += len(batch_u)
if count % (batch_size * 10) == 0:
progress_bar.set_postfix(loss=(sum_loss / count))
progress_bar.update(1)

progress_bar.close()

# since the user's preference is defined by the angular distance,
# we can normalize the user/item vectors without changing the ranking
U = torch.nn.functional.normalize(U, p=2, dim=1)
V = torch.nn.functional.normalize(V, p=2, dim=1)
U = U.data.cpu().numpy()
Expand Down
20 changes: 3 additions & 17 deletions cornac/models/ibpr/recom_ibpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class IBPR(Recommender):
In Proceedings of the 2017 ACM on Conference on Information and Knowledge Management (pp. 1389-1398). ACM.
"""

def __init__(self, k=20, max_iter=100, learning_rate=0.05, lamda=0.001, batch_size=100, name="ibpr", trainable=True,
def __init__(self, k=20, max_iter=100, learning_rate=0.05, lamda=0.001, batch_size=100, name="IBPR", trainable=True,
verbose=False, init_params=None):
Recommender.__init__(self, name=name, trainable=trainable, verbose=verbose)
self.k = k
Expand Down Expand Up @@ -98,25 +98,11 @@ def fit(self, train_set, val_set=None):

from .ibpr import ibpr

X = self.train_set.matrix

# change the data to original user Id item Id and rating format
cooX = X.tocoo()
data = np.ndarray(shape=(len(cooX.data), 3), dtype=float)
data[:, 0] = cooX.row
data[:, 1] = cooX.col
data[:, 2] = cooX.data

if self.verbose:
print('Learning...')
res = ibpr(X, data, k=self.k, n_epochs=self.max_iter, lamda=self.lamda, learning_rate=self.learning_rate,
batch_size=self.batch_size, init_params=self.init_params)
res = ibpr(self.train_set, k=self.k, n_epochs=self.max_iter, lamda=self.lamda, learning_rate=self.learning_rate,
batch_size=self.batch_size, init_params=self.init_params, verbose=self.verbose)
self.U = np.asarray(res['U'])
self.V = np.asarray(res['V'])

if self.verbose:
print('Learning completed')

return self

def score(self, user_idx, item_idx=None):
Expand Down
5 changes: 3 additions & 2 deletions examples/ibpr_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@
ml_1m = movielens.load_1m()

# Instantiate an evaluation method.
ratio_split = RatioSplit(data=ml_1m, test_size=0.2, rating_threshold=1.0, exclude_unknowns=True)
ratio_split = RatioSplit(data=ml_1m, test_size=0.2, rating_threshold=1.0,
exclude_unknowns=True, verbose=True)

# Instantiate a IBPR recommender model.
ibpr = IBPR(k=10, init_params={'U': None, 'V': None})
ibpr = IBPR(k=10, init_params={'U': None, 'V': None}, verbose=True)

# Instantiate evaluation metrics.
rec_20 = cornac.metrics.Recall(k=20)
Expand Down

0 comments on commit 4c6061c

Please sign in to comment.