-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate_item.m
162 lines (151 loc) · 4.82 KB
/
evaluate_item.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
function evalout = evaluate_item(train, test, P, Q, topk, cutoff)
% user_count column vector storing how many entries in the test
if nnz(test)>0
[mat_rank, user_count, cand_count] = Predict(test, train, P, Q, topk);
evalout = compute_item_metric(mat_rank, user_count, cand_count, topk, cutoff);
else
if topk<=0
error('Please give positive topk value')
end
[~, ~, ~, evalout] = Predict(test, train, P, Q, topk);
end
end
function [ mat, user_count, cand_count, topkmat ] = Predict(test, train, U, V, topk )
%Predict: Based user and item latent vector, predict items' rank for each
%user
% R: test rating matrix, where each entry represents wheather the user
% has action on corresponding item; E: training rating matrix, sharing
% similar meaning to R.
% users(i) has one item with ranks(i)
if size(test, 2)==3 && sum(test(:,3)==0) > sum(test(:,3)>0)
[ mat, user_count, cand_count ] = Predict_Tuple(test, train, U, V);
else
Et = train.';
Rt = (test>0).';
Ut = U.';
user_count = sum(Rt~=0 & xor(Et~=0, Rt~=0));
if nnz(test)>0
Ind = (user_count > 0.0001);
Rt = Rt(:, Ind);
Et = Et(:, Ind);
Ut = Ut(:, Ind);
user_count = user_count(Ind);
end
[N, M] = size(Rt);
cand_count = N - sum(Et > 0);
step = 1000;
num_step = floor((M + step-1)/step);
user_cell = cell(num_step, 1);
rank_cell = cell(num_step, 1);
item_cell = cell(num_step, 1);
if nargout == 4
topk_cell = cell(num_step, 1);
end
[Utc, Vc] = split_latent_matrix(Ut,V);
for i=1:num_step
start_u = (i-1)*step +1;
end_u = min(i * step, M);
%subU = Ut(:, start_u:end_u); %%
subUc = get_subUc(Utc, start_u, end_u);
subR = Rt(:, start_u:end_u);
subE = Et(:, start_u:end_u);
%subR_E1 = full(V * subU); %%
subR_E = multiply_cell(subUc, Vc);
subR_E(subE > 0) = -inf;
if topk>0
[~, Index] = maxk(subR_E, topk);
[val, I, J] = find(Index);
rank = sparse(J, I, val, size(subR_E,1), size(subR_E,2));
if nargout == 4
topk_cell{i} = [I + (i-1)*step, J, val];
end
else
[~, Index] = sort(subR_E, 'descend');
[~, rank] = sort(Index);
end
sub_rank = subR .* rank;
[J, I, val] = find(sub_rank);
if ~isempty(I)
user_cell{i} = I + (i-1)*step;
rank_cell{i} = val;
item_cell{i} = J;
end
end
mat = sparse(cell2mat(user_cell), cell2mat(item_cell), cell2mat(rank_cell), M, N);
if nargout == 4
topkmat = cell2mat(topk_cell);
end
cand_count = cand_count.';
user_count = user_count.';
end
end
function [Utc, Vc] = split_latent_matrix(Ut,V)
M = size(Ut,2);
N = size(V,1);
urows = sum(Ut~=0, 2);
dense_u_cols = urows ./M > 0.5;
vrows = sum(V~=0);
dense_v_cols = vrows ./N > 0.5;
cols = dense_u_cols | dense_v_cols.';
Utc = {full(Ut(cols,:)),Ut(~cols,:)};
Vc = {full(V(:,cols)),V(:,~cols)};
end
function subUtc = get_subUc(Utc, starti, endi)
subUtc = cell(length(Utc),1);
for i=1:length(Utc)
subUtc{i} = Utc{i}(:, starti:endi);
end
end
function mat = multiply_cell(Utc, Vc)
for i=1:length(Utc)
if i ==1
mat = Vc{i} * Utc{i};
else
mat = mat + Vc{i} * Utc{i};
end
end
end
function [ mat, user_count, cand_count ] = Predict_Tuple(R, E, U, V )
%Predict: Based user and item latent vector, predict items' rank for each
%user, but candicate items are given in R
% R: test data with three columns, user column, item column and relevance
% column; E: training rating matrix
% users(i) has one item with ranks(i)
Vt = V.';
Et = E.';
I = R(:,1);
J = R(:,2);
Val = R(:,3);
cand_count = tabulate(I); cand_count = cand_count(:,2);
user_count = tabulate(I(Val>0)); user_count = user_count(:,2);
user_ind = cand_count>0;
cand_count = cand_count(user_ind);
user_count = user_count(user_ind);
U = U(user_ind,:);
Et = Et(:, user_ind);
cum_cand_count = cumsum(cand_count);
cum_cand_count = [0;cum_cand_count];
user_cell = cell(length(cand_count), 1);
rank_cell = cell(length(cand_count), 1);
item_cell = cell(length(cand_count), 1);
M = length(cand_count);
N = max(J);
for u=1:length(cand_count)
eu = Et(:,u);
u_start = cum_cand_count(u)+1;
u_end = cum_cand_count(u+1);
assert(std(I(u_start:u_end))==0)
cand_items = J(u_start:u_end);
pred = U(u,:) * Vt(:,cand_items);
cols = [pred.', Val(u_start:u_end),cand_items];
e_ind = eu(cand_items)==0;
cand_count(u) = nnz(e_ind);
cols = cols(e_ind, :);
cols_sorted = sortrows(cols, -1);
rel_rank = find(cols_sorted(:,2));
user_cell{u} = u * ones(length(rel_rank),1);
rank_cell{u} = rel_rank;
item_cell{u} = cols_sorted(rel_rank,3);
end
mat = sparse(cell2mat(user_cell), cell2mat(item_cell), cell2mat(rank_cell), M, N);
end