-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsymNMF.m
194 lines (179 loc) · 6.12 KB
/
symNMF.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
% Exact coordinate method for symmetric NMF (symNMF): given an n-by-n
% symmetric nonnegative matrix A and a factorization rank r, it computes a
% n-by-r nonnegative matrix H, solving the problem
%
% min_{H >= 0} 1/2 * ||A - HH^T||_F^2
%
% using an exact coordinate descent method.
%
% See A. Vandaele, N. Gillis, Q. Lei, K. Zhong, I. Dhillon, Coordinate
% Descent Methods for Symmetric Nonnegative Matrix Factorization, arXiv,
% 2015, for all the details.
%
% If you use the code, please cite the paper.
% The code is avaialble from https://sites.google.com/site/nicolasgillis/
%
% !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
% To use symNMF, you need to compile the file symNMFmex.cpp using the
% command:
% "mex -largeArrayDims symNMFmex.cpp"
%
% For this, you need to install a compiler.
% See, e.g., http://nl.mathworks.com/support/compilers/R2012a/win64.html
% !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
%
% [H,e,t] = symNMF(A,Hr,options)
%
% Input.
% A : (n x n) symmetric matrix to factorize
% Hr : inner rank of factorization OR initial matrix of size (n x r)
% options : optional field with parameters (see loadoptions.m file)
% Output.
% H : nonnegative matrices s.t. HH^T approximates A
% (e,t) : error 1/2 * ||A - HH^T||_F^2, and CPU time
function [H,e,t]=symNMF(A,Hr,options)
e=[]; t=[];
% A must be a symmetric matrix
n = size(A,1);
% Loading options
if nargin<3
options=update_options();
else
options=update_options(options);
end
run_possible=true;
% Input argument #2 can be 'r' or the initial matrix 'H'
if size(Hr,1)==1 && size(Hr,2)==1
r = Hr;
switch options.initmatrix
case 'zeros'
H = zeros(n,r);
case 'dense01'
if options.seed ~= -1
rng(options.seed);
end
H = rand(size(A,1),r);
otherwise
display(sprintf('Error - options.initmatrix is not correct (zeros or dense01).'));
run_possible = false;
end
else
H = Hr;
r = size(H,2);
if size(H,1)~=n
display('Error - The size of A and the size of H must me the same');
run_possible=false;
end
end
%%%%%%%%%%%% PARAMETERS %%%%%%%%%%%%%%
% Maximum number of iterations
if ~isnumeric(options.maxiter) || options.maxiter<=0 || floor(options.maxiter)-options.maxiter>0
run_possible = false;
else
if options.maxiter>1e6
display(sprintf('\nWarning: maxiter is > 1e6, we change it to 1e6.'));
maxiter = 1e6;
else
maxiter = options.maxiter;
end
end
% Maximum time of execution
if ~isnumeric(options.timelimit) || options.timelimit<=0
run_possible = false;
else
timelimit = options.timelimit;
end
% Shuffling the columns
switch options.shuffle_columns
case 0
sc = 0;
case 1
sc = 1;
otherwise
display(sprintf('Error - options.sort_columns is not correct (0 or 1).'));
run_possible = false;
end
if(run_possible)
%%%%%%%%%%%% Scaling %%%%%%%%%%%%%%
if sum(sum(H))>0
nHtH = norm(H'*H,'fro')^2;
HtAHt = sum(sum((H'*A).*(H')));
scaling = HtAHt/nHtH;
H = sqrt(scaling)*H;
end
%%%%%%%%%%%% Initial objective function %%%%%%%%%%%%%%
nA = norm(A,'fro')^2;
nHtH = norm(H'*H,'fro')^2;
HtAHt = sum(sum((H'*A).*(H')));
e0 = 0.5*(nA-2*HtAHt+nHtH);
if strcmp(options.display,'on')==1
display(logdisplay_parameters(n,r,e0,options));
end
% Algorithm
[H,et,t] = symNMFmex(A,H,maxiter,timelimit,sc);
% Objective function
% The length of 'et' and 't' (coming from 'symNMFmex')
% is 'maxiter'. If the timelimit was the first stopping critertion,
% the unused components are filled with '-1'.
% Each component et(i) is the decrease of the objective function
% from et(i-1).
for i=1:length(et)+1
if i==1
e(i) = e0;
else
if(et(i-1)~=-1)
e(i) = e(i-1)-0.5*et(i-1);
else
break;
end
end
end
t=[0;t(1:length(e)-1)];
e=e';
%%%%%%%%%%%% Final objective function %%%%%%%%%%%%%%
if strcmp(options.display,'on')==1
nHtH = norm(H'*H,'fro')^2;
HtAHt = sum(sum((H'*A).*(H')));
ef = 0.5*(nA-2*HtAHt+nHtH);
display(sprintf('Final objective function=%1.5g',ef));
end
end
end
function options=update_options(optnew)
if nargin==0
loadoptions;
else
loadoptions;
if isfield(optnew,'maxiter')
options.maxiter=optnew.maxiter;
end
if isfield(optnew,'timelimit')
options.timelimit=optnew.timelimit;
end
if isfield(optnew,'display')
options.display=optnew.display;
end
if isfield(optnew,'shuffle_columns')
options.shuffle_columns=optnew.shuffle_columns;
end
if isfield(optnew,'initmatrix')
options.initmatrix=optnew.initmatrix;
end
if isfield(optnew,'seed')
options.seed=optnew.seed;
end
end
end
function s=logdisplay_parameters(n,r,e0,options)
s = sprintf('Factorizing a %dx%d matrix using r=%d (maxiter=%d, timelimit=%f)',n,n,r,options.maxiter,options.timelimit);
switch options.initmatrix
case 'zeros'
s = sprintf('%s\nInitial matrix: zeros(%d,%d)',s,n,r);
case 'dense01'
s = sprintf('%s\nInitial matrix: rand(%d,%d) with seed=%f',s,n,r,options.seed);
end
if options.shuffle_columns
s=sprintf('%s\nThe columns are shuffled',s);
end
s=sprintf('%s\nInitial objective function=%1.5g',s,e0);
end