forked from bmcfee/hypergraph_playlist
-
Notifications
You must be signed in to change notification settings - Fork 0
/
buildCFmatrix.py
executable file
·83 lines (66 loc) · 1.98 KB
/
buildCFmatrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python
'''
CREATED:2012-03-20 20:26:12 by Brian McFee <bmcfee@cs.ucsd.edu>
Load an MML latent-factor model and save as a pickle
Usage:
./buildCFmatrix.py cfdata.pickle item_mapping.txt cfmodel.mml playlist.pickle
'''
import sys
import numpy
import cPickle as pickle
def loadPlaylistSongs(infile):
with open(infile, 'r') as f:
X = pickle.load(f)
pass
return X['songs']
def loadItemMap(infile, songs):
itemmap = {}
with open(infile, 'r') as f:
for line in f:
(internal, external) = line.strip().split('\t', 2)
if external in songs:
itemmap[int(internal)] = external
pass
pass
pass
return itemmap
def loadCFdata(itemmap, infile):
X = {}
with open(infile, 'r') as f:
# skip the first two lines
f.readline()
f.readline()
(nUsers, nDim) = map(int, f.readline().strip().split(' ', 2))
# chew up the next zillion lines
for i in xrange(nUsers * nDim + 1):
f.readline()
pass
nItems = int(f.readline().strip())
for i in xrange(nItems + 1):
f.readline()
pass
# now to load the vector data
for i in xrange(nItems):
data = numpy.zeros(nDim)
for j in xrange(nDim):
data[j] = float(f.readline().strip().split(' ', 3)[-1])
pass
if i in itemmap:
X[itemmap[i]] = data
pass
pass
pass
return X
pass
def saveData(outfile, cfdata):
with open(outfile, 'w') as f:
pickle.dump({'X': cfdata}, f)
pass
pass
# ./buildCFmatrix.py cfdata.pickle item_mapping.txt cfmodel.mml playlist.pickle
if __name__ == '__main__':
songset = loadPlaylistSongs(sys.argv[4])
itemMapping = loadItemMap(sys.argv[2], songset)
cfdata = loadCFdata(itemMapping, sys.argv[3])
saveData(sys.argv[1], cfdata)
pass