-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
94 lines (69 loc) · 2.54 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
from struct import pack
def write_fvecs(filename, vecs):
with open(filename, "wb") as f:
for vec in vecs:
dim = len(vec)
f.write(pack('<i', dim))
f.write(pack('f' * dim, *list(vec)))
def write_ivecs(filename, vecs):
with open(filename, "wb") as f:
for vec in vecs:
dim = len(vec)
f.write(pack('<i', dim))
f.write(pack('i' * dim, *list(vec)))
def write_edges_dict(filename, edges):
with open(filename, "wb") as f:
for from_vertex_id, to_vertex_ids in edges.items():
dim = len(to_vertex_ids)
f.write(pack('<i', dim))
f.write(pack('i' * dim, *list(to_vertex_ids)))
def write_edges_list(filename, edges):
with open(filename, "wb") as f:
for to_vertex_ids in edges:
dim = len(to_vertex_ids)
f.write(pack('<i', dim))
f.write(pack('i' * dim, *list(to_vertex_ids)))
def ivecs_read(fname):
a = np.fromfile(fname, dtype='int32')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()
def mmap_fvecs(fname):
x = np.memmap(fname, dtype='int32', mode='r')
d = x[0]
return x.view('float32').reshape(-1, d + 1)[:, 1:]
def mmap_bvecs(fname):
x = np.memmap(fname, dtype='uint8', mode='r')
d = x[:4].view('int32')[0]
return x.reshape(-1, d + 4)[:, 4:]
def getBasedir(s, mnt=False):
if mnt:
start = "/mnt/data/shekhale/"
else:
start = "/home/shekhale/"
paths = {
"sift": start + "data/sift/sift",
"gist": start + "data/gist/gist",
"glove": start + "data/glove/glove",
"deep": start + "data/deep/deep",
"uniform_low": start + "data/synthetic/"
}
return paths[s]
def load_simple(device, database, calc_gt=False, mnt=False):
basedir = getBasedir(database, mnt)
xb = mmap_fvecs(basedir + '_base.fvecs')
xq = mmap_fvecs(basedir + '_query.fvecs')
gt = ivecs_read(basedir + '_groundtruth.ivecs')
xb, xq = np.ascontiguousarray(xb), np.ascontiguousarray(xq)
return xb, xb, xq, gt
def load_dataset(name, device, calc_gt=False, mnt=True):
if name == "sift":
return load_simple(device, "sift", calc_gt, mnt)
elif name == "gist":
return load_simple(device, "gist", calc_gt, mnt)
elif name == "deep":
return load_simple(device, "deep", calc_gt, mnt)
elif name == "glove":
return load_simple(device, "glove", calc_gt, mnt)
elif name == "boltalka_dssm":
return load_simple(device, "boltalka_dssm", calc_gt, mnt)