Skip to content

Commit

Permalink
Fix and improve Vamana and IVF PQ parameters (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
jparismorgan authored Aug 14, 2024
1 parent 905bfdf commit b76d670
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 19 deletions.
6 changes: 6 additions & 0 deletions ann_benchmarks/algorithms/tiledb/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
FROM ann-benchmarks

# Option 1: Install pre-built library.
RUN pip install tiledb tiledb-vector-search

# Option 2: Build the library ourselves.
# RUN git clone https://github.com/TileDB-Inc/TileDB-Vector-Search.git
# RUN cd TileDB-Vector-Search && pip install .

RUN python3 -c 'import tiledb.vector_search'
16 changes: 10 additions & 6 deletions ann_benchmarks/algorithms/tiledb/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ float:
name: tiledb-vamana
run_groups:
VAMANA:
# l_build & r_max_degree:
args: [[20, 40, 60]]
# opt_l:
# r_max_degree:
args: [[10, 15, 20, 25, 30, 35, 40]]
# l_search:
query_args: [[1, 5, 10, 30, 50, 70, 90, 110, 130]]

- base_args: ['@metric']
constructor: TileDBIVFPQ
disabled: false
Expand All @@ -45,7 +45,11 @@ float:
name: tiledb-ivf-pq
run_groups:
IVFPQ:
# n_list:
args: [[512, 1024, 2048, 4096, 8192]]
args: [
# n_list:
[512, 1024, 2048, 4096, 8192],
# num_subspaces divisor:
[1, 2, 4, 8]
]
# n_probe:
query_args: [[1, 5, 10, 50, 100, 200]]
29 changes: 16 additions & 13 deletions ann_benchmarks/algorithms/tiledb/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@
MAX_UINT64 = np.iinfo(np.dtype("uint64")).max

class TileDB(BaseANN):
def __init__(self, metric, index_type, n_list = -1, l_build = -1, r_max_degree = -1):
def __init__(self, metric, index_type, n_list = -1, l_build = -1, r_max_degree = -1, num_subspaces_divisor = -1):
self._index_type = index_type
self._metric = metric
self._n_list = n_list
self._l_build = l_build
self._r_max_degree = r_max_degree
self._num_subspaces_divisor = num_subspaces_divisor
self._n_probe = -1
self._opt_l = -1
self._l_search = -1

def query(self, v, n):
if self._metric == 'angular':
Expand All @@ -35,7 +36,7 @@ def query(self, v, n):
k=n,
nthreads=multiprocessing.cpu_count(),
nprobe=min(self._n_probe, self._n_list),
opt_l=self._opt_l
l_search=self._l_search
)[1][0]
# Fix for 'OverflowError: Python int too large to convert to C long'.
ids[ids == MAX_UINT64] = 0
Expand All @@ -50,6 +51,7 @@ def batch_query(self, X, n):
k=n,
nthreads=multiprocessing.cpu_count(),
nprobe=min(self._n_probe, self._n_list),
l_search=self._l_search
)[1]
# Fix for 'OverflowError: Python int too large to convert to C long'.
self.res[self.res == MAX_UINT64] = 0
Expand All @@ -70,7 +72,7 @@ def fit(self, X):
partitions=self._n_list,
l_build=self._l_build,
r_max_degree=self._r_max_degree,
num_subspaces=dimensions/2
num_subspaces=dimensions/self._num_subspaces_divisor
)
if self._index_type == "IVF_FLAT":
self.index = IVFFlatIndex(uri=array_uri)
Expand Down Expand Up @@ -111,30 +113,31 @@ def __str__(self):
return 'TileDBFlat()'

class TileDBVamana(TileDB):
def __init__(self, metric, l_build_and_r_max_degree):
def __init__(self, metric, r_max_degree):
super().__init__(
index_type="VAMANA",
metric=metric,
l_build=l_build_and_r_max_degree,
r_max_degree=l_build_and_r_max_degree
l_build=60,
r_max_degree=r_max_degree
)

def set_query_arguments(self, opt_l):
self._opt_l = opt_l
def set_query_arguments(self, l_search):
self._l_search = l_search

def __str__(self):
return 'TileDBVamana(l_build=%d, r_max_degree=%d, opt_l=%d)' % (self._l_build, self._r_max_degree, self._opt_l)
return 'TileDBVamana(l_build=%d, r_max_degree=%d, l_search=%d)' % (self._l_build, self._r_max_degree, self._l_search)

class TileDBIVFPQ(TileDB):
def __init__(self, metric, n_list):
def __init__(self, metric, n_list, num_subspaces_divisor):
super().__init__(
index_type="IVF_PQ",
metric=metric,
n_list=n_list
n_list=n_list,
num_subspaces_divisor=num_subspaces_divisor
)

def set_query_arguments(self, n_probe):
self._n_probe = n_probe

def __str__(self):
return 'TileDBIVFPQ(n_list=%d, n_probe=%d)' % (self._n_list, self._n_probe)
return 'TileDBIVFPQ(n_list=%d, n_probe=%d, num_subspaces_divisor=%d)' % (self._n_list, self._n_probe, self._num_subspaces_divisor)

0 comments on commit b76d670

Please sign in to comment.