Skip to content

Commit

Permalink
add dataLevel0BlocksMemory test case
Browse files Browse the repository at this point in the history
  • Loading branch information
Axlgrep committed Jul 27, 2024
1 parent 51450b0 commit 58b7344
Show file tree
Hide file tree
Showing 7 changed files with 259 additions and 16 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ if(HNSWLIB_EXAMPLES)
add_executable(multiThread_replace_test tests/cpp/multiThread_replace_test.cpp)
target_link_libraries(multiThread_replace_test hnswlib)

add_executable(dataLevel0BlocksMemory_test tests/cpp/dataLevel0BlocksMemory_test.cpp)
target_link_libraries(dataLevel0BlocksMemory_test hnswlib)

add_executable(main tests/cpp/main.cpp tests/cpp/sift_1b.cpp)
target_link_libraries(main hnswlib)
endif()
175 changes: 175 additions & 0 deletions tests/cpp/dataLevel0BlocksMemory_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#include "../../hnswlib/hnswlib.h"

#include <assert.h>

#include <vector>
#include <iostream>
#include <cstdio>
#include <thread>
#include <chrono>

namespace {

const size_t M = 32;
const size_t ef_construction = 500;
const size_t random_seed = 100;
const bool allow_replace_deleted = false;

const size_t dimension = 1024;
const size_t total_items = 100 * 10000;
const size_t num_query = 500 * 10000;
size_t topk = 10;
const size_t max_thread_num = 48;
const std::string index_path = "./hnsw.index";

std::vector<float> data(total_items * dimension);
std::vector<float> query(num_query * dimension);


void check_knn_closer(hnswlib::AlgorithmInterface<float>* alg_hnsw) {
for (size_t j = 0; j < num_query; ++j) {
const void* p = query.data() + j * dimension;
auto gd = alg_hnsw->searchKnn(p, topk);
auto res = alg_hnsw->searchKnnCloserFirst(p, topk);
assert(gd.size() == res.size());
size_t t = gd.size();
while (!gd.empty()) {
assert(gd.top() == res[--t]);
gd.pop();
}
}
std::cout << "test hnsw search knn closer first success..." << std::endl;
}

void test_compatibility(bool hnsw_first_use_blocks_memory,
bool hnsw_second_use_blocks_memory) {

std::cout << "================== test compatibility ==================" << std::endl;
hnswlib::L2Space space(dimension);
hnswlib::AlgorithmInterface<float>* alg_hnsw_first = new hnswlib::HierarchicalNSW<float>(&space, 2 * total_items,
M, ef_construction, random_seed, allow_replace_deleted, hnsw_first_use_blocks_memory);

for (size_t i = 0; i < total_items; ++i) {
alg_hnsw_first->addPoint(data.data() + dimension * i, i);
}
check_knn_closer(alg_hnsw_first);

// save hnsw index
std::remove(index_path.data());
alg_hnsw_first->saveIndex(index_path);
std::cout << "save hnsw(use_small_blocks_memory = " << hnsw_first_use_blocks_memory << ") index success" << std::endl;
delete alg_hnsw_first;

// load hnsw index
hnswlib::AlgorithmInterface<float>* alg_hnsw_second = new hnswlib::HierarchicalNSW<float>(&space, false,
0, allow_replace_deleted, hnsw_second_use_blocks_memory);
std::cout << "load hnsw(use_small_blocks_memory = " << hnsw_second_use_blocks_memory << ") index success" << std::endl;
std::remove(index_path.data());
check_knn_closer(alg_hnsw_second);

delete alg_hnsw_second;
}

void test_performace(bool use_small_blocks_memory) {
if (total_items == 0) {
return;
}

std::cout << "================== test preformace("
<< ", dimension: " << dimension
<< ", M: " << M
<< ", ef_construction: " << ef_construction
<< ", topk: " << topk
<< ", use_small_blocks_memory: " << (use_small_blocks_memory ? "ture" : "false" )
<< ") ==================" << std::endl;
hnswlib::L2Space space(dimension);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * total_items,
M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory);

std::vector<std::thread> threads;
size_t num_threads = (total_items >= max_thread_num ? max_thread_num : total_items);
size_t batch_num = (total_items / (num_threads <= 1 ? 1 : (num_threads - 1))) + 1;
auto start_time = std::chrono::system_clock::now();
for (size_t idx = 0; idx < total_items; idx += batch_num) {
size_t start = idx;
size_t end = std::min(idx + batch_num, total_items);
threads.push_back(
std::thread(
[alg_hnsw, start, end] {
for (size_t i = start; i < end; i++) {
alg_hnsw->addPoint(data.data() + i * dimension, i);
}
}
)
);
}
for (auto &thread : threads) {
thread.join();
}
threads.clear();
auto end_time = std::chrono::system_clock::now();
double duration_in_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
double duration_in_seconds = static_cast<double>((std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time)).count()) / 1000.0;
size_t qps = (duration_in_seconds == 0 ? total_items : total_items / duration_in_seconds);
double latency = (total_items == 0 ? 0 : duration_in_ms / total_items);
std::cout << "Start " << num_threads << " thread to add " << total_items << " items to hnsw index, cost "
<< duration_in_seconds << " seconds, qps: " << qps << ", latency: " << latency << "ms" << std::endl;


num_threads = (num_query >= max_thread_num ? max_thread_num : num_query);
batch_num = (num_query / (num_threads <= 1 ? 1 : (num_threads - 1))) + 1;
start_time = std::chrono::system_clock::now();
for (size_t idx = 0; idx < num_query; idx += batch_num) {
size_t start = idx;
size_t end = std::min(idx + batch_num, num_query);
threads.push_back(
std::thread(
[alg_hnsw, start, end] {
for (size_t i = start; i < end; i++) {
const void* p = query.data() + i * dimension;
auto gd = alg_hnsw->searchKnn(p, topk);
}
}
)
);
}
for (auto &thread : threads) {
thread.join();
}
threads.clear();
end_time = std::chrono::system_clock::now();
duration_in_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
duration_in_seconds = static_cast<double>((std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time)).count()) / 1000.0;
qps = (duration_in_seconds == 0 ? num_query : num_query / duration_in_seconds);
latency = (num_query == 0 ? 0 : duration_in_ms / num_query);
std::cout << "Start " << num_threads << " thread to exec " << num_query << " searchKnn, cost "
<< duration_in_seconds << " seconds, qps: " << qps << ", latency: " << latency << "ms" << std::endl;

delete alg_hnsw;
}

} // namespace

int main() {

std::mt19937 rng;
rng.seed(47);
std::uniform_real_distribution<> distrib;

for (size_t i = 0; i < total_items * dimension; ++i) {
data[i] = distrib(rng);
}
for (size_t i = 0; i < num_query * dimension; ++i) {
query[i] = distrib(rng);
}

test_compatibility(true, true);
test_compatibility(false, false);
test_compatibility(true, false);
test_compatibility(false, true);

test_performace(true);
test_performace(false);

return 0;
}
18 changes: 15 additions & 3 deletions tests/cpp/epsilon_search_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
typedef unsigned int docidtype;
typedef float dist_t;

int main() {
void test(bool use_small_blocks_memory) {
int dim = 16; // Dimension of the elements
int max_elements = 10000; // Maximum number of elements, should be known beforehand
int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff
size_t random_seed = 100;
bool allow_replace_deleted = false;

int num_queries = 100;
float epsilon2 = 1.0; // Squared distance to query
Expand All @@ -20,7 +22,8 @@ int main() {
// Initing index
hnswlib::L2Space space(dim);
hnswlib::BruteforceSearch<dist_t>* alg_brute = new hnswlib::BruteforceSearch<dist_t>(&space, max_elements);
hnswlib::HierarchicalNSW<dist_t>* alg_hnsw = new hnswlib::HierarchicalNSW<dist_t>(&space, max_elements, M, ef_construction);
hnswlib::HierarchicalNSW<dist_t>* alg_hnsw = new hnswlib::HierarchicalNSW<dist_t>(&space, max_elements,
M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory);

// Generate random data
std::mt19937 rng;
Expand Down Expand Up @@ -110,5 +113,14 @@ int main() {
delete[] data;
delete alg_brute;
delete alg_hnsw;
return 0;
}

int main() {
std::cout << "Testing with use default memory allocator..." << std::endl;
test(false);
std::cout << "Test ok" << std::endl;

std::cout << "Testing with use block memory allocator..." << std::endl;
test(true);
std::cout << "Test ok" << std::endl;
}
21 changes: 18 additions & 3 deletions tests/cpp/multiThreadLoad_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@
#include <chrono>


int main() {
void test(bool use_small_blocks_memory) {
std::cout << "Running multithread load test" << std::endl;
size_t M = 16;
size_t ef_construction = 200;
size_t random_seed = 100;
bool allow_replace_deleted = false;

int d = 16;
int max_elements = 1000;

Expand All @@ -13,7 +18,8 @@ int main() {
std::uniform_real_distribution<> distrib_real;

hnswlib::L2Space space(d);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * max_elements);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * max_elements,
M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory);

std::cout << "Building index" << std::endl;
int num_threads = 40;
Expand Down Expand Up @@ -136,5 +142,14 @@ int main() {
}

std::cout << "Finish" << std::endl;
return 0;
}

int main() {
std::cout << "Testing with use default memory allocator..." << std::endl;
test(false);
std::cout << "Test ok" << std::endl;

std::cout << "Testing with use block memory allocator..." << std::endl;
test(true);
std::cout << "Test ok" << std::endl;
}
22 changes: 19 additions & 3 deletions tests/cpp/multiThread_replace_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,19 @@ inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn
}


int main() {
void test(bool use_small_blocks_memory) {
std::cout << "Running multithread load test" << std::endl;
int d = 16;
int num_elements = 1000;
int max_elements = 2 * num_elements;
int num_threads = 50;

int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff
size_t random_seed = 100;
bool allow_replace_deleted = true;

std::mt19937 rng;
rng.seed(47);
std::uniform_real_distribution<> distrib_real;
Expand All @@ -90,7 +96,7 @@ int main() {

int iter = 0;
while (iter < 200) {
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, 16, 200, 123, true);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction, 123, true, use_small_blocks_memory);

// add batch1 data
ParallelFor(0, max_elements, num_threads, [&](size_t row, size_t threadId) {
Expand All @@ -117,5 +123,15 @@ int main() {

delete[] batch1;
delete[] batch2;
return 0;
}


int main() {
std::cout << "Testing with use default memory allocator..." << std::endl;
test(false);
std::cout << "Test ok" << std::endl;

std::cout << "Testing with use block memory allocator..." << std::endl;
test(true);
std::cout << "Test ok" << std::endl;
}
18 changes: 15 additions & 3 deletions tests/cpp/multivector_search_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
typedef unsigned int docidtype;
typedef float dist_t;

int main() {
void test(bool use_small_blocks_memory) {
int dim = 16; // Dimension of the elements
int max_elements = 1000; // Maximum number of elements, should be known beforehand
int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff
size_t random_seed = 100;
bool allow_replace_deleted = false;

int num_queries = 100;
int num_docs = 10; // Number of documents to search
Expand All @@ -21,7 +23,8 @@ int main() {
// Initing index
hnswlib::MultiVectorL2Space<docidtype> space(dim);
hnswlib::BruteforceSearch<dist_t>* alg_brute = new hnswlib::BruteforceSearch<dist_t>(&space, max_elements);
hnswlib::HierarchicalNSW<dist_t>* alg_hnsw = new hnswlib::HierarchicalNSW<dist_t>(&space, max_elements, M, ef_construction);
hnswlib::HierarchicalNSW<dist_t>* alg_hnsw = new hnswlib::HierarchicalNSW<dist_t>(&space, max_elements,
M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory);

// Generate random data
std::mt19937 rng;
Expand Down Expand Up @@ -122,5 +125,14 @@ int main() {
delete[] data;
delete alg_brute;
delete alg_hnsw;
return 0;
}

int main() {
std::cout << "Testing with use default memory allocator..." << std::endl;
test(false);
std::cout << "Test ok" << std::endl;

std::cout << "Testing with use block memory allocator..." << std::endl;
test(true);
std::cout << "Test ok" << std::endl;
}
18 changes: 14 additions & 4 deletions tests/cpp/searchKnnCloserFirst_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ namespace {

using idx_t = hnswlib::labeltype;

void test() {
void test(bool use_small_blocks_memory) {
size_t M = 16;
size_t ef_construction = 200;
size_t random_seed = 100;
bool allow_replace_deleted = false;

int d = 4;
idx_t n = 100;
idx_t nq = 10;
Expand All @@ -36,7 +41,8 @@ void test() {

hnswlib::L2Space space(d);
hnswlib::AlgorithmInterface<float>* alg_brute = new hnswlib::BruteforceSearch<float>(&space, 2 * n);
hnswlib::AlgorithmInterface<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * n);
hnswlib::AlgorithmInterface<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * n,
M, ef_construction, random_seed, allow_replace_deleted, use_small_blocks_memory);

for (size_t i = 0; i < n; ++i) {
alg_brute->addPoint(data.data() + d * i, i);
Expand Down Expand Up @@ -74,8 +80,12 @@ void test() {
} // namespace

int main() {
std::cout << "Testing ..." << std::endl;
test();
std::cout << "Testing with use default memory allocator..." << std::endl;
test(false);
std::cout << "Test ok" << std::endl;

std::cout << "Testing with use block memory allocator..." << std::endl;
test(true);
std::cout << "Test ok" << std::endl;

return 0;
Expand Down

0 comments on commit 58b7344

Please sign in to comment.