Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Memory Evaluation For different algorithm #1139

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion jni/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ if ("${WIN32}" STREQUAL "")
tests/faiss_wrapper_test.cpp
tests/nmslib_wrapper_test.cpp
tests/test_util.cpp)
add_executable(
jni_memory_test
jmazanec15 marked this conversation as resolved.
Show resolved Hide resolved
tests/faiss_memory_test.cpp
jmazanec15 marked this conversation as resolved.
Show resolved Hide resolved
tests/test_util.cpp)

target_link_libraries(
jni_test
Expand All @@ -199,6 +203,17 @@ if ("${WIN32}" STREQUAL "")
${TARGET_LIB_NMSLIB}
${TARGET_LIB_COMMON}
)
target_link_libraries(
jni_memory_test
gtest_main
gmock_main
faiss
NonMetricSpaceLib
OpenMP::OpenMP_CXX
${TARGET_LIB_FAISS}
${TARGET_LIB_NMSLIB}
${TARGET_LIB_COMMON}
)

target_include_directories(jni_test PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/tests
Expand All @@ -209,9 +224,18 @@ if ("${WIN32}" STREQUAL "")
${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib/similarity_search/include
${gtest_SOURCE_DIR}/include
${gmock_SOURCE_DIR}/include)

target_include_directories(jni_memory_test PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/tests
${CMAKE_CURRENT_SOURCE_DIR}/include
$ENV{JAVA_HOME}/include
$ENV{JAVA_HOME}/include/${JVM_OS_TYPE}
${CMAKE_CURRENT_SOURCE_DIR}/external/faiss
${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib/similarity_search/include
${gtest_SOURCE_DIR}/include
${gmock_SOURCE_DIR}/include)

set_target_properties(jni_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin)
set_target_properties(jni_memory_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin)
endif ()
endif()

Expand Down
257 changes: 257 additions & 0 deletions jni/tests/memory_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/

#include "faiss_wrapper.h"
#include "nmslib_wrapper.h"

#include <vector>
#include <malloc.h>

#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "jni_util.h"
#include "test_util.h"
#include "faiss/utils/utils.h"

using ::testing::NiceMock;
using ::testing::Return;
#define GTEST_COUT std::cerr << "[ ] [ INFO ]"

TEST(FaissHNSWIndexMemoryTest, BasicAssertions) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add on all the tests what we are testing and what is the expectation as comments on top of all test function.

Also, I want to understand little bit here in terms of what are the failure scenario for these tests. may be you explained it in older comments, if yes can you point me there.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@navneet1v

These test just evaluate the memory only with EngineWrapper, not unit test.

When we want to introduce a new algorithm or engine, we prefer to evaluate the performance, memory, disk size. in benchmark tests, we can evaluate the performance as a single node.

But we can not evaluate a engine/algorithm takes how much memory, because in benchmark jvm make it hard to evaluate the memory only in jni layer.

so i added these code and want to evaluate different algorithm/engine in different param, at index time, query time memory usage, and time usage.


char dataset[] = "dataset/sift/sift_base.fvecs";
float* data_load = NULL;
unsigned points_num, dim;
test_util::load_data(dataset, data_load, points_num, dim);

GTEST_COUT << "points_num:"<< points_num << " data dimension:" << dim << std::endl;
float* dataptr = data_load;

std::vector<std::vector<float>> vectors;
std::vector<int> ids(points_num);
test_util::set_vectors(vectors, ids, points_num, dim, dataptr);
free(data_load);

std::string indexPath = "tmp/FaissHNSWIndexMemoryTest.faiss";
std::string spaceType = knn_jni::L2;
std::string index_description = "HNSW32,Flat";
int thread_num = 7;

std::unordered_map<std::string, jobject> parametersMap;
parametersMap[knn_jni::SPACE_TYPE] = (jobject)&spaceType;
parametersMap[knn_jni::INDEX_DESCRIPTION] = (jobject)&index_description;
parametersMap[knn_jni::INDEX_THREAD_QUANTITY] = (jobject)&thread_num;

// Set up jni
JNIEnv *jniEnv = nullptr;
NiceMock<test_util::MockJNIUtil> mockJNIUtil;

EXPECT_CALL(mockJNIUtil,
GetJavaObjectArrayLength(
jniEnv, reinterpret_cast<jobjectArray>(&vectors)))
.WillRepeatedly(Return(vectors.size()));

EXPECT_CALL(mockJNIUtil,
GetJavaIntArrayLength(jniEnv, reinterpret_cast<jintArray>(&ids)))
.WillRepeatedly(Return(ids.size()));

// Create the index
knn_jni::faiss_wrapper::CreateIndex(
&mockJNIUtil, jniEnv, reinterpret_cast<jintArray>(&ids),
reinterpret_cast<jobjectArray>(&vectors), (jstring)&indexPath,
(jobject)&parametersMap);

// Clean up
ids.clear();
ids.shrink_to_fit();
vectors.clear();
vectors.shrink_to_fit();

size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10);

GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl;
}

TEST(NmslibHNSWIndexMemoryTest, BasicAssertions) {

similarity::initLibrary();
char dataset[] = "dataset/sift/sift_base.fvecs";
float* data_load = NULL;
unsigned points_num, dim;
test_util::load_data(dataset, data_load, points_num, dim);

GTEST_COUT << "points_num:"<< points_num << " data dimension:" << dim << std::endl;
float* dataptr = data_load;

std::vector<std::vector<float>> vectors;
std::vector<int> ids(points_num);
test_util::set_vectors(vectors, ids, points_num, dim, dataptr);
free(data_load);

std::string indexPath = "tmp/NmslibHNSWIndexMemoryTest.hnsw";
std::string spaceType = knn_jni::L2;
int thread_num = 7;
int efConstruction = 512;
int efSearch = 512;
int m = 32;

std::unordered_map<std::string, jobject> parametersMap;
parametersMap[knn_jni::SPACE_TYPE] = (jobject)&spaceType;
parametersMap[knn_jni::INDEX_THREAD_QUANTITY] = (jobject)&thread_num;
parametersMap[knn_jni::EF_CONSTRUCTION] = (jobject)&efConstruction;
parametersMap[knn_jni::EF_SEARCH] = (jobject)&efSearch;
parametersMap[knn_jni::M] = (jobject)&m;

// Set up jni
JNIEnv *jniEnv = nullptr;
NiceMock<test_util::MockJNIUtil> mockJNIUtil;

EXPECT_CALL(mockJNIUtil,
GetJavaObjectArrayLength(
jniEnv, reinterpret_cast<jobjectArray>(&vectors)))
.WillRepeatedly(Return(vectors.size()));

EXPECT_CALL(mockJNIUtil,
GetJavaIntArrayLength(jniEnv, reinterpret_cast<jintArray>(&ids)))
.WillRepeatedly(Return(ids.size()));
// Create the index
knn_jni::nmslib_wrapper::CreateIndex(
&mockJNIUtil, jniEnv, reinterpret_cast<jintArray>(&ids),
reinterpret_cast<jobjectArray>(&vectors), (jstring)&indexPath,
(jobject)&parametersMap);

// Clean up
ids.clear();
ids.shrink_to_fit();
vectors.clear();
vectors.shrink_to_fit();

malloc_trim(0);
size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10);

GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl;
}

TEST(FaissNSGIndexMemoryTest, BasicAssertions) {

char dataset[] = "dataset/sift/sift_base.fvecs";
float* data_load = NULL;
unsigned points_num, dim;
test_util::load_data(dataset, data_load, points_num, dim);

GTEST_COUT << "points_num:"<< points_num << " data dimension:" << dim << std::endl;
float* dataptr = data_load;

std::vector<std::vector<float>> vectors;
std::vector<int> ids(points_num);
test_util::set_vectors(vectors, ids, points_num, dim, dataptr);
free(data_load);

std::string indexPath = "tmp/FaissNSGIndexMemoryTest.faiss";
std::string spaceType = knn_jni::L2;
std::string index_description = "NSG64,Flat";
int thread_num = 7;
std::unordered_map<std::string, jobject> parametersMap;
parametersMap[knn_jni::SPACE_TYPE] = (jobject)&spaceType;
parametersMap[knn_jni::INDEX_DESCRIPTION] = (jobject)&index_description;
parametersMap[knn_jni::INDEX_THREAD_QUANTITY] = (jobject)&thread_num;

// Set up jni
JNIEnv *jniEnv = nullptr;
NiceMock<test_util::MockJNIUtil> mockJNIUtil;

EXPECT_CALL(mockJNIUtil,
GetJavaObjectArrayLength(
jniEnv, reinterpret_cast<jobjectArray>(&vectors)))
.WillRepeatedly(Return(vectors.size()));

EXPECT_CALL(mockJNIUtil,
GetJavaIntArrayLength(jniEnv, reinterpret_cast<jintArray>(&ids)))
.WillRepeatedly(Return(ids.size()));

// Create the index
knn_jni::faiss_wrapper::CreateIndex(
&mockJNIUtil, jniEnv, reinterpret_cast<jintArray>(&ids),
reinterpret_cast<jobjectArray>(&vectors), (jstring)&indexPath,
(jobject)&parametersMap);

// Make sure index can be loaded
// std::unique_ptr<faiss::Index> index(test_util::FaissLoadIndex(indexPath));

// Clean up
ids.clear();
ids.shrink_to_fit();
vectors.clear();
vectors.shrink_to_fit();

malloc_trim(0);
size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10);
GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl;
}

TEST(FaissNSGQueryMemoryTest, BasicAssertions) {

std::string indexPath = "tmp/FaissNSGIndexMemoryTest.faiss";
std::unique_ptr<faiss::Index> index(test_util::FaissLoadIndex(indexPath));
float queryVector[128];
float distance[10];
faiss::idx_t ids[10];
memset(queryVector, 0, sizeof(queryVector));
test_util::FaissQueryIndex(index.get(), queryVector, 10, distance, ids );
size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10);
GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl;
}

TEST(FaissHNSWQueryMemoryTest, BasicAssertions) {

std::string indexPath = "tmp/FaissHNSWIndexMemoryTest.faiss";
std::unique_ptr<faiss::Index> index(test_util::FaissLoadIndex(indexPath));
float queryVector[128];
float distance[10];
faiss::idx_t ids[10];
memset(queryVector, 0, sizeof(queryVector));
test_util::FaissQueryIndex(index.get(), queryVector, 10, distance, ids );
size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10);
GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl;
}

TEST(LIBHNSWQueryMemoryTest, BasicAssertions) {

similarity::initLibrary();
std::string indexPath = "tmp/LibHNSWIndexMemoryTest2.faiss";
std::string spaceType = knn_jni::L2;
int thread_num = 7;
int efConstruction = 512;
int efSearch = 512;
int m = 32;

std::unordered_map<std::string, jobject> parametersMap;
parametersMap[knn_jni::SPACE_TYPE] = (jobject)&spaceType;
parametersMap[knn_jni::INDEX_THREAD_QUANTITY] = (jobject)&thread_num;
parametersMap[knn_jni::EF_CONSTRUCTION] = (jobject)&efConstruction;
parametersMap[knn_jni::EF_SEARCH] = (jobject)&efSearch;
parametersMap[knn_jni::M] = (jobject)&m;

JNIEnv *jniEnv = nullptr;
NiceMock<test_util::MockJNIUtil> mockJNIUtil;
std::unique_ptr<knn_jni::nmslib_wrapper::IndexWrapper> loadedIndex(
reinterpret_cast<knn_jni::nmslib_wrapper::IndexWrapper *>(
knn_jni::nmslib_wrapper::LoadIndex(&mockJNIUtil, jniEnv,
(jstring)&indexPath,
(jobject)&parametersMap)));

float queryVector[128];
float distance[10];
faiss::idx_t ids[10];
memset(queryVector, 0, sizeof(queryVector));
size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10);
GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl;
}
39 changes: 39 additions & 0 deletions jni/tests/test_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <jni.h>

#include <random>
#include <stdio.h>
#include <utility>

#include "faiss/Index.h"
Expand Down Expand Up @@ -347,3 +348,41 @@ float test_util::RandomFloat(float min, float max) {
std::uniform_real_distribution<float> distribution(min, max);
return distribution(e1);
}

void test_util::load_data(char* filename, float*& data, unsigned& num, unsigned& dim) {
std::ifstream in(filename, std::ios::binary);
if (!in.is_open()) {
std::cout << "open file error" << std::endl;
exit(-1);
}
in.read((char*)&dim, 4);
in.seekg(0, std::ios::end);
std::ios::pos_type ss = in.tellg();
size_t fsize = (size_t)ss;
num = (unsigned)(fsize / (dim + 1) / 4);
data = new float[(size_t)num * (size_t)dim];

in.seekg(0, std::ios::beg);
for (size_t i = 0; i < num; i++) {
in.seekg(4, std::ios::cur);
in.read((char*)(data + i * dim), dim * 4);
}
in.close();
}

void test_util::set_vectors(std::vector<std::vector<float>>& vectors,
std::vector<int>& ids,
int points_num,
int dim,
float* dataptr) {
ids.resize(points_num);
for (int i = 0; i < points_num; ++i) {
ids[i] = i;
std::vector<float> vect;
for (int j = 0; j < dim; ++j) {
vect.push_back(*dataptr);
dataptr++;
}
vectors.push_back(vect);
}
}
10 changes: 10 additions & 0 deletions jni/tests/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,16 @@ namespace test_util {

float RandomFloat(float min, float max);

// Read vector file formats
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comment about the format the data is expected to be in

void load_data(char* filename, float*& data, unsigned& num, unsigned& dim);

// asign data into vector
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: asign -> assign. Also, can we add more detail about how this function should be used in the comment?

void set_vectors(std::vector<std::vector<float>>& vectors,
std::vector<int>& ids,
int points_num,
int dim,
float* dataptr);

// -------------------------------------------------------------------------------
} // namespace test_util

Expand Down