diff --git a/jni/CMakeLists.txt b/jni/CMakeLists.txt index 29a844ee0..cbfeb4b96 100644 --- a/jni/CMakeLists.txt +++ b/jni/CMakeLists.txt @@ -187,6 +187,10 @@ if ("${WIN32}" STREQUAL "") tests/faiss_wrapper_test.cpp tests/nmslib_wrapper_test.cpp tests/test_util.cpp) + add_executable( + jni_memory_test + tests/memory_test.cpp + tests/test_util.cpp) target_link_libraries( jni_test @@ -199,6 +203,17 @@ if ("${WIN32}" STREQUAL "") ${TARGET_LIB_NMSLIB} ${TARGET_LIB_COMMON} ) + target_link_libraries( + jni_memory_test + gtest_main + gmock_main + faiss + NonMetricSpaceLib + OpenMP::OpenMP_CXX + ${TARGET_LIB_FAISS} + ${TARGET_LIB_NMSLIB} + ${TARGET_LIB_COMMON} + ) target_include_directories(jni_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests @@ -209,9 +224,18 @@ if ("${WIN32}" STREQUAL "") ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib/similarity_search/include ${gtest_SOURCE_DIR}/include ${gmock_SOURCE_DIR}/include) - + target_include_directories(jni_memory_test PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/tests + ${CMAKE_CURRENT_SOURCE_DIR}/include + $ENV{JAVA_HOME}/include + $ENV{JAVA_HOME}/include/${JVM_OS_TYPE} + ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss + ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib/similarity_search/include + ${gtest_SOURCE_DIR}/include + ${gmock_SOURCE_DIR}/include) set_target_properties(jni_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin) + set_target_properties(jni_memory_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin) endif () endif() diff --git a/jni/tests/memory_test.cpp b/jni/tests/memory_test.cpp new file mode 100644 index 000000000..0bf817dc3 --- /dev/null +++ b/jni/tests/memory_test.cpp @@ -0,0 +1,255 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +#include "faiss_wrapper.h" +#include "nmslib_wrapper.h" + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "jni_util.h" +#include "test_util.h" +#include "faiss/utils/utils.h" + +using ::testing::NiceMock; +using ::testing::Return; +#define GTEST_COUT std::cerr << "[ ] [ INFO ]" + +TEST(FaissHNSWIndexMemoryTest, BasicAssertions) { + + char dataset[] = "dataset/sift/sift_base.fvecs"; + float* data_load = NULL; + unsigned points_num, dim; + test_util::load_data(dataset, data_load, points_num, dim); + + GTEST_COUT << "points_num:"<< points_num << " data dimension:" << dim << std::endl; + float* dataptr = data_load; + + std::vector> vectors; + std::vector ids(points_num); + test_util::set_vectors(vectors, ids, points_num, dim, dataptr); + free(data_load); + + std::string indexPath = "tmp/FaissHNSWIndexMemoryTest.faiss"; + std::string spaceType = knn_jni::L2; + std::string index_description = "HNSW32,Flat"; + int thread_num = 7; + + std::unordered_map parametersMap; + parametersMap[knn_jni::SPACE_TYPE] = (jobject)&spaceType; + parametersMap[knn_jni::INDEX_DESCRIPTION] = (jobject)&index_description; + parametersMap[knn_jni::INDEX_THREAD_QUANTITY] = (jobject)&thread_num; + + // Set up jni + JNIEnv *jniEnv = nullptr; + NiceMock mockJNIUtil; + + EXPECT_CALL(mockJNIUtil, + GetJavaObjectArrayLength( + jniEnv, reinterpret_cast(&vectors))) + .WillRepeatedly(Return(vectors.size())); + + EXPECT_CALL(mockJNIUtil, + GetJavaIntArrayLength(jniEnv, reinterpret_cast(&ids))) + .WillRepeatedly(Return(ids.size())); + + // Create the index + knn_jni::faiss_wrapper::CreateIndex( + &mockJNIUtil, jniEnv, reinterpret_cast(&ids), + reinterpret_cast(&vectors), (jstring)&indexPath, + (jobject)¶metersMap); + + // Clean up + ids.clear(); + ids.shrink_to_fit(); + vectors.clear(); + vectors.shrink_to_fit(); + + size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10); + + GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl; +} + +TEST(NmslibHNSWIndexMemoryTest, BasicAssertions) { + + similarity::initLibrary(); + char dataset[] = "dataset/sift/sift_base.fvecs"; + float* data_load = NULL; + unsigned points_num, dim; + test_util::load_data(dataset, data_load, points_num, dim); + + GTEST_COUT << "points_num:"<< points_num << " data dimension:" << dim << std::endl; + float* dataptr = data_load; + + std::vector> vectors; + std::vector ids(points_num); + test_util::set_vectors(vectors, ids, points_num, dim, dataptr); + free(data_load); + + std::string indexPath = "tmp/NmslibHNSWIndexMemoryTest.hnsw"; + std::string spaceType = knn_jni::L2; + int thread_num = 7; + int efConstruction = 512; + int efSearch = 512; + int m = 32; + + std::unordered_map parametersMap; + parametersMap[knn_jni::SPACE_TYPE] = (jobject)&spaceType; + parametersMap[knn_jni::INDEX_THREAD_QUANTITY] = (jobject)&thread_num; + parametersMap[knn_jni::EF_CONSTRUCTION] = (jobject)&efConstruction; + parametersMap[knn_jni::EF_SEARCH] = (jobject)&efSearch; + parametersMap[knn_jni::M] = (jobject)&m; + + // Set up jni + JNIEnv *jniEnv = nullptr; + NiceMock mockJNIUtil; + + EXPECT_CALL(mockJNIUtil, + GetJavaObjectArrayLength( + jniEnv, reinterpret_cast(&vectors))) + .WillRepeatedly(Return(vectors.size())); + + EXPECT_CALL(mockJNIUtil, + GetJavaIntArrayLength(jniEnv, reinterpret_cast(&ids))) + .WillRepeatedly(Return(ids.size())); + // Create the index + knn_jni::nmslib_wrapper::CreateIndex( + &mockJNIUtil, jniEnv, reinterpret_cast(&ids), + reinterpret_cast(&vectors), (jstring)&indexPath, + (jobject)¶metersMap); + + // Clean up + ids.clear(); + ids.shrink_to_fit(); + vectors.clear(); + vectors.shrink_to_fit(); + + size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10); + + GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl; +} + +TEST(FaissNSGIndexMemoryTest, BasicAssertions) { + + char dataset[] = "dataset/sift/sift_base.fvecs"; + float* data_load = NULL; + unsigned points_num, dim; + test_util::load_data(dataset, data_load, points_num, dim); + + GTEST_COUT << "points_num:"<< points_num << " data dimension:" << dim << std::endl; + float* dataptr = data_load; + + std::vector> vectors; + std::vector ids(points_num); + test_util::set_vectors(vectors, ids, points_num, dim, dataptr); + free(data_load); + + std::string indexPath = "tmp/FaissNSGIndexMemoryTest.faiss"; + std::string spaceType = knn_jni::L2; + std::string index_description = "NSG64,Flat"; + int thread_num = 7; + std::unordered_map parametersMap; + parametersMap[knn_jni::SPACE_TYPE] = (jobject)&spaceType; + parametersMap[knn_jni::INDEX_DESCRIPTION] = (jobject)&index_description; + parametersMap[knn_jni::INDEX_THREAD_QUANTITY] = (jobject)&thread_num; + + // Set up jni + JNIEnv *jniEnv = nullptr; + NiceMock mockJNIUtil; + + EXPECT_CALL(mockJNIUtil, + GetJavaObjectArrayLength( + jniEnv, reinterpret_cast(&vectors))) + .WillRepeatedly(Return(vectors.size())); + + EXPECT_CALL(mockJNIUtil, + GetJavaIntArrayLength(jniEnv, reinterpret_cast(&ids))) + .WillRepeatedly(Return(ids.size())); + + // Create the index + knn_jni::faiss_wrapper::CreateIndex( + &mockJNIUtil, jniEnv, reinterpret_cast(&ids), + reinterpret_cast(&vectors), (jstring)&indexPath, + (jobject)¶metersMap); + + // Make sure index can be loaded + // std::unique_ptr index(test_util::FaissLoadIndex(indexPath)); + + // Clean up + ids.clear(); + ids.shrink_to_fit(); + vectors.clear(); + vectors.shrink_to_fit(); + + size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10); + GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl; +} + +TEST(FaissNSGQueryMemoryTest, BasicAssertions) { + + std::string indexPath = "tmp/FaissNSGIndexMemoryTest.faiss"; + std::unique_ptr index(test_util::FaissLoadIndex(indexPath)); + float queryVector[128]; + float distance[10]; + faiss::idx_t ids[10]; + memset(queryVector, 0, sizeof(queryVector)); + test_util::FaissQueryIndex(index.get(), queryVector, 10, distance, ids ); + size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10); + GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl; +} + +TEST(FaissHNSWQueryMemoryTest, BasicAssertions) { + + std::string indexPath = "tmp/FaissHNSWIndexMemoryTest.faiss"; + std::unique_ptr index(test_util::FaissLoadIndex(indexPath)); + float queryVector[128]; + float distance[10]; + faiss::idx_t ids[10]; + memset(queryVector, 0, sizeof(queryVector)); + test_util::FaissQueryIndex(index.get(), queryVector, 10, distance, ids ); + size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10); + GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl; +} + +TEST(LIBHNSWQueryMemoryTest, BasicAssertions) { + + similarity::initLibrary(); + std::string indexPath = "tmp/LibHNSWIndexMemoryTest2.faiss"; + std::string spaceType = knn_jni::L2; + int thread_num = 7; + int efConstruction = 512; + int efSearch = 512; + int m = 32; + + std::unordered_map parametersMap; + parametersMap[knn_jni::SPACE_TYPE] = (jobject)&spaceType; + parametersMap[knn_jni::INDEX_THREAD_QUANTITY] = (jobject)&thread_num; + parametersMap[knn_jni::EF_CONSTRUCTION] = (jobject)&efConstruction; + parametersMap[knn_jni::EF_SEARCH] = (jobject)&efSearch; + parametersMap[knn_jni::M] = (jobject)&m; + + JNIEnv *jniEnv = nullptr; + NiceMock mockJNIUtil; + std::unique_ptr loadedIndex( + reinterpret_cast( + knn_jni::nmslib_wrapper::LoadIndex(&mockJNIUtil, jniEnv, + (jstring)&indexPath, + (jobject)¶metersMap))); + + float queryVector[128]; + float distance[10]; + faiss::idx_t ids[10]; + memset(queryVector, 0, sizeof(queryVector)); + size_t mem_usage = faiss::get_mem_usage_kb() / (1 << 10); + GTEST_COUT<<"======Memory Usage:[" << mem_usage << "mb]======" << std::endl; +} diff --git a/jni/tests/test_util.cpp b/jni/tests/test_util.cpp index a75886c51..13117227e 100644 --- a/jni/tests/test_util.cpp +++ b/jni/tests/test_util.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include "faiss/Index.h" @@ -347,3 +348,41 @@ float test_util::RandomFloat(float min, float max) { std::uniform_real_distribution distribution(min, max); return distribution(e1); } + +void test_util::load_data(char* filename, float*& data, unsigned& num, unsigned& dim) { + std::ifstream in(filename, std::ios::binary); + if (!in.is_open()) { + std::cout << "open file error" << std::endl; + exit(-1); + } + in.read((char*)&dim, 4); + in.seekg(0, std::ios::end); + std::ios::pos_type ss = in.tellg(); + size_t fsize = (size_t)ss; + num = (unsigned)(fsize / (dim + 1) / 4); + data = new float[(size_t)num * (size_t)dim]; + + in.seekg(0, std::ios::beg); + for (size_t i = 0; i < num; i++) { + in.seekg(4, std::ios::cur); + in.read((char*)(data + i * dim), dim * 4); + } + in.close(); +} + +void test_util::set_vectors(std::vector>& vectors, + std::vector& ids, + int points_num, + int dim, + float* dataptr) { + ids.resize(points_num); + for (int i = 0; i < points_num; ++i) { + ids[i] = i; + std::vector vect; + for (int j = 0; j < dim; ++j) { + vect.push_back(*dataptr); + dataptr++; + } + vectors.push_back(vect); + } +} diff --git a/jni/tests/test_util.h b/jni/tests/test_util.h index 6eac70fcf..1c1bd763c 100644 --- a/jni/tests/test_util.h +++ b/jni/tests/test_util.h @@ -150,6 +150,16 @@ namespace test_util { float RandomFloat(float min, float max); + // Read vector file formats + void load_data(char* filename, float*& data, unsigned& num, unsigned& dim); + + // asign data into vector + void set_vectors(std::vector>& vectors, + std::vector& ids, + int points_num, + int dim, + float* dataptr); + // ------------------------------------------------------------------------------- } // namespace test_util