diff --git a/include/knowhere/comp/index_param.h b/include/knowhere/comp/index_param.h index f7781932e..99471b931 100644 --- a/include/knowhere/comp/index_param.h +++ b/include/knowhere/comp/index_param.h @@ -31,6 +31,7 @@ constexpr const char* INDEX_FAISS_IVFFLAT_CC = "IVF_FLAT_CC"; constexpr const char* INDEX_FAISS_IVFPQ = "IVF_PQ"; constexpr const char* INDEX_FAISS_SCANN = "SCANN"; constexpr const char* INDEX_FAISS_IVFSQ8 = "IVF_SQ8"; +constexpr const char* INDEX_FAISS_IVFSQ_CC = "IVF_SQ_CC"; constexpr const char* INDEX_FAISS_GPU_IDMAP = "GPU_FAISS_FLAT"; constexpr const char* INDEX_FAISS_GPU_IVFFLAT = "GPU_FAISS_IVF_FLAT"; @@ -94,6 +95,8 @@ constexpr const char* SSIZE = "ssize"; constexpr const char* REORDER_K = "reorder_k"; constexpr const char* WITH_RAW_DATA = "with_raw_data"; constexpr const char* ENSURE_TOPK_FULL = "ensure_topk_full"; +constexpr const char* CODE_SIZE = "code_size"; +constexpr const char* RAW_DATA_STORE_PREFIX = "raw_data_store_prefix"; // RAFT Params constexpr const char* REFINE_RATIO = "refine_ratio"; constexpr const char* CACHE_DATASET_ON_DEVICE = "cache_dataset_on_device"; diff --git a/src/index/ivf/ivf.cc b/src/index/ivf/ivf.cc index e63d49c68..0640b2f17 100644 --- a/src/index/ivf/ivf.cc +++ b/src/index/ivf/ivf.cc @@ -18,6 +18,7 @@ #include "faiss/IndexIVFFlat.h" #include "faiss/IndexIVFPQ.h" #include "faiss/IndexIVFPQFastScan.h" +#include "faiss/IndexIVFScalarQuantizerCC.h" #include "faiss/IndexScaNN.h" #include "faiss/IndexScalarQuantizer.h" #include "faiss/index_io.h" @@ -56,7 +57,8 @@ class IvfIndexNode : public IndexNode { std::is_same::value || std::is_same::value || std::is_same::value || - std::is_same::value, + std::is_same::value || + std::is_same::value, "not support"); static_assert(std::is_same_v || std::is_same_v, "IvfIndexNode only support float/bianry"); @@ -98,6 +100,9 @@ class IvfIndexNode : public IndexNode { if constexpr (std::is_same::value) { return true; } + if constexpr (std::is_same::value) { + return index_->with_raw_data(); + } } expected GetIndexMeta(const Config& cfg) const override { @@ -131,6 +136,9 @@ class IvfIndexNode : public IndexNode { if constexpr (std::is_same::value) { return std::make_unique(); } + if constexpr (std::is_same::value) { + return std::make_unique(); + } }; int64_t Dim() const override { @@ -183,6 +191,12 @@ class IvfIndexNode : public IndexNode { auto code_size = index_->code_size; return (nb * code_size + nb * sizeof(int64_t) + nlist * code_size); } + if constexpr (std::is_same::value) { + auto nb = index_->invlists->compute_ntotal(); + auto code_size = index_->code_size; + auto nlist = index_->nlist; + return (nb * code_size + nb * sizeof(int64_t) + 2 * code_size + nlist * sizeof(float)); + } }; int64_t Count() const override { @@ -211,6 +225,9 @@ class IvfIndexNode : public IndexNode { if constexpr (std::is_same::value) { return knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT; } + if constexpr (std::is_same::value) { + return knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC; + } }; private: @@ -345,6 +362,22 @@ to_index_flat(std::unique_ptr&& index) { return std::make_unique(std::move(*index)); } +expected +get_ivf_sq_quantizer_type(int code_size) { + switch (code_size) { + case 4: + return faiss::ScalarQuantizer::QuantizerType::QT_4bit; + case 6: + return faiss::ScalarQuantizer::QuantizerType::QT_6bit; + case 8: + return faiss::ScalarQuantizer::QuantizerType::QT_8bit; + case 16: + return faiss::ScalarQuantizer::QuantizerType::QT_fp16; + default: + return expected::Err( + Status::invalid_args, fmt::format("current code size {} not in (4, 6, 8, 16)", code_size)); + } +} } // namespace template @@ -535,6 +568,35 @@ IvfIndexNode::TrainInternal(const DataSet& dataset, const C qzr.release(); index->own_fields = true; } + if constexpr (std::is_same::value) { + const IvfSqCcConfig& ivf_sq_cc_cfg = static_cast(cfg); + auto nlist = MatchNlist(rows, ivf_sq_cc_cfg.nlist.value()); + auto ssize = ivf_sq_cc_cfg.ssize.value(); + + const bool use_elkan = ivf_sq_cc_cfg.use_elkan.value_or(true); + + // create quantizer for the training + std::unique_ptr qzr = + std::make_unique(dim, metric.value(), false, use_elkan); + // create index. Index does not own qzr + auto qzr_type = get_ivf_sq_quantizer_type(ivf_sq_cc_cfg.code_size.value()); + if (!qzr_type.has_value()) { + LOG_KNOWHERE_ERROR_ << "fail to get ivf sq quantizer type, " << qzr_type.what(); + return qzr_type.error(); + } + index = std::make_unique(qzr.get(), dim, nlist, ssize, qzr_type.value(), + metric.value(), is_cosine, false, + ivf_sq_cc_cfg.raw_data_store_prefix); + // train + index->train(rows, (const float*)data); + // replace quantizer with a regular IndexFlat + qzr = to_index_flat(std::move(qzr)); + index->quantizer = qzr.get(); + // transfer ownership of qzr to index + qzr.release(); + index->own_fields = true; + index->make_direct_map(true, faiss::DirectMap::ConcurrentArray); + } index_ = std::move(index); return Status::success; @@ -624,7 +686,8 @@ IvfIndexNode::Search(const DataSet& dataset, const Config& distances[i + offset] = static_cast(i_distances[i + offset]); } } - } else if constexpr (std::is_same::value) { + } else if constexpr (std::is_same::value || + std::is_same::value) { auto cur_query = (const float*)data + index * dim; if (is_cosine) { copied_query = CopyAndNormalizeVecs(cur_query, 1, dim); @@ -924,7 +987,8 @@ IvfIndexNode::GetVectorByIds(const DataSet& dataset) const LOG_KNOWHERE_WARNING_ << "faiss inner error: " << e.what(); return expected::Err(Status::faiss_inner_error, e.what()); } - } else if constexpr (std::is_same::value) { + } else if constexpr (std::is_same::value || + std::is_same::value) { // we should never go here since we should call HasRawData() first if (!index_->with_raw_data()) { return expected::Err(Status::not_implemented, "GetVectorByIds not implemented"); @@ -1083,7 +1147,8 @@ IvfIndexNode::Deserialize(const BinarySet& binset, const Co } else { index_.reset(static_cast(faiss::read_index(&reader))); } - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v && + !std::is_same_v) { const BaseConfig& base_cfg = static_cast(config); if (HasRawData(base_cfg.metric_type.value())) { index_->make_direct_map(true); @@ -1136,6 +1201,7 @@ KNOWHERE_SIMPLE_REGISTER_GLOBAL(IVFPQ, IvfIndexNode, fp32, faiss::IndexIVFPQ); KNOWHERE_SIMPLE_REGISTER_GLOBAL(IVF_PQ, IvfIndexNode, fp32, faiss::IndexIVFPQ); KNOWHERE_SIMPLE_REGISTER_GLOBAL(IVFSQ, IvfIndexNode, fp32, faiss::IndexIVFScalarQuantizer); KNOWHERE_SIMPLE_REGISTER_GLOBAL(IVF_SQ8, IvfIndexNode, fp32, faiss::IndexIVFScalarQuantizer); +KNOWHERE_SIMPLE_REGISTER_GLOBAL(IVF_SQ_CC, IvfIndexNode, fp32, faiss::IndexIVFScalarQuantizerCC); // fp16 KNOWHERE_MOCK_REGISTER_GLOBAL(IVFFLAT, IvfIndexNode, fp16, faiss::IndexIVFFlat); KNOWHERE_MOCK_REGISTER_GLOBAL(IVF_FLAT, IvfIndexNode, fp16, faiss::IndexIVFFlat); @@ -1146,6 +1212,7 @@ KNOWHERE_MOCK_REGISTER_GLOBAL(IVFPQ, IvfIndexNode, fp16, faiss::IndexIVFPQ); KNOWHERE_MOCK_REGISTER_GLOBAL(IVF_PQ, IvfIndexNode, fp16, faiss::IndexIVFPQ); KNOWHERE_MOCK_REGISTER_GLOBAL(IVFSQ, IvfIndexNode, fp16, faiss::IndexIVFScalarQuantizer); KNOWHERE_MOCK_REGISTER_GLOBAL(IVF_SQ8, IvfIndexNode, fp16, faiss::IndexIVFScalarQuantizer); +KNOWHERE_MOCK_REGISTER_GLOBAL(IVF_SQ_CC, IvfIndexNode, fp16, faiss::IndexIVFScalarQuantizerCC); // bf16 KNOWHERE_MOCK_REGISTER_GLOBAL(IVFFLAT, IvfIndexNode, bf16, faiss::IndexIVFFlat); KNOWHERE_MOCK_REGISTER_GLOBAL(IVF_FLAT, IvfIndexNode, bf16, faiss::IndexIVFFlat); @@ -1156,4 +1223,5 @@ KNOWHERE_MOCK_REGISTER_GLOBAL(IVFPQ, IvfIndexNode, bf16, faiss::IndexIVFPQ); KNOWHERE_MOCK_REGISTER_GLOBAL(IVF_PQ, IvfIndexNode, bf16, faiss::IndexIVFPQ); KNOWHERE_MOCK_REGISTER_GLOBAL(IVFSQ, IvfIndexNode, bf16, faiss::IndexIVFScalarQuantizer); KNOWHERE_MOCK_REGISTER_GLOBAL(IVF_SQ8, IvfIndexNode, bf16, faiss::IndexIVFScalarQuantizer); +KNOWHERE_MOCK_REGISTER_GLOBAL(IVF_SQ_CC, IvfIndexNode, bf16, faiss::IndexIVFScalarQuantizerCC); } // namespace knowhere diff --git a/src/index/ivf/ivf_config.h b/src/index/ivf/ivf_config.h index 550bd0772..228b6f8c2 100644 --- a/src/index/ivf/ivf_config.h +++ b/src/index/ivf/ivf_config.h @@ -130,6 +130,41 @@ class IvfSqConfig : public IvfConfig {}; class IvfBinConfig : public IvfConfig {}; +class IvfSqCcConfig : public IvfFlatCcConfig { + public: + // user can use code size to control ivf_sq_cc quntizer type + CFG_INT code_size; + // IVF_SQ_CC holds all vectors in file when raw_data_store_prefix has value; + // cc index is a just-in-time index, raw data is avaliable after training if raw_data_store_prefix has value. + // ivf sq cc index will not keep raw data after using binaryset to create a new ivf sq cc index. + CFG_STRING raw_data_store_prefix; + KNOHWERE_DECLARE_CONFIG(IvfSqCcConfig) { + KNOWHERE_CONFIG_DECLARE_FIELD(code_size) + .set_default(8) + .description("code size, range in [4, 6, 8 and 16]") + .for_train(); + KNOWHERE_CONFIG_DECLARE_FIELD(raw_data_store_prefix) + .description("Raw data will be set in this prefix path") + .for_train() + .allow_empty_without_default(); + }; + Status + CheckAndAdjust(PARAM_TYPE param_type, std::string* err_msg) override { + if (param_type == PARAM_TYPE::TRAIN) { + auto code_size_v = code_size.value(); + auto legal_code_size_list = std::vector{4, 6, 8, 16}; + if (std::find(legal_code_size_list.begin(), legal_code_size_list.end(), code_size_v) == + legal_code_size_list.end()) { + *err_msg = + "compress a vector into (code_size * dim)/8 bytes, code size value should be in 4, 6, 8 and 16"; + LOG_KNOWHERE_ERROR_ << *err_msg; + return Status::invalid_value_in_json; + } + } + return Status::success; + } +}; + } // namespace knowhere #endif /* IVF_CONFIG_H */ diff --git a/tests/ut/test_ivfflat_cc.cc b/tests/ut/test_ivfflat_cc.cc index d04346d0e..f670a4997 100644 --- a/tests/ut/test_ivfflat_cc.cc +++ b/tests/ut/test_ivfflat_cc.cc @@ -9,6 +9,7 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License. +#include #include #include "catch2/catch_approx.hpp" @@ -44,7 +45,7 @@ TEST_CASE("Test Build Search Concurrency", "[Concurrency]") { return json; }; - auto ivfflat_gen = [&base_gen]() { + auto ivf_gen = [&base_gen]() { knowhere::Json json = base_gen(); json[knowhere::indexparam::NLIST] = 128; json[knowhere::indexparam::NPROBE] = 16; @@ -52,13 +53,19 @@ TEST_CASE("Test Build Search Concurrency", "[Concurrency]") { return json; }; - auto ivfflatcc_gen = [&ivfflat_gen]() { - knowhere::Json json = ivfflat_gen(); + auto ivf_cc_gen = [&ivf_gen]() { + knowhere::Json json = ivf_gen(); json[knowhere::meta::NUM_BUILD_THREAD] = 1; json[knowhere::indexparam::SSIZE] = 48; return json; }; + auto ivf_sq_8_cc_gen = [&ivf_cc_gen]() { + knowhere::Json json = ivf_cc_gen(); + json[knowhere::indexparam::CODE_SIZE] = 8; + return json; + }; + SECTION("Test Concurrent Invlists ") { size_t nlist = 128; size_t code_size = 512; @@ -140,7 +147,8 @@ TEST_CASE("Test Build Search Concurrency", "[Concurrency]") { SECTION("Test Add & Search & RangeSearch Serialized ") { using std::make_tuple; auto [name, gen] = GENERATE_REF(table>({ - make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, ivfflatcc_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, ivf_cc_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC, ivf_sq_8_cc_gen), })); auto idx = knowhere::IndexFactory::Instance().Create(name, version); auto cfg_json = gen().dump(); @@ -183,29 +191,29 @@ TEST_CASE("Test Build Search Concurrency", "[Concurrency]") { SECTION("Test Build & Search Correctness") { using std::make_tuple; + auto [index_name, cc_index_name] = GENERATE_REF(table({ + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC), + })); + auto ivf = knowhere::IndexFactory::Instance().Create(index_name, version); + auto ivf_cc = knowhere::IndexFactory::Instance().Create(cc_index_name, version); - auto ivf_flat = knowhere::IndexFactory::Instance().Create( - knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, version); - auto ivf_flat_cc = knowhere::IndexFactory::Instance().Create( - knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, version); - - knowhere::Json ivf_flat_json = knowhere::Json::parse(ivfflat_gen().dump()); - knowhere::Json ivf_flat_cc_json = knowhere::Json::parse(ivfflatcc_gen().dump()); + knowhere::Json ivf_json = knowhere::Json::parse(ivf_gen().dump()); + knowhere::Json ivf_cc_json = knowhere::Json::parse(ivf_cc_gen().dump()); auto train_ds = GenDataSet(nb, dim, seed); auto query_ds = GenDataSet(nq, dim, seed); - auto flat_res = ivf_flat.Build(*train_ds, ivf_flat_json); + auto flat_res = ivf.Build(*train_ds, ivf_json); REQUIRE(flat_res == knowhere::Status::success); - auto cc_res = ivf_flat_cc.Build(*train_ds, ivf_flat_json); + auto cc_res = ivf_cc.Build(*train_ds, ivf_json); REQUIRE(cc_res == knowhere::Status::success); // test search { - auto flat_results = ivf_flat.Search(*query_ds, ivf_flat_json, nullptr); + auto flat_results = ivf.Search(*query_ds, ivf_json, nullptr); REQUIRE(flat_results.has_value()); - auto cc_results = ivf_flat_cc.Search(*query_ds, ivf_flat_json, nullptr); + auto cc_results = ivf_cc.Search(*query_ds, ivf_json, nullptr); REQUIRE(cc_results.has_value()); auto flat_ids = flat_results.value()->GetIds(); @@ -219,10 +227,10 @@ TEST_CASE("Test Build Search Concurrency", "[Concurrency]") { } // test range_search { - auto flat_results = ivf_flat.RangeSearch(*query_ds, ivf_flat_json, nullptr); + auto flat_results = ivf.RangeSearch(*query_ds, ivf_json, nullptr); REQUIRE(flat_results.has_value()); - auto cc_results = ivf_flat_cc.RangeSearch(*query_ds, ivf_flat_json, nullptr); + auto cc_results = ivf_cc.RangeSearch(*query_ds, ivf_json, nullptr); REQUIRE(cc_results.has_value()); auto flat_ids = flat_results.value()->GetIds(); @@ -242,12 +250,14 @@ TEST_CASE("Test Build Search Concurrency", "[Concurrency]") { SECTION("Test Add & Search & RangeSearch ConCurrent") { using std::make_tuple; auto [name, gen] = GENERATE_REF(table>({ - make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, ivfflatcc_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, ivf_cc_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC, ivf_sq_8_cc_gen), })); auto idx = knowhere::IndexFactory::Instance().Create(name, version); auto cfg_json = gen().dump(); CAPTURE(name, cfg_json); knowhere::Json json = knowhere::Json::parse(cfg_json); + json[knowhere::indexparam::RAW_DATA_STORE_PREFIX] = std::filesystem::current_path().string() + "/"; auto train_ds = GenDataSet(nb, dim, seed); auto res = idx.Build(*train_ds, json); REQUIRE(res == knowhere::Status::success); @@ -290,6 +300,7 @@ TEST_CASE("Test Build Search Concurrency", "[Concurrency]") { retrieve_task_list.push_back(std::async( std::launch::async, [&idx, &retrieve_ids_set] { return idx.GetVectorByIds(*retrieve_ids_set); })); } + for (auto& task : add_task_list) { REQUIRE(task.get() == knowhere::Status::success); } diff --git a/tests/ut/test_search.cc b/tests/ut/test_search.cc index 136285f45..ff45ec857 100644 --- a/tests/ut/test_search.cc +++ b/tests/ut/test_search.cc @@ -60,6 +60,34 @@ TEST_CASE("Test Mem Index With Float Vector", "[float metrics]") { return json; }; + auto ivfsqcc_code_size_4_gen = [ivfflatcc_gen]() { + knowhere::Json json = ivfflatcc_gen(); + json[knowhere::indexparam::SSIZE] = 48; + json[knowhere::indexparam::CODE_SIZE] = 4; + return json; + }; + + auto ivfsqcc_code_size_6_gen = [ivfflatcc_gen]() { + knowhere::Json json = ivfflatcc_gen(); + json[knowhere::indexparam::SSIZE] = 48; + json[knowhere::indexparam::CODE_SIZE] = 6; + return json; + }; + + auto ivfsqcc_code_size_8_gen = [ivfflatcc_gen]() { + knowhere::Json json = ivfflatcc_gen(); + json[knowhere::indexparam::SSIZE] = 48; + json[knowhere::indexparam::CODE_SIZE] = 8; + return json; + }; + + auto ivfsqcc_code_size_16_gen = [ivfflatcc_gen]() { + knowhere::Json json = ivfflatcc_gen(); + json[knowhere::indexparam::SSIZE] = 48; + json[knowhere::indexparam::CODE_SIZE] = 16; + return json; + }; + auto ivfsq_gen = ivfflat_gen; auto flat_gen = base_gen; @@ -110,6 +138,10 @@ TEST_CASE("Test Mem Index With Float Vector", "[float metrics]") { make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, ivfflatcc_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, ivfsq_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, ivfpq_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC, ivfsqcc_code_size_4_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC, ivfsqcc_code_size_6_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC, ivfsqcc_code_size_8_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC, ivfsqcc_code_size_16_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_SCANN, scann_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_SCANN, scann_gen2), make_tuple(knowhere::IndexEnum::INDEX_HNSW, hnsw_gen), @@ -141,7 +173,7 @@ TEST_CASE("Test Mem Index With Float Vector", "[float metrics]") { if (metric == knowhere::metric::COSINE) { if (name != knowhere::IndexEnum::INDEX_FAISS_IVFSQ8 && name != knowhere::IndexEnum::INDEX_FAISS_IVFPQ && name != knowhere::IndexEnum::INDEX_HNSW_SQ8 && name != knowhere::IndexEnum::INDEX_HNSW_SQ8_REFINE && - !scann_without_raw_data) { + name != knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC && !scann_without_raw_data) { REQUIRE(CheckDistanceInScope(*results.value(), topk, -1.00001, 1.00001)); } } @@ -155,6 +187,7 @@ TEST_CASE("Test Mem Index With Float Vector", "[float metrics]") { make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, ivfflatcc_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, ivfsq_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, ivfpq_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC, ivfsq_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_SCANN, scann_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_SCANN, scann_gen2), make_tuple(knowhere::IndexEnum::INDEX_HNSW, hnsw_gen), @@ -178,7 +211,8 @@ TEST_CASE("Test Mem Index With Float Vector", "[float metrics]") { auto lims = results.value()->GetLims(); bool scann_without_raw_data = (name == knowhere::IndexEnum::INDEX_FAISS_SCANN && scann_gen2().dump() == cfg_json); - if (name != knowhere::IndexEnum::INDEX_FAISS_IVFPQ && name != knowhere::IndexEnum::INDEX_FAISS_SCANN) { + if (name != knowhere::IndexEnum::INDEX_FAISS_IVFPQ && name != knowhere::IndexEnum::INDEX_FAISS_SCANN && + name != knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC) { for (int i = 0; i < nq; ++i) { CHECK(ids[lims[i]] == i); } @@ -187,7 +221,7 @@ TEST_CASE("Test Mem Index With Float Vector", "[float metrics]") { if (metric == knowhere::metric::COSINE) { if (name != knowhere::IndexEnum::INDEX_FAISS_IVFSQ8 && name != knowhere::IndexEnum::INDEX_FAISS_IVFPQ && name != knowhere::IndexEnum::INDEX_HNSW_SQ8 && name != knowhere::IndexEnum::INDEX_HNSW_SQ8_REFINE && - !scann_without_raw_data) { + name != knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC && !scann_without_raw_data) { REQUIRE(CheckDistanceInScope(*results.value(), -1.00001, 1.00001)); } } @@ -234,9 +268,12 @@ TEST_CASE("Test Mem Index With Float Vector", "[float metrics]") { json[knowhere::indexparam::ENSURE_TOPK_FULL] = false; return json; }; - auto [name, gen] = GENERATE_REF(table>( - {make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, ivfflatcc_gen_), - make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, ivfflatcc_gen_no_ensure_topk_)})); + auto [name, gen] = GENERATE_REF(table>({ + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, ivfflatcc_gen_), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC, ivfflatcc_gen_), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, ivfflatcc_gen_no_ensure_topk_), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC, ivfflatcc_gen_no_ensure_topk_), + })); auto idx = knowhere::IndexFactory::Instance().Create(name, version); auto cfg_json = gen().dump(); CAPTURE(name, cfg_json); @@ -312,6 +349,7 @@ TEST_CASE("Test Mem Index With Float Vector", "[float metrics]") { make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, ivfflatcc_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, ivfsq_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, ivfpq_gen), + make_tuple(knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC, ivfsq_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_SCANN, scann_gen), make_tuple(knowhere::IndexEnum::INDEX_FAISS_SCANN, scann_gen2), make_tuple(knowhere::IndexEnum::INDEX_HNSW, hnsw_gen), @@ -352,6 +390,24 @@ TEST_CASE("Test Mem Index With Float Vector", "[float metrics]") { auto res = idx.Build(*train_ds, ivf_pq_gen()); REQUIRE(res == knowhere::Status::faiss_inner_error); } + + SECTION("Test IVFPQ with invalid params") { + auto idx = knowhere::IndexFactory::Instance().Create(knowhere::IndexEnum::INDEX_FAISS_IVFSQ_CC, + version); + uint32_t nb = 1000; + uint32_t dim = 128; + auto ivf_pq_gen = [&]() { + knowhere::Json json; + json[knowhere::meta::DIM] = dim; + json[knowhere::meta::METRIC_TYPE] = knowhere::metric::L2; + json[knowhere::meta::TOPK] = 10; + json[knowhere::indexparam::CODE_SIZE] = 7; + return json; + }; + auto train_ds = GenDataSet(nb, dim); + auto res = idx.Build(*train_ds, ivf_pq_gen()); + REQUIRE(res == knowhere::Status::invalid_value_in_json); + } } TEST_CASE("Test Mem Index With Binary Vector", "[float metrics]") { diff --git a/thirdparty/faiss/faiss/CMakeLists.txt b/thirdparty/faiss/faiss/CMakeLists.txt index 0cc526b88..55e44c0c0 100644 --- a/thirdparty/faiss/faiss/CMakeLists.txt +++ b/thirdparty/faiss/faiss/CMakeLists.txt @@ -60,6 +60,7 @@ set(FAISS_SRC VectorTransform.cpp clone_index.cpp index_factory.cpp + IndexIVFScalarQuantizerCC.cpp impl/AuxIndexStructures.cpp impl/FaissException.cpp impl/HNSW.cpp @@ -99,6 +100,7 @@ set(FAISS_SRC utils/quantize_lut.cpp utils/random.cpp utils/utils.cpp + utils/data_backup_file.cpp ) if("${CMAKE_SYSTEM}" MATCHES "Linux") @@ -146,6 +148,7 @@ set(FAISS_HEADERS IndexAdditiveQuantizer.h IndexSQHybrid.h IndexScalarQuantizer.h + IndexIVFScalarQuantizerCC.h IndexScaNN.h IndexShards.h MatrixStats.h @@ -206,6 +209,7 @@ set(FAISS_HEADERS utils/simdlib_neon.h utils/structure-inl.h utils/utils.h + utils/data_backup_file.h ) # OK for MSYS diff --git a/thirdparty/faiss/faiss/IndexIVFScalarQuantizerCC.cpp b/thirdparty/faiss/faiss/IndexIVFScalarQuantizerCC.cpp new file mode 100644 index 000000000..988875a75 --- /dev/null +++ b/thirdparty/faiss/faiss/IndexIVFScalarQuantizerCC.cpp @@ -0,0 +1,135 @@ +#include +#include + +namespace faiss { +IndexIVFScalarQuantizerCC::IndexIVFScalarQuantizerCC( + Index* quantizer, + size_t d, + size_t nlist, + size_t ssize, + ScalarQuantizer::QuantizerType qtype, + MetricType metric, + bool is_cosine, + bool by_residual, + std::optional raw_data_prefix_path) + : IndexIVFScalarQuantizer( + quantizer, + d, + nlist, + qtype, + metric, + by_residual) { + if (raw_data_prefix_path.has_value()) { + raw_data_backup_ = std::make_unique( + raw_data_prefix_path.value(), d * sizeof(float)); + } + this->is_cosine = is_cosine; + // add code into ConcurrentArrayInvertedLists, no need to normalize ig + // metric == cosine + replace_invlists( + new ConcurrentArrayInvertedLists(nlist, code_size, ssize, false), + true); +} + +IndexIVFScalarQuantizerCC::IndexIVFScalarQuantizerCC() { + this->by_residual = false; +} + +void IndexIVFScalarQuantizerCC::train(idx_t n, const float* x) { + if (is_cosine) { + auto x_normalized = knowhere::CopyAndNormalizeVecs(x, n, d); + IndexIVF::train(n, x_normalized.get()); + } else { + IndexIVF::train(n, x); + } +} + +void IndexIVFScalarQuantizerCC::add_core( + idx_t n, + const float* x, + const float* x_norms, + const idx_t* xids, + const idx_t* coarse_idx) { + FAISS_THROW_IF_NOT(is_trained); + std::unique_ptr x_normalized = nullptr; + const float* base_x = x; + if (is_cosine) { + x_normalized = knowhere::CopyAndNormalizeVecs(x, n, d); + base_x = x_normalized.get(); + } + + size_t nadd = 0; + std::unique_ptr squant(sq.select_quantizer()); + + DirectMapAdd dm_add(direct_map, n, xids); + +#pragma omp parallel reduction(+ : nadd) + { + std::vector residual(d); + std::vector one_code(code_size); + int nt = omp_get_num_threads(); + int rank = omp_get_thread_num(); + + // each thread takes care of a subset of lists + for (size_t i = 0; i < n; i++) { + int64_t list_no = coarse_idx[i]; + if (list_no >= 0 && list_no % nt == rank) { + int64_t id = xids ? xids[i] : ntotal + i; + + const float* xi = base_x + i * d; + if (by_residual) { + quantizer->compute_residual(xi, residual.data(), list_no); + xi = residual.data(); + } + + memset(one_code.data(), 0, code_size); + squant->encode_vector(xi, one_code.data()); + + size_t ofs = invlists->add_entry(list_no, id, one_code.data()); + + dm_add.add(i, list_no, ofs); + if (raw_data_backup_ != nullptr) { + raw_data_backup_->AppendDataBlock((char*)(x + i * d)); + } + nadd++; + + } else if (rank == 0 && list_no == -1) { + dm_add.add(i, -1, 0); + } + } + } + ntotal += n; +} + +void IndexIVFScalarQuantizerCC::add_with_ids( + idx_t n, + const float* x, + const idx_t* xids) { + if (is_cosine) { + std::unique_ptr coarse_idx(new idx_t[n]); + { + auto x_normalized = knowhere::CopyAndNormalizeVecs(x, n, d); + quantizer->assign(n, x_normalized.get(), coarse_idx.get()); + } + add_core(n, x, nullptr, xids, coarse_idx.get()); + } else { + IndexIVFScalarQuantizer::add_with_ids(n, x, xids); + } +} + +void IndexIVFScalarQuantizerCC::reconstruct(idx_t key, float* recons) const { + FAISS_THROW_IF_NOT_MSG( + raw_data_backup_ != nullptr, + "IndexIVFScalarQuantizerCC can't get raw data if raw_data_backup_ not set."); + raw_data_backup_->ReadDataBlock((char*)recons, key); +} + +bool IndexIVFScalarQuantizerCC::with_raw_data() { + return (raw_data_backup_ != nullptr); +} + +void IndexIVFScalarQuantizerCC::reconstruct_n(idx_t i0, idx_t ni, float* recons) + const { + FAISS_THROW_MSG("IndexIVFScalarQuantizerCC not support reconstruct_n"); +} +} // namespace faiss \ No newline at end of file diff --git a/thirdparty/faiss/faiss/IndexIVFScalarQuantizerCC.h b/thirdparty/faiss/faiss/IndexIVFScalarQuantizerCC.h new file mode 100644 index 000000000..b90836be5 --- /dev/null +++ b/thirdparty/faiss/faiss/IndexIVFScalarQuantizerCC.h @@ -0,0 +1,46 @@ +#pragma once +#include +#include +#include +#include +#include "knowhere/utils.h" +#include "utils/data_backup_file.h" +namespace faiss { + +/*************************************************** + *IndexIVFScalarQuantizerCC + ***************************************************/ +struct IndexIVFScalarQuantizerCC : IndexIVFScalarQuantizer { + std::unique_ptr raw_data_backup_ = nullptr; + + IndexIVFScalarQuantizerCC( + Index* quantizer, + size_t d, + size_t nlist, + size_t ssize, + ScalarQuantizer::QuantizerType qtype, + MetricType metric = METRIC_L2, + bool is_cosine = false, + bool by_residual = false, + std::optional raw_data_prefix_path = std::nullopt); + + IndexIVFScalarQuantizerCC(); + + void train(idx_t n, const float* x) override; + + void add_core( + idx_t n, + const float* x, + const float* x_norms, + const idx_t* xids, + const idx_t* coarse_idx) override; + + void add_with_ids(idx_t n, const float* x, const idx_t* xids) override; + + void reconstruct(idx_t key, float* recons) const override; + + bool with_raw_data(); + + void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override; +}; +} // namespace faiss \ No newline at end of file diff --git a/thirdparty/faiss/faiss/utils/data_backup_file.cpp b/thirdparty/faiss/faiss/utils/data_backup_file.cpp new file mode 100644 index 000000000..95abf801c --- /dev/null +++ b/thirdparty/faiss/faiss/utils/data_backup_file.cpp @@ -0,0 +1,76 @@ +#include +#include +#include +#include +namespace faiss { + +namespace { +constexpr const char* kRawDataFileName = "/ivf_sq_cc_raw_data.bin"; +constexpr const size_t kDataBackFileHandlerDefaulBufferSize = 8192; +constexpr const size_t kDataBackFileHandlerMinFlushNum = 4; +} // namespace + +DataBackFileHandler::DataBackFileHandler( + std::string prefix, + size_t block_size) { + FAISS_THROW_IF_NOT(block_size != 0); + std::unique_lock lock(file_mtx_); + raw_data_file_name_ = prefix + kRawDataFileName; + std::fstream file; + file.open(raw_data_file_name_.c_str(), std::fstream::out | std::fstream::trunc); + FAISS_THROW_IF_NOT(file.is_open()); + file.close(); + this->block_size_ = block_size; + this->file_block_num_ = 0; + this->buffer_size_ = std::min( + kDataBackFileHandlerDefaulBufferSize, + kDataBackFileHandlerMinFlushNum * block_size); + this->buffer_ = std::make_unique(this->buffer_size_); + this->buffer_res_size_ = this->buffer_size_; + this->buffer_block_num_ = 0; + this->buffer_max_block_num_ = + std::floor((float)buffer_size_ / (float)block_size); + memset(this->buffer_.get(), 0, this->buffer_size_); +} + +DataBackFileHandler::~DataBackFileHandler() { + if (FileExist()) { + std::remove(raw_data_file_name_.c_str()); + } +} + +bool DataBackFileHandler::FileExist() { + std::ifstream file(raw_data_file_name_.c_str()); + return file.good(); +} + +void DataBackFileHandler::ReadDataBlock( + char* data, + size_t blk_id) { + std::shared_lock lock(file_mtx_); + FAISS_THROW_IF_NOT(blk_id < this->buffer_block_num_ + this->file_block_num_); + if (blk_id >= this->file_block_num_) { + auto buffer_blk_id = blk_id - this->file_block_num_; + std::memcpy( + data, buffer_.get() + buffer_blk_id * block_size_, block_size_); + } else { + std::ifstream reader(raw_data_file_name_.c_str(), std::ios::binary); + reader.seekg(blk_id * block_size_); + reader.read(data, block_size_); + } +} + +void DataBackFileHandler::AppendDataBlock( + const char* data) { + std::unique_lock lock(file_mtx_); + std::memcpy(buffer_.get() + buffer_block_num_ * block_size_, data, block_size_); + buffer_block_num_++; + if (buffer_block_num_ == buffer_max_block_num_) { + std::ofstream writer(raw_data_file_name_.c_str(), std::ios::app); + writer.write(buffer_.get(), buffer_max_block_num_ * block_size_); + writer.flush(); + buffer_block_num_ = 0; + file_block_num_ += buffer_max_block_num_; + } +} +} // namespace faiss diff --git a/thirdparty/faiss/faiss/utils/data_backup_file.h b/thirdparty/faiss/faiss/utils/data_backup_file.h new file mode 100644 index 000000000..a328d4f96 --- /dev/null +++ b/thirdparty/faiss/faiss/utils/data_backup_file.h @@ -0,0 +1,31 @@ +#pragma once +#include +#include +#include +#include +#include +namespace faiss { + /** + * @brief BackDataFileHandler is a temporary file structure, and the temperary file only exist in the life cycle of the struct. + * It is used to back up the original data of the index for the needs of data reconstruction with no loss. + */ + struct DataBackFileHandler { + public: + DataBackFileHandler(std::string prefix, size_t block_size); + ~DataBackFileHandler(); + void ReadDataBlock(char* data, size_t block_id); + void AppendDataBlock(const char* data); + inline bool FileExist(); + + private: + std::shared_mutex file_mtx_; + size_t buffer_size_; + size_t buffer_res_size_; + std::unique_ptr buffer_; + std::string raw_data_file_name_; + size_t block_size_; + size_t file_block_num_; + size_t buffer_block_num_; + size_t buffer_max_block_num_; + }; +} // namespace faiss \ No newline at end of file