Skip to content

Commit 3088f6c

Browse files
committed
change to inverted_sparse
Signed-off-by: Keming <kemingyang@tensorchord.ai>
1 parent 60a707f commit 3088f6c

File tree

6 files changed

+58
-45
lines changed

6 files changed

+58
-45
lines changed

crates/base/src/index.rs

+13-3
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ impl IndexOptions {
124124
..
125125
}),
126126
) => Ok(()),
127-
(VectorKind::SVecf32, DistanceKind::Dot, IndexingOptions::Inverted()) => Ok(()),
127+
(VectorKind::SVecf32, DistanceKind::Dot, IndexingOptions::InvertedSparse(_)) => Ok(()),
128128
_ => Err(ValidationError::new("not valid index options")),
129129
}
130130
}
@@ -261,7 +261,7 @@ pub enum IndexingOptions {
261261
Flat(FlatIndexingOptions),
262262
Ivf(IvfIndexingOptions),
263263
Hnsw(HnswIndexingOptions),
264-
Inverted(),
264+
InvertedSparse(InvertedSparseIndexingOptions),
265265
}
266266

267267
impl IndexingOptions {
@@ -297,11 +297,21 @@ impl Validate for IndexingOptions {
297297
Self::Flat(x) => x.validate(),
298298
Self::Ivf(x) => x.validate(),
299299
Self::Hnsw(x) => x.validate(),
300-
Self::Inverted() => Ok(()),
300+
Self::InvertedSparse(_) => Ok(()),
301301
}
302302
}
303303
}
304304

305+
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
306+
#[serde(deny_unknown_fields)]
307+
pub struct InvertedSparseIndexingOptions {}
308+
309+
impl Default for InvertedSparseIndexingOptions {
310+
fn default() -> Self {
311+
Self {}
312+
}
313+
}
314+
305315
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
306316
#[serde(deny_unknown_fields)]
307317
pub struct FlatIndexingOptions {

crates/index/src/indexing/sealed.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@ use base::operator::*;
44
use base::search::*;
55
use flat::Flat;
66
use hnsw::Hnsw;
7-
use inverted::Inverted;
7+
use inverted::InvertedSparse;
88
use ivf::Ivf;
99
use std::path::Path;
1010

1111
pub enum SealedIndexing<O: Op> {
1212
Flat(Flat<O>),
1313
Ivf(Ivf<O>),
1414
Hnsw(Hnsw<O>),
15-
Inverted(Inverted<O>),
15+
InvertedSparse(InvertedSparse<O>),
1616
}
1717

1818
impl<O: Op> SealedIndexing<O> {
@@ -25,7 +25,7 @@ impl<O: Op> SealedIndexing<O> {
2525
IndexingOptions::Flat(_) => Self::Flat(Flat::create(path, options, source)),
2626
IndexingOptions::Ivf(_) => Self::Ivf(Ivf::create(path, options, source)),
2727
IndexingOptions::Hnsw(_) => Self::Hnsw(Hnsw::create(path, options, source)),
28-
IndexingOptions::Inverted() => Self::Inverted(Inverted::create(path, options, source)),
28+
IndexingOptions::InvertedSparse(_) => Self::InvertedSparse(InvertedSparse::create(path, options, source)),
2929
}
3030
}
3131

@@ -34,7 +34,7 @@ impl<O: Op> SealedIndexing<O> {
3434
IndexingOptions::Flat(_) => Self::Flat(Flat::open(path)),
3535
IndexingOptions::Ivf(_) => Self::Ivf(Ivf::open(path)),
3636
IndexingOptions::Hnsw(_) => Self::Hnsw(Hnsw::open(path)),
37-
IndexingOptions::Inverted() => Self::Inverted(Inverted::open(path)),
37+
IndexingOptions::InvertedSparse(_) => Self::InvertedSparse(InvertedSparse::open(path)),
3838
}
3939
}
4040

@@ -47,7 +47,7 @@ impl<O: Op> SealedIndexing<O> {
4747
SealedIndexing::Flat(x) => x.vbase(vector, opts),
4848
SealedIndexing::Ivf(x) => x.vbase(vector, opts),
4949
SealedIndexing::Hnsw(x) => x.vbase(vector, opts),
50-
SealedIndexing::Inverted(x) => x.vbase(vector, opts),
50+
SealedIndexing::InvertedSparse(x) => x.vbase(vector, opts),
5151
}
5252
}
5353

@@ -56,7 +56,7 @@ impl<O: Op> SealedIndexing<O> {
5656
SealedIndexing::Flat(x) => x.len(),
5757
SealedIndexing::Ivf(x) => x.len(),
5858
SealedIndexing::Hnsw(x) => x.len(),
59-
SealedIndexing::Inverted(x) => x.len(),
59+
SealedIndexing::InvertedSparse(x) => x.len(),
6060
}
6161
}
6262

@@ -65,7 +65,7 @@ impl<O: Op> SealedIndexing<O> {
6565
SealedIndexing::Flat(x) => x.vector(i),
6666
SealedIndexing::Ivf(x) => x.vector(i),
6767
SealedIndexing::Hnsw(x) => x.vector(i),
68-
SealedIndexing::Inverted(x) => x.vector(i),
68+
SealedIndexing::InvertedSparse(x) => x.vector(i),
6969
}
7070
}
7171

@@ -74,7 +74,7 @@ impl<O: Op> SealedIndexing<O> {
7474
SealedIndexing::Flat(x) => x.payload(i),
7575
SealedIndexing::Ivf(x) => x.payload(i),
7676
SealedIndexing::Hnsw(x) => x.payload(i),
77-
SealedIndexing::Inverted(x) => x.payload(i),
77+
SealedIndexing::InvertedSparse(x) => x.payload(i),
7878
}
7979
}
8080
}

crates/index/src/lib.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use common::dir_ops::sync_walk_from_dir;
2525
use common::file_atomic::FileAtomic;
2626
use crossbeam::atomic::AtomicCell;
2727
use crossbeam::channel::Sender;
28-
use inverted::operator::OperatorInverted;
28+
use inverted::operator::OperatorInvertedSparse;
2929
use ivf::operator::OperatorIvf;
3030
use parking_lot::Mutex;
3131
use quantization::operator::OperatorQuantization;
@@ -43,11 +43,11 @@ use thiserror::Error;
4343
use validator::Validate;
4444

4545
pub trait Op:
46-
Operator + OperatorQuantization + OperatorStorage + OperatorIvf + OperatorInverted
46+
Operator + OperatorQuantization + OperatorStorage + OperatorIvf + OperatorInvertedSparse
4747
{
4848
}
4949

50-
impl<T: Operator + OperatorQuantization + OperatorStorage + OperatorIvf + OperatorInverted> Op
50+
impl<T: Operator + OperatorQuantization + OperatorStorage + OperatorIvf + OperatorInvertedSparse> Op
5151
for T
5252
{
5353
}

crates/index/src/segment/sealed.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ impl<O: Op> SealedSegment<O> {
121121
SealedIndexing::Flat(x) => x,
122122
SealedIndexing::Ivf(x) => x,
123123
SealedIndexing::Hnsw(x) => x,
124-
SealedIndexing::Inverted(x) => x,
124+
SealedIndexing::InvertedSparse(x) => x,
125125
}
126126
}
127127
}

crates/inverted/src/lib.rs

+9-9
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
pub mod operator;
44

5-
use self::operator::OperatorInverted;
5+
use self::operator::OperatorInvertedSparse;
66
use base::index::{IndexOptions, SearchOptions};
77
use base::operator::Borrowed;
88
use base::scalar::{ScalarLike, F32};
@@ -20,15 +20,15 @@ use std::path::Path;
2020
const ZERO: F32 = F32(0.0);
2121

2222
#[allow(dead_code)]
23-
pub struct Inverted<O: OperatorInverted> {
23+
pub struct InvertedSparse<O: OperatorInvertedSparse> {
2424
storage: O::Storage,
2525
payloads: MmapArray<Payload>,
2626
indexes: Json<Vec<u32>>,
2727
offsets: Json<Vec<u32>>,
2828
scores: Json<Vec<F32>>,
2929
}
3030

31-
impl<O: OperatorInverted> Inverted<O> {
31+
impl<O: OperatorInvertedSparse> InvertedSparse<O> {
3232
pub fn create(path: impl AsRef<Path>, options: IndexOptions, source: &impl Source<O>) -> Self {
3333
let remapped = RemappedCollection::from_source(source);
3434
from_nothing(path, options, &remapped)
@@ -83,12 +83,12 @@ impl<O: OperatorInverted> Inverted<O> {
8383
}
8484
}
8585

86-
fn from_nothing<O: OperatorInverted>(
86+
fn from_nothing<O: OperatorInvertedSparse>(
8787
path: impl AsRef<Path>,
8888
_: IndexOptions,
8989
collection: &impl Collection<O>,
90-
) -> Inverted<O> {
91-
create_dir(path.as_ref()).expect("failed to create path for inverted index");
90+
) -> InvertedSparse<O> {
91+
create_dir(path.as_ref()).expect("failed to create path for inverted sparse index");
9292

9393
let mut token_collection = BTreeMap::new();
9494
for i in 0..collection.len() {
@@ -110,7 +110,7 @@ fn from_nothing<O: OperatorInverted>(
110110
let json_offset = Json::create(path.as_ref().join("offsets"), offsets);
111111
let json_score = Json::create(path.as_ref().join("scores"), scores);
112112
sync_dir(path);
113-
Inverted {
113+
InvertedSparse {
114114
storage,
115115
payloads,
116116
indexes: json_index,
@@ -119,13 +119,13 @@ fn from_nothing<O: OperatorInverted>(
119119
}
120120
}
121121

122-
fn open<O: OperatorInverted>(path: impl AsRef<Path>) -> Inverted<O> {
122+
fn open<O: OperatorInvertedSparse>(path: impl AsRef<Path>) -> InvertedSparse<O> {
123123
let storage = O::Storage::open(path.as_ref().join("storage"));
124124
let payloads = MmapArray::open(path.as_ref().join("payloads"));
125125
let offsets = Json::open(path.as_ref().join("offsets"));
126126
let indexes = Json::open(path.as_ref().join("indexes"));
127127
let scores = Json::open(path.as_ref().join("scores"));
128-
Inverted {
128+
InvertedSparse {
129129
storage,
130130
payloads,
131131
indexes,

crates/inverted/src/operator.rs

+24-21
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,38 @@ use base::scalar::F32;
33
use quantization::operator::OperatorQuantization;
44
use storage::OperatorStorage;
55

6-
pub trait OperatorInverted: OperatorQuantization + OperatorStorage {
7-
fn to_index_vec(vec: Borrowed<'_, Self>) -> Vec<(u32, F32)>;
6+
use std::iter::{Empty, zip};
7+
8+
pub trait OperatorInvertedSparse: OperatorQuantization + OperatorStorage {
9+
fn to_index_vec(vec: Borrowed<'_, Self>) -> impl Iterator<Item = (u32, F32)>;
810
}
911

10-
impl OperatorInverted for SVecf32Dot {
11-
fn to_index_vec(vec: Borrowed<'_, Self>) -> Vec<(u32, F32)> {
12-
std::iter::zip(vec.indexes().to_vec(), vec.values().to_vec()).collect()
12+
impl OperatorInvertedSparse for SVecf32Dot {
13+
fn to_index_vec(vec: Borrowed<'_, Self>) -> impl Iterator<Item = (u32, F32)> {
14+
zip(vec.indexes().to_vec(), vec.values().to_vec())
1315
}
1416
}
1517

16-
macro_rules! unimpl_operator_inverted {
18+
macro_rules! unimpl_operator_inverted_sparse {
1719
($t:ty) => {
18-
impl OperatorInverted for $t {
19-
fn to_index_vec(_: Borrowed<'_, Self>) -> Vec<(u32, F32)> {
20-
unimplemented!()
20+
impl OperatorInvertedSparse for $t {
21+
fn to_index_vec(_: Borrowed<'_, Self>) -> impl Iterator<Item = (u32, F32)> {
22+
#![allow(unreachable_code)]
23+
unimplemented!() as Empty<(u32, F32)>
2124
}
2225
}
2326
};
2427
}
2528

26-
unimpl_operator_inverted!(SVecf32Cos);
27-
unimpl_operator_inverted!(SVecf32L2);
28-
unimpl_operator_inverted!(BVecf32Cos);
29-
unimpl_operator_inverted!(BVecf32Dot);
30-
unimpl_operator_inverted!(BVecf32Jaccard);
31-
unimpl_operator_inverted!(BVecf32L2);
32-
unimpl_operator_inverted!(Vecf32Cos);
33-
unimpl_operator_inverted!(Vecf32Dot);
34-
unimpl_operator_inverted!(Vecf32L2);
35-
unimpl_operator_inverted!(Vecf16Cos);
36-
unimpl_operator_inverted!(Vecf16Dot);
37-
unimpl_operator_inverted!(Vecf16L2);
29+
unimpl_operator_inverted_sparse!(SVecf32Cos);
30+
unimpl_operator_inverted_sparse!(SVecf32L2);
31+
unimpl_operator_inverted_sparse!(BVecf32Cos);
32+
unimpl_operator_inverted_sparse!(BVecf32Dot);
33+
unimpl_operator_inverted_sparse!(BVecf32Jaccard);
34+
unimpl_operator_inverted_sparse!(BVecf32L2);
35+
unimpl_operator_inverted_sparse!(Vecf32Cos);
36+
unimpl_operator_inverted_sparse!(Vecf32Dot);
37+
unimpl_operator_inverted_sparse!(Vecf32L2);
38+
unimpl_operator_inverted_sparse!(Vecf16Cos);
39+
unimpl_operator_inverted_sparse!(Vecf16Dot);
40+
unimpl_operator_inverted_sparse!(Vecf16L2);

0 commit comments

Comments
 (0)