Skip to content

Vector index query doesn't work on nullable type #17746

@vitalif

Description

@vitalif

Knn::CosineDistance index optimisation doesn't work when used with a nullable type

create table facts (
    id uint64,
    embedding string,
    primary key (id)
);

insert into facts (id, embedding) values (123, Untag(Knn::ToBinaryStringFloat([1.f, 2.f, 3.f, 4.f]), 'FloatVector'));

alter table facts add index my_index global using vector_kmeans_tree on (embedding)
with (distance=cosine, vector_type=float, vector_dimension=4, clusters=4);

-- Works
select * from facts view my_index
order by Knn::CosineDistance(embedding, Untag(Knn::ToBinaryStringFloat([1.f, 2.f, 3.f, 4.f]), 'FloatVector'))
limit 10;

-- OK
select String::HexDecode('0000803F00000040000040400000804001') == Untag(Knn::ToBinaryStringFloat([1.f, 2.f, 3.f, 4.f]), 'FloatVector');

-- Does not work
-- "Given predicate is not suitable for used index: my_index, because sorting doesn't contain distance"
select * from facts view my_index
order by Knn::CosineDistance(embedding, String::HexDecode('0000803F00000040000040400000804001'))
limit 10;

-- Works but with a full scan
select * from facts
order by Knn::CosineDistance(embedding, String::HexDecode('0000803F00000040000040400000804001'))
limit 10;

-- Works
select * from facts view my_index
order by Knn::CosineDistance(embedding, Unwrap(String::HexDecode('0000803F00000040000040400000804001')))
limit 10;

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions