-
Notifications
You must be signed in to change notification settings - Fork 718
Closed
Description
Knn::CosineDistance index optimisation doesn't work when used with a nullable type
create table facts (
id uint64,
embedding string,
primary key (id)
);
insert into facts (id, embedding) values (123, Untag(Knn::ToBinaryStringFloat([1.f, 2.f, 3.f, 4.f]), 'FloatVector'));
alter table facts add index my_index global using vector_kmeans_tree on (embedding)
with (distance=cosine, vector_type=float, vector_dimension=4, clusters=4);
-- Works
select * from facts view my_index
order by Knn::CosineDistance(embedding, Untag(Knn::ToBinaryStringFloat([1.f, 2.f, 3.f, 4.f]), 'FloatVector'))
limit 10;
-- OK
select String::HexDecode('0000803F00000040000040400000804001') == Untag(Knn::ToBinaryStringFloat([1.f, 2.f, 3.f, 4.f]), 'FloatVector');
-- Does not work
-- "Given predicate is not suitable for used index: my_index, because sorting doesn't contain distance"
select * from facts view my_index
order by Knn::CosineDistance(embedding, String::HexDecode('0000803F00000040000040400000804001'))
limit 10;
-- Works but with a full scan
select * from facts
order by Knn::CosineDistance(embedding, String::HexDecode('0000803F00000040000040400000804001'))
limit 10;
-- Works
select * from facts view my_index
order by Knn::CosineDistance(embedding, Unwrap(String::HexDecode('0000803F00000040000040400000804001')))
limit 10;
Metadata
Metadata
Assignees
Labels
No labels