Skip to content

Commit

Permalink
Handle arbitrary types in bloom filter synopsis
Browse files Browse the repository at this point in the history
  • Loading branch information
lava authored and Benno Evers committed May 26, 2021
1 parent a260850 commit 62c404a
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 1 deletion.
3 changes: 3 additions & 0 deletions libvast/src/partition_synopsis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ void partition_synopsis::add(const table_slice& slice,
auto add_column = [&](const synopsis_ptr& syn) {
for (size_t row = 0; row < slice.rows(); ++row) {
auto view = slice.at(row, col, type);
// TODO: It would probably make sense to allow `nil` in the
// synopsis API, so we can treat queries like `x == nil` just
// like normal queries.
if (!caf::holds_alternative<caf::none_t>(view))
syn->add(std::move(view));
}
Expand Down
15 changes: 15 additions & 0 deletions libvast/test/bloom_filter_synopsis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,18 @@ TEST(bloom filter synopsis) {
verify(make_data_view(integer{2}), {N, N, N, N, N, N, T, N, N, N, N, N});
verify(make_data_view(integer{42}), {N, N, N, N, N, N, F, N, N, N, N, N});
}

TEST(bloom filter synopsis - wrong lookup type) {
bloom_filter_parameters xs;
xs.m = 1_k;
xs.p = 0.1;
auto bf = unbox(make_bloom_filter<xxhash64>(std::move(xs)));
bloom_filter_synopsis<std::string, xxhash64> synopsis{string_type{},
std::move(bf)};
auto r1
= synopsis.lookup(relational_operator::equal, make_data_view(caf::none));
CHECK_EQUAL(r1, std::nullopt);
auto r2
= synopsis.lookup(relational_operator::equal, make_data_view(integer{17}));
CHECK_EQUAL(r2, false);
}
15 changes: 14 additions & 1 deletion libvast/vast/bloom_filter_synopsis.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class bloom_filter_synopsis : public synopsis {
}

void add(data_view x) override {
VAST_ASSERT(caf::holds_alternative<view<T>>(x), "invalid data");
bloom_filter_.add(caf::get<view<T>>(x));
}

Expand All @@ -42,12 +43,24 @@ class bloom_filter_synopsis : public synopsis {
default:
return {};
case relational_operator::equal:
// TODO: We should treat 'nil' as a normal value and
// hash it, so we can exclude synopsis where all values
// are non-nil.
if (caf::holds_alternative<view<caf::none_t>>(rhs))
return {};
if (!caf::holds_alternative<view<T>>(rhs))
return false;
return bloom_filter_.lookup(caf::get<view<T>>(rhs));
case relational_operator::in: {
if (auto xs = caf::get_if<view<list>>(&rhs)) {
for (auto x : **xs)
for (auto x : **xs) {
if (caf::holds_alternative<view<caf::none_t>>(x))
return {};
if (!caf::holds_alternative<view<T>>(x))
continue;
if (bloom_filter_.lookup(caf::get<view<T>>(x)))
return true;
}
return false;
}
return {};
Expand Down

0 comments on commit 62c404a

Please sign in to comment.