Skip to content

Commit

Permalink
Add function that adds #index=hash to Zeek fields
Browse files Browse the repository at this point in the history
  • Loading branch information
mavam committed Jan 8, 2020
1 parent a5a4617 commit cc1b8e6
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions libvast/src/format/zeek.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,23 @@ void print_header(const type& t, std::ostream& out) {
out << '\n';
}

void add_hash_index_attribute(record_type& layout) {
// TODO: do more than this simple heuristic. For example, also consider
// zeek.files.conn_uids, which is a set of strings. The inner index needs to
// have the #index=hash tag. There are a lot more cases that we need to
// consider, such as zeek.x509.id (instead of uid).
auto pred = [&](auto& field) {
return caf::holds_alternative<string_type>(field.type)
&& (field.name == "uid" || field.name == "fuid");
};
auto& fields = layout.fields;
auto find = [&](auto i) { return std::find_if(i, fields.end(), pred); };
for (auto i = find(fields.begin()); i != fields.end(); i = find(i + 1)) {
VAST_DEBUG_ANON("using hash index for field", i->name);
i->type.attributes({{"index", "hash"}});
}
}

} // namespace

reader::reader(caf::atom_value table_slice_type,
Expand Down Expand Up @@ -439,6 +456,8 @@ caf::error reader::parse_header() {
std::distance(layout_.fields.begin(), i), "as event timestamp");
i->type.attributes({{"timestamp"}});
}
// Add #index=hash attributefor fields where it makes sense.
add_hash_index_attribute(layout_);
// After having modified layout attributes, we no longer make changes to the
// type and can now safely copy it.
type_ = layout_;
Expand Down

0 comments on commit cc1b8e6

Please sign in to comment.