Skip to content

Commit

Permalink
Merge pull request #765
Browse files Browse the repository at this point in the history
Revert "Annotate schemas with #index=hash"
  • Loading branch information
lava authored Feb 25, 2020
2 parents 2d34ea3 + e0c62b3 commit a60fc23
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 77 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ Every entry has a category for which we use the following visual abbreviations:

## [Unreleased]

- 🔄 Hash indices have been disabled again due to a performance regression.
[#765](https://github.com/tenzir/vast/pull/765)

- 🔄 The option `--directory` has been replaced by `--db-directory` and
`log-directory`, which set directories for persistent state and log files
respectively. The default log file path has changed from `vast.db/log` to
Expand Down
3 changes: 1 addition & 2 deletions libvast/src/format/pcap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ inline type make_packet_type() {
{"dst", address_type{}},
{"sport", port_type{}},
{"dport", port_type{}},
{"community_id",
string_type{}.attributes({{"index", "hash"}})},
{"community_id", string_type{}},
{"payload", string_type{}.attributes({{"skip"}})}}
.name("pcap.packet");
}
Expand Down
20 changes: 0 additions & 20 deletions libvast/src/format/zeek.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,24 +172,6 @@ void print_header(const type& t, std::ostream& out) {
out << '\n';
}

void add_hash_index_attribute(record_type& layout) {
// TODO: do more than this simple heuristic. For example, also consider
// zeek.files.conn_uids, which is a set of strings. The inner index needs to
// have the #index=hash tag. There are a lot more cases that we need to
// consider, such as zeek.x509.id (instead of uid).
auto pred = [&](auto& field) {
return caf::holds_alternative<string_type>(field.type)
&& (field.name == "uid" || field.name == "fuid"
|| field.name == "community_id");
};
auto& fields = layout.fields;
auto find = [&](auto i) { return std::find_if(i, fields.end(), pred); };
for (auto i = find(fields.begin()); i != fields.end(); i = find(i + 1)) {
VAST_DEBUG_ANON("using hash index for field", i->name);
i->type.attributes({{"index", "hash"}});
}
}

} // namespace

reader::reader(caf::atom_value table_slice_type,
Expand Down Expand Up @@ -474,8 +456,6 @@ caf::error reader::parse_header() {
std::distance(layout_.fields.begin(), i), "as event timestamp");
i->type.attributes({{"timestamp"}});
}
// Add #index=hash attribute for fields where it makes sense.
add_hash_index_attribute(layout_);
// After having modified layout attributes, we no longer make changes to the
// type and can now safely copy it.
type_ = layout_;
Expand Down
36 changes: 18 additions & 18 deletions schema/argus.schema
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ type argus.record = record{
// Standard fields that are always present.
StartTime: time #timestamp, // stime
Flgs: string, // flgs
Proto: string #index=hash, // proto
Proto: string #id, // proto
SrcAddr: string, // saddr (MAC or IP)
Sport: count, // sport (TODO: switch to port type)
Dir: string #index=hash, // dir
Dir: string #id, // dir
DstAddr: string, // daddr (MAC or IP)
Dport: count, // dport (TODO: switch to port type)
TotPkts: count, // pkts
TotBytes: count, // bytes
State: string #index=hash, // state
State: string #id, // state
// Optional fields that are present when invoking ra(1) with -s +F where F is
// the field name from the man page of ra(1).
SrcId: string #index=hash, // srcid
SrcId: string #id, // srcid
Rank: count, // rank
LastTime: time, // ltime
Trans: count, // trans
Expand All @@ -42,17 +42,17 @@ type argus.record = record{
dTos: count, // dtos
sDSb: string, // sdsb
dDSb: string, // ddsb
sCo: string #index=hash, // sco
dCo: string #index=hash, // dco
sCo: string #id, // sco
dCo: string #id, // dco
sTtl: count #max=255, // sttl
dTtl: count #max=255, // dttl
sHops: count, // shops
dHops: count, // dhops
sIpId: string #index=hash #hex, // sipid
dIpId: string #index=hash #hex, // dipid
sMpls: string #index=hash, // smpls
dMpls: string #index=hash, // dmpls
AutoId: string #index=hash, // autoid
sIpId: string #id #hex, // sipid
dIpId: string #id #hex, // dipid
sMpls: string #id, // smpls
dMpls: string #id, // dmpls
AutoId: string #id, // autoid
sAS: count, // sas
dAS: count, // das
iAS: count, // ias
Expand Down Expand Up @@ -115,12 +115,12 @@ type argus.record = record{
dstUdata: string, // duser
SrcWin: count, // swin
DstWin: count, // dwin
sVlan: string #index=hash, // svlan
dVlan: string #index=hash, // dvlan
sVid: string #index=hash, // svid
dVid: string #index=hash, // dvid
sVpri: string #index=hash, // svpri
dVpri: string #index=hash, // dvpri
sVlan: string #id, // svlan
dVlan: string #id, // dvlan
sVid: string #id, // svid
dVid: string #id, // dvid
sVpri: string #id, // svpri
dVpri: string #id, // dvpri
SRange: time, // srng
ERange: time, // srng
SrcTCPBase: count, // stcpb
Expand Down Expand Up @@ -154,5 +154,5 @@ type argus.record = record{
LDelay: duration #unit=s, // ldelay
sEnc: string, // senc
dEnc: string, // denc
IcmpId: string #index=hash // icmpid
IcmpId: string #id // icmpid
}
74 changes: 37 additions & 37 deletions schema/suricata.schema
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
type suricata.component.common = record{
timestamp: time #timestamp,
flow_id: count #index=hash,
flow_id: count,
pcap_cnt: count,
src_ip: addr,
src_port: port,
dest_ip: addr,
dest_port: port,
proto: string,
event_type: string,
community_id: string #index=hash
community_id: string
}

type suricata.component.flow = record{
Expand All @@ -30,20 +30,20 @@ type suricata.component.app_proto = record{

type suricata.alert = record{
timestamp: time #timestamp,
flow_id: count #index=hash,
flow_id: count,
pcap_cnt: count,
src_ip: addr,
src_port: port,
dest_ip: addr,
dest_port: port,
proto: string,
event_type: string,
community_id: string #index=hash,
community_id: string,
alert: record{
app_proto: string,
action: enum{allowed, blocked},
gid: count #index=hash,
signature_id: count #index=hash,
gid: count,
signature_id: count,
rev: count,
signature: string,
category: string,
Expand All @@ -61,64 +61,64 @@ type suricata.alert = record{

type suricata.dhcp = record{
timestamp: time #timestamp,
flow_id: count #index=hash,
flow_id: count,
pcap_cnt: count,
src_ip: addr,
src_port: port,
dest_ip: addr,
dest_port: port,
proto: string,
event_type: string,
community_id: string #index=hash,
community_id: string,
dhcp: record{
type: string,
id: count #index=hash,
client_mac: string #index=hash,
id: count,
client_mac: string,
assigned_ip: addr,
client_ip: addr,
dhcp_type: string,
assigned_ip: addr,
client_id: string #index=hash,
client_id: string,
hostname: string,
params: vector<string>
}
}

type suricata.dns = record{
timestamp: time #timestamp,
flow_id: count #index=hash,
flow_id: count,
pcap_cnt: count,
src_ip: addr,
src_port: port,
dest_ip: addr,
dest_port: port,
proto: string,
event_type: string,
community_id: string #index=hash,
community_id: string,
dns: record{
type: enum{answer, query},
id: count #index=hash,
id: count,
flags: string,
rrname: string,
rrtype: string,
rcode: string,
rdata: string,
ttl: count,
tx_id: count #index=hash
tx_id: count
}
}

type suricata.http = record{
timestamp: time #timestamp,
flow_id: count #index=hash,
flow_id: count,
pcap_cnt: count,
src_ip: addr,
src_port: port,
dest_ip: addr,
dest_port: port,
proto: string,
event_type: string,
community_id: string #index=hash,
community_id: string,
http: record{
hostname: string,
url: string,
Expand All @@ -132,32 +132,32 @@ type suricata.http = record{
redirect: string,
length: count
},
tx_id: count #index=hash
tx_id: count
}

type suricata.fileinfo = record{
timestamp: time #timestamp,
flow_id: count #index=hash,
flow_id: count,
pcap_cnt: count,
src_ip: addr,
src_port: port,
dest_ip: addr,
dest_port: port,
proto: string,
event_type: string,
community_id: string #index=hash,
community_id: string,
fileinfo: record{
filename: string,
magic: string,
gaps: bool,
state: string,
md5: string #index=hash,
sha1: string #index=hash,
sha256: string #index=hash,
md5: string,
sha1: string,
sha256: string,
stored: bool,
file_id: count #index=hash,
file_id: count,
size: count,
tx_id: count #index=hash
tx_id: count
},
http: record{
hostname: string,
Expand All @@ -177,30 +177,30 @@ type suricata.fileinfo = record{

type suricata.flow = record{
timestamp: time #timestamp,
flow_id: count #index=hash,
flow_id: count,
pcap_cnt: count,
src_ip: addr,
src_port: port,
dest_ip: addr,
dest_port: port,
proto: string,
event_type: string,
community_id: string #index=hash,
community_id: string,
flow: suricata.component.flow,
app_proto: string
}

type suricata.netflow = record{
timestamp: time #timestamp,
flow_id: count #index=hash,
flow_id: count,
pcap_cnt: count,
src_ip: addr,
src_port: port,
dest_ip: addr,
dest_port: port,
proto: string,
event_type: string,
community_id: string #index=hash,
community_id: string,
netflow: record{
pkts: count,
bytes: count,
Expand All @@ -213,16 +213,16 @@ type suricata.netflow = record{

type suricata.smtp =record{
timestamp: time #timestamp,
flow_id: count #index=hash,
flow_id: count,
pcap_cnt: count,
src_ip: addr,
src_port: port,
dest_ip: addr,
dest_port: port,
proto: string,
event_type: string,
community_id: string #index=hash,
tx_id: count #index=hash,
community_id: string,
tx_id: count,
smtp: record{
helo: string,
mail_from: string,
Expand All @@ -233,32 +233,32 @@ type suricata.smtp =record{
from: string,
to: vector<string>,
attachment: vector<string>,
url: vector<string>
url: vector<string>
}
}

type suricata.tls = record{
timestamp: time #timestamp,
flow_id: count #index=hash,
flow_id: count,
pcap_cnt: count,
src_ip: addr,
src_port: port,
dest_ip: addr,
dest_port: port,
proto: string,
event_type: string,
community_id: string #index=hash,
community_id: string,
tls: record{
subject: string,
issuerdn: string,
serial: string,
fingerprint: string,
ja3: record{
hash: string #index=hash,
hash: string,
string: string
},
ja3s: record{
hash: string #index=hash,
hash: string,
string: string
},
notbefore: time,
Expand Down

0 comments on commit a60fc23

Please sign in to comment.