Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experiment ac rust/v2 #9896

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rust/Cargo.toml.in
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ brotli = "~3.4.0"
hkdf = "~0.12.3"
aes = "~0.7.5"
aes-gcm = "~0.9.4"
aho-corasick = "1.1.2"

der-parser = "~8.2.0"
kerberos-parser = { version = "~0.7.1", default_features = false }
Expand Down
157 changes: 157 additions & 0 deletions rust/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,160 @@ pub unsafe extern "C" fn rs_to_hex_sep(
// overwrites last separator with final null char
oslice[3 * islice.len() - 1] = 0;
}

use aho_corasick::AhoCorasick;
use std::collections::HashMap;

#[derive(Debug,Clone)]
struct AhoCorasickPatternData {
pat: Vec<u8>,
sids: Vec<u32>,
ci: bool,
offset: u16,
depth: u16,
}

impl AhoCorasickPatternData {
fn new(pat: Vec<u8>, ci: bool, sids: Vec<u32>, offset: u16, depth: u16) -> Self {
Self { pat, ci, sids, offset, depth }
}
}

#[derive(Default)]
pub struct AhoCorasickStateBuilder {
/// vector of patterns. The final pattern id will depend on the position in this
/// vector, starting at 0.
patterns: Vec<Vec<u8>>,
pattern_id: u32,
/// Hash of patterns with their settings. Will be copied to AhoCorasickStateBuilder
/// in the prepare step.
pattern_data: HashMap<u32,AhoCorasickPatternData>,
/// track if we have case insensitive patterns. If so, we need to tell AC and
/// do a bit more work in validation.
has_ci: bool,
}

impl AhoCorasickStateBuilder {
fn new() -> Self {
Self { ..Default::default() }
}
fn add_pattern(&mut self, pat: Vec<u8>, ci: bool, sids: Vec<u32>, offset: u16, depth: u16) {
self.patterns.push(pat.clone());
if ci {
self.has_ci = true;
}
let pattern_id = self.pattern_id;
self.pattern_id += 1;

self.pattern_data.insert(pattern_id, AhoCorasickPatternData::new(pat.clone(), ci, sids, offset, depth));
}
}

#[no_mangle]
pub extern "C" fn rs_mpm_acrs_new_builder() -> *mut std::os::raw::c_void {
let state = AhoCorasickStateBuilder::new();
let boxed = Box::new(state);
return Box::into_raw(boxed) as *mut _;
}

#[no_mangle]
pub extern "C" fn rs_mpm_acrs_free_builder(state: *mut std::os::raw::c_void) {
let mut _state = unsafe { Box::from_raw(state as *mut AhoCorasickStateBuilder) };
}

#[no_mangle]
pub unsafe extern "C" fn rs_mpm_acrs_add_pattern(state: &mut AhoCorasickStateBuilder,
pat: *mut u8, pat_len: u16, sids: *mut u32, sids_len: u32, ci: bool, offset: u16, depth: u16) -> i32 {
let p = unsafe { build_slice!(pat, pat_len as usize) };
let s = unsafe { build_slice!(sids, sids_len as usize) };
state.add_pattern(p.to_vec(), ci, s.to_vec(), offset, depth);
return 0;
}

pub struct AhoCorasickState {
pattern_cnt: u32,
pattern_data: HashMap<u32,AhoCorasickPatternData>,
has_ci: bool,
ac: AhoCorasick,
}

impl AhoCorasickState {
/// build the AC state from the builder
fn prepare(builder: &AhoCorasickStateBuilder) -> Self {
let ac = AhoCorasick::builder()
.ascii_case_insensitive(builder.has_ci)
.build(&builder.patterns)
.unwrap();
Self { ac, has_ci: builder.has_ci, pattern_cnt: builder.pattern_id, pattern_data: builder.pattern_data.clone() }
}

/// Search for the patterns. Returns number of matches.
/// Per pattern found sids are only appended once.
/// TODO review match_cnt logic. In general it's tuned to the unittests now, but it leads to
/// some inefficienty. Could make sense to check the bool array first instead of doing the
/// hash map lookup.
fn search(&self, haystack: &[u8], sids: &mut Vec<u32>) -> u32 {
SCLogDebug!("haystack {:?}: looking for {} patterns. Has CI {}", haystack, self.pattern_cnt, self.has_ci);
let mut match_cnt = 0;
// array of bools for patterns we found
let mut matches = vec![false; self.pattern_cnt as usize];
for mat in self.ac.find_overlapping_iter(haystack) {
let pat_id = mat.pattern();
let data = self.pattern_data.get(&pat_id.as_u32()).unwrap();
if self.has_ci && !data.ci {
let found = &haystack[mat.start()..mat.end()];
if found != data.pat {
SCLogDebug!("pattern {:?} failed: not an exact match", pat_id);
continue;
}
}
match_cnt += 1;

/* bail if we found this pattern before */
// TODO would prefer to do this first, but this messes up match_cnt.
if matches[pat_id] {
SCLogDebug!("pattern {:?} already found", pat_id);
continue;
}
/* enforce offset and depth */
if data.offset as usize > mat.start() {
SCLogDebug!("pattern {:?} failed: found before offset", pat_id);
continue;
}
if data.depth != 0 && mat.end() > data.depth as usize {
SCLogDebug!("pattern {:?} failed: after depth", pat_id);
continue;
}
matches[pat_id] = true;
SCLogDebug!("match! {:?}: {:?}", pat_id, data);
sids.append(&mut data.sids.clone());
}
return match_cnt;
}
}

#[no_mangle]
pub extern "C" fn rs_mpm_acrs_prepare_builder(builder: &AhoCorasickStateBuilder) -> *mut std::os::raw::c_void {
let state = AhoCorasickState::prepare(builder);
let boxed = Box::new(state);
return Box::into_raw(boxed) as *mut _;
}
#[no_mangle]
pub extern "C" fn rs_mpm_acrs_state_free(state: *mut std::os::raw::c_void) {
let mut _state = unsafe { Box::from_raw(state as *mut AhoCorasickState) };
}

#[no_mangle]
pub unsafe extern "C" fn rs_mpm_acrs_search(state: &AhoCorasickState, data: *const u8, data_len: u32,
cb: unsafe extern "C" fn(*mut std::os::raw::c_void, *const u32, u32),
cbdata: *mut std::os::raw::c_void) -> u32
{
let mut sids: Vec<u32> = Vec::new();
let data = unsafe { build_slice!(data, data_len as usize) };
let matches = state.search(data, &mut sids);
if !sids.is_empty() {
let sids_s = sids.as_ptr();
unsafe { cb(cbdata, sids_s, sids.len() as u32); };
}
matches
}
2 changes: 2 additions & 0 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,7 @@ noinst_HEADERS = \
util-mpm-ac-bs.h \
util-mpm-ac.h \
util-mpm-ac-ks.h \
util-mpm-ac-rs.h \
util-mpm.h \
util-mpm-hs.h \
util-napatech.h \
Expand Down Expand Up @@ -1181,6 +1182,7 @@ libsuricata_c_a_SOURCES = \
util-mpm-ac.c \
util-mpm-ac-ks.c \
util-mpm-ac-ks-small.c \
util-mpm-ac-rs.c \
util-mpm.c \
util-mpm-hs.c \
util-napatech.c \
Expand Down
2 changes: 2 additions & 0 deletions src/util-mpm-ac-bs.c
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,8 @@ int SCACBSPreparePatterns(MpmCtx *mpm_ctx)
/* ACPatternList now owns this memory */
ctx->pid_pat_list[ctx->parray[i]->id].sids_size = ctx->parray[i]->sids_size;
ctx->pid_pat_list[ctx->parray[i]->id].sids = ctx->parray[i]->sids;
ctx->parray[i]->sids = NULL;
ctx->parray[i]->sids_size = 0;
}

/* prepare the state table required by AC */
Expand Down
Loading
Loading