From 2ff8fcb37023b35fa0c533d415498d56e2173477 Mon Sep 17 00:00:00 2001 From: Ludwig Stecher Date: Sun, 5 Nov 2023 23:54:58 +0100 Subject: [PATCH] chore: more fuzzing improvements --- pomsky-lib/afl-fuzz/.gitignore | 1 + pomsky-lib/afl-fuzz/Cargo.lock | 1 + pomsky-lib/afl-fuzz/Cargo.toml | 1 + pomsky-lib/afl-fuzz/ignored_errors.txt | 5 + pomsky-lib/afl-fuzz/justfile | 3 +- pomsky-lib/afl-fuzz/src/main.rs | 162 ++++++++++++++++++------- pomsky-lib/src/options.rs | 2 +- 7 files changed, 126 insertions(+), 49 deletions(-) create mode 100644 pomsky-lib/afl-fuzz/ignored_errors.txt diff --git a/pomsky-lib/afl-fuzz/.gitignore b/pomsky-lib/afl-fuzz/.gitignore index c793eae..f592116 100644 --- a/pomsky-lib/afl-fuzz/.gitignore +++ b/pomsky-lib/afl-fuzz/.gitignore @@ -1,4 +1,5 @@ target out out.txt +errors.txt log.txt diff --git a/pomsky-lib/afl-fuzz/Cargo.lock b/pomsky-lib/afl-fuzz/Cargo.lock index 3c021bc..9adc38f 100644 --- a/pomsky-lib/afl-fuzz/Cargo.lock +++ b/pomsky-lib/afl-fuzz/Cargo.lock @@ -21,6 +21,7 @@ dependencies = [ "afl", "arbitrary", "pomsky", + "regex", "regex-test", ] diff --git a/pomsky-lib/afl-fuzz/Cargo.toml b/pomsky-lib/afl-fuzz/Cargo.toml index 693e694..fdb6c9e 100644 --- a/pomsky-lib/afl-fuzz/Cargo.toml +++ b/pomsky-lib/afl-fuzz/Cargo.toml @@ -10,6 +10,7 @@ edition = "2021" [dependencies] afl = "0.14.3" arbitrary = "1.3.2" +regex = "1" regex-test = { path = "../../regex-test" } pomsky = { path = "..", features = ["arbitrary"] } diff --git a/pomsky-lib/afl-fuzz/ignored_errors.txt b/pomsky-lib/afl-fuzz/ignored_errors.txt new file mode 100644 index 0000000..a00306a --- /dev/null +++ b/pomsky-lib/afl-fuzz/ignored_errors.txt @@ -0,0 +1,5 @@ +Ruby|Oniguruma error: never ending recursion +Rust|empty character classes are not allowed +Rust|Compiled regex exceeds size limit +PCRE|error compiling pattern at offset \d+: lookbehind assertion is not fixed length +Py|look-behind requires fixed-width pattern \ No newline at end of file diff --git a/pomsky-lib/afl-fuzz/justfile b/pomsky-lib/afl-fuzz/justfile index 28e6e1a..2120ff0 100644 --- a/pomsky-lib/afl-fuzz/justfile +++ b/pomsky-lib/afl-fuzz/justfile @@ -1,2 +1,3 @@ tmin input: - AFL_DEBUG=1 AFL_MAP_SIZE=100000 cargo afl tmin -i {{input}} -o out.txt -- ./target/debug/afl-fuzz \ No newline at end of file + rm log.txt + FUZZ_LOG=1 AFL_DEBUG=1 AFL_MAP_SIZE=100000 cargo afl tmin -i {{input}} -o out.txt -- ./target/debug/afl-fuzz \ No newline at end of file diff --git a/pomsky-lib/afl-fuzz/src/main.rs b/pomsky-lib/afl-fuzz/src/main.rs index 2177df5..00282dc 100644 --- a/pomsky-lib/afl-fuzz/src/main.rs +++ b/pomsky-lib/afl-fuzz/src/main.rs @@ -1,7 +1,12 @@ -use std::sync::OnceLock; +use std::collections::HashMap; +use std::fs::{self, File, OpenOptions}; +use std::io::Write as _; +use std::path::Path; +use std::{env, sync::OnceLock}; use arbitrary::{Arbitrary, Unstructured}; -use pomsky::{features::PomskyFeatures, options::RegexFlavor, Expr}; +use pomsky::{options::RegexFlavor, Expr}; +use regex::RegexSet; use regex_test::{Outcome, RegexTest}; fn get_test() -> &'static RegexTest { @@ -9,45 +14,76 @@ fn get_test() -> &'static RegexTest { TEST.get_or_init(RegexTest::new) } -#[allow(unused)] macro_rules! debug { - (type) => { - () - }; - (init: $input:expr, $options:expr) => { - () + ($file:expr $(, $s:expr)* $(,)?) => { + if let Some(f) = $file { + write!(f $(, $s)*).unwrap(); + } }; - ($file:expr $(, $s:expr)* $(,)?) => {}; } -#[cfg(FALSE)] // comment this attribute to enable debugging while using `cargo afl tmin` -macro_rules! debug { - (type) => { std::fs::File }; - (init: $input:expr, $options:expr) => {{ - let mut file = std::fs::OpenOptions::new().create(true).append(true).open("./log.txt").unwrap(); - use std::io::Write as _; - write!(file, "\n{:?} -- {:?}\n", $input, $options).unwrap(); - file - }}; - ($file:expr $(, $s:expr)* $(,)?) => {{ - use std::io::Write as _; - write!($file $(, $s)*).unwrap(); - }}; -} +fn main() { + let mut f = if let Ok("1") = env::var("FUZZ_LOG").as_deref() { + let file = OpenOptions::new().create(true).append(true).open("./log.txt").unwrap(); + Some(file) + } else { + None + }; + let mut ef = Some(OpenOptions::new().create(true).append(true).open("./errors.txt").unwrap()); + let f = &mut f; + let ef = &mut ef; -type DebugFile = debug!(type); + let ignored_errors = parse_ignored_errors(); -fn main() { afl::fuzz!(|data: &[u8]| { let mut u = Unstructured::new(data); if let Ok((input, compile_options)) = Arbitrary::arbitrary(&mut u) { - #[allow(clippy::let_unit_value)] - let mut _f = debug!(init: input, compile_options); + let _: &str = input; + let input: String = input.chars().fold(String::new(), |mut acc, c| match c { + // increase likelihood of generating these key words and important sequences by chance + 'à' => acc + " Codepoint ", + 'á' => acc + " Grapheme ", + 'â' => acc + " Start ", + 'ã' => acc + " End ", + 'ä' => acc + " lazy ", + 'å' => acc + " greedy ", + 'æ' => acc + " enable ", + 'ç' => acc + " disable ", + 'è' => acc + " unicode ", + 'é' => acc + " test {", + 'ê' => acc + " match ", + 'ë' => acc + " reject ", + 'ì' => acc + " in ", + 'í' => acc + " as ", + 'î' => acc + " if ", + 'ï' => acc + " else ", + 'ð' => acc + " regex ", + 'ñ' => acc + " recursion ", + 'ò' => acc + " range ", + 'ó' => acc + " base ", + 'ô' => acc + " let ", + 'õ' => acc + " U+1FEFF ", + 'ö' => acc + ":bla(", + 'ø' => acc + "::bla ", + 'ù' => acc + "<< ", + 'ú' => acc + ">> ", + 'û' => acc + "'test'", + 'ü' => acc + "atomic", + 'ý' => acc + " U+FEFF ", + // 'þ' => acc + "", + // 'ÿ' => acc + "", + _ => { + acc.push(c); + acc + } + }); - let result = Expr::parse_and_compile(input, compile_options); + debug!(f, "\n{:?} -- {:?}\n", input, compile_options); + + let result = Expr::parse_and_compile(&input, compile_options); if let (Some(regex), _warnings, _tests) = result { - debug!(_f, " compiled;"); + debug!(f, " compiled;"); let features = compile_options.allowed_features; @@ -59,42 +95,74 @@ fn main() { && !regex.is_empty() && features == { features }.regexes(false) { - debug!(_f, " check"); - check(®ex, features, compile_options.flavor, _f); + debug!(f, " check"); + check(®ex, &ignored_errors, compile_options.flavor, f, ef); } else { - debug!(_f, " skipped (too long or `regex` feature enabled)\n"); + debug!(f, " SKIPPED (too long or `regex` feature enabled)"); } } else { - debug!(_f, " returned error\n"); + debug!(f, " returned error"); } } }); } -fn check(regex: &str, features: PomskyFeatures, flavor: RegexFlavor, mut _f: DebugFile) { +fn check( + regex: &str, + ignored_errors: &HashMap, + flavor: RegexFlavor, + f: &mut Option, + ef: &mut Option, +) { let test = get_test(); let outcome = match flavor { - // Pomsky currently doesn't check if loobehind has repetitions, so we don't check some - // regexes - RegexFlavor::Java if features == { features }.lookbehind(false) => test.test_java(regex), + RegexFlavor::Java => test.test_java(regex), RegexFlavor::JavaScript => test.test_js(regex), RegexFlavor::Ruby => test.test_ruby(regex), RegexFlavor::Rust => test.test_rust(regex), - RegexFlavor::Python if features == { features }.lookbehind(false) => { - test.test_python(regex) - } - RegexFlavor::Pcre if features == { features }.lookbehind(false) => test.test_pcre(regex), + RegexFlavor::Python => test.test_python(regex), + RegexFlavor::Pcre => test.test_pcre(regex), RegexFlavor::DotNet => test.test_dotnet(regex), _ => Outcome::Success, }; if let Outcome::Error(e) = outcome { - if flavor == RegexFlavor::Rust - && e.trim().ends_with("error: empty character classes are not allowed") - { - // This is on my radar, but more difficult to fix! - return; + let e = e.trim(); + if let Some(ignored_errors) = ignored_errors.get(&flavor) { + if ignored_errors.is_match(e) { + debug!(f, " {regex:?} ({flavor:?}) ERROR IGNORED: {e}"); + return; + } } - debug!(_f, " {regex:?} ({flavor:?}) failed:\n{e}"); + + debug!(ef, "{flavor:?}|{regex:?}|{e}\n"); + debug!(f, " {regex:?} ({flavor:?}) failed:\n{e}"); panic!("Regex {regex:?} is invalid in the {flavor:?} flavor:\n{e}"); } } + +fn parse_ignored_errors() -> HashMap { + let ignored_err_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("ignored_errors.txt"); + let ignored_errors = fs::read_to_string(ignored_err_path).unwrap(); + let ignored_errors = ignored_errors + .lines() + .filter_map(|line| { + Some(match line.split_once('|') { + Some(("JS" | "JavaScript", err)) => (RegexFlavor::JavaScript, err), + Some(("Java", err)) => (RegexFlavor::Java, err), + Some(("Py" | "Python", err)) => (RegexFlavor::Python, err), + Some(("PCRE", err)) => (RegexFlavor::Pcre, err), + Some((".NET" | "DotNet", err)) => (RegexFlavor::DotNet, err), + Some(("Ruby", err)) => (RegexFlavor::Ruby, err), + Some(("Rust", err)) => (RegexFlavor::Rust, err), + Some((invalid, _)) => panic!("Flavor {invalid} is invalid"), + None => return None, + }) + }) + .fold(HashMap::new(), |mut acc, (flavor, err)| { + let v: &mut Vec<&str> = acc.entry(flavor).or_default(); + v.push(err); + acc + }); + + ignored_errors.into_iter().map(|(k, v)| (k, RegexSet::new(v).unwrap())).collect() +} diff --git a/pomsky-lib/src/options.rs b/pomsky-lib/src/options.rs index 4a8f093..79661e2 100644 --- a/pomsky-lib/src/options.rs +++ b/pomsky-lib/src/options.rs @@ -33,7 +33,7 @@ impl Default for CompileOptions { /// A regex flavor is a regex engine or a set of regex engines that are similar /// enough that they can be treated the same for the purpose of writing regexes. -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] #[non_exhaustive] pub enum RegexFlavor {