Skip to content

Commit

Permalink
chore: more fuzzing improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
Aloso committed Nov 5, 2023
1 parent e73778f commit 2ff8fcb
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 49 deletions.
1 change: 1 addition & 0 deletions pomsky-lib/afl-fuzz/.gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
target
out
out.txt
errors.txt
log.txt
1 change: 1 addition & 0 deletions pomsky-lib/afl-fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pomsky-lib/afl-fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ edition = "2021"
[dependencies]
afl = "0.14.3"
arbitrary = "1.3.2"
regex = "1"
regex-test = { path = "../../regex-test" }
pomsky = { path = "..", features = ["arbitrary"] }

Expand Down
5 changes: 5 additions & 0 deletions pomsky-lib/afl-fuzz/ignored_errors.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Ruby|Oniguruma error: never ending recursion
Rust|empty character classes are not allowed
Rust|Compiled regex exceeds size limit
PCRE|error compiling pattern at offset \d+: lookbehind assertion is not fixed length
Py|look-behind requires fixed-width pattern
3 changes: 2 additions & 1 deletion pomsky-lib/afl-fuzz/justfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
tmin input:
AFL_DEBUG=1 AFL_MAP_SIZE=100000 cargo afl tmin -i {{input}} -o out.txt -- ./target/debug/afl-fuzz
rm log.txt
FUZZ_LOG=1 AFL_DEBUG=1 AFL_MAP_SIZE=100000 cargo afl tmin -i {{input}} -o out.txt -- ./target/debug/afl-fuzz
162 changes: 115 additions & 47 deletions pomsky-lib/afl-fuzz/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,53 +1,89 @@
use std::sync::OnceLock;
use std::collections::HashMap;
use std::fs::{self, File, OpenOptions};
use std::io::Write as _;
use std::path::Path;
use std::{env, sync::OnceLock};

use arbitrary::{Arbitrary, Unstructured};
use pomsky::{features::PomskyFeatures, options::RegexFlavor, Expr};
use pomsky::{options::RegexFlavor, Expr};
use regex::RegexSet;
use regex_test::{Outcome, RegexTest};

fn get_test() -> &'static RegexTest {
static TEST: OnceLock<RegexTest> = OnceLock::new();
TEST.get_or_init(RegexTest::new)
}

#[allow(unused)]
macro_rules! debug {
(type) => {
()
};
(init: $input:expr, $options:expr) => {
()
($file:expr $(, $s:expr)* $(,)?) => {
if let Some(f) = $file {
write!(f $(, $s)*).unwrap();
}
};
($file:expr $(, $s:expr)* $(,)?) => {};
}

#[cfg(FALSE)] // comment this attribute to enable debugging while using `cargo afl tmin`
macro_rules! debug {
(type) => { std::fs::File };
(init: $input:expr, $options:expr) => {{
let mut file = std::fs::OpenOptions::new().create(true).append(true).open("./log.txt").unwrap();
use std::io::Write as _;
write!(file, "\n{:?} -- {:?}\n", $input, $options).unwrap();
file
}};
($file:expr $(, $s:expr)* $(,)?) => {{
use std::io::Write as _;
write!($file $(, $s)*).unwrap();
}};
}
fn main() {
let mut f = if let Ok("1") = env::var("FUZZ_LOG").as_deref() {
let file = OpenOptions::new().create(true).append(true).open("./log.txt").unwrap();
Some(file)
} else {
None
};
let mut ef = Some(OpenOptions::new().create(true).append(true).open("./errors.txt").unwrap());
let f = &mut f;
let ef = &mut ef;

type DebugFile = debug!(type);
let ignored_errors = parse_ignored_errors();

fn main() {
afl::fuzz!(|data: &[u8]| {
let mut u = Unstructured::new(data);
if let Ok((input, compile_options)) = Arbitrary::arbitrary(&mut u) {
#[allow(clippy::let_unit_value)]
let mut _f = debug!(init: input, compile_options);
let _: &str = input;
let input: String = input.chars().fold(String::new(), |mut acc, c| match c {
// increase likelihood of generating these key words and important sequences by chance
'à' => acc + " Codepoint ",
'á' => acc + " Grapheme ",
'â' => acc + " Start ",
'ã' => acc + " End ",
'ä' => acc + " lazy ",
'å' => acc + " greedy ",
'æ' => acc + " enable ",
'ç' => acc + " disable ",
'è' => acc + " unicode ",
'é' => acc + " test {",
'ê' => acc + " match ",
'ë' => acc + " reject ",
'ì' => acc + " in ",
'í' => acc + " as ",
'î' => acc + " if ",
'ï' => acc + " else ",
'ð' => acc + " regex ",
'ñ' => acc + " recursion ",
'ò' => acc + " range ",
'ó' => acc + " base ",
'ô' => acc + " let ",
'õ' => acc + " U+1FEFF ",
'ö' => acc + ":bla(",
'ø' => acc + "::bla ",
'ù' => acc + "<< ",
'ú' => acc + ">> ",
'û' => acc + "'test'",
'ü' => acc + "atomic",
'ý' => acc + " U+FEFF ",
// 'þ' => acc + "",
// 'ÿ' => acc + "",
_ => {
acc.push(c);
acc
}
});

let result = Expr::parse_and_compile(input, compile_options);
debug!(f, "\n{:?} -- {:?}\n", input, compile_options);

let result = Expr::parse_and_compile(&input, compile_options);

if let (Some(regex), _warnings, _tests) = result {
debug!(_f, " compiled;");
debug!(f, " compiled;");

let features = compile_options.allowed_features;

Expand All @@ -59,42 +95,74 @@ fn main() {
&& !regex.is_empty()
&& features == { features }.regexes(false)
{
debug!(_f, " check");
check(&regex, features, compile_options.flavor, _f);
debug!(f, " check");
check(&regex, &ignored_errors, compile_options.flavor, f, ef);
} else {
debug!(_f, " skipped (too long or `regex` feature enabled)\n");
debug!(f, " SKIPPED (too long or `regex` feature enabled)");
}
} else {
debug!(_f, " returned error\n");
debug!(f, " returned error");
}
}
});
}

fn check(regex: &str, features: PomskyFeatures, flavor: RegexFlavor, mut _f: DebugFile) {
fn check(
regex: &str,
ignored_errors: &HashMap<RegexFlavor, RegexSet>,
flavor: RegexFlavor,
f: &mut Option<File>,
ef: &mut Option<File>,
) {
let test = get_test();
let outcome = match flavor {
// Pomsky currently doesn't check if loobehind has repetitions, so we don't check some
// regexes
RegexFlavor::Java if features == { features }.lookbehind(false) => test.test_java(regex),
RegexFlavor::Java => test.test_java(regex),
RegexFlavor::JavaScript => test.test_js(regex),
RegexFlavor::Ruby => test.test_ruby(regex),
RegexFlavor::Rust => test.test_rust(regex),
RegexFlavor::Python if features == { features }.lookbehind(false) => {
test.test_python(regex)
}
RegexFlavor::Pcre if features == { features }.lookbehind(false) => test.test_pcre(regex),
RegexFlavor::Python => test.test_python(regex),
RegexFlavor::Pcre => test.test_pcre(regex),
RegexFlavor::DotNet => test.test_dotnet(regex),
_ => Outcome::Success,
};
if let Outcome::Error(e) = outcome {
if flavor == RegexFlavor::Rust
&& e.trim().ends_with("error: empty character classes are not allowed")
{
// This is on my radar, but more difficult to fix!
return;
let e = e.trim();
if let Some(ignored_errors) = ignored_errors.get(&flavor) {
if ignored_errors.is_match(e) {
debug!(f, " {regex:?} ({flavor:?}) ERROR IGNORED: {e}");
return;
}
}
debug!(_f, " {regex:?} ({flavor:?}) failed:\n{e}");

debug!(ef, "{flavor:?}|{regex:?}|{e}\n");
debug!(f, " {regex:?} ({flavor:?}) failed:\n{e}");
panic!("Regex {regex:?} is invalid in the {flavor:?} flavor:\n{e}");
}
}

fn parse_ignored_errors() -> HashMap<RegexFlavor, RegexSet> {
let ignored_err_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("ignored_errors.txt");
let ignored_errors = fs::read_to_string(ignored_err_path).unwrap();
let ignored_errors = ignored_errors
.lines()
.filter_map(|line| {
Some(match line.split_once('|') {
Some(("JS" | "JavaScript", err)) => (RegexFlavor::JavaScript, err),
Some(("Java", err)) => (RegexFlavor::Java, err),
Some(("Py" | "Python", err)) => (RegexFlavor::Python, err),
Some(("PCRE", err)) => (RegexFlavor::Pcre, err),
Some((".NET" | "DotNet", err)) => (RegexFlavor::DotNet, err),
Some(("Ruby", err)) => (RegexFlavor::Ruby, err),
Some(("Rust", err)) => (RegexFlavor::Rust, err),
Some((invalid, _)) => panic!("Flavor {invalid} is invalid"),
None => return None,
})
})
.fold(HashMap::new(), |mut acc, (flavor, err)| {
let v: &mut Vec<&str> = acc.entry(flavor).or_default();
v.push(err);
acc
});

ignored_errors.into_iter().map(|(k, v)| (k, RegexSet::new(v).unwrap())).collect()
}
2 changes: 1 addition & 1 deletion pomsky-lib/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl Default for CompileOptions {

/// A regex flavor is a regex engine or a set of regex engines that are similar
/// enough that they can be treated the same for the purpose of writing regexes.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
#[non_exhaustive]
pub enum RegexFlavor {
Expand Down

0 comments on commit 2ff8fcb

Please sign in to comment.