Skip to content

Commit

Permalink
[red-knot] Add fuzzer to catch panics for invalid syntax
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Dec 3, 2024
1 parent f1b2e85 commit 94698f8
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 9 deletions.
9 changes: 9 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ libfuzzer = ["libfuzzer-sys/link_libfuzzer"]
cargo-fuzz = true

[dependencies]
red_knot_python_semantic = { path = "../crates/red_knot_python_semantic" }
red_knot_vendored = { path = "../crates/red_knot_vendored" }
ruff_db = { path = "../crates/ruff_db" }
ruff_linter = { path = "../crates/ruff_linter" }
ruff_python_ast = { path = "../crates/ruff_python_ast" }
ruff_python_codegen = { path = "../crates/ruff_python_codegen" }
Expand All @@ -26,12 +29,18 @@ ruff_python_formatter = { path = "../crates/ruff_python_formatter"}
ruff_text_size = { path = "../crates/ruff_text_size" }

libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false }
salsa = { git = "https://github.com/salsa-rs/salsa.git", rev = "254c749b02cde2fd29852a7463a33e800b771758" }
similar = { version = "2.5.0" }
tracing = { version = "0.1.40" }

# Prevent this from interfering with workspaces
[workspace]
members = ["."]

[[bin]]
name = "red_knot_check_invalid_syntax"
path = "fuzz_targets/red_knot_check_invalid_syntax.rs"

[[bin]]
name = "ruff_parse_simple"
path = "fuzz_targets/ruff_parse_simple.rs"
Expand Down
9 changes: 9 additions & 0 deletions fuzz/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ Each fuzzer harness in [`fuzz_targets`](fuzz_targets) targets a different aspect
them in different ways. While there is implementation-specific documentation in the source code
itself, each harness is briefly described below.

### `red_knot_check_invalid_syntax`

This fuzz harness checks that the type checker (Red Knot) does not panic when checking a source
file with invalid syntax. This rejects any corpus entries that is already valid Python code.
Currently, this is limited to syntax errors that's produced by Ruff's Python parser which means
that it does not cover all possible syntax errors (https://github.com/astral-sh/ruff/issues/11934).
A possible workaround for now would be to bypass the parser and run the type checker on all inputs
regardless of syntax errors.

### `ruff_parse_simple`

This fuzz harness does not perform any "smart" testing of Ruff; it merely checks that the parsing
Expand Down
1 change: 1 addition & 0 deletions fuzz/corpus/red_knot_check_invalid_syntax
131 changes: 131 additions & 0 deletions fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
//! Fuzzer harness that runs the type checker to catch for panics for source code containing
//! syntax errors.
#![no_main]

use libfuzzer_sys::{fuzz_target, Corpus};

use red_knot_python_semantic::types::check_types;
use red_knot_python_semantic::{
Db as SemanticDb, Program, ProgramSettings, PythonVersion, SearchPathSettings,
};
use ruff_db::files::{system_path_to_file, File, Files};
use ruff_db::system::{DbWithTestSystem, System, SystemPathBuf, TestSystem};
use ruff_db::vendored::VendoredFileSystem;
use ruff_db::{Db as SourceDb, Upcast};
use ruff_python_parser::{parse_unchecked, Mode};

/// Database that can be used for testing.
///
/// Uses an in memory filesystem and it stubs out the vendored files by default.
#[salsa::db]
struct TestDb {
storage: salsa::Storage<Self>,
files: Files,
system: TestSystem,
vendored: VendoredFileSystem,
events: std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>>,
}

impl TestDb {
fn new() -> Self {
Self {
storage: salsa::Storage::default(),
system: TestSystem::default(),
vendored: red_knot_vendored::file_system().clone(),
events: std::sync::Arc::default(),
files: Files::default(),
}
}
}

#[salsa::db]
impl SourceDb for TestDb {
fn vendored(&self) -> &VendoredFileSystem {
&self.vendored
}

fn system(&self) -> &dyn System {
&self.system
}

fn files(&self) -> &Files {
&self.files
}
}

impl DbWithTestSystem for TestDb {
fn test_system(&self) -> &TestSystem {
&self.system
}

fn test_system_mut(&mut self) -> &mut TestSystem {
&mut self.system
}
}

impl Upcast<dyn SourceDb> for TestDb {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
self
}
}

#[salsa::db]
impl SemanticDb for TestDb {
fn is_file_open(&self, file: File) -> bool {
!file.path(self).is_vendored_path()
}
}

#[salsa::db]
impl salsa::Database for TestDb {
fn salsa_event(&self, event: &dyn Fn() -> salsa::Event) {
let event = event();
tracing::trace!("event: {:?}", event);
let mut events = self.events.lock().unwrap();
events.push(event);
}
}

fn setup_db() -> TestDb {
let db = TestDb::new();

let src_root = SystemPathBuf::from("/src");
db.memory_file_system()
.create_directory_all(&src_root)
.unwrap();

Program::from_settings(
&db,
&ProgramSettings {
target_version: PythonVersion::default(),
search_paths: SearchPathSettings::new(src_root),
},
)
.expect("Valid search path settings");

db
}

fn do_fuzz(case: &[u8]) -> Corpus {
let Ok(code) = std::str::from_utf8(case) else {
return Corpus::Reject;
};

let parsed = parse_unchecked(code, Mode::Module);
if parsed.is_valid() {
return Corpus::Reject;
}

let mut db = setup_db();
db.write_file("/src/a.py", code).unwrap();
let file = system_path_to_file(&db, "/src/a.py").unwrap();
check_types(&db, file);

Corpus::Keep
}

fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) });
34 changes: 25 additions & 9 deletions fuzz/init-fuzzer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,32 @@ fi

if [ ! -d corpus/ruff_fix_validity ]; then
mkdir -p corpus/ruff_fix_validity
read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r
echo
cd corpus/ruff_fix_validity
if [[ $REPLY =~ ^[Yy]$ ]]; then
curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz

(
cd corpus/ruff_fix_validity

read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz
fi

# Build a smaller corpus in addition to the (optional) larger corpus
curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.13.0.tar.gz' | tar xz
cp -r "../../../crates/red_knot_workspace/resources/test/corpus" "red_knot_workspace"
cp -r "../../../crates/ruff_linter/resources/test/fixtures" "ruff_linter"
cp -r "../../../crates/ruff_python_formatter/resources/test/fixtures" "ruff_python_formatter"
cp -r "../../../crates/ruff_python_parser/resources" "ruff_python_parser"

# Delete all non-Python files
find . -type f -not -name "*.py" -delete
)

if [[ "$OSTYPE" == "darwin"* ]]; then
cargo +nightly fuzz cmin ruff_fix_validity -- -timeout=5
else
cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5
fi
curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.12.0b2.tar.gz' | tar xz
cp -r "../../../crates/ruff_linter/resources/test" .
cd -
cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5
fi

echo "Done! You are ready to fuzz."

0 comments on commit 94698f8

Please sign in to comment.