diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 05dfe6fca7510..16bb02c209096 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -32,6 +32,8 @@ jobs: # Flag that is raised when any code is changed # This is superset of the linter and formatter code: ${{ steps.changed.outputs.code_any_changed }} + # Flag that is raised when any code that affects the fuzzer is changed + fuzz: ${{ steps.changed.outputs.fuzz_any_changed }} steps: - uses: actions/checkout@v4 with: @@ -79,6 +81,11 @@ jobs: - python/** - .github/workflows/ci.yaml + fuzz: + - fuzz/Cargo.toml + - fuzz/Cargo.lock + - fuzz/fuzz_targets/** + code: - "**/*" - "!**/*.md" @@ -287,7 +294,7 @@ jobs: name: "cargo fuzz build" runs-on: ubuntu-latest needs: determine_changes - if: ${{ github.ref == 'refs/heads/main' }} + if: ${{ github.ref == 'refs/heads/main' || needs.determine_changes.outputs.fuzz == 'true' }} timeout-minutes: 10 steps: - uses: actions/checkout@v4 diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 24c0e57e3e85b..02eb20df65b63 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -17,6 +17,9 @@ libfuzzer = ["libfuzzer-sys/link_libfuzzer"] cargo-fuzz = true [dependencies] +red_knot_python_semantic = { path = "../crates/red_knot_python_semantic" } +red_knot_vendored = { path = "../crates/red_knot_vendored" } +ruff_db = { path = "../crates/ruff_db" } ruff_linter = { path = "../crates/ruff_linter" } ruff_python_ast = { path = "../crates/ruff_python_ast" } ruff_python_codegen = { path = "../crates/ruff_python_codegen" } @@ -26,12 +29,18 @@ ruff_python_formatter = { path = "../crates/ruff_python_formatter"} ruff_text_size = { path = "../crates/ruff_text_size" } libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false } +salsa = { git = "https://github.com/salsa-rs/salsa.git", rev = "254c749b02cde2fd29852a7463a33e800b771758" } similar = { version = "2.5.0" } +tracing = { version = "0.1.40" } # Prevent this from interfering with workspaces [workspace] members = ["."] +[[bin]] +name = "red_knot_check_invalid_syntax" +path = "fuzz_targets/red_knot_check_invalid_syntax.rs" + [[bin]] name = "ruff_parse_simple" path = "fuzz_targets/ruff_parse_simple.rs" diff --git a/fuzz/README.md b/fuzz/README.md index 1b91e57d05c1f..5265149b8d8bf 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -74,6 +74,15 @@ Each fuzzer harness in [`fuzz_targets`](fuzz_targets) targets a different aspect them in different ways. While there is implementation-specific documentation in the source code itself, each harness is briefly described below. +### `red_knot_check_invalid_syntax` + +This fuzz harness checks that the type checker (Red Knot) does not panic when checking a source +file with invalid syntax. This rejects any corpus entries that is already valid Python code. +Currently, this is limited to syntax errors that's produced by Ruff's Python parser which means +that it does not cover all possible syntax errors (). +A possible workaround for now would be to bypass the parser and run the type checker on all inputs +regardless of syntax errors. + ### `ruff_parse_simple` This fuzz harness does not perform any "smart" testing of Ruff; it merely checks that the parsing diff --git a/fuzz/corpus/red_knot_check_invalid_syntax b/fuzz/corpus/red_knot_check_invalid_syntax new file mode 120000 index 0000000000000..38dc5bc1ea310 --- /dev/null +++ b/fuzz/corpus/red_knot_check_invalid_syntax @@ -0,0 +1 @@ +ruff_fix_validity \ No newline at end of file diff --git a/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs b/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs new file mode 100644 index 0000000000000..70e4fc55fab26 --- /dev/null +++ b/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs @@ -0,0 +1,143 @@ +//! Fuzzer harness that runs the type checker to catch for panics for source code containing +//! syntax errors. + +#![no_main] + +use std::sync::{Mutex, OnceLock}; + +use libfuzzer_sys::{fuzz_target, Corpus}; + +use red_knot_python_semantic::types::check_types; +use red_knot_python_semantic::{ + Db as SemanticDb, Program, ProgramSettings, PythonVersion, SearchPathSettings, +}; +use ruff_db::files::{system_path_to_file, File, Files}; +use ruff_db::system::{DbWithTestSystem, System, SystemPathBuf, TestSystem}; +use ruff_db::vendored::VendoredFileSystem; +use ruff_db::{Db as SourceDb, Upcast}; +use ruff_python_parser::{parse_unchecked, Mode}; + +/// Database that can be used for testing. +/// +/// Uses an in memory filesystem and it stubs out the vendored files by default. +#[salsa::db] +struct TestDb { + storage: salsa::Storage, + files: Files, + system: TestSystem, + vendored: VendoredFileSystem, + events: std::sync::Arc>>, +} + +impl TestDb { + fn new() -> Self { + Self { + storage: salsa::Storage::default(), + system: TestSystem::default(), + vendored: red_knot_vendored::file_system().clone(), + events: std::sync::Arc::default(), + files: Files::default(), + } + } +} + +#[salsa::db] +impl SourceDb for TestDb { + fn vendored(&self) -> &VendoredFileSystem { + &self.vendored + } + + fn system(&self) -> &dyn System { + &self.system + } + + fn files(&self) -> &Files { + &self.files + } +} + +impl DbWithTestSystem for TestDb { + fn test_system(&self) -> &TestSystem { + &self.system + } + + fn test_system_mut(&mut self) -> &mut TestSystem { + &mut self.system + } +} + +impl Upcast for TestDb { + fn upcast(&self) -> &(dyn SourceDb + 'static) { + self + } + fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) { + self + } +} + +#[salsa::db] +impl SemanticDb for TestDb { + fn is_file_open(&self, file: File) -> bool { + !file.path(self).is_vendored_path() + } +} + +#[salsa::db] +impl salsa::Database for TestDb { + fn salsa_event(&self, event: &dyn Fn() -> salsa::Event) { + let event = event(); + tracing::trace!("event: {:?}", event); + let mut events = self.events.lock().unwrap(); + events.push(event); + } +} + +fn setup_db() -> TestDb { + let db = TestDb::new(); + + let src_root = SystemPathBuf::from("/src"); + db.memory_file_system() + .create_directory_all(&src_root) + .unwrap(); + + Program::from_settings( + &db, + &ProgramSettings { + target_version: PythonVersion::default(), + search_paths: SearchPathSettings::new(src_root), + }, + ) + .expect("Valid search path settings"); + + db +} + +static TEST_DB: OnceLock> = OnceLock::new(); + +fn do_fuzz(case: &[u8]) -> Corpus { + let Ok(code) = std::str::from_utf8(case) else { + return Corpus::Reject; + }; + + let parsed = parse_unchecked(code, Mode::Module); + if parsed.is_valid() { + return Corpus::Reject; + } + + let mut db = TEST_DB + .get_or_init(|| Mutex::new(setup_db())) + .lock() + .unwrap(); + + for path in &["/src/a.py", "/src/a.pyi"] { + db.write_file(path, code).unwrap(); + let file = system_path_to_file(&*db, path).unwrap(); + check_types(&*db, file); + db.memory_file_system().remove_file(path).unwrap(); + file.sync(&mut *db); + } + + Corpus::Keep +} + +fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/init-fuzzer.sh b/fuzz/init-fuzzer.sh index 77d4cdfcb30bf..a652897b267d6 100755 --- a/fuzz/init-fuzzer.sh +++ b/fuzz/init-fuzzer.sh @@ -11,16 +11,32 @@ fi if [ ! -d corpus/ruff_fix_validity ]; then mkdir -p corpus/ruff_fix_validity - read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r - echo - cd corpus/ruff_fix_validity - if [[ $REPLY =~ ^[Yy]$ ]]; then - curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz + + ( + cd corpus/ruff_fix_validity + + read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz + fi + + # Build a smaller corpus in addition to the (optional) larger corpus + curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.13.0.tar.gz' | tar xz + cp -r "../../../crates/red_knot_workspace/resources/test/corpus" "red_knot_workspace" + cp -r "../../../crates/ruff_linter/resources/test/fixtures" "ruff_linter" + cp -r "../../../crates/ruff_python_formatter/resources/test/fixtures" "ruff_python_formatter" + cp -r "../../../crates/ruff_python_parser/resources" "ruff_python_parser" + + # Delete all non-Python files + find . -type f -not -name "*.py" -delete + ) + + if [[ "$OSTYPE" == "darwin"* ]]; then + cargo +nightly fuzz cmin ruff_fix_validity -- -timeout=5 + else + cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5 fi - curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.12.0b2.tar.gz' | tar xz - cp -r "../../../crates/ruff_linter/resources/test" . - cd - - cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5 fi echo "Done! You are ready to fuzz."