astral-sh · dhruvmanila · Dec 4, 2024 · Nov 28, 2024 · Dec 3, 2024 · Dec 4, 2024
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -32,6 +32,8 @@ jobs:
       # Flag that is raised when any code is changed
       # This is superset of the linter and formatter
       code: ${{ steps.changed.outputs.code_any_changed }}
+      # Flag that is raised when any code that affects the fuzzer is changed
+      fuzz: ${{ steps.changed.outputs.fuzz_any_changed }}
     steps:
       - uses: actions/checkout@v4
         with:
@@ -79,6 +81,11 @@ jobs:
               - python/**
               - .github/workflows/ci.yaml
 
+            fuzz:
+              - fuzz/Cargo.toml
+              - fuzz/Cargo.lock
+              - fuzz/fuzz_targets/**
+
             code:
               - "**/*"
               - "!**/*.md"
@@ -287,7 +294,7 @@ jobs:
     name: "cargo fuzz build"
     runs-on: ubuntu-latest
     needs: determine_changes
-    if: ${{ github.ref == 'refs/heads/main' }}
+    if: ${{ github.ref == 'refs/heads/main' || needs.determine_changes.outputs.fuzz == 'true' }}
     timeout-minutes: 10
     steps:
       - uses: actions/checkout@v4

diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
@@ -17,6 +17,9 @@ libfuzzer = ["libfuzzer-sys/link_libfuzzer"]
 cargo-fuzz = true
 
 [dependencies]
+red_knot_python_semantic = { path = "../crates/red_knot_python_semantic" }
+red_knot_vendored = { path = "../crates/red_knot_vendored" }
+ruff_db = { path = "../crates/ruff_db" }
 ruff_linter = { path = "../crates/ruff_linter" }
 ruff_python_ast = { path = "../crates/ruff_python_ast" }
 ruff_python_codegen = { path = "../crates/ruff_python_codegen" }
@@ -26,12 +29,18 @@ ruff_python_formatter = { path = "../crates/ruff_python_formatter"}
 ruff_text_size = { path = "../crates/ruff_text_size" }
 
 libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false }
+salsa = { git = "https://github.com/salsa-rs/salsa.git", rev = "254c749b02cde2fd29852a7463a33e800b771758" }
 similar = { version = "2.5.0" }
+tracing = { version = "0.1.40" }
 
 # Prevent this from interfering with workspaces
 [workspace]
 members = ["."]
 
+[[bin]]
+name = "red_knot_check_invalid_syntax"
+path = "fuzz_targets/red_knot_check_invalid_syntax.rs"
+
 [[bin]]
 name = "ruff_parse_simple"
 path = "fuzz_targets/ruff_parse_simple.rs"

diff --git a/fuzz/README.md b/fuzz/README.md
@@ -74,6 +74,15 @@ Each fuzzer harness in [`fuzz_targets`](fuzz_targets) targets a different aspect
 them in different ways. While there is implementation-specific documentation in the source code
 itself, each harness is briefly described below.
 
+### `red_knot_check_invalid_syntax`
+
+This fuzz harness checks that the type checker (Red Knot) does not panic when checking a source
+file with invalid syntax. This rejects any corpus entries that is already valid Python code.
+Currently, this is limited to syntax errors that's produced by Ruff's Python parser which means
+that it does not cover all possible syntax errors (<https://github.com/astral-sh/ruff/issues/11934>).
+A possible workaround for now would be to bypass the parser and run the type checker on all inputs
+regardless of syntax errors.
+
 ### `ruff_parse_simple`
 
 This fuzz harness does not perform any "smart" testing of Ruff; it merely checks that the parsing

diff --git a/fuzz/corpus/red_knot_check_invalid_syntax b/fuzz/corpus/red_knot_check_invalid_syntax
@@ -0,0 +1 @@
+ruff_fix_validity
diff --git a/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs b/fuzz/fuzz_targets/red_knot_check_invalid_syntax.rs
@@ -0,0 +1,131 @@
+//! Fuzzer harness that runs the type checker to catch for panics for source code containing
+//! syntax errors.
+
+#![no_main]
+
+use libfuzzer_sys::{fuzz_target, Corpus};
+
+use red_knot_python_semantic::types::check_types;
+use red_knot_python_semantic::{
+    Db as SemanticDb, Program, ProgramSettings, PythonVersion, SearchPathSettings,
+};
+use ruff_db::files::{system_path_to_file, File, Files};
+use ruff_db::system::{DbWithTestSystem, System, SystemPathBuf, TestSystem};
+use ruff_db::vendored::VendoredFileSystem;
+use ruff_db::{Db as SourceDb, Upcast};
+use ruff_python_parser::{parse_unchecked, Mode};
+
+/// Database that can be used for testing.
+///
+/// Uses an in memory filesystem and it stubs out the vendored files by default.
+#[salsa::db]
+struct TestDb {
+    storage: salsa::Storage<Self>,
+    files: Files,
+    system: TestSystem,
+    vendored: VendoredFileSystem,
+    events: std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>>,
+}
+
+impl TestDb {
+    fn new() -> Self {
+        Self {
+            storage: salsa::Storage::default(),
+            system: TestSystem::default(),
+            vendored: red_knot_vendored::file_system().clone(),
+            events: std::sync::Arc::default(),
+            files: Files::default(),
+        }
+    }
+}
+
+#[salsa::db]
+impl SourceDb for TestDb {
+    fn vendored(&self) -> &VendoredFileSystem {
+        &self.vendored
+    }
+
+    fn system(&self) -> &dyn System {
+        &self.system
+    }
+
+    fn files(&self) -> &Files {
+        &self.files
+    }
+}
+
+impl DbWithTestSystem for TestDb {
+    fn test_system(&self) -> &TestSystem {
+        &self.system
+    }
+
+    fn test_system_mut(&mut self) -> &mut TestSystem {
+        &mut self.system
+    }
+}
+
+impl Upcast<dyn SourceDb> for TestDb {
+    fn upcast(&self) -> &(dyn SourceDb + 'static) {
+        self
+    }
+    fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
+        self
+    }
+}
+
+#[salsa::db]
+impl SemanticDb for TestDb {
+    fn is_file_open(&self, file: File) -> bool {
+        !file.path(self).is_vendored_path()
+    }
+}
+
+#[salsa::db]
+impl salsa::Database for TestDb {
+    fn salsa_event(&self, event: &dyn Fn() -> salsa::Event) {
+        let event = event();
+        tracing::trace!("event: {:?}", event);
+        let mut events = self.events.lock().unwrap();
+        events.push(event);
+    }
+}
+
+fn setup_db() -> TestDb {
+    let db = TestDb::new();
+
+    let src_root = SystemPathBuf::from("/src");
+    db.memory_file_system()
+        .create_directory_all(&src_root)
+        .unwrap();
+
+    Program::from_settings(
+        &db,
+        &ProgramSettings {
+            target_version: PythonVersion::default(),
+            search_paths: SearchPathSettings::new(src_root),
+        },
+    )
+    .expect("Valid search path settings");
+
+    db
+}
+
+fn do_fuzz(case: &[u8]) -> Corpus {
+    let Ok(code) = std::str::from_utf8(case) else {
+        return Corpus::Reject;
+    };
+
+    let parsed = parse_unchecked(code, Mode::Module);
+    if parsed.is_valid() {
+        return Corpus::Reject;
+    }
+
+    let mut db = setup_db();
+    db.write_file("/src/a.py", code).unwrap();
+    let file = system_path_to_file(&db, "/src/a.py").unwrap();
+    check_types(&db, file);
+
+    Corpus::Keep
+}
+
+fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) });
diff --git a/fuzz/init-fuzzer.sh b/fuzz/init-fuzzer.sh
@@ -11,16 +11,32 @@ fi
 
 if [ ! -d corpus/ruff_fix_validity ]; then
   mkdir -p corpus/ruff_fix_validity
-  read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r
-  echo
-  cd corpus/ruff_fix_validity
-  if [[ $REPLY =~ ^[Yy]$ ]]; then
-    curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz
+
+  (
+    cd corpus/ruff_fix_validity
+
+    read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]]; then
+      curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz
+    fi
+
+    # Build a smaller corpus in addition to the (optional) larger corpus
+    curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.13.0.tar.gz' | tar xz
+    cp -r "../../../crates/red_knot_workspace/resources/test/corpus" "red_knot_workspace"
+    cp -r "../../../crates/ruff_linter/resources/test/fixtures" "ruff_linter"
+    cp -r "../../../crates/ruff_python_formatter/resources/test/fixtures" "ruff_python_formatter"
+    cp -r "../../../crates/ruff_python_parser/resources" "ruff_python_parser"
+
+    # Delete all non-Python files
+    find . -type f -not -name "*.py" -delete
-    find . -type f -not -name "*.py" -delete
+    find . -type f -not \( -name "*.py" -or -name "*.pyi" \) -delete
-    find . -type f -not -name "*.py" -delete
+    find . -type f -not -regex '.*\.pyi?' -delete
-    find . -type f -not -name "*.py" -delete
+    find . -type f -not \( -name "*.py" -or -name "*.pyi" \) -delete
-    find . -type f -not -name "*.py" -delete
+    find . -type f -not -regex '.*\.pyi?' -delete
+  )
+
+  if [[ "$OSTYPE" == "darwin"* ]]; then
+    cargo +nightly fuzz cmin ruff_fix_validity -- -timeout=5
+  else
+    cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5
   fi
-  curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.12.0b2.tar.gz' | tar xz
-  cp -r "../../../crates/ruff_linter/resources/test" .
-  cd -
-  cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5
 fi
 
 echo "Done! You are ready to fuzz."