From cd3c2f773ff4dac76b5447f0e1fd71f7134a96b0 Mon Sep 17 00:00:00 2001 From: konsti Date: Fri, 15 Dec 2023 15:15:46 +0100 Subject: [PATCH] Prevent invalid utf8 indexing in cell magic detection (#9146) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The example below used to panic because we tried to split at 2 bytes in the 4-bytes character `转`. ```python def sample_func(xx): """ 转置 (transpose) """ return xx.T ``` Fixes #9145 Fixes https://github.com/astral-sh/ruff-vscode/issues/362 The second commit is a small test refactoring. --- .../jupyter/cell/unicode_magic_gh9145.json | 19 +++++ crates/ruff_notebook/src/cell.rs | 79 +++++++++---------- crates/ruff_notebook/src/notebook.rs | 28 ++++--- 3 files changed, 72 insertions(+), 54 deletions(-) create mode 100644 crates/ruff_notebook/resources/test/fixtures/jupyter/cell/unicode_magic_gh9145.json diff --git a/crates/ruff_notebook/resources/test/fixtures/jupyter/cell/unicode_magic_gh9145.json b/crates/ruff_notebook/resources/test/fixtures/jupyter/cell/unicode_magic_gh9145.json new file mode 100644 index 0000000000000..38455b3c3df65 --- /dev/null +++ b/crates/ruff_notebook/resources/test/fixtures/jupyter/cell/unicode_magic_gh9145.json @@ -0,0 +1,19 @@ +{ + "execution_count": null, + "cell_type": "code", + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "def sample_func(xx):\n", + " \"\"\"\n", + " 转置 (transpose)\n", + " \"\"\"\n", + " return xx.T", + "# https://github.com/astral-sh/ruff-vscode/issues/362", + "DEFAULT_SYSTEM_PROMPT = (", + " \"Ты — Сайга, русскоязычный автоматический ассистент. \"", + " \"Ты разговариваешь с людьми и помогаешь им.\"", + ")" + ] +} diff --git a/crates/ruff_notebook/src/cell.rs b/crates/ruff_notebook/src/cell.rs index caa4cb3204a03..eefc18918b5cb 100644 --- a/crates/ruff_notebook/src/cell.rs +++ b/crates/ruff_notebook/src/cell.rs @@ -171,48 +171,43 @@ impl Cell { // Detect cell magics (which operate on multiple lines). lines.any(|line| { - line.split_whitespace().next().is_some_and(|first| { - if first.len() < 2 { - return false; - } - let (token, command) = first.split_at(2); - // These cell magics are special in that the lines following them are valid - // Python code and the variables defined in that scope are available to the - // rest of the notebook. - // - // For example: - // - // Cell 1: - // ```python - // x = 1 - // ``` - // - // Cell 2: - // ```python - // %%time - // y = x - // ``` - // - // Cell 3: - // ```python - // print(y) # Here, `y` is available. - // ``` - // - // This is to avoid false positives when these variables are referenced - // elsewhere in the notebook. - token == "%%" - && !matches!( - command, - "capture" - | "debug" - | "prun" - | "pypy" - | "python" - | "python3" - | "time" - | "timeit" - ) - }) + let Some(first) = line.split_whitespace().next() else { + return false; + }; + if first.len() < 2 { + return false; + } + let Some(command) = first.strip_prefix("%%") else { + return false; + }; + // These cell magics are special in that the lines following them are valid + // Python code and the variables defined in that scope are available to the + // rest of the notebook. + // + // For example: + // + // Cell 1: + // ```python + // x = 1 + // ``` + // + // Cell 2: + // ```python + // %%time + // y = x + // ``` + // + // Cell 3: + // ```python + // print(y) # Here, `y` is available. + // ``` + // + // This is to avoid false positives when these variables are referenced + // elsewhere in the notebook. + !matches!( + command, + "capture" | "debug" | "prun" | "pypy" | "python" | "python3" | "time" | "timeit" + ) }) } } diff --git a/crates/ruff_notebook/src/notebook.rs b/crates/ruff_notebook/src/notebook.rs index 7c9e8356b79d3..ddc558ba21b87 100644 --- a/crates/ruff_notebook/src/notebook.rs +++ b/crates/ruff_notebook/src/notebook.rs @@ -421,17 +421,18 @@ mod tests { )); } - #[test_case(Path::new("markdown.json"), false; "markdown")] - #[test_case(Path::new("only_magic.json"), true; "only_magic")] - #[test_case(Path::new("code_and_magic.json"), true; "code_and_magic")] - #[test_case(Path::new("only_code.json"), true; "only_code")] - #[test_case(Path::new("cell_magic.json"), false; "cell_magic")] - #[test_case(Path::new("valid_cell_magic.json"), true; "valid_cell_magic")] - #[test_case(Path::new("automagic.json"), false; "automagic")] - #[test_case(Path::new("automagics.json"), false; "automagics")] - #[test_case(Path::new("automagic_before_code.json"), false; "automagic_before_code")] - #[test_case(Path::new("automagic_after_code.json"), true; "automagic_after_code")] - fn test_is_valid_code_cell(path: &Path, expected: bool) -> Result<()> { + #[test_case("markdown", false)] + #[test_case("only_magic", true)] + #[test_case("code_and_magic", true)] + #[test_case("only_code", true)] + #[test_case("cell_magic", false)] + #[test_case("valid_cell_magic", true)] + #[test_case("automagic", false)] + #[test_case("automagics", false)] + #[test_case("automagic_before_code", false)] + #[test_case("automagic_after_code", true)] + #[test_case("unicode_magic_gh9145", true)] + fn test_is_valid_code_cell(cell: &str, expected: bool) -> Result<()> { /// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory. fn read_jupyter_cell(path: impl AsRef) -> Result { let path = notebook_path("cell").join(path); @@ -439,7 +440,10 @@ mod tests { Ok(serde_json::from_str(&source_code)?) } - assert_eq!(read_jupyter_cell(path)?.is_valid_code_cell(), expected); + assert_eq!( + read_jupyter_cell(format!("{cell}.json"))?.is_valid_code_cell(), + expected + ); Ok(()) }