From 07a168b6e4c227579a280dc3f7b36a78dd0df7e0 Mon Sep 17 00:00:00 2001 From: Zsolt Dollenstein Date: Sun, 28 May 2023 23:02:19 +0200 Subject: [PATCH 1/2] Fix parsing of code without trailing newlines When the input doesn't have a trailing newline, but the last line had exactly the amount of bytes as the current indentation level, the tokenizer didn't emit a fake newline, causing parse errors (the grammar expects newlines to conform with the Python spec). I don't see any reason for fake newlines to be omitted in these cases, so this PR removes that condition from the tokenizer. Reported in #930. --- native/libcst/src/lib.rs | 16 ++++++++++++++++ native/libcst/src/tokenizer/core/mod.rs | 5 +---- native/libcst/src/tokenizer/tests.rs | 13 +++++++++++++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/native/libcst/src/lib.rs b/native/libcst/src/lib.rs index 6d2bdd99c..9bd6bb998 100644 --- a/native/libcst/src/lib.rs +++ b/native/libcst/src/lib.rs @@ -149,6 +149,22 @@ mod test { parse_module("def g(a, b): ...", None).expect("parse error"); } + #[test] + fn test_single_statement_with_no_newline() { + for src in &[ + "(\n \\\n)", + "(\n \\\n)", + "(\n '''\n''')", + "del _", + "if _:\n '''\n)'''", + "if _:\n ('''\n''')", + "if _:\n '''\n '''", + "if _:\n '''\n ''' ", + ] { + parse_module(src, None).unwrap_or_else(|e| panic!("'{}' doesn't parse: {}", src, e)); + } + } + #[test] fn bol_offset_first_line() { assert_eq!(0, bol_offset("hello", 1)); diff --git a/native/libcst/src/tokenizer/core/mod.rs b/native/libcst/src/tokenizer/core/mod.rs index 7c0f0788e..fc6268978 100644 --- a/native/libcst/src/tokenizer/core/mod.rs +++ b/native/libcst/src/tokenizer/core/mod.rs @@ -334,10 +334,7 @@ impl<'t> TokState<'t> { return match self.text_pos.peek() { // Check for EOF now None => { - if self.missing_nl_before_eof - && self.text_pos.byte_column_number() != self.bol_width - && !self.blank_line - { + if self.missing_nl_before_eof && !self.blank_line { self.at_bol = true; self.missing_nl_before_eof = false; Ok(TokType::Newline) diff --git a/native/libcst/src/tokenizer/tests.rs b/native/libcst/src/tokenizer/tests.rs index 4e8ce4d3e..06518bc67 100644 --- a/native/libcst/src/tokenizer/tests.rs +++ b/native/libcst/src/tokenizer/tests.rs @@ -718,6 +718,19 @@ fn test_fake_newline() { ); } +#[test] +fn test_fake_newline_when_at_bol() { + assert_eq!( + tokenize_with_end_marker("(\n \\\n)", &default_config()), + Ok(vec![ + (TokType::Op, "("), + (TokType::Op, ")"), + (TokType::Newline, ""), + (TokType::EndMarker, "") + ]) + ) +} + #[test] fn test_no_fake_newline_for_empty_input() { assert_eq!( From ef1801ac80e4f26481e2cdf82f8e422144d47869 Mon Sep 17 00:00:00 2001 From: Zsolt Dollenstein Date: Mon, 17 Jul 2023 21:06:40 +0100 Subject: [PATCH 2/2] Don't swallow trailing whitespace --- native/libcst/src/nodes/module.rs | 8 ++------ native/libcst/tests/fixtures/trailing_whitespace.py | 5 +++++ 2 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 native/libcst/tests/fixtures/trailing_whitespace.py diff --git a/native/libcst/src/nodes/module.rs b/native/libcst/src/nodes/module.rs index 656b7d390..ff9a2a733 100644 --- a/native/libcst/src/nodes/module.rs +++ b/native/libcst/src/nodes/module.rs @@ -77,12 +77,8 @@ impl<'r, 'a> Inflate<'a> for DeflatedModule<'r, 'a> { } } if let Some(num) = last_indented { - if num + 1 == footer.len() { - footer = vec![]; - } else { - let (_, rest) = footer.split_at(num + 1); - footer = rest.to_vec(); - } + let (_, rest) = footer.split_at(num); + footer = rest.to_vec(); } } else { swap(&mut header, &mut footer); diff --git a/native/libcst/tests/fixtures/trailing_whitespace.py b/native/libcst/tests/fixtures/trailing_whitespace.py new file mode 100644 index 000000000..5a01c197e --- /dev/null +++ b/native/libcst/tests/fixtures/trailing_whitespace.py @@ -0,0 +1,5 @@ + + +x = 42 +print(x) + \ No newline at end of file