diff --git a/Cargo.lock b/Cargo.lock index 6c6730ee922a6..2bbd5602a3327 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2001,7 +2001,7 @@ dependencies = [ [[package]] name = "ruff_text_size" version = "0.0.0" -source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d" +source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee" dependencies = [ "schemars", "serde", @@ -2072,7 +2072,7 @@ dependencies = [ [[package]] name = "rustpython-ast" version = "0.2.0" -source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d" +source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee" dependencies = [ "is-macro", "num-bigint", @@ -2083,7 +2083,7 @@ dependencies = [ [[package]] name = "rustpython-format" version = "0.2.0" -source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d" +source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee" dependencies = [ "bitflags 2.2.1", "itertools", @@ -2095,7 +2095,7 @@ dependencies = [ [[package]] name = "rustpython-literal" version = "0.2.0" -source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d" +source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee" dependencies = [ "hexf-parse", "lexical-parse-float", @@ -2106,7 +2106,7 @@ dependencies = [ [[package]] name = "rustpython-parser" version = "0.2.0" -source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d" +source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee" dependencies = [ "anyhow", "itertools", @@ -2128,7 +2128,7 @@ dependencies = [ [[package]] name = "rustpython-parser-core" version = "0.2.0" -source = "git+https://github.com/RustPython/Parser.git?rev=27e3873dc2a3b0d652cc106bc9ddcede4b01806d#27e3873dc2a3b0d652cc106bc9ddcede4b01806d" +source = "git+https://github.com/RustPython/Parser.git?rev=735c06d5f43da05d191f32442224f082f1d202ee#735c06d5f43da05d191f32442224f082f1d202ee" dependencies = [ "ruff_text_size", ] diff --git a/Cargo.toml b/Cargo.toml index 05ea8b8bb545b..3d9046ff4b0a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,10 +31,10 @@ proc-macro2 = { version = "1.0.51" } quote = { version = "1.0.23" } regex = { version = "1.7.1" } rustc-hash = { version = "1.1.0" } -ruff_text_size = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d" } -rustpython-format = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d" } -rustpython-literal = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d" } -rustpython-parser = { git = "https://github.com/RustPython/Parser.git", rev = "27e3873dc2a3b0d652cc106bc9ddcede4b01806d", default-features = false, features = ["full-lexer", "all-nodes-with-ranges"] } +ruff_text_size = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee" } +rustpython-format = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee" } +rustpython-literal = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee" } +rustpython-parser = { git = "https://github.com/RustPython/Parser.git", rev = "735c06d5f43da05d191f32442224f082f1d202ee", default-features = false, features = ["full-lexer", "all-nodes-with-ranges"] } schemars = { version = "0.8.12" } serde = { version = "1.0.152", features = ["derive"] } serde_json = { version = "1.0.93", features = ["preserve_order"] } diff --git a/crates/ruff/src/checkers/logical_lines.rs b/crates/ruff/src/checkers/logical_lines.rs index d9ff322a7371b..ca1a35c565aaa 100644 --- a/crates/ruff/src/checkers/logical_lines.rs +++ b/crates/ruff/src/checkers/logical_lines.rs @@ -168,7 +168,8 @@ mod tests { let contents = r#" x = 1 y = 2 -z = x + 1"#; +z = x + 1"# + .trim(); let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) @@ -189,7 +190,8 @@ x = [ 3, ] y = 2 -z = x + 1"#; +z = x + 1"# + .trim(); let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) @@ -216,7 +218,8 @@ z = x + 1"#; let contents = r#" def f(): x = 1 -f()"#; +f()"# + .trim(); let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) @@ -231,7 +234,8 @@ def f(): """Docstring goes here.""" # Comment goes here. x = 1 -f()"#; +f()"# + .trim(); let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) diff --git a/crates/ruff/src/doc_lines.rs b/crates/ruff/src/doc_lines.rs index 38e09c6ea16da..c85c3c871febc 100644 --- a/crates/ruff/src/doc_lines.rs +++ b/crates/ruff/src/doc_lines.rs @@ -3,7 +3,7 @@ use std::iter::FusedIterator; -use ruff_text_size::{TextRange, TextSize}; +use ruff_text_size::TextSize; use rustpython_parser::ast::{self, Constant, Expr, Ranged, Stmt, Suite}; use rustpython_parser::lexer::LexResult; use rustpython_parser::Tok; @@ -13,24 +13,19 @@ use ruff_python_ast::source_code::Locator; use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor}; /// Extract doc lines (standalone comments) from a token sequence. -pub(crate) fn doc_lines_from_tokens<'a>( - lxr: &'a [LexResult], - locator: &'a Locator<'a>, -) -> DocLines<'a> { - DocLines::new(lxr, locator) +pub(crate) fn doc_lines_from_tokens(lxr: &[LexResult]) -> DocLines { + DocLines::new(lxr) } pub(crate) struct DocLines<'a> { inner: std::iter::Flatten>, - locator: &'a Locator<'a>, prev: TextSize, } impl<'a> DocLines<'a> { - fn new(lxr: &'a [LexResult], locator: &'a Locator) -> Self { + fn new(lxr: &'a [LexResult]) -> Self { Self { inner: lxr.iter().flatten(), - locator, prev: TextSize::default(), } } @@ -46,15 +41,11 @@ impl Iterator for DocLines<'_> { match tok { Tok::Comment(..) => { - if at_start_of_line - || self - .locator - .contains_line_break(TextRange::new(self.prev, range.start())) - { + if at_start_of_line { break Some(range.start()); } } - Tok::Newline => { + Tok::Newline | Tok::NonLogicalNewline => { at_start_of_line = true; } Tok::Indent | Tok::Dedent => { diff --git a/crates/ruff/src/linter.rs b/crates/ruff/src/linter.rs index fa87d8cb12a28..50b308cefdfae 100644 --- a/crates/ruff/src/linter.rs +++ b/crates/ruff/src/linter.rs @@ -88,7 +88,7 @@ pub fn check_path( let use_doc_lines = settings.rules.enabled(Rule::DocLineTooLong); let mut doc_lines = vec![]; if use_doc_lines { - doc_lines.extend(doc_lines_from_tokens(&tokens, locator)); + doc_lines.extend(doc_lines_from_tokens(&tokens)); } // Run the token-based rules. diff --git a/crates/ruff/src/rules/flake8_todos/rules.rs b/crates/ruff/src/rules/flake8_todos/rules.rs index d23208d0e3ca6..126083635028a 100644 --- a/crates/ruff/src/rules/flake8_todos/rules.rs +++ b/crates/ruff/src/rules/flake8_todos/rules.rs @@ -309,16 +309,22 @@ pub(crate) fn todos(tokens: &[LexResult], settings: &Settings) -> Vec { + if detect_tag(comment, token_range.start()).is_some() { + break; + } + if ISSUE_LINK_REGEX_SET.is_match(comment) { + has_issue_link = true; + break; + } + } + Tok::Newline | Tok::NonLogicalNewline => { + continue; } - if ISSUE_LINK_REGEX_SET.is_match(comment) { - has_issue_link = true; + _ => { break; } - } else { - break; } } if !has_issue_link { diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs index 3057ab5daec79..c9e2aab2d0b1a 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs @@ -89,8 +89,7 @@ impl<'a> LogicalLines<'a> { let mut builder = LogicalLinesBuilder::with_capacity(tokens.len()); let mut parens: u32 = 0; - let mut iter = tokens.iter().flatten().peekable(); - while let Some((token, range)) = iter.next() { + for (token, range) in tokens.iter().flatten() { let token_kind = TokenKind::from_token(token); builder.push_token(token_kind, *range); @@ -101,24 +100,6 @@ impl<'a> LogicalLines<'a> { TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => { parens -= 1; } - TokenKind::Comment if parens == 0 => { - // If a comment is followed by a newline, ignore it, and we'll build the line - // when we process the newline. Otherwise, we'll end up creating one logical - // line here, and then another, empty logical line when we process the newline. - // - // The lexer will always emit a newline after a comment _unless_ the comment - // appears at the start of a logical line. - if let Some((token, ..)) = iter.peek() { - let token_kind = TokenKind::from_token(token); - if matches!( - token_kind, - TokenKind::Newline | TokenKind::NonLogicalNewline - ) { - continue; - } - } - builder.finish_line(); - } TokenKind::Newline | TokenKind::NonLogicalNewline if parens == 0 => { builder.finish_line(); } diff --git a/crates/ruff_python_ast/src/source_code/indexer.rs b/crates/ruff_python_ast/src/source_code/indexer.rs index f6a954fb30738..dd6c459c0083a 100644 --- a/crates/ruff_python_ast/src/source_code/indexer.rs +++ b/crates/ruff_python_ast/src/source_code/indexer.rs @@ -35,17 +35,17 @@ impl Indexer { // Get the trivia between the previous and the current token and detect any newlines. // This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens - // between any two tokens that form a continuation nor multiple newlines in a row. - // That's why we have to extract the newlines "manually". + // between any two tokens that form a continuation. That's why we have to extract the + // newlines "manually". for (index, text) in trivia.match_indices(['\n', '\r']) { if text == "\r" && trivia.as_bytes().get(index + 1) == Some(&b'\n') { continue; } - // Newlines after a comment or new-line never form a continuation. + // Newlines after a newline never form a continuation. if !matches!( prev_token, - Some(Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)) | None + Some(Tok::Newline | Tok::NonLogicalNewline) | None ) { continuation_lines.push(line_start); } diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index bc579f2c2b08d..5a7d357eb19a2 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -28,7 +28,7 @@ pub fn fmt(contents: &str) -> Result> { let tokens: Vec = ruff_rustpython::tokenize(contents); // Extract trivia. - let trivia = trivia::extract_trivia_tokens(&tokens, contents); + let trivia = trivia::extract_trivia_tokens(&tokens); // Parse the AST. let python_ast = ruff_rustpython::parse_program_tokens(tokens, "")?; diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__tests__black_test__comment_after_escaped_newline_py.snap b/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__tests__black_test__comment_after_escaped_newline_py.snap deleted file mode 100644 index 92e670eafabb9..0000000000000 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__tests__black_test__comment_after_escaped_newline_py.snap +++ /dev/null @@ -1,62 +0,0 @@ ---- -source: crates/ruff_python_formatter/src/lib.rs -expression: snapshot -input_file: crates/ruff_python_formatter/resources/test/fixtures/black/simple_cases/comment_after_escaped_newline.py ---- -## Input - -```py -def bob(): \ - # pylint: disable=W9016 - pass - - -def bobtwo(): \ - \ - # some comment here - pass -``` - -## Black Differences - -```diff ---- Black -+++ Ruff -@@ -1,6 +1,8 @@ --def bob(): # pylint: disable=W9016 -+def bob(): -+ # pylint: disable=W9016 - pass - - --def bobtwo(): # some comment here -+def bobtwo(): -+ # some comment here - pass -``` - -## Ruff Output - -```py -def bob(): - # pylint: disable=W9016 - pass - - -def bobtwo(): - # some comment here - pass -``` - -## Black Output - -```py -def bob(): # pylint: disable=W9016 - pass - - -def bobtwo(): # some comment here - pass -``` - - diff --git a/crates/ruff_python_formatter/src/trivia.rs b/crates/ruff_python_formatter/src/trivia.rs index 7f553e50bff0a..8bc00de5bd79a 100644 --- a/crates/ruff_python_formatter/src/trivia.rs +++ b/crates/ruff_python_formatter/src/trivia.rs @@ -2,7 +2,6 @@ use ruff_text_size::{TextRange, TextSize}; use rustc_hash::FxHashMap; use rustpython_parser::lexer::LexResult; use rustpython_parser::Tok; -use std::ops::Add; use crate::cst::{ Alias, Arg, Body, BoolOp, CmpOp, Excepthandler, ExcepthandlerKind, Expr, ExprKind, Keyword, @@ -190,49 +189,25 @@ impl Trivia { } } -pub fn extract_trivia_tokens(lxr: &[LexResult], text: &str) -> Vec { +pub fn extract_trivia_tokens(lxr: &[LexResult]) -> Vec { let mut tokens = vec![]; - let mut prev_end = TextSize::default(); let mut prev_tok: Option<(&Tok, TextRange)> = None; let mut prev_semantic_tok: Option<(&Tok, TextRange)> = None; let mut parens = vec![]; for (tok, range) in lxr.iter().flatten() { - // Add empty lines. - let trivia = &text[TextRange::new(prev_end, range.start())]; - let bytes = trivia.as_bytes(); - - let mut bytes_iter = bytes.iter().enumerate(); - - let mut after_new_line = - matches!(prev_tok, Some((Tok::Newline | Tok::NonLogicalNewline, _))); - - while let Some((index, byte)) = bytes_iter.next() { - let len = match byte { - b'\r' if bytes.get(index + 1) == Some(&b'\n') => { - bytes_iter.next(); - TextSize::from(2) - } - b'\n' | b'\r' => TextSize::from(1), - _ => { - // Must be whitespace or the parser would generate a token - continue; - } - }; + let after_new_line = matches!(prev_tok, Some((Tok::Newline | Tok::NonLogicalNewline, _))); - if after_new_line { - let new_line_start = prev_end.add(TextSize::try_from(index).unwrap()); - tokens.push(TriviaToken { - range: TextRange::new(new_line_start, new_line_start.add(len)), - kind: TriviaTokenKind::EmptyLine, - }); - } else { - after_new_line = true; - } + // Add empty lines. + if after_new_line && matches!(tok, Tok::NonLogicalNewline) { + tokens.push(TriviaToken { + range: *range, + kind: TriviaTokenKind::EmptyLine, + }); } // Add comments. - if let Tok::Comment(_) = tok { + if matches!(tok, Tok::Comment(..)) { tokens.push(TriviaToken { range: *range, // Used to use prev_non-newline_tok @@ -293,8 +268,6 @@ pub fn extract_trivia_tokens(lxr: &[LexResult], text: &str) -> Vec ) { prev_semantic_tok = Some((tok, *range)); } - - prev_end = range.end(); } tokens }