From ff3587795566316f24e2ca30574d4e0d0dde9691 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sat, 11 Mar 2023 23:21:19 -0500 Subject: [PATCH] Implement an iterator for universal newlines --- Cargo.lock | 1 + crates/ruff/src/autofix/helpers.rs | 6 +- crates/ruff/src/checkers/noqa.rs | 3 +- crates/ruff/src/checkers/physical_lines.rs | 3 +- crates/ruff/src/noqa.rs | 5 +- .../src/rules/flake8_simplify/rules/ast_if.rs | 5 +- .../rules/flake8_simplify/rules/ast_with.rs | 5 +- crates/ruff/src/rules/isort/helpers.rs | 3 +- .../rules/invalid_escape_sequence.rs | 3 +- .../pycodestyle/rules/lambda_assignment.rs | 8 +- .../rules/no_newline_at_end_of_file.rs | 8 +- crates/ruff/src/rules/pydocstyle/helpers.rs | 3 +- .../pydocstyle/rules/blank_after_summary.rs | 5 +- .../rules/blank_before_after_class.rs | 10 +- .../rules/blank_before_after_function.rs | 13 +- .../pydocstyle/rules/ends_with_period.rs | 5 +- .../pydocstyle/rules/ends_with_punctuation.rs | 5 +- .../ruff/src/rules/pydocstyle/rules/indent.rs | 4 +- .../rules/multi_line_summary_start.rs | 6 +- .../rules/newline_after_last_paragraph.rs | 9 +- .../rules/pydocstyle/rules/no_signature.rs | 3 +- .../rules/no_surrounding_whitespace.rs | 4 +- .../pydocstyle/rules/non_imperative_mood.rs | 3 +- .../src/rules/pydocstyle/rules/one_liner.rs | 4 +- .../src/rules/pydocstyle/rules/sections.rs | 6 +- .../rules/pydocstyle/rules/triple_quotes.rs | 4 +- crates/ruff_python_ast/Cargo.toml | 1 + crates/ruff_python_ast/src/helpers.rs | 3 +- crates/ruff_python_ast/src/str.rs | 23 +- crates/ruff_python_ast/src/whitespace.rs | 201 +++++++++++++++++- .../ruff_python_formatter/src/cst/helpers.rs | 6 +- 31 files changed, 281 insertions(+), 87 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e2cf3db5b5ff0..096b14e0c5be07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2132,6 +2132,7 @@ dependencies = [ "rustc-hash", "rustpython-common", "rustpython-parser", + "serde", "smallvec", ] diff --git a/crates/ruff/src/autofix/helpers.rs b/crates/ruff/src/autofix/helpers.rs index 8ff322b821c71f..a10d2bb7125939 100644 --- a/crates/ruff/src/autofix/helpers.rs +++ b/crates/ruff/src/autofix/helpers.rs @@ -10,7 +10,7 @@ use ruff_diagnostics::Fix; use ruff_python_ast::helpers; use ruff_python_ast::helpers::to_absolute; use ruff_python_ast::source_code::{Indexer, Locator, Stylist}; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; +use ruff_python_ast::whitespace::NewlineWithTrailingNewline; use crate::cst::helpers::compose_module_path; use crate::cst::matchers::match_module; @@ -100,7 +100,7 @@ fn is_lone_child(child: &Stmt, parent: &Stmt, deleted: &[&Stmt]) -> Result /// of a multi-statement line. fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option { let contents = locator.skip(stmt.end_location.unwrap()); - for (row, line) in LinesWithTrailingNewline::from(contents).enumerate() { + for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() { let trimmed = line.trim(); if trimmed.starts_with(';') { let column = line @@ -123,7 +123,7 @@ fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option { fn next_stmt_break(semicolon: Location, locator: &Locator) -> Location { let start_location = Location::new(semicolon.row(), semicolon.column() + 1); let contents = locator.skip(start_location); - for (row, line) in LinesWithTrailingNewline::from(contents).enumerate() { + for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() { let trimmed = line.trim(); // Skip past any continuations. if trimmed.starts_with('\\') { diff --git a/crates/ruff/src/checkers/noqa.rs b/crates/ruff/src/checkers/noqa.rs index f6dfcd52b9556b..e6017c86d455c9 100644 --- a/crates/ruff/src/checkers/noqa.rs +++ b/crates/ruff/src/checkers/noqa.rs @@ -6,6 +6,7 @@ use rustpython_parser::ast::Location; use ruff_diagnostics::{Diagnostic, Fix}; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::codes::NoqaCode; use crate::noqa; @@ -38,7 +39,7 @@ pub fn check_noqa( // Indices of diagnostics that were ignored by a `noqa` directive. let mut ignored_diagnostics = vec![]; - let lines: Vec<&str> = contents.lines().collect(); + let lines: Vec<&str> = UniversalNewlineIterator::from(contents).collect(); for lineno in commented_lines { match extract_file_exemption(lines[lineno - 1]) { Exemption::All => { diff --git a/crates/ruff/src/checkers/physical_lines.rs b/crates/ruff/src/checkers/physical_lines.rs index 442cac67f530b5..4bc5046e92f16f 100644 --- a/crates/ruff/src/checkers/physical_lines.rs +++ b/crates/ruff/src/checkers/physical_lines.rs @@ -4,6 +4,7 @@ use std::path::Path; use ruff_diagnostics::Diagnostic; use ruff_python_ast::source_code::Stylist; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::registry::Rule; use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective}; @@ -56,7 +57,7 @@ pub fn check_physical_lines( let mut commented_lines_iter = commented_lines.iter().peekable(); let mut doc_lines_iter = doc_lines.iter().peekable(); - for (index, line) in contents.lines().enumerate() { + for (index, line) in UniversalNewlineIterator::from(contents).enumerate() { while commented_lines_iter .next_if(|lineno| &(index + 1) == *lineno) .is_some() diff --git a/crates/ruff/src/noqa.rs b/crates/ruff/src/noqa.rs index ad973602110e86..fa02ab7b444b93 100644 --- a/crates/ruff/src/noqa.rs +++ b/crates/ruff/src/noqa.rs @@ -14,6 +14,7 @@ use rustpython_parser::ast::Location; use ruff_diagnostics::Diagnostic; use ruff_python_ast::source_code::{LineEnding, Locator}; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::codes::NoqaCode; use crate::registry::{AsRule, Rule}; @@ -181,7 +182,7 @@ fn add_noqa_inner( // Codes that are globally exempted (within the current file). let mut file_exemptions: Vec = vec![]; - let lines: Vec<&str> = contents.lines().collect(); + let lines: Vec<&str> = UniversalNewlineIterator::from(contents).collect(); for lineno in commented_lines { match extract_file_exemption(lines[lineno - 1]) { Exemption::All => { @@ -263,7 +264,7 @@ fn add_noqa_inner( let mut count: usize = 0; let mut output = String::new(); - for (lineno, line) in contents.lines().enumerate() { + for (lineno, line) in lines.into_iter().enumerate() { match matches_by_line.get(&lineno) { None => { output.push_str(line); diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs index 38587687d7b5ca..14737d4457222e 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs @@ -10,6 +10,7 @@ use ruff_python_ast::helpers::{ has_comments_in, unparse_expr, unparse_stmt, }; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::checkers::ast::Checker; use crate::registry::AsRule; @@ -281,9 +282,7 @@ pub fn nested_if_statements( if fixable && checker.patch(diagnostic.kind.rule()) { match fix_if::fix_nested_if_statements(checker.locator, checker.stylist, stmt) { Ok(fix) => { - if fix - .content - .lines() + if UniversalNewlineIterator::from(&fix.content) .all(|line| line.len() <= checker.settings.line_length) { diagnostic.amend(fix); diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs index 800a5830a37563..6f26aa3edf10bb 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs @@ -6,6 +6,7 @@ use ruff_diagnostics::{AutofixKind, Availability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::{first_colon_range, has_comments_in}; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::checkers::ast::Checker; use crate::registry::AsRule; @@ -113,9 +114,7 @@ pub fn multiple_with_statements( with_stmt, ) { Ok(fix) => { - if fix - .content - .lines() + if UniversalNewlineIterator::from(&fix.content) .all(|line| line.len() <= checker.settings.line_length) { diagnostic.amend(fix); diff --git a/crates/ruff/src/rules/isort/helpers.rs b/crates/ruff/src/rules/isort/helpers.rs index f1ca5396f189a6..5a0c6b7b88fa85 100644 --- a/crates/ruff/src/rules/isort/helpers.rs +++ b/crates/ruff/src/rules/isort/helpers.rs @@ -3,6 +3,7 @@ use rustpython_parser::{lexer, Mode, Tok}; use ruff_python_ast::helpers::is_docstring_stmt; use ruff_python_ast::source_code::Locator; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use super::types::TrailingComma; @@ -62,7 +63,7 @@ pub fn has_comment_break(stmt: &Stmt, locator: &Locator) -> bool { // # Direct comment. // def f(): pass let mut seen_blank = false; - for line in locator.take(stmt.location).lines().rev() { + for line in UniversalNewlineIterator::from(locator.take(stmt.location)).rev() { let line = line.trim(); if seen_blank { if line.starts_with('#') { diff --git a/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs b/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs index dd79f339d349da..88684aee8aa5f2 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs @@ -6,6 +6,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::source_code::Locator; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; /// ## What it does /// Checks for invalid escape sequences. @@ -76,7 +77,7 @@ pub fn invalid_escape_sequence( let body = &text[(quote_pos + quote.len())..(text.len() - quote.len())]; if !prefix.contains('r') { - for (row_offset, line) in body.lines().enumerate() { + for (row_offset, line) in UniversalNewlineIterator::from(body).enumerate() { let chars: Vec = line.chars().collect(); for col_offset in 0..chars.len() { if chars[col_offset] != '\\' { diff --git a/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs b/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs index 335a9dba13f4d2..082ce0fc28676e 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs @@ -5,7 +5,7 @@ use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::{match_leading_content, match_trailing_content, unparse_stmt}; use ruff_python_ast::source_code::Stylist; use ruff_python_ast::types::{Range, ScopeKind}; -use ruff_python_ast::whitespace::leading_space; +use ruff_python_ast::whitespace::{leading_space, UniversalNewlineIterator}; use crate::checkers::ast::Checker; use crate::registry::AsRule; @@ -85,9 +85,9 @@ pub fn lambda_assignment(checker: &mut Checker, target: &Expr, value: &Expr, stm )); let indentation = &leading_space(first_line); let mut indented = String::new(); - for (idx, line) in function(id, args, body, checker.stylist) - .lines() - .enumerate() + for (idx, line) in + UniversalNewlineIterator::from(&function(id, args, body, checker.stylist)) + .enumerate() { if idx == 0 { indented.push_str(line); diff --git a/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs b/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs index e5411cc53c772c..fd30a330b25a4c 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs @@ -4,6 +4,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::source_code::Stylist; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; /// ## What it does /// Checks for files missing a new line at the end of the file. @@ -41,12 +42,13 @@ pub fn no_newline_at_end_of_file( contents: &str, autofix: bool, ) -> Option { - if !contents.ends_with('\n') { + if !contents.ends_with(['\n', '\r']) { // Note: if `lines.last()` is `None`, then `contents` is empty (and so we don't // want to raise W292 anyway). - if let Some(line) = contents.lines().last() { + if let Some(line) = UniversalNewlineIterator::from(contents).last() { // Both locations are at the end of the file (and thus the same). - let location = Location::new(contents.lines().count(), line.len()); + let location = + Location::new(UniversalNewlineIterator::from(contents).count(), line.len()); let mut diagnostic = Diagnostic::new(NoNewLineAtEndOfFile, Range::new(location, location)); if autofix { diff --git a/crates/ruff/src/rules/pydocstyle/helpers.rs b/crates/ruff/src/rules/pydocstyle/helpers.rs index b6261b1361fbff..06d38a123f1f55 100644 --- a/crates/ruff/src/rules/pydocstyle/helpers.rs +++ b/crates/ruff/src/rules/pydocstyle/helpers.rs @@ -2,6 +2,7 @@ use std::collections::BTreeSet; use ruff_python_ast::cast; use ruff_python_ast::helpers::{map_callable, to_call_path}; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::checkers::ast::Checker; use crate::docstrings::definition::{Definition, DefinitionKind}; @@ -10,7 +11,7 @@ use crate::docstrings::definition::{Definition, DefinitionKind}; pub fn logical_line(content: &str) -> Option { // Find the first logical line. let mut logical_line = None; - for (i, line) in content.lines().enumerate() { + for (i, line) in UniversalNewlineIterator::from(content).enumerate() { if line.trim().is_empty() { // Empty line. If this is the line _after_ the first logical line, stop. if logical_line.is_some() { diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs index 2e39195bd03ba9..17d5f33f1d880a 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs @@ -1,6 +1,7 @@ use ruff_diagnostics::{AutofixKind, Availability, Diagnostic, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -45,7 +46,7 @@ pub fn blank_after_summary(checker: &mut Checker, docstring: &Docstring) { let mut lines_count = 1; let mut blanks_count = 0; - for line in body.trim().lines().skip(1) { + for line in UniversalNewlineIterator::from(body.trim()).skip(1) { lines_count += 1; if line.trim().is_empty() { blanks_count += 1; @@ -64,7 +65,7 @@ pub fn blank_after_summary(checker: &mut Checker, docstring: &Docstring) { if blanks_count > 1 { // Find the "summary" line (defined as the first non-blank line). let mut summary_line = 0; - for line in body.lines() { + for line in UniversalNewlineIterator::from(body) { if line.trim().is_empty() { summary_line += 1; } else { diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs index 9659fca6c1e870..ed3cfcf54f685b 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs @@ -1,6 +1,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::checkers::ast::Checker; use crate::docstrings::definition::{DefinitionKind, Docstring}; @@ -74,8 +75,7 @@ pub fn blank_before_after_class(checker: &mut Checker, docstring: &Docstring) { .locator .slice(Range::new(parent.location, docstring.expr.location)); - let blank_lines_before = before - .lines() + let blank_lines_before = UniversalNewlineIterator::from(before) .rev() .skip(1) .take_while(|line| line.trim().is_empty()) @@ -137,16 +137,14 @@ pub fn blank_before_after_class(checker: &mut Checker, docstring: &Docstring) { parent.end_location.unwrap(), )); - let all_blank_after = after - .lines() + let all_blank_after = UniversalNewlineIterator::from(after) .skip(1) .all(|line| line.trim().is_empty() || line.trim_start().starts_with('#')); if all_blank_after { return; } - let blank_lines_after = after - .lines() + let blank_lines_after = UniversalNewlineIterator::from(after) .skip(1) .take_while(|line| line.trim().is_empty()) .count(); diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs index 701808c70aa6b8..e887ff99402ec9 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs @@ -4,6 +4,7 @@ use regex::Regex; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::checkers::ast::Checker; use crate::docstrings::definition::{DefinitionKind, Docstring}; @@ -66,8 +67,7 @@ pub fn blank_before_after_function(checker: &mut Checker, docstring: &Docstring) .locator .slice(Range::new(parent.location, docstring.expr.location)); - let blank_lines_before = before - .lines() + let blank_lines_before = UniversalNewlineIterator::from(before) .rev() .skip(1) .take_while(|line| line.trim().is_empty()) @@ -101,8 +101,7 @@ pub fn blank_before_after_function(checker: &mut Checker, docstring: &Docstring) )); // If the docstring is only followed by blank and commented lines, abort. - let all_blank_after = after - .lines() + let all_blank_after = UniversalNewlineIterator::from(after) .skip(1) .all(|line| line.trim().is_empty() || line.trim_start().starts_with('#')); if all_blank_after { @@ -110,16 +109,14 @@ pub fn blank_before_after_function(checker: &mut Checker, docstring: &Docstring) } // Count the number of blank lines after the docstring. - let blank_lines_after = after - .lines() + let blank_lines_after = UniversalNewlineIterator::from(after) .skip(1) .take_while(|line| line.trim().is_empty()) .count(); // Avoid violations for blank lines followed by inner functions or classes. if blank_lines_after == 1 - && after - .lines() + && UniversalNewlineIterator::from(after) .skip(1 + blank_lines_after) .find(|line| !line.trim_start().starts_with('#')) .map_or(false, |line| INNER_FUNCTION_OR_CLASS_REGEX.is_match(line)) diff --git a/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs b/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs index e4d963feff6baf..e32568cc9d811d 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs @@ -4,6 +4,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -31,7 +32,7 @@ pub fn ends_with_period(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - if let Some(first_line) = body.trim().lines().next() { + if let Some(first_line) = UniversalNewlineIterator::from(body.trim()).next() { let trimmed = first_line.trim(); // Avoid false-positives: `:param`, etc. @@ -55,7 +56,7 @@ pub fn ends_with_period(checker: &mut Checker, docstring: &Docstring) { } if let Some(index) = logical_line(body) { - let line = body.lines().nth(index).unwrap(); + let line = UniversalNewlineIterator::from(body).nth(index).unwrap(); let trimmed = line.trim_end(); if !trimmed.ends_with('.') { diff --git a/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs b/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs index f163afc6ad18f1..78e16106493167 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs @@ -4,6 +4,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -31,7 +32,7 @@ pub fn ends_with_punctuation(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - if let Some(first_line) = body.trim().lines().next() { + if let Some(first_line) = UniversalNewlineIterator::from(body.trim()).next() { let trimmed = first_line.trim(); // Avoid false-positives: `:param`, etc. @@ -55,7 +56,7 @@ pub fn ends_with_punctuation(checker: &mut Checker, docstring: &Docstring) { } if let Some(index) = logical_line(body) { - let line = body.lines().nth(index).unwrap(); + let line = UniversalNewlineIterator::from(body).nth(index).unwrap(); let trimmed = line.trim_end(); if !(trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?')) { let mut diagnostic = Diagnostic::new(EndsInPunctuation, Range::from(docstring.expr)); diff --git a/crates/ruff/src/rules/pydocstyle/rules/indent.rs b/crates/ruff/src/rules/pydocstyle/rules/indent.rs index 475720d40ed9d1..ff87f824420487 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/indent.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/indent.rs @@ -3,7 +3,7 @@ use ruff_diagnostics::{Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::types::Range; use ruff_python_ast::whitespace; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; +use ruff_python_ast::whitespace::NewlineWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -53,7 +53,7 @@ pub fn indent(checker: &mut Checker, docstring: &Docstring) { let body = docstring.body; // Split the docstring into lines. - let lines: Vec<&str> = LinesWithTrailingNewline::from(body).collect(); + let lines: Vec<&str> = NewlineWithTrailingNewline::from(body).collect(); if lines.len() <= 1 { return; } diff --git a/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs b/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs index c2d2ec7ea818fe..5abbb022c46eee 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs @@ -2,7 +2,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::str::{is_triple_quote, leading_quote}; use ruff_python_ast::types::Range; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; +use ruff_python_ast::whitespace::{NewlineWithTrailingNewline, UniversalNewlineIterator}; use crate::checkers::ast::Checker; use crate::docstrings::definition::{DefinitionKind, Docstring}; @@ -42,10 +42,10 @@ pub fn multi_line_summary_start(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - if LinesWithTrailingNewline::from(body).nth(1).is_none() { + if NewlineWithTrailingNewline::from(body).nth(1).is_none() { return; }; - let mut content_lines = contents.lines(); + let mut content_lines = UniversalNewlineIterator::from(contents); let Some(first_line) = content_lines .next() else diff --git a/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs b/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs index b3f49732e2cbe7..7e4e3f4bb8ca8a 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs @@ -2,7 +2,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::types::Range; use ruff_python_ast::whitespace; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; +use ruff_python_ast::whitespace::{NewlineWithTrailingNewline, UniversalNewlineIterator}; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -29,12 +29,15 @@ pub fn newline_after_last_paragraph(checker: &mut Checker, docstring: &Docstring let body = docstring.body; let mut line_count = 0; - for line in LinesWithTrailingNewline::from(body) { + for line in NewlineWithTrailingNewline::from(body) { if !line.trim().is_empty() { line_count += 1; } if line_count > 1 { - if let Some(last_line) = contents.lines().last().map(str::trim) { + if let Some(last_line) = UniversalNewlineIterator::from(contents) + .last() + .map(str::trim) + { if last_line != "\"\"\"" && last_line != "'''" { let mut diagnostic = Diagnostic::new(NewLineAfterLastParagraph, Range::from(docstring.expr)); diff --git a/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs b/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs index 6e404dc223b1e6..5ed7b7ed995745 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs @@ -3,6 +3,7 @@ use rustpython_parser::ast::StmtKind; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::checkers::ast::Checker; use crate::docstrings::definition::{DefinitionKind, Docstring}; @@ -32,7 +33,7 @@ pub fn no_signature(checker: &mut Checker, docstring: &Docstring) { let body = docstring.body; - let Some(first_line) = body.trim().lines().next() else { + let Some(first_line) = UniversalNewlineIterator::from(body.trim()).next() else { return; }; if !first_line.contains(&format!("{name}(")) { diff --git a/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs b/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs index 609bbe1e8ba733..726f7ea2406128 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs @@ -2,7 +2,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; +use ruff_python_ast::whitespace::NewlineWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -28,7 +28,7 @@ pub fn no_surrounding_whitespace(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - let mut lines = LinesWithTrailingNewline::from(body); + let mut lines = NewlineWithTrailingNewline::from(body); let Some(line) = lines.next() else { return; }; diff --git a/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs b/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs index 9beae70e9f6c92..73304300924e3d 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs @@ -9,6 +9,7 @@ use ruff_python_ast::cast; use ruff_python_ast::helpers::to_call_path; use ruff_python_ast::types::{CallPath, Range}; use ruff_python_ast::visibility::{is_property, is_test}; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::checkers::ast::Checker; use crate::docstrings::definition::{DefinitionKind, Docstring}; @@ -48,7 +49,7 @@ pub fn non_imperative_mood( let body = docstring.body; // Find first line, disregarding whitespace. - let line = match body.trim().lines().next() { + let line = match UniversalNewlineIterator::from(body.trim()).next() { Some(line) => line.trim(), None => return, }; diff --git a/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs b/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs index 7ecb70f70df24d..e4399a6e7eac89 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs @@ -2,7 +2,7 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_ast::types::Range; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; +use ruff_python_ast::whitespace::NewlineWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -26,7 +26,7 @@ impl AlwaysAutofixableViolation for FitsOnOneLine { pub fn one_liner(checker: &mut Checker, docstring: &Docstring) { let mut line_count = 0; let mut non_empty_line_count = 0; - for line in LinesWithTrailingNewline::from(docstring.body) { + for line in NewlineWithTrailingNewline::from(docstring.body) { line_count += 1; if !line.trim().is_empty() { non_empty_line_count += 1; diff --git a/crates/ruff/src/rules/pydocstyle/rules/sections.rs b/crates/ruff/src/rules/pydocstyle/rules/sections.rs index 3680dc30267a2d..a8096c99ae6907 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/sections.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/sections.rs @@ -10,7 +10,7 @@ use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::identifier_range; use ruff_python_ast::types::Range; use ruff_python_ast::visibility::is_staticmethod; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; +use ruff_python_ast::whitespace::NewlineWithTrailingNewline; use ruff_python_ast::{cast, whitespace}; use crate::checkers::ast::Checker; @@ -273,7 +273,7 @@ impl AlwaysAutofixableViolation for NoBlankLinesBetweenHeaderAndContent { pub fn sections(checker: &mut Checker, docstring: &Docstring, convention: Option<&Convention>) { let body = docstring.body; - let lines: Vec<&str> = LinesWithTrailingNewline::from(body).collect(); + let lines: Vec<&str> = NewlineWithTrailingNewline::from(body).collect(); if lines.len() < 2 { return; } @@ -923,7 +923,7 @@ fn parameters_section(checker: &mut Checker, docstring: &Docstring, context: &Se // Join line continuations, then resplit by line. let adjusted_following_lines = context.following_lines.join("\n").replace("\\\n", ""); - let lines: Vec<&str> = LinesWithTrailingNewline::from(&adjusted_following_lines).collect(); + let lines: Vec<&str> = NewlineWithTrailingNewline::from(&adjusted_following_lines).collect(); for i in 1..lines.len() { let current_line = lines[i - 1]; diff --git a/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs b/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs index 720b70fe810e6a..0bfafd693051aa 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs @@ -1,6 +1,7 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -20,8 +21,7 @@ pub fn triple_quotes(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - let Some(first_line) = contents - .lines() + let Some(first_line) = UniversalNewlineIterator::from(contents) .next() .map(str::to_lowercase) else { diff --git a/crates/ruff_python_ast/Cargo.toml b/crates/ruff_python_ast/Cargo.toml index 087bcaabb909bb..635a315c80ef7e 100644 --- a/crates/ruff_python_ast/Cargo.toml +++ b/crates/ruff_python_ast/Cargo.toml @@ -24,4 +24,5 @@ regex = { workspace = true } rustc-hash = { workspace = true } rustpython-common = { workspace = true } rustpython-parser = { workspace = true } +serde = { workspace = true } smallvec = { version = "1.10.0" } diff --git a/crates/ruff_python_ast/src/helpers.rs b/crates/ruff_python_ast/src/helpers.rs index 047e03c59b0308..eb848e3e36eee4 100644 --- a/crates/ruff_python_ast/src/helpers.rs +++ b/crates/ruff_python_ast/src/helpers.rs @@ -18,6 +18,7 @@ use crate::source_code::{Generator, Indexer, Locator, Stylist}; use crate::types::{Binding, BindingKind, CallPath, Range}; use crate::visitor; use crate::visitor::Visitor; +use crate::whitespace::UniversalNewlineIterator; /// Create an `Expr` with default location from an `ExprKind`. pub fn create_expr(node: ExprKind) -> Expr { @@ -1125,7 +1126,7 @@ pub fn end_of_statement(stmt: &Stmt, locator: &Locator) -> Location { } // Otherwise, find the end of the last line that's "part of" the statement. - for (lineno, line) in contents.lines().enumerate() { + for (lineno, line) in UniversalNewlineIterator::from(contents).enumerate() { if line.ends_with('\\') { continue; } diff --git a/crates/ruff_python_ast/src/str.rs b/crates/ruff_python_ast/src/str.rs index 1a332b474cfe6a..34651f00afeb54 100644 --- a/crates/ruff_python_ast/src/str.rs +++ b/crates/ruff_python_ast/src/str.rs @@ -40,19 +40,18 @@ pub fn raw_contents(contents: &str) -> &str { /// Return the leading quote for a string or byte literal (e.g., `"""`). pub fn leading_quote(content: &str) -> Option<&str> { - if let Some(first_line) = content.lines().next() { - for pattern in TRIPLE_QUOTE_STR_PREFIXES - .iter() - .chain(TRIPLE_QUOTE_BYTE_PREFIXES) - .chain(SINGLE_QUOTE_STR_PREFIXES) - .chain(SINGLE_QUOTE_BYTE_PREFIXES) - { - if first_line.starts_with(pattern) { - return Some(pattern); + TRIPLE_QUOTE_STR_PREFIXES + .iter() + .chain(TRIPLE_QUOTE_BYTE_PREFIXES) + .chain(SINGLE_QUOTE_STR_PREFIXES) + .chain(SINGLE_QUOTE_BYTE_PREFIXES) + .find_map(|pattern| { + if content.starts_with(pattern) { + Some(*pattern) + } else { + None } - } - } - None + }) } /// Return the trailing quote string for a string or byte literal (e.g., `"""`). diff --git a/crates/ruff_python_ast/src/whitespace.rs b/crates/ruff_python_ast/src/whitespace.rs index 8a779421c14b47..4b716714d2436e 100644 --- a/crates/ruff_python_ast/src/whitespace.rs +++ b/crates/ruff_python_ast/src/whitespace.rs @@ -1,5 +1,3 @@ -use std::str::Lines; - use rustpython_parser::ast::{Located, Location}; use crate::source_code::Locator; @@ -40,17 +38,17 @@ pub fn clean(indentation: &str) -> String { .collect() } -/// Like `str#lines`, but includes a trailing newline as an empty line. -pub struct LinesWithTrailingNewline<'a> { +/// Like `UniversalNewlineIterator`, but includes a trailing newline as an empty line. +pub struct NewlineWithTrailingNewline<'a> { trailing: Option<&'a str>, - underlying: Lines<'a>, + underlying: UniversalNewlineIterator<'a>, } -impl<'a> LinesWithTrailingNewline<'a> { - pub fn from(input: &'a str) -> LinesWithTrailingNewline<'a> { - LinesWithTrailingNewline { - underlying: input.lines(), - trailing: if input.ends_with('\n') { +impl<'a> NewlineWithTrailingNewline<'a> { + pub fn from(input: &'a str) -> NewlineWithTrailingNewline<'a> { + NewlineWithTrailingNewline { + underlying: UniversalNewlineIterator::from(input), + trailing: if input.ends_with(['\r', '\n']) { Some("") } else { None @@ -59,7 +57,7 @@ impl<'a> LinesWithTrailingNewline<'a> { } } -impl<'a> Iterator for LinesWithTrailingNewline<'a> { +impl<'a> Iterator for NewlineWithTrailingNewline<'a> { type Item = &'a str; #[inline] @@ -74,3 +72,184 @@ impl<'a> Iterator for LinesWithTrailingNewline<'a> { next } } + +/// Like `str#lines`, but accommodates LF, CRLF, and CR line endings, +/// the latter of which are not supported by `str#lines`. +pub struct UniversalNewlineIterator<'a> { + text: &'a str, + forward_position: usize, + backwards_position: usize, +} + +impl<'a> UniversalNewlineIterator<'a> { + pub fn from(text: &'a str) -> UniversalNewlineIterator<'a> { + // Like `str#lines`, we want to ignore any trailing newline. + let mut backwards_position = text.len(); + if text.ends_with("\r\n") { + backwards_position -= 2; + } else if text.ends_with(['\r', '\n']) { + backwards_position -= 1; + } + + UniversalNewlineIterator { + text, + forward_position: 0, + backwards_position, + } + } +} + +impl<'a> Iterator for UniversalNewlineIterator<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + if self.forward_position == self.text.len() { + return None; + } + + let mut next_pos = None; + let mut line_end = None; + + for (i, c) in self.text[self.forward_position..].char_indices() { + match c { + '\r' => { + if let Some('\n') = self.text.chars().nth(i + self.forward_position + 1) { + next_pos = Some(i + self.forward_position + 2); + line_end = Some(i + self.forward_position); + } else { + next_pos = Some(i + self.forward_position + 1); + line_end = Some(i + self.forward_position); + } + break; + } + '\n' => { + next_pos = Some(i + self.forward_position + 1); + line_end = Some(i + self.forward_position); + break; + } + _ => {} + } + } + + if let Some(line_end_pos) = line_end { + let line = &self.text[self.forward_position..line_end_pos]; + self.forward_position = next_pos.unwrap_or(line_end_pos); + Some(line) + } else { + let line = &self.text[self.forward_position..]; + self.forward_position = self.text.len(); + Some(line) + } + } +} + +impl<'a> DoubleEndedIterator for UniversalNewlineIterator<'a> { + #[inline] + fn next_back(&mut self) -> Option { + if self.backwards_position == 0 { + return None; + } + + let mut next_pos = None; + let mut line_start = None; + + for (i, c) in self.text[..self.backwards_position].char_indices().rev() { + match c { + '\r' => { + if let Some('\n') = self.text.chars().nth(i - 1) { + next_pos = Some(i - 1); + line_start = Some(i + 1); + } else { + next_pos = Some(i); + line_start = Some(i + 1); + } + break; + } + '\n' => { + next_pos = Some(i); + line_start = Some(i + 1); + break; + } + _ => {} + } + } + + if let Some(line_start_pos) = line_start { + let line = &self.text[line_start_pos..self.backwards_position]; + self.backwards_position = next_pos.unwrap_or(line_start_pos); + Some(line) + } else { + let line = &self.text[..self.backwards_position]; + self.backwards_position = 0; + Some(line) + } + } +} + +#[cfg(test)] +mod tests { + use super::UniversalNewlineIterator; + + #[test] + fn universal_newlines_forward() { + let text = "foo\nbar\n\r\nbaz\rbop"; + let mut lines = UniversalNewlineIterator::from(text); + + assert_eq!(Some("foo"), lines.next()); + assert_eq!(Some("bar"), lines.next()); + assert_eq!(Some(""), lines.next()); + assert_eq!(Some("baz"), lines.next()); + assert_eq!(Some("bop"), lines.next()); + + assert_eq!(None, lines.next()); + + let text = "foo\nbar\n\r\nbaz\rbop\n"; + let mut lines = UniversalNewlineIterator::from(text); + + assert_eq!(Some("foo"), lines.next()); + assert_eq!(Some("bar"), lines.next()); + assert_eq!(Some(""), lines.next()); + assert_eq!(Some("baz"), lines.next()); + assert_eq!(Some("bop"), lines.next()); + + assert_eq!(None, lines.next()); + + let text = "foo\nbar\n\r\nbaz\rbop\n\n"; + let mut lines = UniversalNewlineIterator::from(text); + + assert_eq!(Some("foo"), lines.next()); + assert_eq!(Some("bar"), lines.next()); + assert_eq!(Some(""), lines.next()); + assert_eq!(Some("baz"), lines.next()); + assert_eq!(Some("bop"), lines.next()); + assert_eq!(Some(""), lines.next()); + + assert_eq!(None, lines.next()); + } + + #[test] + fn universal_newlines_backwards() { + let text = "foo\nbar\n\r\nbaz\rbop"; + let mut lines = UniversalNewlineIterator::from(text).rev(); + + assert_eq!(Some("bop"), lines.next()); + assert_eq!(Some("baz"), lines.next()); + assert_eq!(Some(""), lines.next()); + assert_eq!(Some("bar"), lines.next()); + assert_eq!(Some("foo"), lines.next()); + + assert_eq!(None, lines.next()); + + let text = "foo\nbar\n\r\nbaz\rbop\n"; + let mut lines = UniversalNewlineIterator::from(text).rev(); + + assert_eq!(Some("bop"), lines.next()); + assert_eq!(Some("baz"), lines.next()); + assert_eq!(Some(""), lines.next()); + assert_eq!(Some("bar"), lines.next()); + assert_eq!(Some("foo"), lines.next()); + + assert_eq!(None, lines.next()); + } +} diff --git a/crates/ruff_python_formatter/src/cst/helpers.rs b/crates/ruff_python_formatter/src/cst/helpers.rs index f25a9dd903e8a6..b30e9a89b9fe2c 100644 --- a/crates/ruff_python_formatter/src/cst/helpers.rs +++ b/crates/ruff_python_formatter/src/cst/helpers.rs @@ -2,6 +2,7 @@ use rustpython_parser::ast::Location; use ruff_python_ast::source_code::Locator; use ruff_python_ast::types::Range; +use ruff_python_ast::whitespace::UniversalNewlineIterator; /// Return `true` if the given string is a radix literal (e.g., `0b101`). pub fn is_radix_literal(content: &str) -> bool { @@ -96,7 +97,10 @@ pub fn expand_indented_block( // Compound statement: from the colon to the end of the block. let mut offset = 0; - for (index, line) in contents[end_index..].lines().skip(1).enumerate() { + for (index, line) in UniversalNewlineIterator::from(&contents[end_index..]) + .skip(1) + .enumerate() + { if line.is_empty() { continue; }