Skip to content

Commit

Permalink
Move W605 to the AST checker
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed May 13, 2024
1 parent 6a26b08 commit 1f748b8
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 53 deletions.
5 changes: 4 additions & 1 deletion crates/ruff_linter/src/checkers/ast/analyze/string_like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use ruff_python_ast::StringLike;

use crate::checkers::ast::Checker;
use crate::codes::Rule;
use crate::rules::{flake8_bandit, flake8_pyi, flake8_quotes, ruff};
use crate::rules::{flake8_bandit, flake8_pyi, flake8_quotes, pycodestyle, ruff};

/// Run lint rules over a [`StringLike`] syntax nodes.
pub(crate) fn string_like(string_like: StringLike, checker: &mut Checker) {
Expand Down Expand Up @@ -36,4 +36,7 @@ pub(crate) fn string_like(string_like: StringLike, checker: &mut Checker) {
if checker.enabled(Rule::AvoidableEscapedQuote) && checker.settings.flake8_quotes.avoid_escape {
flake8_quotes::rules::avoidable_escaped_quote(checker, string_like);
}
if checker.enabled(Rule::InvalidEscapeSequence) {
pycodestyle::rules::invalid_escape_sequence(checker, string_like);
}
}
12 changes: 0 additions & 12 deletions crates/ruff_linter/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,6 @@ pub(crate) fn check_tokens(
pyupgrade::rules::unnecessary_coding_comment(&mut diagnostics, locator, indexer);
}

if settings.rules.enabled(Rule::InvalidEscapeSequence) {
for (tok, range) in tokens.iter().flatten() {
pycodestyle::rules::invalid_escape_sequence(
&mut diagnostics,
locator,
indexer,
tok,
*range,
);
}
}

if settings.rules.enabled(Rule::TabIndentation) {
pycodestyle::rules::tab_indentation(&mut diagnostics, locator, indexer);
}
Expand Down
1 change: 0 additions & 1 deletion crates/ruff_linter/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ impl Rule {
| Rule::InvalidCharacterNul
| Rule::InvalidCharacterSub
| Rule::InvalidCharacterZeroWidthSpace
| Rule::InvalidEscapeSequence
| Rule::InvalidTodoCapitalization
| Rule::InvalidTodoTag
| Rule::LineContainsFixme
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ use memchr::memchr_iter;

use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_index::Indexer;
use ruff_python_parser::Tok;
use ruff_python_ast::{AnyStringKind, FStringElement, StringLike, StringLikePart};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

use crate::checkers::ast::Checker;
use crate::fix::edits::pad_start;

/// ## What it does
Expand Down Expand Up @@ -59,38 +59,83 @@ impl AlwaysFixableViolation for InvalidEscapeSequence {
}

/// W605
pub(crate) fn invalid_escape_sequence(
diagnostics: &mut Vec<Diagnostic>,
locator: &Locator,
indexer: &Indexer,
token: &Tok,
token_range: TextRange,
) {
let (token_source_code, string_start_location, kind) = match token {
Tok::FStringMiddle { kind, .. } => {
if kind.is_raw_string() {
return;
}
let Some(f_string_range) = indexer.fstring_ranges().innermost(token_range.start())
else {
return;
};
(locator.slice(token_range), f_string_range.start(), kind)
pub(crate) fn invalid_escape_sequence(checker: &mut Checker, string_like: StringLike) {
let locator = checker.locator();

for part in string_like.parts() {
if part.is_raw() {
continue;
}
Tok::String { kind, .. } => {
if kind.is_raw_string() {
return;
match part {
StringLikePart::String(string_literal) => {
check(
&mut checker.diagnostics,
locator,
string_literal.start(),
string_literal.range(),
AnyStringKind::from(string_literal.flags),
);
}
StringLikePart::Bytes(bytes_literal) => {
check(
&mut checker.diagnostics,
locator,
bytes_literal.start(),
bytes_literal.range(),
AnyStringKind::from(bytes_literal.flags),
);
}
StringLikePart::FString(f_string) => {
let kind = AnyStringKind::from(f_string.flags);
for element in f_string.elements.iter() {
match element {
FStringElement::Literal(literal) => {
check(
&mut checker.diagnostics,
locator,
f_string.start(),
literal.range(),
kind,
);
}
FStringElement::Expression(expression) => {
let Some(format_spec) = expression.format_spec.as_ref() else {
continue;
};
for literal in format_spec.elements.literals() {
check(
&mut checker.diagnostics,
locator,
f_string.start(),
literal.range(),
kind,
);
}
}
}
}
}
(locator.slice(token_range), token_range.start(), kind)
}
_ => return,
};
}
}

fn check(
diagnostics: &mut Vec<Diagnostic>,
locator: &Locator,
// Start position of the expression that contains the source range. This is used to generate
// the fix when the source range is part of the expression like in f-string which contains
// other f-string literal elements.
expr_start: TextSize,
// Range in the source code to perform the check on.
source_range: TextRange,
kind: AnyStringKind,
) {
let source = locator.slice(source_range);
let mut contains_valid_escape_sequence = false;
let mut invalid_escape_chars = Vec::new();

let mut prev = None;
let bytes = token_source_code.as_bytes();
let bytes = source.as_bytes();
for i in memchr_iter(b'\\', bytes) {
// If the previous character was also a backslash, skip.
if prev.is_some_and(|prev| prev == i - 1) {
Expand All @@ -100,9 +145,9 @@ pub(crate) fn invalid_escape_sequence(

prev = Some(i);

let next_char = match token_source_code[i + 1..].chars().next() {
let next_char = match source[i + 1..].chars().next() {
Some(next_char) => next_char,
None if token.is_f_string_middle() => {
None if kind.is_f_string() => {
// If we're at the end of a f-string middle token, the next character
// is actually emitted as a different token. For example,
//
Expand All @@ -124,7 +169,7 @@ pub(crate) fn invalid_escape_sequence(
// f-string. This means that if there's a `FStringMiddle` token and we
// encounter a `\` character, then the next character is always going to
// be part of the f-string.
if let Some(next_char) = locator.after(token_range.end()).chars().next() {
if let Some(next_char) = locator.after(source_range.end()).chars().next() {
next_char
} else {
continue;
Expand Down Expand Up @@ -172,15 +217,14 @@ pub(crate) fn invalid_escape_sequence(
continue;
}

let location = token_range.start() + TextSize::try_from(i).unwrap();
let location = source_range.start() + TextSize::try_from(i).unwrap();
let range = TextRange::at(location, next_char.text_len() + TextSize::from(1));
invalid_escape_chars.push(InvalidEscapeChar {
ch: next_char,
range,
});
}

let mut invalid_escape_sequence = Vec::new();
if contains_valid_escape_sequence {
// Escape with backslash.
for invalid_escape_char in &invalid_escape_chars {
Expand All @@ -195,7 +239,7 @@ pub(crate) fn invalid_escape_sequence(
r"\".to_string(),
invalid_escape_char.start() + TextSize::from(1),
)));
invalid_escape_sequence.push(diagnostic);
diagnostics.push(diagnostic);
}
} else {
// Turn into raw string.
Expand All @@ -212,8 +256,8 @@ pub(crate) fn invalid_escape_sequence(
// Replace the Unicode prefix with `r`.
diagnostic.set_fix(Fix::safe_edit(Edit::replacement(
"r".to_string(),
string_start_location,
string_start_location + TextSize::from(1),
expr_start,
expr_start + TextSize::from(1),
)));
} else {
// Insert the `r` prefix.
Expand All @@ -222,17 +266,15 @@ pub(crate) fn invalid_escape_sequence(
// `assert`, etc.) and the string. For example, `return"foo"` is valid, but
// `returnr"foo"` is not.
Fix::safe_edit(Edit::insertion(
pad_start("r".to_string(), string_start_location, locator),
string_start_location,
pad_start("r".to_string(), expr_start, locator),
expr_start,
)),
);
}

invalid_escape_sequence.push(diagnostic);
diagnostics.push(diagnostic);
}
}

diagnostics.extend(invalid_escape_sequence);
}

#[derive(Debug, PartialEq, Eq)]
Expand Down
11 changes: 11 additions & 0 deletions crates/ruff_python_ast/src/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,17 @@ pub enum StringLikePart<'a> {
FString(&'a ast::FString),
}

impl StringLikePart<'_> {
/// Returns `true` if this part is a raw string i.e., the literal is prefixed with `r`.
pub fn is_raw(&self) -> bool {
match self {
StringLikePart::String(string) => string.flags.prefix().is_raw(),
StringLikePart::Bytes(bytes) => bytes.flags.prefix().is_raw(),
StringLikePart::FString(f_string) => f_string.flags.prefix().is_raw(),
}
}
}

impl<'a> From<&'a ast::StringLiteral> for StringLikePart<'a> {
fn from(value: &'a ast::StringLiteral) -> Self {
StringLikePart::String(value)
Expand Down

0 comments on commit 1f748b8

Please sign in to comment.