Skip to content

Commit

Permalink
[ruff] Extend unnecessary-regular-expression to non-literal strings…
Browse files Browse the repository at this point in the history
… (`RUF055`) (#14679)

Co-authored-by: Alex Waygood <alex.waygood@gmail.com>
  • Loading branch information
ntBre and AlexWaygood authored Dec 3, 2024
1 parent 81bfcc9 commit 62e358e
Show file tree
Hide file tree
Showing 7 changed files with 387 additions and 165 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,21 @@ def dashrepl(matchobj):
"",
s, # string
)

# A diagnostic should not be emitted for `sub` replacements with backreferences or
# most other ASCII escapes
re.sub(r"a", r"\g<0>\g<0>\g<0>", "a")
re.sub(r"a", r"\1", "a")
re.sub(r"a", r"\s", "a")

# Escapes like \n are "processed":
# `re.sub(r"a", r"\n", some_string)` is fixed to `some_string.replace("a", "\n")`
# *not* `some_string.replace("a", "\\n")`.
# We currently emit diagnostics for some of these without fixing them.
re.sub(r"a", "\n", "a")
re.sub(r"a", r"\n", "a")
re.sub(r"a", "\a", "a")
re.sub(r"a", r"\a", "a")

re.sub(r"a", "\?", "a")
re.sub(r"a", r"\?", "a")
17 changes: 17 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF055_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Test that RUF055 can follow a single str assignment for both the pattern and
the replacement argument to re.sub
"""

import re

pat1 = "needle"

re.sub(pat1, "", haystack)

# aliases are not followed, so this one should not trigger the rule
if pat4 := pat1:
re.sub(pat4, "", haystack)

# also works for the `repl` argument in sub
repl = "new"
re.sub(r"abc", repl, haystack)
3 changes: 2 additions & 1 deletion crates/ruff_linter/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,8 @@ mod tests {
#[test_case(Rule::MapIntVersionParsing, Path::new("RUF048_1.py"))]
#[test_case(Rule::UnrawRePattern, Path::new("RUF039.py"))]
#[test_case(Rule::UnrawRePattern, Path::new("RUF039_concat.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_0.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_1.py"))]
fn preview_rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"preview__{}_{}",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use ruff_diagnostics::{AlwaysFixableViolation, Applicability, Diagnostic, Edit, Fix};
use itertools::Itertools;
use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_ast::{
Arguments, CmpOp, Expr, ExprAttribute, ExprCall, ExprCompare, ExprContext, Identifier,
Arguments, CmpOp, Expr, ExprAttribute, ExprCall, ExprCompare, ExprContext, ExprStringLiteral,
Identifier,
};
use ruff_python_semantic::analyze::typing::find_binding_value;
use ruff_python_semantic::{Modules, SemanticModel};
use ruff_text_size::TextRange;

Expand Down Expand Up @@ -53,17 +56,19 @@ use crate::checkers::ast::Checker;
/// - [Python Regular Expression HOWTO: Common Problems - Use String Methods](https://docs.python.org/3/howto/regex.html#use-string-methods)
#[derive(ViolationMetadata)]
pub(crate) struct UnnecessaryRegularExpression {
replacement: String,
replacement: Option<String>,
}

impl AlwaysFixableViolation for UnnecessaryRegularExpression {
impl Violation for UnnecessaryRegularExpression {
const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes;

#[derive_message_formats]
fn message(&self) -> String {
"Plain string pattern passed to `re` function".to_string()
}

fn fix_title(&self) -> String {
format!("Replace with `{}`", self.replacement)
fn fix_title(&self) -> Option<String> {
Some(format!("Replace with `{}`", self.replacement.as_ref()?))
}
}

Expand All @@ -90,8 +95,8 @@ pub(crate) fn unnecessary_regular_expression(checker: &mut Checker, call: &ExprC
return;
};

// For now, restrict this rule to string literals
let Some(string_lit) = re_func.pattern.as_string_literal_expr() else {
// For now, restrict this rule to string literals and variables that can be resolved to literals
let Some(string_lit) = resolve_string_literal(re_func.pattern, semantic) else {
return;
};

Expand All @@ -110,33 +115,36 @@ pub(crate) fn unnecessary_regular_expression(checker: &mut Checker, call: &ExprC
// we can proceed with the str method replacement
let new_expr = re_func.replacement();

let repl = checker.generator().expr(&new_expr);
let diagnostic = Diagnostic::new(
let repl = new_expr.map(|expr| checker.generator().expr(&expr));
let mut diagnostic = Diagnostic::new(
UnnecessaryRegularExpression {
replacement: repl.clone(),
},
call.range,
);

let fix = Fix::applicable_edit(
Edit::range_replacement(repl, call.range),
if checker
.comment_ranges()
.has_comments(call, checker.source())
{
Applicability::Unsafe
} else {
Applicability::Safe
},
);
if let Some(repl) = repl {
diagnostic.set_fix(Fix::applicable_edit(
Edit::range_replacement(repl, call.range),
if checker
.comment_ranges()
.has_comments(call, checker.source())
{
Applicability::Unsafe
} else {
Applicability::Safe
},
));
}

checker.diagnostics.push(diagnostic.with_fix(fix));
checker.diagnostics.push(diagnostic);
}

/// The `re` functions supported by this rule.
#[derive(Debug)]
enum ReFuncKind<'a> {
Sub { repl: &'a Expr },
// Only `Some` if it's a fixable `re.sub()` call
Sub { repl: Option<&'a Expr> },
Match,
Search,
Fullmatch,
Expand All @@ -152,7 +160,7 @@ struct ReFunc<'a> {

impl<'a> ReFunc<'a> {
fn from_call_expr(
semantic: &SemanticModel,
semantic: &'a SemanticModel,
call: &'a ExprCall,
func_name: &str,
) -> Option<Self> {
Expand All @@ -173,11 +181,32 @@ impl<'a> ReFunc<'a> {
// version
("sub", 3) => {
let repl = call.arguments.find_argument("repl", 1)?;
if !repl.is_string_literal_expr() {
return None;
let lit = resolve_string_literal(repl, semantic)?;
let mut fixable = true;
for (c, next) in lit.value.chars().tuple_windows() {
// `\0` (or any other ASCII digit) and `\g` have special meaning in `repl` strings.
// Meanwhile, nearly all other escapes of ASCII letters in a `repl` string causes
// `re.PatternError` to be raised at runtime.
//
// If we see that the escaped character is an alphanumeric ASCII character,
// we should only emit a diagnostic suggesting to replace the `re.sub()` call with
// `str.replace`if we can detect that the escaped character is one that is both
// valid in a `repl` string *and* does not have any special meaning in a REPL string.
//
// It's out of scope for this rule to change invalid `re.sub()` calls into something
// that would not raise an exception at runtime. They should be left as-is.
if c == '\\' && next.is_ascii_alphanumeric() {
if "abfnrtv".contains(next) {
fixable = false;
} else {
return None;
}
}
}
Some(ReFunc {
kind: ReFuncKind::Sub { repl },
kind: ReFuncKind::Sub {
repl: fixable.then_some(repl),
},
pattern: call.arguments.find_argument("pattern", 0)?,
string: call.arguments.find_argument("string", 2)?,
})
Expand All @@ -201,20 +230,20 @@ impl<'a> ReFunc<'a> {
}
}

fn replacement(&self) -> Expr {
fn replacement(&self) -> Option<Expr> {
match self.kind {
// string.replace(pattern, repl)
ReFuncKind::Sub { repl } => {
self.method_expr("replace", vec![self.pattern.clone(), repl.clone()])
}
ReFuncKind::Sub { repl } => repl
.cloned()
.map(|repl| self.method_expr("replace", vec![self.pattern.clone(), repl])),
// string.startswith(pattern)
ReFuncKind::Match => self.method_expr("startswith", vec![self.pattern.clone()]),
ReFuncKind::Match => Some(self.method_expr("startswith", vec![self.pattern.clone()])),
// pattern in string
ReFuncKind::Search => self.compare_expr(CmpOp::In),
ReFuncKind::Search => Some(self.compare_expr(CmpOp::In)),
// string == pattern
ReFuncKind::Fullmatch => self.compare_expr(CmpOp::Eq),
ReFuncKind::Fullmatch => Some(self.compare_expr(CmpOp::Eq)),
// string.split(pattern)
ReFuncKind::Split => self.method_expr("split", vec![self.pattern.clone()]),
ReFuncKind::Split => Some(self.method_expr("split", vec![self.pattern.clone()])),
}
}

Expand Down Expand Up @@ -248,3 +277,23 @@ impl<'a> ReFunc<'a> {
})
}
}

/// Try to resolve `name` to an [`ExprStringLiteral`] in `semantic`.
fn resolve_string_literal<'a>(
name: &'a Expr,
semantic: &'a SemanticModel,
) -> Option<&'a ExprStringLiteral> {
if name.is_string_literal_expr() {
return name.as_string_literal_expr();
}

if let Some(name_expr) = name.as_name_expr() {
let binding = semantic.binding(semantic.only_binding(name_expr)?);
let value = find_binding_value(binding, semantic)?;
if value.is_string_literal_expr() {
return value.as_string_literal_expr();
}
}

None
}

This file was deleted.

Loading

0 comments on commit 62e358e

Please sign in to comment.