From dd42961dd9d85a3e70baf1acf0bdd60db4393455 Mon Sep 17 00:00:00 2001 From: Auguste Lalande Date: Thu, 9 May 2024 08:36:20 -0400 Subject: [PATCH] [`pylint`] Detect `pathlib.Path.open` calls in `unspecified-encoding` (`PLW1514`) (#11288) ## Summary Resolves #11263 Detect `pathlib.Path.open` calls which do not specify a file encoding. ## Test Plan Test cases added to fixture. --------- Co-authored-by: Dhruv Manilawala --- .../fixtures/pylint/unspecified_encoding.py | 25 +++ .../pylint/rules/unspecified_encoding.rs | 166 ++++++++++++------ ...ests__PLW1514_unspecified_encoding.py.snap | 81 +++++++++ ...nspecified_encoding_python39_or_lower.snap | 99 +++++++++++ 4 files changed, 322 insertions(+), 49 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/unspecified_encoding.py b/crates/ruff_linter/resources/test/fixtures/pylint/unspecified_encoding.py index 01c12c9fc4c77..fca823bcbc8a2 100644 --- a/crates/ruff_linter/resources/test/fixtures/pylint/unspecified_encoding.py +++ b/crates/ruff_linter/resources/test/fixtures/pylint/unspecified_encoding.py @@ -69,3 +69,28 @@ def func(*args, **kwargs): (("test.txt")), # comment ) + +# pathlib +from pathlib import Path + +# Errors. +Path("foo.txt").open() +Path("foo.txt").open("w") +text = Path("foo.txt").read_text() +Path("foo.txt").write_text(text) + +# Non-errors. +Path("foo.txt").open(encoding="utf-8") +Path("foo.txt").open("wb") +Path("foo.txt").open(*args) +Path("foo.txt").open(**kwargs) +text = Path("foo.txt").read_text(encoding="utf-8") +text = Path("foo.txt").read_text(*args) +text = Path("foo.txt").read_text(**kwargs) +Path("foo.txt").write_text(text, encoding="utf-8") +Path("foo.txt").write_text(text, *args) +Path("foo.txt").write_text(text, **kwargs) + +# Violation but not detectable +x = Path("foo.txt") +x.open() diff --git a/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs b/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs index 801a3b8f8b02b..c5f6b9370343c 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs @@ -1,11 +1,12 @@ +use std::fmt::{Display, Formatter}; + use anyhow::Result; -use ast::StringLiteralFlags; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast as ast; use ruff_python_ast::name::QualifiedName; -use ruff_python_ast::Expr; +use ruff_python_ast::{self as ast, Expr, StringLiteralFlags}; +use ruff_python_semantic::SemanticModel; use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::Checker; @@ -43,7 +44,7 @@ use crate::settings::types::PythonVersion; #[violation] pub struct UnspecifiedEncoding { function_name: String, - mode: Mode, + mode: ModeArgument, } impl AlwaysFixableViolation for UnspecifiedEncoding { @@ -55,10 +56,10 @@ impl AlwaysFixableViolation for UnspecifiedEncoding { } = self; match mode { - Mode::Supported => { + ModeArgument::Supported => { format!("`{function_name}` in text mode without explicit `encoding` argument") } - Mode::Unsupported => { + ModeArgument::Unsupported => { format!("`{function_name}` without explicit `encoding` argument") } } @@ -71,11 +72,9 @@ impl AlwaysFixableViolation for UnspecifiedEncoding { /// PLW1514 pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) { - let Some((function_name, mode)) = checker - .semantic() - .resolve_qualified_name(&call.func) - .filter(|qualified_name| is_violation(call, qualified_name)) - .map(|qualified_name| (qualified_name.to_string(), Mode::from(&qualified_name))) + let Some((function_name, mode)) = Callee::try_from_call_expression(call, checker.semantic()) + .filter(|segments| is_violation(call, segments)) + .map(|segments| (segments.to_string(), segments.mode_argument())) else { return; }; @@ -97,6 +96,68 @@ pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) checker.diagnostics.push(diagnostic); } +/// Represents the path of the function or method being called. +enum Callee<'a> { + /// Fully-qualified symbol name of the callee. + Qualified(QualifiedName<'a>), + /// Attribute value for the `pathlib.Path(...)` call e.g., `open` in + /// `pathlib.Path(...).open(...)`. + Pathlib(&'a str), +} + +impl<'a> Callee<'a> { + fn try_from_call_expression( + call: &'a ast::ExprCall, + semantic: &'a SemanticModel, + ) -> Option { + if let Expr::Attribute(ast::ExprAttribute { attr, value, .. }) = call.func.as_ref() { + // Check for `pathlib.Path(...).open(...)` or equivalent + if let Expr::Call(ast::ExprCall { func, .. }) = value.as_ref() { + if semantic + .resolve_qualified_name(func) + .is_some_and(|qualified_name| { + matches!(qualified_name.segments(), ["pathlib", "Path"]) + }) + { + return Some(Callee::Pathlib(attr)); + } + } + } + + if let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) { + return Some(Callee::Qualified(qualified_name)); + } + + None + } + + fn mode_argument(&self) -> ModeArgument { + match self { + Callee::Qualified(qualified_name) => match qualified_name.segments() { + ["" | "codecs" | "_io", "open"] => ModeArgument::Supported, + ["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => { + ModeArgument::Supported + } + ["io" | "_io", "TextIOWrapper"] => ModeArgument::Unsupported, + _ => ModeArgument::Unsupported, + }, + Callee::Pathlib(attr) => match *attr { + "open" => ModeArgument::Supported, + _ => ModeArgument::Unsupported, + }, + } + } +} + +impl Display for Callee<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Callee::Qualified(qualified_name) => f.write_str(&qualified_name.to_string()), + Callee::Pathlib(attr) => f.write_str(&format!("pathlib.Path(...).{attr}")), + } + } +} + /// Generate an [`Edit`] for Python 3.10 and later. fn generate_keyword_fix(checker: &Checker, call: &ast::ExprCall) -> Fix { Fix::unsafe_edit(add_argument( @@ -146,7 +207,7 @@ fn is_binary_mode(expr: &Expr) -> Option { } /// Returns `true` if the given call lacks an explicit `encoding`. -fn is_violation(call: &ast::ExprCall, qualified_name: &QualifiedName) -> bool { +fn is_violation(call: &ast::ExprCall, qualified_name: &Callee) -> bool { // If we have something like `*args`, which might contain the encoding argument, abort. if call.arguments.args.iter().any(Expr::is_starred_expr) { return false; @@ -160,54 +221,61 @@ fn is_violation(call: &ast::ExprCall, qualified_name: &QualifiedName) -> bool { { return false; } - match qualified_name.segments() { - ["" | "codecs" | "_io", "open"] => { - if let Some(mode_arg) = call.arguments.find_argument("mode", 1) { - if is_binary_mode(mode_arg).unwrap_or(true) { - // binary mode or unknown mode is no violation - return false; + match qualified_name { + Callee::Qualified(qualified_name) => match qualified_name.segments() { + ["" | "codecs" | "_io", "open"] => { + if let Some(mode_arg) = call.arguments.find_argument("mode", 1) { + if is_binary_mode(mode_arg).unwrap_or(true) { + // binary mode or unknown mode is no violation + return false; + } } + // else mode not specified, defaults to text mode + call.arguments.find_argument("encoding", 3).is_none() } - // else mode not specified, defaults to text mode - call.arguments.find_argument("encoding", 3).is_none() - } - ["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => { - let mode_pos = usize::from(qualified_name.segments()[1] == "SpooledTemporaryFile"); - if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) { - if is_binary_mode(mode_arg).unwrap_or(true) { - // binary mode or unknown mode is no violation + ["tempfile", tempfile_class @ ("TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile")] => + { + let mode_pos = usize::from(*tempfile_class == "SpooledTemporaryFile"); + if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) { + if is_binary_mode(mode_arg).unwrap_or(true) { + // binary mode or unknown mode is no violation + return false; + } + } else { + // defaults to binary mode return false; } - } else { - // defaults to binary mode - return false; + call.arguments + .find_argument("encoding", mode_pos + 2) + .is_none() } - call.arguments - .find_argument("encoding", mode_pos + 2) - .is_none() - } - ["io" | "_io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(), - _ => false, + ["io" | "_io", "TextIOWrapper"] => { + call.arguments.find_argument("encoding", 1).is_none() + } + _ => false, + }, + Callee::Pathlib(attr) => match *attr { + "open" => { + if let Some(mode_arg) = call.arguments.find_argument("mode", 0) { + if is_binary_mode(mode_arg).unwrap_or(true) { + // binary mode or unknown mode is no violation + return false; + } + } + // else mode not specified, defaults to text mode + call.arguments.find_argument("encoding", 2).is_none() + } + "read_text" => call.arguments.find_argument("encoding", 0).is_none(), + "write_text" => call.arguments.find_argument("encoding", 1).is_none(), + _ => false, + }, } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum Mode { +enum ModeArgument { /// The call supports a `mode` argument. Supported, /// The call does not support a `mode` argument. Unsupported, } - -impl From<&QualifiedName<'_>> for Mode { - fn from(value: &QualifiedName<'_>) -> Self { - match value.segments() { - ["" | "codecs" | "_io", "open"] => Mode::Supported, - ["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => { - Mode::Supported - } - ["io" | "_io", "TextIOWrapper"] => Mode::Unsupported, - _ => Mode::Unsupported, - } - } -} diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1514_unspecified_encoding.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1514_unspecified_encoding.py.snap index 9ceaf09daf820..200c1e380cdf6 100644 --- a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1514_unspecified_encoding.py.snap +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1514_unspecified_encoding.py.snap @@ -352,5 +352,86 @@ unspecified_encoding.py:68:1: PLW1514 [*] `open` in text mode without explicit ` 69 |+ (("test.txt")), encoding="locale", 70 70 | # comment 71 71 | ) +72 72 | +unspecified_encoding.py:77:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument + | +76 | # Errors. +77 | Path("foo.txt").open() + | ^^^^^^^^^^^^^^^^^^^^ PLW1514 +78 | Path("foo.txt").open("w") +79 | text = Path("foo.txt").read_text() + | + = help: Add explicit `encoding` argument + +ℹ Unsafe fix +74 74 | from pathlib import Path +75 75 | +76 76 | # Errors. +77 |-Path("foo.txt").open() + 77 |+Path("foo.txt").open(encoding="locale") +78 78 | Path("foo.txt").open("w") +79 79 | text = Path("foo.txt").read_text() +80 80 | Path("foo.txt").write_text(text) + +unspecified_encoding.py:78:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument + | +76 | # Errors. +77 | Path("foo.txt").open() +78 | Path("foo.txt").open("w") + | ^^^^^^^^^^^^^^^^^^^^ PLW1514 +79 | text = Path("foo.txt").read_text() +80 | Path("foo.txt").write_text(text) + | + = help: Add explicit `encoding` argument + +ℹ Unsafe fix +75 75 | +76 76 | # Errors. +77 77 | Path("foo.txt").open() +78 |-Path("foo.txt").open("w") + 78 |+Path("foo.txt").open("w", encoding="locale") +79 79 | text = Path("foo.txt").read_text() +80 80 | Path("foo.txt").write_text(text) +81 81 | + +unspecified_encoding.py:79:8: PLW1514 [*] `pathlib.Path(...).read_text` without explicit `encoding` argument + | +77 | Path("foo.txt").open() +78 | Path("foo.txt").open("w") +79 | text = Path("foo.txt").read_text() + | ^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514 +80 | Path("foo.txt").write_text(text) + | + = help: Add explicit `encoding` argument + +ℹ Unsafe fix +76 76 | # Errors. +77 77 | Path("foo.txt").open() +78 78 | Path("foo.txt").open("w") +79 |-text = Path("foo.txt").read_text() + 79 |+text = Path("foo.txt").read_text(encoding="locale") +80 80 | Path("foo.txt").write_text(text) +81 81 | +82 82 | # Non-errors. + +unspecified_encoding.py:80:1: PLW1514 [*] `pathlib.Path(...).write_text` without explicit `encoding` argument + | +78 | Path("foo.txt").open("w") +79 | text = Path("foo.txt").read_text() +80 | Path("foo.txt").write_text(text) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514 +81 | +82 | # Non-errors. + | + = help: Add explicit `encoding` argument +ℹ Unsafe fix +77 77 | Path("foo.txt").open() +78 78 | Path("foo.txt").open("w") +79 79 | text = Path("foo.txt").read_text() +80 |-Path("foo.txt").write_text(text) + 80 |+Path("foo.txt").write_text(text, encoding="locale") +81 81 | +82 82 | # Non-errors. +83 83 | Path("foo.txt").open(encoding="utf-8") diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__unspecified_encoding_python39_or_lower.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__unspecified_encoding_python39_or_lower.snap index 1390eeede0cf3..f58de408517eb 100644 --- a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__unspecified_encoding_python39_or_lower.snap +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__unspecified_encoding_python39_or_lower.snap @@ -473,5 +473,104 @@ unspecified_encoding.py:68:1: PLW1514 [*] `open` in text mode without explicit ` 70 |+ (("test.txt")), encoding=locale.getpreferredencoding(False), 70 71 | # comment 71 72 | ) +72 73 | +unspecified_encoding.py:77:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument + | +76 | # Errors. +77 | Path("foo.txt").open() + | ^^^^^^^^^^^^^^^^^^^^ PLW1514 +78 | Path("foo.txt").open("w") +79 | text = Path("foo.txt").read_text() + | + = help: Add explicit `encoding` argument + +ℹ Unsafe fix +72 72 | +73 73 | # pathlib +74 74 | from pathlib import Path + 75 |+import locale +75 76 | +76 77 | # Errors. +77 |-Path("foo.txt").open() + 78 |+Path("foo.txt").open(encoding=locale.getpreferredencoding(False)) +78 79 | Path("foo.txt").open("w") +79 80 | text = Path("foo.txt").read_text() +80 81 | Path("foo.txt").write_text(text) + +unspecified_encoding.py:78:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument + | +76 | # Errors. +77 | Path("foo.txt").open() +78 | Path("foo.txt").open("w") + | ^^^^^^^^^^^^^^^^^^^^ PLW1514 +79 | text = Path("foo.txt").read_text() +80 | Path("foo.txt").write_text(text) + | + = help: Add explicit `encoding` argument + +ℹ Unsafe fix +72 72 | +73 73 | # pathlib +74 74 | from pathlib import Path + 75 |+import locale +75 76 | +76 77 | # Errors. +77 78 | Path("foo.txt").open() +78 |-Path("foo.txt").open("w") + 79 |+Path("foo.txt").open("w", encoding=locale.getpreferredencoding(False)) +79 80 | text = Path("foo.txt").read_text() +80 81 | Path("foo.txt").write_text(text) +81 82 | + +unspecified_encoding.py:79:8: PLW1514 [*] `pathlib.Path(...).read_text` without explicit `encoding` argument + | +77 | Path("foo.txt").open() +78 | Path("foo.txt").open("w") +79 | text = Path("foo.txt").read_text() + | ^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514 +80 | Path("foo.txt").write_text(text) + | + = help: Add explicit `encoding` argument + +ℹ Unsafe fix +72 72 | +73 73 | # pathlib +74 74 | from pathlib import Path + 75 |+import locale +75 76 | +76 77 | # Errors. +77 78 | Path("foo.txt").open() +78 79 | Path("foo.txt").open("w") +79 |-text = Path("foo.txt").read_text() + 80 |+text = Path("foo.txt").read_text(encoding=locale.getpreferredencoding(False)) +80 81 | Path("foo.txt").write_text(text) +81 82 | +82 83 | # Non-errors. + +unspecified_encoding.py:80:1: PLW1514 [*] `pathlib.Path(...).write_text` without explicit `encoding` argument + | +78 | Path("foo.txt").open("w") +79 | text = Path("foo.txt").read_text() +80 | Path("foo.txt").write_text(text) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514 +81 | +82 | # Non-errors. + | + = help: Add explicit `encoding` argument +ℹ Unsafe fix +72 72 | +73 73 | # pathlib +74 74 | from pathlib import Path + 75 |+import locale +75 76 | +76 77 | # Errors. +77 78 | Path("foo.txt").open() +78 79 | Path("foo.txt").open("w") +79 80 | text = Path("foo.txt").read_text() +80 |-Path("foo.txt").write_text(text) + 81 |+Path("foo.txt").write_text(text, encoding=locale.getpreferredencoding(False)) +81 82 | +82 83 | # Non-errors. +83 84 | Path("foo.txt").open(encoding="utf-8")