From cd82b83f8969d3aff48ab7af2f9df0bf4c6ea824 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 17 May 2023 12:20:58 -0400 Subject: [PATCH] Avoid triggering `pd#at` and friends on non-subscripts (#4474) --- crates/ruff/src/checkers/ast/mod.rs | 6 +- crates/ruff/src/rules/pandas_vet/helpers.rs | 61 +++++++++--- crates/ruff/src/rules/pandas_vet/mod.rs | 26 +++++ .../ruff/src/rules/pandas_vet/rules/attr.rs | 47 +++++++++ .../rules/{check_call.rs => call.rs} | 42 ++------ .../src/rules/pandas_vet/rules/check_attr.rs | 96 ------------------- crates/ruff/src/rules/pandas_vet/rules/mod.rs | 14 +-- .../src/rules/pandas_vet/rules/subscript.rs | 66 +++++++++++++ 8 files changed, 207 insertions(+), 151 deletions(-) create mode 100644 crates/ruff/src/rules/pandas_vet/rules/attr.rs rename crates/ruff/src/rules/pandas_vet/rules/{check_call.rs => call.rs} (65%) delete mode 100644 crates/ruff/src/rules/pandas_vet/rules/check_attr.rs create mode 100644 crates/ruff/src/rules/pandas_vet/rules/subscript.rs diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index dba5a695c2fbb..7f182d450d9b6 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -2352,6 +2352,8 @@ where { flake8_simplify::rules::use_capital_environment_variables(self, expr); } + + pandas_vet::rules::subscript(self, value, expr); } Expr::Tuple(ast::ExprTuple { elts, @@ -2525,7 +2527,7 @@ where if self.settings.rules.enabled(Rule::PrivateMemberAccess) { flake8_self::rules::private_member_access(self, expr); } - pandas_vet::rules::check_attr(self, attr, value, expr); + pandas_vet::rules::attr(self, attr, value, expr); } Expr::Call(ast::ExprCall { func, @@ -3009,7 +3011,7 @@ where .into_iter(), ); } - pandas_vet::rules::check_call(self, func); + pandas_vet::rules::call(self, func); if self.settings.rules.enabled(Rule::PandasUseOfPdMerge) { if let Some(diagnostic) = pandas_vet::rules::use_of_pd_merge(func) { diff --git a/crates/ruff/src/rules/pandas_vet/helpers.rs b/crates/ruff/src/rules/pandas_vet/helpers.rs index cbe66b476a0cf..45db1a5d0b8c7 100644 --- a/crates/ruff/src/rules/pandas_vet/helpers.rs +++ b/crates/ruff/src/rules/pandas_vet/helpers.rs @@ -1,18 +1,51 @@ +use ruff_python_semantic::binding::{BindingKind, Importation}; +use ruff_python_semantic::context::Context; +use rustpython_parser::ast; use rustpython_parser::ast::Expr; -/// Return `true` if an `Expr` _could_ be a `DataFrame`. This rules out -/// obviously-wrong cases, like constants and literals. -pub(crate) const fn is_dataframe_candidate(expr: &Expr) -> bool { - !matches!( - expr, +pub(crate) enum Resolution { + /// The expression resolves to an irrelevant expression type (e.g., a constant). + IrrelevantExpression, + /// The expression resolves to an irrelevant binding (e.g., a function definition). + IrrelevantBinding, + /// The expression resolves to a relevant local binding (e.g., a variable). + RelevantLocal, + /// The expression resolves to the `pandas` module itself. + PandasModule, +} + +/// Test an [`Expr`] for relevance to Pandas-related operations. +pub(crate) fn test_expression(expr: &Expr, context: &Context) -> Resolution { + match expr { Expr::Constant(_) - | Expr::Tuple(_) - | Expr::List(_) - | Expr::Set(_) - | Expr::Dict(_) - | Expr::SetComp(_) - | Expr::ListComp(_) - | Expr::DictComp(_) - | Expr::GeneratorExp(_) - ) + | Expr::Tuple(_) + | Expr::List(_) + | Expr::Set(_) + | Expr::Dict(_) + | Expr::SetComp(_) + | Expr::ListComp(_) + | Expr::DictComp(_) + | Expr::GeneratorExp(_) => Resolution::IrrelevantExpression, + Expr::Name(ast::ExprName { id, .. }) => { + context + .find_binding(id) + .map_or(Resolution::IrrelevantBinding, |binding| { + match binding.kind { + BindingKind::Annotation + | BindingKind::Argument + | BindingKind::Assignment + | BindingKind::NamedExprAssignment + | BindingKind::Binding + | BindingKind::LoopVar + | BindingKind::Global + | BindingKind::Nonlocal => Resolution::RelevantLocal, + BindingKind::Importation(Importation { + full_name: module, .. + }) if module == "pandas" => Resolution::PandasModule, + _ => Resolution::IrrelevantBinding, + } + }) + } + _ => Resolution::RelevantLocal, + } } diff --git a/crates/ruff/src/rules/pandas_vet/mod.rs b/crates/ruff/src/rules/pandas_vet/mod.rs index 91bd2fabe3471..cc610c5ad6cdb 100644 --- a/crates/ruff/src/rules/pandas_vet/mod.rs +++ b/crates/ruff/src/rules/pandas_vet/mod.rs @@ -105,6 +105,32 @@ mod tests { x = pd.DataFrame() index = x.loc[:, ['B', 'A']] "#, &[]; "PD008_pass")] + #[test_case(r#" + import io + import zipfile + + + class MockBinaryFile(io.BytesIO): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def close(self): + pass # Don't allow closing the file, it would clear the buffer + + + zip_buffer = MockBinaryFile() + + with zipfile.ZipFile(zip_buffer, "w") as zf: + zf.writestr("dir/file.txt", "This is a test") + + # Reset the BytesIO object's cursor to the start. + zip_buffer.seek(0) + + with zipfile.ZipFile(zip_buffer, "r") as zf: + zpath = zipfile.Path(zf, "/") + + dir_name, file_name = zpath.at.split("/") + "#, &[]; "PD008_pass_on_attr")] #[test_case(r#" import pandas as pd x = pd.DataFrame() diff --git a/crates/ruff/src/rules/pandas_vet/rules/attr.rs b/crates/ruff/src/rules/pandas_vet/rules/attr.rs new file mode 100644 index 0000000000000..67311ad8a0460 --- /dev/null +++ b/crates/ruff/src/rules/pandas_vet/rules/attr.rs @@ -0,0 +1,47 @@ +use rustpython_parser::ast::{Expr, Ranged}; + +use ruff_diagnostics::Violation; +use ruff_diagnostics::{Diagnostic, DiagnosticKind}; +use ruff_macros::{derive_message_formats, violation}; + +use crate::checkers::ast::Checker; +use crate::registry::Rule; +use crate::rules::pandas_vet::helpers::{test_expression, Resolution}; + +#[violation] +pub struct PandasUseOfDotValues; + +impl Violation for PandasUseOfDotValues { + #[derive_message_formats] + fn message(&self) -> String { + format!("Use `.to_numpy()` instead of `.values`") + } +} + +pub(crate) fn attr(checker: &mut Checker, attr: &str, value: &Expr, attr_expr: &Expr) { + let rules = &checker.settings.rules; + let violation: DiagnosticKind = match attr { + "values" if rules.enabled(Rule::PandasUseOfDotValues) => PandasUseOfDotValues.into(), + _ => return, + }; + + // Avoid flagging on function calls (e.g., `df.values()`). + if let Some(parent) = checker.ctx.expr_parent() { + if matches!(parent, Expr::Call(_)) { + return; + } + } + + // Avoid flagging on non-DataFrames (e.g., `{"a": 1}.values`), and on irrelevant bindings + // (like imports). + if !matches!( + test_expression(value, &checker.ctx), + Resolution::RelevantLocal + ) { + return; + } + + checker + .diagnostics + .push(Diagnostic::new(violation, attr_expr.range())); +} diff --git a/crates/ruff/src/rules/pandas_vet/rules/check_call.rs b/crates/ruff/src/rules/pandas_vet/rules/call.rs similarity index 65% rename from crates/ruff/src/rules/pandas_vet/rules/check_call.rs rename to crates/ruff/src/rules/pandas_vet/rules/call.rs index c2aa897cbd696..2d0396d1fc12a 100644 --- a/crates/ruff/src/rules/pandas_vet/rules/check_call.rs +++ b/crates/ruff/src/rules/pandas_vet/rules/call.rs @@ -3,11 +3,10 @@ use rustpython_parser::ast::{self, Expr, Ranged}; use ruff_diagnostics::Violation; use ruff_diagnostics::{Diagnostic, DiagnosticKind}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_semantic::binding::{BindingKind, Importation}; use crate::checkers::ast::Checker; use crate::registry::Rule; -use crate::rules::pandas_vet::helpers::is_dataframe_candidate; +use crate::rules::pandas_vet::helpers::{test_expression, Resolution}; #[violation] pub struct PandasUseOfDotIsNull; @@ -61,9 +60,11 @@ impl Violation for PandasUseOfDotStack { } } -pub(crate) fn check_call(checker: &mut Checker, func: &Expr) { +pub(crate) fn call(checker: &mut Checker, func: &Expr) { let rules = &checker.settings.rules; - let Expr::Attribute(ast::ExprAttribute { value, attr, .. } )= func else {return}; + let Expr::Attribute(ast::ExprAttribute { value, attr, .. } )= func else { + return; + }; let violation: DiagnosticKind = match attr.as_str() { "isnull" if rules.enabled(Rule::PandasUseOfDotIsNull) => PandasUseOfDotIsNull.into(), "notnull" if rules.enabled(Rule::PandasUseOfDotNotNull) => PandasUseOfDotNotNull.into(), @@ -77,37 +78,14 @@ pub(crate) fn check_call(checker: &mut Checker, func: &Expr) { _ => return, }; - if !is_dataframe_candidate(value) { + // Ignore irrelevant bindings (like imports). + if !matches!( + test_expression(value, &checker.ctx), + Resolution::RelevantLocal | Resolution::PandasModule + ) { return; } - // If the target is a named variable, avoid triggering on - // irrelevant bindings (like non-Pandas imports). - if let Expr::Name(ast::ExprName { id, .. }) = value.as_ref() { - if checker.ctx.find_binding(id).map_or(true, |binding| { - if let BindingKind::Importation(Importation { - full_name: module, .. - }) = &binding.kind - { - module != &"pandas" - } else { - matches!( - binding.kind, - BindingKind::Builtin - | BindingKind::ClassDefinition - | BindingKind::FunctionDefinition - | BindingKind::Export(..) - | BindingKind::FutureImportation - | BindingKind::Importation(..) - | BindingKind::FromImportation(..) - | BindingKind::SubmoduleImportation(..) - ) - } - }) { - return; - } - } - checker .diagnostics .push(Diagnostic::new(violation, func.range())); diff --git a/crates/ruff/src/rules/pandas_vet/rules/check_attr.rs b/crates/ruff/src/rules/pandas_vet/rules/check_attr.rs deleted file mode 100644 index c392a99c7830e..0000000000000 --- a/crates/ruff/src/rules/pandas_vet/rules/check_attr.rs +++ /dev/null @@ -1,96 +0,0 @@ -use rustpython_parser::ast::{self, Expr, Ranged}; - -use ruff_diagnostics::Violation; -use ruff_diagnostics::{Diagnostic, DiagnosticKind}; -use ruff_macros::{derive_message_formats, violation}; -use ruff_python_semantic::binding::BindingKind; - -use crate::checkers::ast::Checker; -use crate::registry::Rule; -use crate::rules::pandas_vet::helpers::is_dataframe_candidate; - -#[violation] -pub struct PandasUseOfDotIx; - -impl Violation for PandasUseOfDotIx { - #[derive_message_formats] - fn message(&self) -> String { - format!("`.ix` is deprecated; use more explicit `.loc` or `.iloc`") - } -} - -#[violation] -pub struct PandasUseOfDotAt; - -impl Violation for PandasUseOfDotAt { - #[derive_message_formats] - fn message(&self) -> String { - format!("Use `.loc` instead of `.at`. If speed is important, use numpy.") - } -} - -#[violation] -pub struct PandasUseOfDotIat; - -impl Violation for PandasUseOfDotIat { - #[derive_message_formats] - fn message(&self) -> String { - format!("Use `.iloc` instead of `.iat`. If speed is important, use numpy.") - } -} - -#[violation] -pub struct PandasUseOfDotValues; - -impl Violation for PandasUseOfDotValues { - #[derive_message_formats] - fn message(&self) -> String { - format!("Use `.to_numpy()` instead of `.values`") - } -} - -pub(crate) fn check_attr(checker: &mut Checker, attr: &str, value: &Expr, attr_expr: &Expr) { - let rules = &checker.settings.rules; - let violation: DiagnosticKind = match attr { - "ix" if rules.enabled(Rule::PandasUseOfDotIx) => PandasUseOfDotIx.into(), - "at" if rules.enabled(Rule::PandasUseOfDotAt) => PandasUseOfDotAt.into(), - "iat" if rules.enabled(Rule::PandasUseOfDotIat) => PandasUseOfDotIat.into(), - "values" if rules.enabled(Rule::PandasUseOfDotValues) => PandasUseOfDotValues.into(), - _ => return, - }; - - // Avoid flagging on function calls (e.g., `df.values()`). - if let Some(parent) = checker.ctx.expr_parent() { - if matches!(parent, Expr::Call(_)) { - return; - } - } - // Avoid flagging on non-DataFrames (e.g., `{"a": 1}.values`). - if !is_dataframe_candidate(value) { - return; - } - - // If the target is a named variable, avoid triggering on - // irrelevant bindings (like imports). - if let Expr::Name(ast::ExprName { id, .. }) = value { - if checker.ctx.find_binding(id).map_or(true, |binding| { - matches!( - binding.kind, - BindingKind::Builtin - | BindingKind::ClassDefinition - | BindingKind::FunctionDefinition - | BindingKind::Export(..) - | BindingKind::FutureImportation - | BindingKind::Importation(..) - | BindingKind::FromImportation(..) - | BindingKind::SubmoduleImportation(..) - ) - }) { - return; - } - } - - checker - .diagnostics - .push(Diagnostic::new(violation, attr_expr.range())); -} diff --git a/crates/ruff/src/rules/pandas_vet/rules/mod.rs b/crates/ruff/src/rules/pandas_vet/rules/mod.rs index a4dfb12e5826e..917c9a2efab6d 100644 --- a/crates/ruff/src/rules/pandas_vet/rules/mod.rs +++ b/crates/ruff/src/rules/pandas_vet/rules/mod.rs @@ -1,16 +1,16 @@ pub(crate) use assignment_to_df::{assignment_to_df, PandasDfVariableName}; -pub(crate) use check_attr::{ - check_attr, PandasUseOfDotAt, PandasUseOfDotIat, PandasUseOfDotIx, PandasUseOfDotValues, -}; -pub(crate) use check_call::{ - check_call, PandasUseOfDotIsNull, PandasUseOfDotNotNull, PandasUseOfDotPivotOrUnstack, +pub(crate) use attr::{attr, PandasUseOfDotValues}; +pub(crate) use call::{ + call, PandasUseOfDotIsNull, PandasUseOfDotNotNull, PandasUseOfDotPivotOrUnstack, PandasUseOfDotReadTable, PandasUseOfDotStack, }; pub(crate) use inplace_argument::{inplace_argument, PandasUseOfInplaceArgument}; pub(crate) use pd_merge::{use_of_pd_merge, PandasUseOfPdMerge}; +pub(crate) use subscript::{subscript, PandasUseOfDotAt, PandasUseOfDotIat, PandasUseOfDotIx}; pub(crate) mod assignment_to_df; -pub(crate) mod check_attr; -pub(crate) mod check_call; +pub(crate) mod attr; +pub(crate) mod call; pub(crate) mod inplace_argument; pub(crate) mod pd_merge; +pub(crate) mod subscript; diff --git a/crates/ruff/src/rules/pandas_vet/rules/subscript.rs b/crates/ruff/src/rules/pandas_vet/rules/subscript.rs new file mode 100644 index 0000000000000..1ee7147a0b51f --- /dev/null +++ b/crates/ruff/src/rules/pandas_vet/rules/subscript.rs @@ -0,0 +1,66 @@ +use rustpython_parser::ast::{self, Expr, Ranged}; + +use ruff_diagnostics::Violation; +use ruff_diagnostics::{Diagnostic, DiagnosticKind}; +use ruff_macros::{derive_message_formats, violation}; + +use crate::checkers::ast::Checker; +use crate::registry::Rule; +use crate::rules::pandas_vet::helpers::{test_expression, Resolution}; + +#[violation] +pub struct PandasUseOfDotIx; + +impl Violation for PandasUseOfDotIx { + #[derive_message_formats] + fn message(&self) -> String { + format!("`.ix` is deprecated; use more explicit `.loc` or `.iloc`") + } +} + +#[violation] +pub struct PandasUseOfDotAt; + +impl Violation for PandasUseOfDotAt { + #[derive_message_formats] + fn message(&self) -> String { + format!("Use `.loc` instead of `.at`. If speed is important, use NumPy.") + } +} + +#[violation] +pub struct PandasUseOfDotIat; + +impl Violation for PandasUseOfDotIat { + #[derive_message_formats] + fn message(&self) -> String { + format!("Use `.iloc` instead of `.iat`. If speed is important, use NumPy.") + } +} + +pub(crate) fn subscript(checker: &mut Checker, value: &Expr, expr: &Expr) { + let Expr::Attribute(ast::ExprAttribute { attr, value, .. }) = value else { + return; + }; + + let rules = &checker.settings.rules; + let violation: DiagnosticKind = match attr.as_str() { + "ix" if rules.enabled(Rule::PandasUseOfDotIx) => PandasUseOfDotIx.into(), + "at" if rules.enabled(Rule::PandasUseOfDotAt) => PandasUseOfDotAt.into(), + "iat" if rules.enabled(Rule::PandasUseOfDotIat) => PandasUseOfDotIat.into(), + _ => return, + }; + + // Avoid flagging on non-DataFrames (e.g., `{"a": 1}.at[0]`), and on irrelevant bindings + // (like imports). + if !matches!( + test_expression(value, &checker.ctx), + Resolution::RelevantLocal + ) { + return; + } + + checker + .diagnostics + .push(Diagnostic::new(violation, expr.range())); +}