Skip to content

Commit

Permalink
Avoid triggering pd#at and friends on non-subscripts (#4474)
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh authored May 17, 2023
1 parent 39fb2cc commit cd82b83
Show file tree
Hide file tree
Showing 8 changed files with 207 additions and 151 deletions.
6 changes: 4 additions & 2 deletions crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2352,6 +2352,8 @@ where
{
flake8_simplify::rules::use_capital_environment_variables(self, expr);
}

pandas_vet::rules::subscript(self, value, expr);
}
Expr::Tuple(ast::ExprTuple {
elts,
Expand Down Expand Up @@ -2525,7 +2527,7 @@ where
if self.settings.rules.enabled(Rule::PrivateMemberAccess) {
flake8_self::rules::private_member_access(self, expr);
}
pandas_vet::rules::check_attr(self, attr, value, expr);
pandas_vet::rules::attr(self, attr, value, expr);
}
Expr::Call(ast::ExprCall {
func,
Expand Down Expand Up @@ -3009,7 +3011,7 @@ where
.into_iter(),
);
}
pandas_vet::rules::check_call(self, func);
pandas_vet::rules::call(self, func);

if self.settings.rules.enabled(Rule::PandasUseOfPdMerge) {
if let Some(diagnostic) = pandas_vet::rules::use_of_pd_merge(func) {
Expand Down
61 changes: 47 additions & 14 deletions crates/ruff/src/rules/pandas_vet/helpers.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,51 @@
use ruff_python_semantic::binding::{BindingKind, Importation};
use ruff_python_semantic::context::Context;
use rustpython_parser::ast;
use rustpython_parser::ast::Expr;

/// Return `true` if an `Expr` _could_ be a `DataFrame`. This rules out
/// obviously-wrong cases, like constants and literals.
pub(crate) const fn is_dataframe_candidate(expr: &Expr) -> bool {
!matches!(
expr,
pub(crate) enum Resolution {
/// The expression resolves to an irrelevant expression type (e.g., a constant).
IrrelevantExpression,
/// The expression resolves to an irrelevant binding (e.g., a function definition).
IrrelevantBinding,
/// The expression resolves to a relevant local binding (e.g., a variable).
RelevantLocal,
/// The expression resolves to the `pandas` module itself.
PandasModule,
}

/// Test an [`Expr`] for relevance to Pandas-related operations.
pub(crate) fn test_expression(expr: &Expr, context: &Context) -> Resolution {
match expr {
Expr::Constant(_)
| Expr::Tuple(_)
| Expr::List(_)
| Expr::Set(_)
| Expr::Dict(_)
| Expr::SetComp(_)
| Expr::ListComp(_)
| Expr::DictComp(_)
| Expr::GeneratorExp(_)
)
| Expr::Tuple(_)
| Expr::List(_)
| Expr::Set(_)
| Expr::Dict(_)
| Expr::SetComp(_)
| Expr::ListComp(_)
| Expr::DictComp(_)
| Expr::GeneratorExp(_) => Resolution::IrrelevantExpression,
Expr::Name(ast::ExprName { id, .. }) => {
context
.find_binding(id)
.map_or(Resolution::IrrelevantBinding, |binding| {
match binding.kind {
BindingKind::Annotation
| BindingKind::Argument
| BindingKind::Assignment
| BindingKind::NamedExprAssignment
| BindingKind::Binding
| BindingKind::LoopVar
| BindingKind::Global
| BindingKind::Nonlocal => Resolution::RelevantLocal,
BindingKind::Importation(Importation {
full_name: module, ..
}) if module == "pandas" => Resolution::PandasModule,
_ => Resolution::IrrelevantBinding,
}
})
}
_ => Resolution::RelevantLocal,
}
}
26 changes: 26 additions & 0 deletions crates/ruff/src/rules/pandas_vet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,32 @@ mod tests {
x = pd.DataFrame()
index = x.loc[:, ['B', 'A']]
"#, &[]; "PD008_pass")]
#[test_case(r#"
import io
import zipfile
class MockBinaryFile(io.BytesIO):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def close(self):
pass # Don't allow closing the file, it would clear the buffer
zip_buffer = MockBinaryFile()
with zipfile.ZipFile(zip_buffer, "w") as zf:
zf.writestr("dir/file.txt", "This is a test")
# Reset the BytesIO object's cursor to the start.
zip_buffer.seek(0)
with zipfile.ZipFile(zip_buffer, "r") as zf:
zpath = zipfile.Path(zf, "/")
dir_name, file_name = zpath.at.split("/")
"#, &[]; "PD008_pass_on_attr")]
#[test_case(r#"
import pandas as pd
x = pd.DataFrame()
Expand Down
47 changes: 47 additions & 0 deletions crates/ruff/src/rules/pandas_vet/rules/attr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use rustpython_parser::ast::{Expr, Ranged};

use ruff_diagnostics::Violation;
use ruff_diagnostics::{Diagnostic, DiagnosticKind};
use ruff_macros::{derive_message_formats, violation};

use crate::checkers::ast::Checker;
use crate::registry::Rule;
use crate::rules::pandas_vet::helpers::{test_expression, Resolution};

#[violation]
pub struct PandasUseOfDotValues;

impl Violation for PandasUseOfDotValues {
#[derive_message_formats]
fn message(&self) -> String {
format!("Use `.to_numpy()` instead of `.values`")
}
}

pub(crate) fn attr(checker: &mut Checker, attr: &str, value: &Expr, attr_expr: &Expr) {
let rules = &checker.settings.rules;
let violation: DiagnosticKind = match attr {
"values" if rules.enabled(Rule::PandasUseOfDotValues) => PandasUseOfDotValues.into(),
_ => return,
};

// Avoid flagging on function calls (e.g., `df.values()`).
if let Some(parent) = checker.ctx.expr_parent() {
if matches!(parent, Expr::Call(_)) {
return;
}
}

// Avoid flagging on non-DataFrames (e.g., `{"a": 1}.values`), and on irrelevant bindings
// (like imports).
if !matches!(
test_expression(value, &checker.ctx),
Resolution::RelevantLocal
) {
return;
}

checker
.diagnostics
.push(Diagnostic::new(violation, attr_expr.range()));
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ use rustpython_parser::ast::{self, Expr, Ranged};
use ruff_diagnostics::Violation;
use ruff_diagnostics::{Diagnostic, DiagnosticKind};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_semantic::binding::{BindingKind, Importation};

use crate::checkers::ast::Checker;
use crate::registry::Rule;
use crate::rules::pandas_vet::helpers::is_dataframe_candidate;
use crate::rules::pandas_vet::helpers::{test_expression, Resolution};

#[violation]
pub struct PandasUseOfDotIsNull;
Expand Down Expand Up @@ -61,9 +60,11 @@ impl Violation for PandasUseOfDotStack {
}
}

pub(crate) fn check_call(checker: &mut Checker, func: &Expr) {
pub(crate) fn call(checker: &mut Checker, func: &Expr) {
let rules = &checker.settings.rules;
let Expr::Attribute(ast::ExprAttribute { value, attr, .. } )= func else {return};
let Expr::Attribute(ast::ExprAttribute { value, attr, .. } )= func else {
return;
};
let violation: DiagnosticKind = match attr.as_str() {
"isnull" if rules.enabled(Rule::PandasUseOfDotIsNull) => PandasUseOfDotIsNull.into(),
"notnull" if rules.enabled(Rule::PandasUseOfDotNotNull) => PandasUseOfDotNotNull.into(),
Expand All @@ -77,37 +78,14 @@ pub(crate) fn check_call(checker: &mut Checker, func: &Expr) {
_ => return,
};

if !is_dataframe_candidate(value) {
// Ignore irrelevant bindings (like imports).
if !matches!(
test_expression(value, &checker.ctx),
Resolution::RelevantLocal | Resolution::PandasModule
) {
return;
}

// If the target is a named variable, avoid triggering on
// irrelevant bindings (like non-Pandas imports).
if let Expr::Name(ast::ExprName { id, .. }) = value.as_ref() {
if checker.ctx.find_binding(id).map_or(true, |binding| {
if let BindingKind::Importation(Importation {
full_name: module, ..
}) = &binding.kind
{
module != &"pandas"
} else {
matches!(
binding.kind,
BindingKind::Builtin
| BindingKind::ClassDefinition
| BindingKind::FunctionDefinition
| BindingKind::Export(..)
| BindingKind::FutureImportation
| BindingKind::Importation(..)
| BindingKind::FromImportation(..)
| BindingKind::SubmoduleImportation(..)
)
}
}) {
return;
}
}

checker
.diagnostics
.push(Diagnostic::new(violation, func.range()));
Expand Down
96 changes: 0 additions & 96 deletions crates/ruff/src/rules/pandas_vet/rules/check_attr.rs

This file was deleted.

14 changes: 7 additions & 7 deletions crates/ruff/src/rules/pandas_vet/rules/mod.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
pub(crate) use assignment_to_df::{assignment_to_df, PandasDfVariableName};
pub(crate) use check_attr::{
check_attr, PandasUseOfDotAt, PandasUseOfDotIat, PandasUseOfDotIx, PandasUseOfDotValues,
};
pub(crate) use check_call::{
check_call, PandasUseOfDotIsNull, PandasUseOfDotNotNull, PandasUseOfDotPivotOrUnstack,
pub(crate) use attr::{attr, PandasUseOfDotValues};
pub(crate) use call::{
call, PandasUseOfDotIsNull, PandasUseOfDotNotNull, PandasUseOfDotPivotOrUnstack,
PandasUseOfDotReadTable, PandasUseOfDotStack,
};
pub(crate) use inplace_argument::{inplace_argument, PandasUseOfInplaceArgument};
pub(crate) use pd_merge::{use_of_pd_merge, PandasUseOfPdMerge};
pub(crate) use subscript::{subscript, PandasUseOfDotAt, PandasUseOfDotIat, PandasUseOfDotIx};

pub(crate) mod assignment_to_df;
pub(crate) mod check_attr;
pub(crate) mod check_call;
pub(crate) mod attr;
pub(crate) mod call;
pub(crate) mod inplace_argument;
pub(crate) mod pd_merge;
pub(crate) mod subscript;
Loading

0 comments on commit cd82b83

Please sign in to comment.