Skip to content

Commit

Permalink
Unrolled build for rust-lang#115257
Browse files Browse the repository at this point in the history
Rollup merge of rust-lang#115257 - Urgau:invalid-utf8-walk-up-hir, r=Nilstrieb

Improve invalid UTF-8 lint by finding the expression initializer

This PR introduce a small mechanism to walk up the HIR through bindings, if/else, consts, ... when trying lint on invalid UTF-8.

Fixes rust-lang#115208
  • Loading branch information
rust-timer committed Sep 21, 2023
2 parents e4a361a + f156d3b commit 04e5f50
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 50 deletions.
48 changes: 48 additions & 0 deletions compiler/rustc_lint/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1315,6 +1315,54 @@ impl<'tcx> LateContext<'tcx> {
tcx.try_normalize_erasing_regions(self.param_env, proj).ok()
})
}

/// If the given expression is a local binding, find the initializer expression.
/// If that initializer expression is another local or **outside** (`const`/`static`)
/// binding, find its initializer again.
///
/// This process repeats as long as possible (but usually no more than once).
/// Type-check adjustments are not taken in account in this function.
///
/// Examples:
/// ```
/// const ABC: i32 = 1;
/// // ^ output
/// let def = ABC;
/// dbg!(def);
/// // ^^^ input
///
/// // or...
/// let abc = 1;
/// let def = abc + 2;
/// // ^^^^^^^ output
/// dbg!(def);
/// // ^^^ input
/// ```
pub fn expr_or_init<'a>(&self, mut expr: &'a hir::Expr<'tcx>) -> &'a hir::Expr<'tcx> {
expr = expr.peel_blocks();

while let hir::ExprKind::Path(ref qpath) = expr.kind
&& let Some(parent_node) = match self.qpath_res(qpath, expr.hir_id) {
Res::Local(hir_id) => self.tcx.hir().find_parent(hir_id),
Res::Def(_, def_id) => self.tcx.hir().get_if_local(def_id),
_ => None,
}
&& let Some(init) = match parent_node {
hir::Node::Expr(expr) => Some(expr),
hir::Node::Local(hir::Local { init, .. }) => *init,
hir::Node::Item(item) => match item.kind {
hir::ItemKind::Const(.., body_id) | hir::ItemKind::Static(.., body_id) => {
Some(self.tcx.hir().body(body_id).value)
}
_ => None
}
_ => None
}
{
expr = init.peel_blocks();
}
expr
}
}

impl<'tcx> abi::HasDataLayout for LateContext<'tcx> {
Expand Down
25 changes: 14 additions & 11 deletions compiler/rustc_lint/src/invalid_from_utf8.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::str::Utf8Error;

use rustc_ast::{BorrowKind, LitKind};
use rustc_ast::LitKind;
use rustc_hir::{Expr, ExprKind};
use rustc_span::source_map::Spanned;
use rustc_span::sym;
Expand All @@ -11,7 +11,7 @@ use crate::{LateContext, LateLintPass, LintContext};
declare_lint! {
/// The `invalid_from_utf8_unchecked` lint checks for calls to
/// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
/// with an invalid UTF-8 literal.
/// with a known invalid UTF-8 value.
///
/// ### Example
///
Expand All @@ -36,7 +36,7 @@ declare_lint! {
declare_lint! {
/// The `invalid_from_utf8` lint checks for calls to
/// `std::str::from_utf8` and `std::str::from_utf8_mut`
/// with an invalid UTF-8 literal.
/// with a known invalid UTF-8 value.
///
/// ### Example
///
Expand Down Expand Up @@ -67,8 +67,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
&& [sym::str_from_utf8, sym::str_from_utf8_mut,
sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
{
let lint = |utf8_error: Utf8Error| {
let label = arg.span;
let lint = |label, utf8_error: Utf8Error| {
let method = diag_item.as_str().strip_prefix("str_").unwrap();
let method = format!("std::str::{method}");
let valid_up_to = utf8_error.valid_up_to();
Expand All @@ -78,22 +77,26 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
expr.span,
if is_unchecked_variant {
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
} else {
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
}
)
};

match &arg.kind {
let mut init = cx.expr_or_init(arg);
while let ExprKind::AddrOf(.., inner) = init.kind {
init = cx.expr_or_init(inner);
}
match init.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => {
if let LitKind::ByteStr(bytes, _) = &lit
&& let Err(utf8_error) = std::str::from_utf8(bytes)
{
lint(utf8_error);
lint(init.span, utf8_error);
}
},
ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
ExprKind::Array(args) => {
let elements = args.iter().map(|e|{
match &e.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
Expand All @@ -108,7 +111,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
if let Some(elements) = elements
&& let Err(utf8_error) = std::str::from_utf8(&elements)
{
lint(utf8_error);
lint(init.span, utf8_error);
}
}
_ => {}
Expand Down
27 changes: 27 additions & 0 deletions tests/ui/lint/invalid_from_utf8.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// check-pass

#![feature(inline_const)]
#![feature(concat_bytes)]

#![warn(invalid_from_utf8_unchecked)]
#![warn(invalid_from_utf8)]

Expand Down Expand Up @@ -90,4 +92,29 @@ pub fn from_utf8() {
}
}

pub fn from_utf8_with_indirections() {
let mut a = [99, 108, 130, 105, 112, 112, 121];
std::str::from_utf8_mut(&mut a);
//~^ WARN calls to `std::str::from_utf8_mut`
let mut b = &mut a;
let mut c = b;
std::str::from_utf8_mut(c);
//~^ WARN calls to `std::str::from_utf8_mut`
let mut c = &[99, 108, 130, 105, 112, 112, 121];
std::str::from_utf8(c);
//~^ WARN calls to `std::str::from_utf8`
const INVALID_1: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
std::str::from_utf8(&INVALID_1);
//~^ WARN calls to `std::str::from_utf8`
static INVALID_2: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
std::str::from_utf8(&INVALID_2);
//~^ WARN calls to `std::str::from_utf8`
const INVALID_3: &'static [u8; 7] = &[99, 108, 130, 105, 112, 112, 121];
std::str::from_utf8(INVALID_3);
//~^ WARN calls to `std::str::from_utf8`
const INVALID_4: &'static [u8; 7] = { &[99, 108, 130, 105, 112, 112, 121] };
std::str::from_utf8(INVALID_4);
//~^ WARN calls to `std::str::from_utf8`
}

fn main() {}
135 changes: 96 additions & 39 deletions tests/ui/lint/invalid_from_utf8.stderr
Original file line number Diff line number Diff line change
@@ -1,110 +1,167 @@
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:19:9
--> $DIR/invalid_from_utf8.rs:21:9
|
LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
|
note: the lint level is defined here
--> $DIR/invalid_from_utf8.rs:4:9
--> $DIR/invalid_from_utf8.rs:6:9
|
LL | #![warn(invalid_from_utf8_unchecked)]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:21:9
--> $DIR/invalid_from_utf8.rs:23:9
|
LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:39:9
--> $DIR/invalid_from_utf8.rs:41:9
|
LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:41:9
--> $DIR/invalid_from_utf8.rs:43:9
|
LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:43:9
--> $DIR/invalid_from_utf8.rs:45:9
|
LL | std::str::from_utf8_unchecked(b"cl\x82ippy");
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:45:9
--> $DIR/invalid_from_utf8.rs:47:9
|
LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:62:9
--> $DIR/invalid_from_utf8.rs:64:9
|
LL | std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
|
note: the lint level is defined here
--> $DIR/invalid_from_utf8.rs:5:9
--> $DIR/invalid_from_utf8.rs:7:9
|
LL | #![warn(invalid_from_utf8)]
| ^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:64:9
--> $DIR/invalid_from_utf8.rs:66:9
|
LL | std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:82:9
--> $DIR/invalid_from_utf8.rs:84:9
|
LL | std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^-----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:84:9
--> $DIR/invalid_from_utf8.rs:86:9
|
LL | std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
| ^^^^^^^^^^^^^^^^^^^^^---------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:86:9
--> $DIR/invalid_from_utf8.rs:88:9
|
LL | std::str::from_utf8(b"cl\x82ippy");
| ^^^^^^^^^^^^^^^^^^^^-------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:88:9
--> $DIR/invalid_from_utf8.rs:90:9
|
LL | std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
| ^^^^^^^^^^^^^^^^^^^^---------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: 12 warnings emitted
warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:97:5
|
LL | let mut a = [99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8_mut(&mut a);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:101:5
|
LL | let mut a = [99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
...
LL | std::str::from_utf8_mut(c);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:104:5
|
LL | let mut c = &[99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8(c);
| ^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:107:5
|
LL | const INVALID_1: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8(&INVALID_1);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:110:5
|
LL | static INVALID_2: [u8; 7] = [99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8(&INVALID_2);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:113:5
|
LL | const INVALID_3: &'static [u8; 7] = &[99, 108, 130, 105, 112, 112, 121];
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8(INVALID_3);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:116:5
|
LL | const INVALID_4: &'static [u8; 7] = { &[99, 108, 130, 105, 112, 112, 121] };
| ---------------------------------- the literal was valid UTF-8 up to the 2 bytes
LL | std::str::from_utf8(INVALID_4);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: 19 warnings emitted

0 comments on commit 04e5f50

Please sign in to comment.