From 857f5dd475ca33c825446fb3235e39b928dbf998 Mon Sep 17 00:00:00 2001 From: ninad Date: Sun, 14 Apr 2024 20:17:44 +0530 Subject: [PATCH 1/4] Don't inline integer literals when out of range --- compiler/rustc_ast_lowering/src/format.rs | 200 +++++++++++------- .../ui/fmt/no-inline-literals-out-of-range.rs | 8 + .../no-inline-literals-out-of-range.stderr | 40 ++++ 3 files changed, 171 insertions(+), 77 deletions(-) create mode 100644 tests/ui/fmt/no-inline-literals-out-of-range.rs create mode 100644 tests/ui/fmt/no-inline-literals-out-of-range.stderr diff --git a/compiler/rustc_ast_lowering/src/format.rs b/compiler/rustc_ast_lowering/src/format.rs index d2c3b8b2d5652..a37e0832b6fcf 100644 --- a/compiler/rustc_ast_lowering/src/format.rs +++ b/compiler/rustc_ast_lowering/src/format.rs @@ -12,6 +12,34 @@ use rustc_span::{ }; use std::borrow::Cow; +fn int_ty_max(int_ty: IntTy) -> Option { + match int_ty { + // isize is platform-dependent, so we should use + // TyCtxt.data_layout.pointer_size + // This is available, for example, from a LoweringContext + IntTy::Isize => None, + IntTy::I8 => Some(i8::MAX as u128), + IntTy::I16 => Some(i16::MAX as u128), + IntTy::I32 => Some(i32::MAX as u128), + IntTy::I64 => Some(i64::MAX as u128), + IntTy::I128 => Some(i128::MAX as u128), + } +} + +fn uint_ty_max(uint_ty: UintTy) -> Option { + match uint_ty { + // usize is platform-dependent, so we should use + // TyCtxt.data_layout.pointer_size + // This is available, for example, from a LoweringContext + UintTy::Usize => None, + UintTy::U8 => Some(u8::MAX as u128), + UintTy::U16 => Some(u16::MAX as u128), + UintTy::U32 => Some(u32::MAX as u128), + UintTy::U64 => Some(u64::MAX as u128), + UintTy::U128 => Some(u128::MAX as u128), + } +} + impl<'hir> LoweringContext<'_, 'hir> { pub(crate) fn lower_format_args(&mut self, sp: Span, fmt: &FormatArgs) -> hir::ExprKind<'hir> { // Never call the const constructor of `fmt::Arguments` if the @@ -20,10 +48,104 @@ impl<'hir> LoweringContext<'_, 'hir> { let mut fmt = Cow::Borrowed(fmt); if self.tcx.sess.opts.unstable_opts.flatten_format_args { fmt = flatten_format_args(fmt); - fmt = inline_literals(fmt); + fmt = self.inline_literals(fmt); } expand_format_args(self, sp, &fmt, allow_const) } + + /// Try to convert a literal into an interned string + fn try_inline_lit(&self, lit: token::Lit) -> Option { + match LitKind::from_token_lit(lit) { + Ok(LitKind::Str(s, _)) => Some(s), + Ok(LitKind::Int(n, ty)) => { + // platform-dependent usize and isize MAX + let usize_bits = self.tcx.data_layout.pointer_size.bits(); + let usize_max = if usize_bits >= 128 { u128::MAX } else { 1u128 << usize_bits - 1 }; + let isize_max = usize_max >> 1; + match ty { + // unsuffixed integer literals are assumed to be i32's + LitIntType::Unsuffixed => (n <= i32::MAX as u128).then_some(Symbol::intern(&n.to_string())), + LitIntType::Signed(int_ty) => { + let max_literal = int_ty_max(int_ty).unwrap_or(isize_max); + (n <= max_literal).then_some(Symbol::intern(&n.to_string())) + } + LitIntType::Unsigned(uint_ty) => { + let max_literal = uint_ty_max(uint_ty).unwrap_or(usize_max); + (n <= max_literal).then_some(Symbol::intern(&n.to_string())) + } + } + } + _ => None, + } + } + + /// Inline literals into the format string. + /// + /// Turns + /// + /// `format_args!("Hello, {}! {} {}", "World", 123, x)` + /// + /// into + /// + /// `format_args!("Hello, World! 123 {}", x)`. + fn inline_literals<'fmt>(&self, mut fmt: Cow<'fmt, FormatArgs>) -> Cow<'fmt, FormatArgs> { + let mut was_inlined = vec![false; fmt.arguments.all_args().len()]; + let mut inlined_anything = false; + + for i in 0..fmt.template.len() { + let FormatArgsPiece::Placeholder(placeholder) = &fmt.template[i] else { continue }; + let Ok(arg_index) = placeholder.argument.index else { continue }; + + let mut literal = None; + + if let FormatTrait::Display = placeholder.format_trait + && placeholder.format_options == Default::default() + && let arg = fmt.arguments.all_args()[arg_index].expr.peel_parens_and_refs() + && let ExprKind::Lit(lit) = arg.kind + { + literal = self.try_inline_lit(lit); + } + + if let Some(literal) = literal { + // Now we need to mutate the outer FormatArgs. + // If this is the first time, this clones the outer FormatArgs. + let fmt = fmt.to_mut(); + // Replace the placeholder with the literal. + fmt.template[i] = FormatArgsPiece::Literal(literal); + was_inlined[arg_index] = true; + inlined_anything = true; + } + } + + // Remove the arguments that were inlined. + if inlined_anything { + let fmt = fmt.to_mut(); + + let mut remove = was_inlined; + + // Don't remove anything that's still used. + for_all_argument_indexes(&mut fmt.template, |index| remove[*index] = false); + + // Drop all the arguments that are marked for removal. + let mut remove_it = remove.iter(); + fmt.arguments.all_args_mut().retain(|_| remove_it.next() != Some(&true)); + + // Calculate the mapping of old to new indexes for the remaining arguments. + let index_map: Vec = remove + .into_iter() + .scan(0, |i, remove| { + let mapped = *i; + *i += !remove as usize; + Some(mapped) + }) + .collect(); + + // Correct the indexes that refer to arguments that have shifted position. + for_all_argument_indexes(&mut fmt.template, |index| *index = index_map[*index]); + } + + fmt + } } /// Flattens nested `format_args!()` into one. @@ -103,82 +225,6 @@ fn flatten_format_args(mut fmt: Cow<'_, FormatArgs>) -> Cow<'_, FormatArgs> { fmt } -/// Inline literals into the format string. -/// -/// Turns -/// -/// `format_args!("Hello, {}! {} {}", "World", 123, x)` -/// -/// into -/// -/// `format_args!("Hello, World! 123 {}", x)`. -fn inline_literals(mut fmt: Cow<'_, FormatArgs>) -> Cow<'_, FormatArgs> { - let mut was_inlined = vec![false; fmt.arguments.all_args().len()]; - let mut inlined_anything = false; - - for i in 0..fmt.template.len() { - let FormatArgsPiece::Placeholder(placeholder) = &fmt.template[i] else { continue }; - let Ok(arg_index) = placeholder.argument.index else { continue }; - - let mut literal = None; - - if let FormatTrait::Display = placeholder.format_trait - && placeholder.format_options == Default::default() - && let arg = fmt.arguments.all_args()[arg_index].expr.peel_parens_and_refs() - && let ExprKind::Lit(lit) = arg.kind - { - if let token::LitKind::Str | token::LitKind::StrRaw(_) = lit.kind - && let Ok(LitKind::Str(s, _)) = LitKind::from_token_lit(lit) - { - literal = Some(s); - } else if let token::LitKind::Integer = lit.kind - && let Ok(LitKind::Int(n, _)) = LitKind::from_token_lit(lit) - { - literal = Some(Symbol::intern(&n.to_string())); - } - } - - if let Some(literal) = literal { - // Now we need to mutate the outer FormatArgs. - // If this is the first time, this clones the outer FormatArgs. - let fmt = fmt.to_mut(); - // Replace the placeholder with the literal. - fmt.template[i] = FormatArgsPiece::Literal(literal); - was_inlined[arg_index] = true; - inlined_anything = true; - } - } - - // Remove the arguments that were inlined. - if inlined_anything { - let fmt = fmt.to_mut(); - - let mut remove = was_inlined; - - // Don't remove anything that's still used. - for_all_argument_indexes(&mut fmt.template, |index| remove[*index] = false); - - // Drop all the arguments that are marked for removal. - let mut remove_it = remove.iter(); - fmt.arguments.all_args_mut().retain(|_| remove_it.next() != Some(&true)); - - // Calculate the mapping of old to new indexes for the remaining arguments. - let index_map: Vec = remove - .into_iter() - .scan(0, |i, remove| { - let mapped = *i; - *i += !remove as usize; - Some(mapped) - }) - .collect(); - - // Correct the indexes that refer to arguments that have shifted position. - for_all_argument_indexes(&mut fmt.template, |index| *index = index_map[*index]); - } - - fmt -} - #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] enum ArgumentType { Format(FormatTrait), diff --git a/tests/ui/fmt/no-inline-literals-out-of-range.rs b/tests/ui/fmt/no-inline-literals-out-of-range.rs new file mode 100644 index 0000000000000..05a0b64f77d79 --- /dev/null +++ b/tests/ui/fmt/no-inline-literals-out-of-range.rs @@ -0,0 +1,8 @@ +fn main() { + format_args!("{}", 0x8f_i8); // issue #115423 + //~^ ERROR literal out of range for `i8` + format_args!("{}", 0xffff_ffff_u8); // issue #116633 + //~^ ERROR literal out of range for `u8` + format_args!("{}", 0xffff_ffff); // treat unsuffixed literals as i32 + //~^ ERROR literal out of range for `i32` +} diff --git a/tests/ui/fmt/no-inline-literals-out-of-range.stderr b/tests/ui/fmt/no-inline-literals-out-of-range.stderr new file mode 100644 index 0000000000000..22104d6134f4b --- /dev/null +++ b/tests/ui/fmt/no-inline-literals-out-of-range.stderr @@ -0,0 +1,40 @@ +error: literal out of range for `i8` + --> $DIR/no-inline-literals-out-of-range.rs:2:24 + | +LL | format_args!("{}", 0x8f_i8); // issue #115423 + | ^^^^^^^ + | + = note: the literal `0x8f_i8` (decimal `143`) does not fit into the type `i8` and will become `-113i8` + = note: `#[deny(overflowing_literals)]` on by default +help: consider using the type `u8` instead + | +LL | format_args!("{}", 0x8f_u8); // issue #115423 + | ~~~~~~~ +help: to use as a negative number (decimal `-113`), consider using the type `u8` for the literal and cast it to `i8` + | +LL | format_args!("{}", 0x8f_u8 as i8); // issue #115423 + | ~~~~~~~~~~~~~ + +error: literal out of range for `u8` + --> $DIR/no-inline-literals-out-of-range.rs:4:24 + | +LL | format_args!("{}", 0xffff_ffff_u8); // issue #116633 + | ^^^^^^^^^^^^^^ help: consider using the type `u32` instead: `0xffff_ffff_u32` + | + = note: the literal `0xffff_ffff_u8` (decimal `4294967295`) does not fit into the type `u8` and will become `255u8` + +error: literal out of range for `i32` + --> $DIR/no-inline-literals-out-of-range.rs:6:24 + | +LL | format_args!("{}", 0xffff_ffff); // treat unsuffixed literals as i32 + | ^^^^^^^^^^^ + | + = note: the literal `0xffff_ffff` (decimal `4294967295`) does not fit into the type `i32` and will become `-1i32` + = help: consider using the type `u32` instead +help: to use as a negative number (decimal `-1`), consider using the type `u32` for the literal and cast it to `i32` + | +LL | format_args!("{}", 0xffff_ffffu32 as i32); // treat unsuffixed literals as i32 + | ~~~~~~~~~~~~~~~~~~~~~ + +error: aborting due to 3 previous errors + From 757f5bb3dcb46e4954cf69e6c47c1ae1806f2a9b Mon Sep 17 00:00:00 2001 From: ninad Date: Sun, 14 Apr 2024 20:29:30 +0530 Subject: [PATCH 2/4] Fix formatting --- compiler/rustc_ast_lowering/src/format.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_ast_lowering/src/format.rs b/compiler/rustc_ast_lowering/src/format.rs index a37e0832b6fcf..2d8f462675259 100644 --- a/compiler/rustc_ast_lowering/src/format.rs +++ b/compiler/rustc_ast_lowering/src/format.rs @@ -64,7 +64,9 @@ impl<'hir> LoweringContext<'_, 'hir> { let isize_max = usize_max >> 1; match ty { // unsuffixed integer literals are assumed to be i32's - LitIntType::Unsuffixed => (n <= i32::MAX as u128).then_some(Symbol::intern(&n.to_string())), + LitIntType::Unsuffixed => { + (n <= i32::MAX as u128).then_some(Symbol::intern(&n.to_string())) + } LitIntType::Signed(int_ty) => { let max_literal = int_ty_max(int_ty).unwrap_or(isize_max); (n <= max_literal).then_some(Symbol::intern(&n.to_string())) From 38c4885c39647a49892138b898725225e9e9fc44 Mon Sep 17 00:00:00 2001 From: ninad Date: Sun, 14 Apr 2024 21:34:14 +0530 Subject: [PATCH 3/4] Add more test cases --- .../ui/fmt/no-inline-literals-out-of-range.rs | 6 +++++ .../no-inline-literals-out-of-range.stderr | 24 +++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/tests/ui/fmt/no-inline-literals-out-of-range.rs b/tests/ui/fmt/no-inline-literals-out-of-range.rs index 05a0b64f77d79..d2532cdfbff61 100644 --- a/tests/ui/fmt/no-inline-literals-out-of-range.rs +++ b/tests/ui/fmt/no-inline-literals-out-of-range.rs @@ -1,8 +1,14 @@ +//@ only-64bit + fn main() { format_args!("{}", 0x8f_i8); // issue #115423 //~^ ERROR literal out of range for `i8` format_args!("{}", 0xffff_ffff_u8); // issue #116633 //~^ ERROR literal out of range for `u8` + format_args!("{}", 0xffff_ffff_ffff_ffff_ffff_usize); + //~^ ERROR literal out of range for `usize` + format_args!("{}", 0x8000_0000_0000_0000_isize); + //~^ ERROR literal out of range for `isize` format_args!("{}", 0xffff_ffff); // treat unsuffixed literals as i32 //~^ ERROR literal out of range for `i32` } diff --git a/tests/ui/fmt/no-inline-literals-out-of-range.stderr b/tests/ui/fmt/no-inline-literals-out-of-range.stderr index 22104d6134f4b..78ec4888c27a3 100644 --- a/tests/ui/fmt/no-inline-literals-out-of-range.stderr +++ b/tests/ui/fmt/no-inline-literals-out-of-range.stderr @@ -1,5 +1,5 @@ error: literal out of range for `i8` - --> $DIR/no-inline-literals-out-of-range.rs:2:24 + --> $DIR/no-inline-literals-out-of-range.rs:4:24 | LL | format_args!("{}", 0x8f_i8); // issue #115423 | ^^^^^^^ @@ -16,15 +16,31 @@ LL | format_args!("{}", 0x8f_u8 as i8); // issue #115423 | ~~~~~~~~~~~~~ error: literal out of range for `u8` - --> $DIR/no-inline-literals-out-of-range.rs:4:24 + --> $DIR/no-inline-literals-out-of-range.rs:6:24 | LL | format_args!("{}", 0xffff_ffff_u8); // issue #116633 | ^^^^^^^^^^^^^^ help: consider using the type `u32` instead: `0xffff_ffff_u32` | = note: the literal `0xffff_ffff_u8` (decimal `4294967295`) does not fit into the type `u8` and will become `255u8` +error: literal out of range for `usize` + --> $DIR/no-inline-literals-out-of-range.rs:8:24 + | +LL | format_args!("{}", 0xffff_ffff_ffff_ffff_ffff_usize); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: the literal `0xffff_ffff_ffff_ffff_ffff_usize` (decimal `1208925819614629174706175`) does not fit into the type `usize` and will become `18446744073709551615usize` + +error: literal out of range for `isize` + --> $DIR/no-inline-literals-out-of-range.rs:10:24 + | +LL | format_args!("{}", 0x8000_0000_0000_0000_isize); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: the literal `0x8000_0000_0000_0000_isize` (decimal `9223372036854775808`) does not fit into the type `isize` and will become `-9223372036854775808isize` + error: literal out of range for `i32` - --> $DIR/no-inline-literals-out-of-range.rs:6:24 + --> $DIR/no-inline-literals-out-of-range.rs:12:24 | LL | format_args!("{}", 0xffff_ffff); // treat unsuffixed literals as i32 | ^^^^^^^^^^^ @@ -36,5 +52,5 @@ help: to use as a negative number (decimal `-1`), consider using the type `u32` LL | format_args!("{}", 0xffff_ffffu32 as i32); // treat unsuffixed literals as i32 | ~~~~~~~~~~~~~~~~~~~~~ -error: aborting due to 3 previous errors +error: aborting due to 5 previous errors From 4c8d210ab8f84f22e82df7efa0537c7aeb4237d0 Mon Sep 17 00:00:00 2001 From: ninad Date: Mon, 15 Apr 2024 00:39:28 +0530 Subject: [PATCH 4/4] Improve semantics of int_ty_max functions --- compiler/rustc_ast_lowering/src/format.rs | 60 ++++++++++------------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/compiler/rustc_ast_lowering/src/format.rs b/compiler/rustc_ast_lowering/src/format.rs index 2d8f462675259..ca4604c60c580 100644 --- a/compiler/rustc_ast_lowering/src/format.rs +++ b/compiler/rustc_ast_lowering/src/format.rs @@ -12,34 +12,6 @@ use rustc_span::{ }; use std::borrow::Cow; -fn int_ty_max(int_ty: IntTy) -> Option { - match int_ty { - // isize is platform-dependent, so we should use - // TyCtxt.data_layout.pointer_size - // This is available, for example, from a LoweringContext - IntTy::Isize => None, - IntTy::I8 => Some(i8::MAX as u128), - IntTy::I16 => Some(i16::MAX as u128), - IntTy::I32 => Some(i32::MAX as u128), - IntTy::I64 => Some(i64::MAX as u128), - IntTy::I128 => Some(i128::MAX as u128), - } -} - -fn uint_ty_max(uint_ty: UintTy) -> Option { - match uint_ty { - // usize is platform-dependent, so we should use - // TyCtxt.data_layout.pointer_size - // This is available, for example, from a LoweringContext - UintTy::Usize => None, - UintTy::U8 => Some(u8::MAX as u128), - UintTy::U16 => Some(u16::MAX as u128), - UintTy::U32 => Some(u32::MAX as u128), - UintTy::U64 => Some(u64::MAX as u128), - UintTy::U128 => Some(u128::MAX as u128), - } -} - impl<'hir> LoweringContext<'_, 'hir> { pub(crate) fn lower_format_args(&mut self, sp: Span, fmt: &FormatArgs) -> hir::ExprKind<'hir> { // Never call the const constructor of `fmt::Arguments` if the @@ -58,21 +30,17 @@ impl<'hir> LoweringContext<'_, 'hir> { match LitKind::from_token_lit(lit) { Ok(LitKind::Str(s, _)) => Some(s), Ok(LitKind::Int(n, ty)) => { - // platform-dependent usize and isize MAX - let usize_bits = self.tcx.data_layout.pointer_size.bits(); - let usize_max = if usize_bits >= 128 { u128::MAX } else { 1u128 << usize_bits - 1 }; - let isize_max = usize_max >> 1; match ty { // unsuffixed integer literals are assumed to be i32's LitIntType::Unsuffixed => { (n <= i32::MAX as u128).then_some(Symbol::intern(&n.to_string())) } LitIntType::Signed(int_ty) => { - let max_literal = int_ty_max(int_ty).unwrap_or(isize_max); + let max_literal = self.int_ty_max(int_ty); (n <= max_literal).then_some(Symbol::intern(&n.to_string())) } LitIntType::Unsigned(uint_ty) => { - let max_literal = uint_ty_max(uint_ty).unwrap_or(usize_max); + let max_literal = self.uint_ty_max(uint_ty); (n <= max_literal).then_some(Symbol::intern(&n.to_string())) } } @@ -81,6 +49,30 @@ impl<'hir> LoweringContext<'_, 'hir> { } } + /// Get the maximum value of int_ty. It is platform-dependent due to the byte size of isize + fn int_ty_max(&self, int_ty: IntTy) -> u128 { + match int_ty { + IntTy::Isize => self.tcx.data_layout.pointer_size.signed_int_max() as u128, + IntTy::I8 => i8::MAX as u128, + IntTy::I16 => i16::MAX as u128, + IntTy::I32 => i32::MAX as u128, + IntTy::I64 => i64::MAX as u128, + IntTy::I128 => i128::MAX as u128, + } + } + + /// Get the maximum value of uint_ty. It is platform-dependent due to the byte size of usize + fn uint_ty_max(&self, uint_ty: UintTy) -> u128 { + match uint_ty { + UintTy::Usize => self.tcx.data_layout.pointer_size.unsigned_int_max(), + UintTy::U8 => u8::MAX as u128, + UintTy::U16 => u16::MAX as u128, + UintTy::U32 => u32::MAX as u128, + UintTy::U64 => u64::MAX as u128, + UintTy::U128 => u128::MAX as u128, + } + } + /// Inline literals into the format string. /// /// Turns