Skip to content

Commit

Permalink
Auto merge of rust-lang#99322 - GKFX:const-int-parse, r=Mark-Simulacrum
Browse files Browse the repository at this point in the history
Make {integer}::from_str_radix constant

This commit makes FromStr on integers constant so that `const x: u32 = "23".parse();` works. More practical use-case is with environment variables at build time as discussed in rust-lang/rfcs#1907.

Tracking issue rust-lang#59133.

ACP: rust-lang/libs-team#74
  • Loading branch information
bors committed Mar 30, 2024
2 parents 70714e3 + 3855b8b commit 8df7e72
Show file tree
Hide file tree
Showing 10 changed files with 239 additions and 166 deletions.
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@
#![feature(const_heap)]
#![feature(const_hint_assert_unchecked)]
#![feature(const_index_range_slice_index)]
#![feature(const_int_from_str)]
#![feature(const_intrinsic_copy)]
#![feature(const_intrinsic_forget)]
#![feature(const_ipv4)]
Expand Down
3 changes: 2 additions & 1 deletion library/core/src/num/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,9 @@ pub enum IntErrorKind {
impl ParseIntError {
/// Outputs the detailed cause of parsing an integer failing.
#[must_use]
#[rustc_const_unstable(feature = "const_int_from_str", issue = "59133")]
#[stable(feature = "int_error_matching", since = "1.55.0")]
pub fn kind(&self) -> &IntErrorKind {
pub const fn kind(&self) -> &IntErrorKind {
&self.kind
}
}
Expand Down
26 changes: 0 additions & 26 deletions library/core/src/num/int_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,32 +60,6 @@ macro_rules! int_impl {
#[stable(feature = "int_bits_const", since = "1.53.0")]
pub const BITS: u32 = <$UnsignedT>::BITS;

/// Converts a string slice in a given base to an integer.
///
/// The string is expected to be an optional `+` or `-` sign followed by digits.
/// Leading and trailing whitespace represent an error. Digits are a subset of these characters,
/// depending on `radix`:
///
/// * `0-9`
/// * `a-z`
/// * `A-Z`
///
/// # Panics
///
/// This function panics if `radix` is not in the range from 2 to 36.
///
/// # Examples
///
/// Basic usage:
///
/// ```
#[doc = concat!("assert_eq!(", stringify!($SelfT), "::from_str_radix(\"A\", 16), Ok(10));")]
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
from_str_radix(src, radix)
}

/// Returns the number of ones in the binary representation of `self`.
///
/// # Examples
Expand Down
293 changes: 183 additions & 110 deletions library/core/src/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use crate::ascii;
use crate::hint;
use crate::intrinsics;
use crate::mem;
use crate::ops::{Add, Mul, Sub};
use crate::str::FromStr;

// Used because the `?` operator is not allowed in a const context.
Expand Down Expand Up @@ -1386,144 +1385,218 @@ pub enum FpCategory {
Normal,
}

#[doc(hidden)]
trait FromStrRadixHelper:
PartialOrd + Copy + Add<Output = Self> + Sub<Output = Self> + Mul<Output = Self>
{
const MIN: Self;
fn from_u32(u: u32) -> Self;
fn checked_mul(&self, other: u32) -> Option<Self>;
fn checked_sub(&self, other: u32) -> Option<Self>;
fn checked_add(&self, other: u32) -> Option<Self>;
}

macro_rules! from_str_radix_int_impl {
($($t:ty)*) => {$(
#[stable(feature = "rust1", since = "1.0.0")]
impl FromStr for $t {
type Err = ParseIntError;
fn from_str(src: &str) -> Result<Self, ParseIntError> {
from_str_radix(src, 10)
<$t>::from_str_radix(src, 10)
}
}
)*}
}
from_str_radix_int_impl! { isize i8 i16 i32 i64 i128 usize u8 u16 u32 u64 u128 }

macro_rules! impl_helper_for {
($($t:ty)*) => ($(impl FromStrRadixHelper for $t {
const MIN: Self = Self::MIN;
#[inline]
fn from_u32(u: u32) -> Self { u as Self }
#[inline]
fn checked_mul(&self, other: u32) -> Option<Self> {
Self::checked_mul(*self, other as Self)
}
#[inline]
fn checked_sub(&self, other: u32) -> Option<Self> {
Self::checked_sub(*self, other as Self)
}
#[inline]
fn checked_add(&self, other: u32) -> Option<Self> {
Self::checked_add(*self, other as Self)
}
})*)
}
impl_helper_for! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }

/// Determines if a string of text of that length of that radix could be guaranteed to be
/// stored in the given type T.
/// Note that if the radix is known to the compiler, it is just the check of digits.len that
/// is done at runtime.
#[doc(hidden)]
#[inline(always)]
#[unstable(issue = "none", feature = "std_internals")]
pub fn can_not_overflow<T>(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool {
pub const fn can_not_overflow<T>(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool {
radix <= 16 && digits.len() <= mem::size_of::<T>() * 2 - is_signed_ty as usize
}

fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, ParseIntError> {
use self::IntErrorKind::*;
use self::ParseIntError as PIE;
#[track_caller]
const fn from_str_radix_panic_ct(_radix: u32) -> ! {
panic!("from_str_radix_int: must lie in the range `[2, 36]`");
}

assert!(
(2..=36).contains(&radix),
"from_str_radix_int: must lie in the range `[2, 36]` - found {}",
radix
);
#[track_caller]
fn from_str_radix_panic_rt(radix: u32) -> ! {
panic!("from_str_radix_int: must lie in the range `[2, 36]` - found {}", radix);
}

if src.is_empty() {
return Err(PIE { kind: Empty });
#[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))]
#[cfg_attr(feature = "panic_immediate_abort", inline)]
#[cold]
#[track_caller]
const fn from_str_radix_assert(radix: u32) {
if 2 > radix || radix > 36 {
// The only difference between these two functions is their panic message.
intrinsics::const_eval_select((radix,), from_str_radix_panic_ct, from_str_radix_panic_rt);
}
}

let is_signed_ty = T::from_u32(0) > T::MIN;

// all valid digits are ascii, so we will just iterate over the utf8 bytes
// and cast them to chars. .to_digit() will safely return None for anything
// other than a valid ascii digit for the given radix, including the first-byte
// of multi-byte sequences
let src = src.as_bytes();
macro_rules! from_str_radix {
($($int_ty:ty)+) => {$(
impl $int_ty {
/// Converts a string slice in a given base to an integer.
///
/// The string is expected to be an optional `+` sign
/// followed by digits.
/// Leading and trailing whitespace represent an error.
/// Digits are a subset of these characters, depending on `radix`:
///
/// * `0-9`
/// * `a-z`
/// * `A-Z`
///
/// # Panics
///
/// This function panics if `radix` is not in the range from 2 to 36.
///
/// # Examples
///
/// Basic usage:
///
/// ```
#[doc = concat!("assert_eq!(", stringify!($int_ty), "::from_str_radix(\"A\", 16), Ok(10));")]
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_unstable(feature = "const_int_from_str", issue = "59133")]
pub const fn from_str_radix(src: &str, radix: u32) -> Result<$int_ty, ParseIntError> {
use self::IntErrorKind::*;
use self::ParseIntError as PIE;

from_str_radix_assert(radix);

if src.is_empty() {
return Err(PIE { kind: Empty });
}

let (is_positive, digits) = match src[0] {
b'+' | b'-' if src[1..].is_empty() => {
return Err(PIE { kind: InvalidDigit });
}
b'+' => (true, &src[1..]),
b'-' if is_signed_ty => (false, &src[1..]),
_ => (true, src),
};
#[allow(unused_comparisons)]
let is_signed_ty = 0 > <$int_ty>::MIN;

// all valid digits are ascii, so we will just iterate over the utf8 bytes
// and cast them to chars. .to_digit() will safely return None for anything
// other than a valid ascii digit for the given radix, including the first-byte
// of multi-byte sequences
let src = src.as_bytes();

let (is_positive, mut digits) = match src {
[b'+' | b'-'] => {
return Err(PIE { kind: InvalidDigit });
}
[b'+', rest @ ..] => (true, rest),
[b'-', rest @ ..] if is_signed_ty => (false, rest),
_ => (true, src),
};

let mut result = 0;

macro_rules! unwrap_or_PIE {
($option:expr, $kind:ident) => {
match $option {
Some(value) => value,
None => return Err(PIE { kind: $kind }),
}
};
}

let mut result = T::from_u32(0);

if can_not_overflow::<T>(radix, is_signed_ty, digits) {
// If the len of the str is short compared to the range of the type
// we are parsing into, then we can be certain that an overflow will not occur.
// This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition
// above is a faster (conservative) approximation of this.
//
// Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest:
// `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow.
// `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow.
macro_rules! run_unchecked_loop {
($unchecked_additive_op:expr) => {
for &c in digits {
result = result * T::from_u32(radix);
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
result = $unchecked_additive_op(result, T::from_u32(x));
if can_not_overflow::<$int_ty>(radix, is_signed_ty, digits) {
// If the len of the str is short compared to the range of the type
// we are parsing into, then we can be certain that an overflow will not occur.
// This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition
// above is a faster (conservative) approximation of this.
//
// Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest:
// `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow.
// `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow.
macro_rules! run_unchecked_loop {
($unchecked_additive_op:tt) => {{
while let [c, rest @ ..] = digits {
result = result * (radix as $int_ty);
let x = unwrap_or_PIE!((*c as char).to_digit(radix), InvalidDigit);
result = result $unchecked_additive_op (x as $int_ty);
digits = rest;
}
}};
}
if is_positive {
run_unchecked_loop!(+)
} else {
run_unchecked_loop!(-)
};
} else {
macro_rules! run_checked_loop {
($checked_additive_op:ident, $overflow_err:ident) => {{
while let [c, rest @ ..] = digits {
// When `radix` is passed in as a literal, rather than doing a slow `imul`
// the compiler can use shifts if `radix` can be expressed as a
// sum of powers of 2 (x*10 can be written as x*8 + x*2).
// When the compiler can't use these optimisations,
// the latency of the multiplication can be hidden by issuing it
// before the result is needed to improve performance on
// modern out-of-order CPU as multiplication here is slower
// than the other instructions, we can get the end result faster
// doing multiplication first and let the CPU spends other cycles
// doing other computation and get multiplication result later.
let mul = result.checked_mul(radix as $int_ty);
let x = unwrap_or_PIE!((*c as char).to_digit(radix), InvalidDigit) as $int_ty;
result = unwrap_or_PIE!(mul, $overflow_err);
result = unwrap_or_PIE!(<$int_ty>::$checked_additive_op(result, x), $overflow_err);
digits = rest;
}
}};
}
if is_positive {
run_checked_loop!(checked_add, PosOverflow)
} else {
run_checked_loop!(checked_sub, NegOverflow)
};
}
};
Ok(result)
}
}
if is_positive {
run_unchecked_loop!(<T as core::ops::Add>::add)
} else {
run_unchecked_loop!(<T as core::ops::Sub>::sub)
};
} else {
macro_rules! run_checked_loop {
($checked_additive_op:ident, $overflow_err:expr) => {
for &c in digits {
// When `radix` is passed in as a literal, rather than doing a slow `imul`
// the compiler can use shifts if `radix` can be expressed as a
// sum of powers of 2 (x*10 can be written as x*8 + x*2).
// When the compiler can't use these optimisations,
// the latency of the multiplication can be hidden by issuing it
// before the result is needed to improve performance on
// modern out-of-order CPU as multiplication here is slower
// than the other instructions, we can get the end result faster
// doing multiplication first and let the CPU spends other cycles
// doing other computation and get multiplication result later.
let mul = result.checked_mul(radix);
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
result = mul.ok_or_else($overflow_err)?;
result = T::$checked_additive_op(&result, x).ok_or_else($overflow_err)?;
}
};
)+}
}

from_str_radix! { i8 u8 i16 u16 i32 u32 i64 u64 i128 u128 }

// Re-use the relevant implementation of from_str_radix for isize and usize to avoid outputting two
// identical functions.
macro_rules! from_str_radix_size_impl {
($($t:ident $size:ty),*) => {$(
impl $size {
/// Converts a string slice in a given base to an integer.
///
/// The string is expected to be an optional `+` sign
/// followed by digits.
/// Leading and trailing whitespace represent an error.
/// Digits are a subset of these characters, depending on `radix`:
///
/// * `0-9`
/// * `a-z`
/// * `A-Z`
///
/// # Panics
///
/// This function panics if `radix` is not in the range from 2 to 36.
///
/// # Examples
///
/// Basic usage:
///
/// ```
#[doc = concat!("assert_eq!(", stringify!($size), "::from_str_radix(\"A\", 16), Ok(10));")]
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_unstable(feature = "const_int_from_str", issue = "59133")]
pub const fn from_str_radix(src: &str, radix: u32) -> Result<$size, ParseIntError> {
match <$t>::from_str_radix(src, radix) {
Ok(x) => Ok(x as $size),
Err(e) => Err(e),
}
}
if is_positive {
run_checked_loop!(checked_add, || PIE { kind: PosOverflow })
} else {
run_checked_loop!(checked_sub, || PIE { kind: NegOverflow })
};
}
Ok(result)
})*}
}

#[cfg(target_pointer_width = "16")]
from_str_radix_size_impl! { i16 isize, u16 usize }
#[cfg(target_pointer_width = "32")]
from_str_radix_size_impl! { i32 isize, u32 usize }
#[cfg(target_pointer_width = "64")]
from_str_radix_size_impl! { i64 isize, u64 usize }
Loading

0 comments on commit 8df7e72

Please sign in to comment.