Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the comments for hexadecimal float parsing. #1888

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 70 additions & 35 deletions crates/wast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ macro_rules! float {
let signif_bits = width - 1 - $exp_bits;
let signif_mask = (1 << exp_offset) - 1;
let bias = (1 << ($exp_bits - 1)) - 1;
let msb = 1 << neg_offset;

let (hex, integral, fractional, exponent_str) = match val {
// Infinity is when the exponent bits are all set and
Expand Down Expand Up @@ -497,53 +498,77 @@ macro_rules! float {
return Some(float.to_bits());
}

// Parsing hex floats is... hard! I don't really know what most of
// this below does. It was copied from Gecko's implementation in
// `WasmTextToBinary.cpp`. Would love comments on this if you have
// them!
let fractional = fractional.as_ref().map(|s| &**s).unwrap_or("");
let negative = integral.starts_with('-');
// Parse a hexadecimal floating-point value.
//
// The main loop here is simpler than for parsing decimal floats,
// because we can just parse hexadecimal digits and then shift
// their bits into place in the significand. But in addition to
// that, we also need to handle non-normalized representations,
// where the integral part is not "1", to convert them to
// normalized results, to round, in case we get more digits than
// the target format supports, and to handle overflow and subnormal
// cases.

// Get slices of digits for the integral and fractional parts. We
// can trivially skip any leading zeros in the integral part.
let is_negative = integral.starts_with('-');
let integral = integral.trim_start_matches('-').trim_start_matches('0');
let fractional = fractional.as_ref().map(|s| &**s).unwrap_or("");

// Do a bunch of work up front to locate the first non-zero digit
// to determine the initial exponent. There's a number of
// adjustments depending on where the digit was found, but the
// general idea here is that I'm not really sure why things are
// calculated the way they are but it should match Gecko.
// Locate the first non-zero digit to determine the initial exponent.
//
// If there's no integral part, skip past leading zeros so that
// something like "0x.0000000000000000000002" doesn't cause us to hit
// a shift overflow when we try to shift the value into place. We'll
// adjust the exponent below to account for these skipped zeros.
let fractional_no_leading = fractional.trim_start_matches('0');
let fractional_iter = if integral.is_empty() {
fractional_no_leading.chars()
} else {
fractional.chars()
};

// Create a unified iterator over the digits of the integral part
// followed by the digits of the fractional part. The boolean value
// indicates which of these parts we're in.
let mut digits = integral.chars()
.map(|c| (to_hex(c) as $int, false))
.chain(fractional_iter.map(|c| (to_hex(c) as $int, true)));

// Compute the number of leading zeros in the first non-zero digit,
// since if the first digit is not "1" we'll need to adjust for
// normalization.
let lead_nonzero_digit = match digits.next() {
Some((c, _)) => c,
// No digits? Must be `+0` or `-0`, being careful to handle the
// sign encoding here.
None if negative => return Some(1 << (width - 1)),
// No non-zero digits? Must be `+0` or `-0`, being careful to
// handle the sign encoding here.
None if is_negative => return Some(msb),
None => return Some(0),
};
let mut significand = 0 as $int;
let lz = (lead_nonzero_digit as u8).leading_zeros() as i32 - 4;

// Prepare for the main parsing loop. Calculate the initial values
// of `exponent` and `significand` based on what we've seen so far.
let mut exponent = if !integral.is_empty() {
1
} else {
// Adjust the exponent digits to account for any leading zeros
// in the fractional part that we skipped above.
-((fractional.len() - fractional_no_leading.len() + 1) as i32) + 1
};
let lz = (lead_nonzero_digit as u8).leading_zeros() as i32 - 4;
exponent = exponent.checked_mul(4)?.checked_sub(lz + 1)?;
let mut significand_pos = (width - (4 - (lz as usize))) as isize;
assert!(significand_pos >= 0);
significand |= lead_nonzero_digit << significand_pos;
let mut significand: $int = lead_nonzero_digit << significand_pos;
let mut discarded_extra_nonzero = false;

assert!(significand_pos >= 0, "$int should be at least 4 bits wide");

// Adjust for leading zeros in the first digit.
exponent = exponent.checked_mul(4)?.checked_sub(lz + 1)?;

// Now that we've got an anchor in the string we parse the remaining
// digits. Again, not entirely sure why everything is the way it is
// here! This is copied frmo gecko.
let mut discarded_extra_nonzero = false;
for (digit, is_fractional) in digits {
if !is_fractional {
// hexadecimal digits.
for (digit, in_fractional) in digits {
if !in_fractional {
exponent += 4;
}
if significand_pos > -4 {
Expand All @@ -560,12 +585,19 @@ macro_rules! float {
}
}

debug_assert!(significand != 0, "The case of no non-zero digits should have been handled above");

// Parse the exponent string, which despite this being a hexadecimal
// syntax, is a decimal number, and add it the exponent we've
// computed from the potentially non-normalized significand.
exponent = exponent.checked_add(match exponent_str {
Some(s) => s.parse::<i32>().ok()?,
None => 0,
})?;
debug_assert!(significand != 0);

// Encode the exponent and significand. Also calculate the bits of
// the significand which are discarded, as we'll use them to
// determine if we need to round up.
let (encoded_exponent, encoded_significand, discarded_significand) =
if exponent <= -bias {
// Underflow to subnormal or zero.
Expand Down Expand Up @@ -598,26 +630,29 @@ macro_rules! float {
)
};

// Combine the encoded exponent and encoded significand to produce
// the raw result, except for the sign bit, which we'll apply at
// the end.
let bits = encoded_exponent | encoded_significand;

// Apply rounding. If this overflows the significand, it carries
// into the exponent bit according to the magic of the IEEE 754
// encoding.
//
// Or rather, the comment above is what Gecko says so it's copied
// here too.
let msb = 1 << (width - 1);
// Apply rounding. Do an integer add of `0` or `1` on the raw
// result, depending on whether rounding is needed. Rounding can
// lead to a floating-point overflow, but we don't need to
// special-case that here because it turns out that IEEE 754 floats
// are encoded such that when an integer add of `1` carries into
// the bits of the exponent field, it produces the correct encoding
// for infinity.
let bits = bits
+ (((discarded_significand & msb != 0)
&& ((discarded_significand & !msb != 0) ||
discarded_extra_nonzero ||
// ties to even
(encoded_significand & 1 != 0))) as $int);

// Just before we return the bits be sure to handle the sign bit we
// Just before we return the bits, be sure to handle the sign bit we
// found at the beginning.
let bits = if negative {
bits | (1 << (width - 1))
let bits = if is_negative {
bits | msb
} else {
bits
};
Expand Down