Skip to content

Commit

Permalink
Improve the comments for hexadecimal float parsing. (#1888)
Browse files Browse the repository at this point in the history
* Improve the comments for hexadecimal float parsing.

Replace the comments requesting better comments with better comments,
and tidy up the code.

* Fix a typo.
  • Loading branch information
sunfishcode authored Oct 31, 2024
1 parent 5a7419d commit 6a76d04
Showing 1 changed file with 70 additions and 35 deletions.
105 changes: 70 additions & 35 deletions crates/wast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ macro_rules! float {
let signif_bits = width - 1 - $exp_bits;
let signif_mask = (1 << exp_offset) - 1;
let bias = (1 << ($exp_bits - 1)) - 1;
let msb = 1 << neg_offset;

let (hex, integral, fractional, exponent_str) = match val {
// Infinity is when the exponent bits are all set and
Expand Down Expand Up @@ -497,53 +498,77 @@ macro_rules! float {
return Some(float.to_bits());
}

// Parsing hex floats is... hard! I don't really know what most of
// this below does. It was copied from Gecko's implementation in
// `WasmTextToBinary.cpp`. Would love comments on this if you have
// them!
let fractional = fractional.as_ref().map(|s| &**s).unwrap_or("");
let negative = integral.starts_with('-');
// Parse a hexadecimal floating-point value.
//
// The main loop here is simpler than for parsing decimal floats,
// because we can just parse hexadecimal digits and then shift
// their bits into place in the significand. But in addition to
// that, we also need to handle non-normalized representations,
// where the integral part is not "1", to convert them to
// normalized results, to round, in case we get more digits than
// the target format supports, and to handle overflow and subnormal
// cases.

// Get slices of digits for the integral and fractional parts. We
// can trivially skip any leading zeros in the integral part.
let is_negative = integral.starts_with('-');
let integral = integral.trim_start_matches('-').trim_start_matches('0');
let fractional = fractional.as_ref().map(|s| &**s).unwrap_or("");

// Do a bunch of work up front to locate the first non-zero digit
// to determine the initial exponent. There's a number of
// adjustments depending on where the digit was found, but the
// general idea here is that I'm not really sure why things are
// calculated the way they are but it should match Gecko.
// Locate the first non-zero digit to determine the initial exponent.
//
// If there's no integral part, skip past leading zeros so that
// something like "0x.0000000000000000000002" doesn't cause us to hit
// a shift overflow when we try to shift the value into place. We'll
// adjust the exponent below to account for these skipped zeros.
let fractional_no_leading = fractional.trim_start_matches('0');
let fractional_iter = if integral.is_empty() {
fractional_no_leading.chars()
} else {
fractional.chars()
};

// Create a unified iterator over the digits of the integral part
// followed by the digits of the fractional part. The boolean value
// indicates which of these parts we're in.
let mut digits = integral.chars()
.map(|c| (to_hex(c) as $int, false))
.chain(fractional_iter.map(|c| (to_hex(c) as $int, true)));

// Compute the number of leading zeros in the first non-zero digit,
// since if the first digit is not "1" we'll need to adjust for
// normalization.
let lead_nonzero_digit = match digits.next() {
Some((c, _)) => c,
// No digits? Must be `+0` or `-0`, being careful to handle the
// sign encoding here.
None if negative => return Some(1 << (width - 1)),
// No non-zero digits? Must be `+0` or `-0`, being careful to
// handle the sign encoding here.
None if is_negative => return Some(msb),
None => return Some(0),
};
let mut significand = 0 as $int;
let lz = (lead_nonzero_digit as u8).leading_zeros() as i32 - 4;

// Prepare for the main parsing loop. Calculate the initial values
// of `exponent` and `significand` based on what we've seen so far.
let mut exponent = if !integral.is_empty() {
1
} else {
// Adjust the exponent digits to account for any leading zeros
// in the fractional part that we skipped above.
-((fractional.len() - fractional_no_leading.len() + 1) as i32) + 1
};
let lz = (lead_nonzero_digit as u8).leading_zeros() as i32 - 4;
exponent = exponent.checked_mul(4)?.checked_sub(lz + 1)?;
let mut significand_pos = (width - (4 - (lz as usize))) as isize;
assert!(significand_pos >= 0);
significand |= lead_nonzero_digit << significand_pos;
let mut significand: $int = lead_nonzero_digit << significand_pos;
let mut discarded_extra_nonzero = false;

assert!(significand_pos >= 0, "$int should be at least 4 bits wide");

// Adjust for leading zeros in the first digit.
exponent = exponent.checked_mul(4)?.checked_sub(lz + 1)?;

// Now that we've got an anchor in the string we parse the remaining
// digits. Again, not entirely sure why everything is the way it is
// here! This is copied frmo gecko.
let mut discarded_extra_nonzero = false;
for (digit, is_fractional) in digits {
if !is_fractional {
// hexadecimal digits.
for (digit, in_fractional) in digits {
if !in_fractional {
exponent += 4;
}
if significand_pos > -4 {
Expand All @@ -560,12 +585,19 @@ macro_rules! float {
}
}

debug_assert!(significand != 0, "The case of no non-zero digits should have been handled above");

// Parse the exponent string, which despite this being a hexadecimal
// syntax, is a decimal number, and add it the exponent we've
// computed from the potentially non-normalized significand.
exponent = exponent.checked_add(match exponent_str {
Some(s) => s.parse::<i32>().ok()?,
None => 0,
})?;
debug_assert!(significand != 0);

// Encode the exponent and significand. Also calculate the bits of
// the significand which are discarded, as we'll use them to
// determine if we need to round up.
let (encoded_exponent, encoded_significand, discarded_significand) =
if exponent <= -bias {
// Underflow to subnormal or zero.
Expand Down Expand Up @@ -598,26 +630,29 @@ macro_rules! float {
)
};

// Combine the encoded exponent and encoded significand to produce
// the raw result, except for the sign bit, which we'll apply at
// the end.
let bits = encoded_exponent | encoded_significand;

// Apply rounding. If this overflows the significand, it carries
// into the exponent bit according to the magic of the IEEE 754
// encoding.
//
// Or rather, the comment above is what Gecko says so it's copied
// here too.
let msb = 1 << (width - 1);
// Apply rounding. Do an integer add of `0` or `1` on the raw
// result, depending on whether rounding is needed. Rounding can
// lead to a floating-point overflow, but we don't need to
// special-case that here because it turns out that IEEE 754 floats
// are encoded such that when an integer add of `1` carries into
// the bits of the exponent field, it produces the correct encoding
// for infinity.
let bits = bits
+ (((discarded_significand & msb != 0)
&& ((discarded_significand & !msb != 0) ||
discarded_extra_nonzero ||
// ties to even
(encoded_significand & 1 != 0))) as $int);

// Just before we return the bits be sure to handle the sign bit we
// Just before we return the bits, be sure to handle the sign bit we
// found at the beginning.
let bits = if negative {
bits | (1 << (width - 1))
let bits = if is_negative {
bits | msb
} else {
bits
};
Expand Down

0 comments on commit 6a76d04

Please sign in to comment.