Skip to content

Commit

Permalink
Split Int into Int and MinInt
Browse files Browse the repository at this point in the history
`MinInt` contains the basic methods that are only needed by integers
involved in widening operations, i.e. big integers. `Int` retains all
other operations and convenience methods.
  • Loading branch information
tgross35 committed May 15, 2024
1 parent 7f33201 commit 9c6fcb5
Show file tree
Hide file tree
Showing 12 changed files with 210 additions and 172 deletions.
22 changes: 11 additions & 11 deletions src/float/add.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::float::Float;
use crate::int::{CastInto, Int};
use crate::int::{CastInto, Int, MinInt};

/// Returns `a + b`
fn add<F: Float>(a: F, b: F) -> F
Expand Down Expand Up @@ -57,17 +57,17 @@ where
}

// zero + anything = anything
if a_abs == Int::ZERO {
if a_abs == MinInt::ZERO {
// but we need to get the sign right for zero + zero
if b_abs == Int::ZERO {
if b_abs == MinInt::ZERO {
return F::from_repr(a.repr() & b.repr());
} else {
return b;
}
}

// anything + zero = anything
if b_abs == Int::ZERO {
if b_abs == MinInt::ZERO {
return a;
}
}
Expand Down Expand Up @@ -113,10 +113,10 @@ where
// Shift the significand of b by the difference in exponents, with a sticky
// bottom bit to get rounding correct.
let align = a_exponent.wrapping_sub(b_exponent).cast();
if align != Int::ZERO {
if align != MinInt::ZERO {
if align < bits {
let sticky =
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO);
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO);
b_significand = (b_significand >> align.cast()) | sticky;
} else {
b_significand = one; // sticky; b is known to be non-zero.
Expand All @@ -125,8 +125,8 @@ where
if subtraction {
a_significand = a_significand.wrapping_sub(b_significand);
// If a == -b, return +zero.
if a_significand == Int::ZERO {
return F::from_repr(Int::ZERO);
if a_significand == MinInt::ZERO {
return F::from_repr(MinInt::ZERO);
}

// If partial cancellation occured, we need to left-shift the result
Expand All @@ -143,8 +143,8 @@ where

// If the addition carried up, we need to right-shift the result and
// adjust the exponent:
if a_significand & implicit_bit << 4 != Int::ZERO {
let sticky = F::Int::from_bool(a_significand & one != Int::ZERO);
if a_significand & implicit_bit << 4 != MinInt::ZERO {
let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO);
a_significand = a_significand >> 1 | sticky;
a_exponent += 1;
}
Expand All @@ -160,7 +160,7 @@ where
// need to shift the significand.
let shift = (1 - a_exponent).cast();
let sticky =
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO);
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO);
a_significand = a_significand >> shift.cast() | sticky;
a_exponent = 0;
}
Expand Down
2 changes: 1 addition & 1 deletion src/float/cmp.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![allow(unreachable_code)]

use crate::float::Float;
use crate::int::Int;
use crate::int::MinInt;

#[derive(Clone, Copy)]
enum Result {
Expand Down
79 changes: 46 additions & 33 deletions src/float/div.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#![allow(clippy::needless_return)]

use crate::float::Float;
use crate::int::{CastInto, DInt, HInt, Int};
use crate::int::{CastInto, DInt, HInt, Int, MinInt};

use super::HalfRep;

fn div32<F: Float>(a: F, b: F) -> F
where
Expand Down Expand Up @@ -454,15 +456,20 @@ where

fn div64<F: Float>(a: F, b: F) -> F
where
u32: CastInto<F::Int>,
F::Int: CastInto<u32>,
i32: CastInto<F::Int>,
F::Int: CastInto<i32>,
u64: CastInto<F::Int>,
F::Int: CastInto<HalfRep<F>>,
F::Int: From<HalfRep<F>>,
F::Int: From<u8>,
F::Int: CastInto<u64>,
i64: CastInto<F::Int>,
F::Int: CastInto<i64>,
F::Int: HInt,
F::Int: HInt + DInt,
u16: CastInto<F::Int>,
i32: CastInto<F::Int>,
i64: CastInto<F::Int>,
u32: CastInto<F::Int>,
u64: CastInto<F::Int>,
u64: CastInto<HalfRep<F>>,
{
const NUMBER_OF_HALF_ITERATIONS: usize = 3;
const NUMBER_OF_FULL_ITERATIONS: usize = 1;
Expand All @@ -471,7 +478,7 @@ where
let one = F::Int::ONE;
let zero = F::Int::ZERO;
let hw = F::BITS / 2;
let lo_mask = u64::MAX >> hw;
let lo_mask = F::Int::MAX >> hw;

let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
Expand Down Expand Up @@ -616,21 +623,23 @@ where

let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
// Starting with (n-1) half-width iterations
let b_uq1_hw: u32 =
(CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw)) as u32;
let b_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(
CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw),
);

// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
// with W0 being either 16 or 32 and W0 <= HW.
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.

// HW is at least 32. Shifting into the highest bits if needed.
let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32));
let c_hw = (CastInto::<HalfRep<F>>::cast(0x7504F333_u64)).wrapping_shl(hw.wrapping_sub(32));

// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
// so x0 fits to UQ0.HW without wrapping.
let x_uq0_hw: u32 = {
let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
let x_uq0_hw: HalfRep<F> = {
let mut x_uq0_hw: HalfRep<F> =
c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
// dbg!(x_uq0_hw);
// An e_0 error is comprised of errors due to
// * x0 being an inherently imprecise first approximation of 1/b_hw
Expand Down Expand Up @@ -661,8 +670,9 @@ where
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
// expected to be strictly positive because b_UQ1_hw has its highest bit set
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
let corr_uq1_hw: u32 =
0.wrapping_sub(((x_uq0_hw as u64).wrapping_mul(b_uq1_hw as u64)) >> hw) as u32;
let corr_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(zero.wrapping_sub(
((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(b_uq1_hw))) >> hw,
));
// dbg!(corr_uq1_hw);

// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
Expand All @@ -677,7 +687,9 @@ where
// The fact corr_UQ1_hw was virtually round up (due to result of
// multiplication being **first** truncated, then negated - to improve
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32;
x_uq0_hw = ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(corr_uq1_hw))
>> (hw - 1))
.cast();
// dbg!(x_uq0_hw);
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
Expand Down Expand Up @@ -707,7 +719,7 @@ where
// be not below that value (see g(x) above), so it is safe to decrement just
// once after the final iteration. On the other hand, an effective value of
// divisor changes after this point (from b_hw to b), so adjust here.
x_uq0_hw.wrapping_sub(1_u32)
x_uq0_hw.wrapping_sub(HalfRep::<F>::ONE)
};

// Error estimations for full-precision iterations are calculated just
Expand All @@ -717,7 +729,7 @@ where
// Simulating operations on a twice_rep_t to perform a single final full-width
// iteration. Using ad-hoc multiplication implementations to take advantage
// of particular structure of operands.
let blo: u64 = (CastInto::<u64>::cast(b_uq1)) & lo_mask;
let blo: F::Int = b_uq1 & lo_mask;
// x_UQ0 = x_UQ0_hw * 2^HW - 1
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
//
Expand All @@ -726,19 +738,20 @@ where
// + [ x_UQ0_hw * blo ]
// - [ b_UQ1 ]
// = [ result ][.... discarded ...]
let corr_uq1 = negate_u64(
(x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1,
); // account for *possible* carry
let lo_corr = corr_uq1 & lo_mask;
let hi_corr = corr_uq1 >> hw;
let corr_uq1: F::Int = (F::Int::from(x_uq0_hw) * F::Int::from(b_uq1_hw)
+ ((F::Int::from(x_uq0_hw) * blo) >> hw))
.wrapping_sub(one)
.wrapping_neg(); // account for *possible* carry
let lo_corr: F::Int = corr_uq1 & lo_mask;
let hi_corr: F::Int = corr_uq1 >> hw;
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
let mut x_uq0: <F as Float>::Int = ((((x_uq0_hw as u64) * hi_corr) << 1)
.wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1))
.wrapping_sub(2))
.cast(); // 1 to account for the highest bit of corr_UQ1 can be 1
// 1 to account for possible carry
// Just like the case of half-width iterations but with possibility
// of overflowing by one extra Ulp of x_UQ0.
let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1)
.wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1))
.wrapping_sub(F::Int::from(2u8));
// 1 to account for the highest bit of corr_UQ1 can be 1
// 1 to account for possible carry
// Just like the case of half-width iterations but with possibility
// of overflowing by one extra Ulp of x_UQ0.
x_uq0 -= one;
// ... and then traditional fixup by 2 should work

Expand All @@ -755,8 +768,8 @@ where
x_uq0
} else {
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
let c: <F as Float>::Int = (0x7504F333 << (F::BITS - 32)).cast();
let x_uq0: <F as Float>::Int = c.wrapping_sub(b_uq1);
let c: F::Int = (0x7504F333 << (F::BITS - 32)).cast();
let x_uq0: F::Int = c.wrapping_sub(b_uq1);
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
x_uq0
};
Expand Down Expand Up @@ -806,7 +819,7 @@ where
// Now 1/b - (2*P) * 2^-W < x < 1/b
// FIXME Is x_UQ0 still >= 0.5?

let mut quotient: <F as Float>::Int = x_uq0.widen_mul(a_significand << 1).hi();
let mut quotient: F::Int = x_uq0.widen_mul(a_significand << 1).hi();
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).

// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
Expand Down Expand Up @@ -868,7 +881,7 @@ where
// r = a - b * q
let abs_result = if written_exponent > 0 {
let mut ret = quotient & significand_mask;
ret |= ((written_exponent as u64) << significand_bits).cast();
ret |= written_exponent.cast() << significand_bits;
residual <<= 1;
ret
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/float/extend.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::float::Float;
use crate::int::{CastInto, Int};
use crate::int::{CastInto, Int, MinInt};

/// Generic conversion from a narrower to a wider IEEE-754 floating-point type
fn extend<F: Float, R: Float>(a: F) -> R
Expand Down
7 changes: 5 additions & 2 deletions src/float/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use core::ops;

use super::int::Int;
use crate::int::{DInt, Int, MinInt};

pub mod add;
pub mod cmp;
Expand All @@ -12,6 +12,9 @@ pub mod pow;
pub mod sub;
pub mod trunc;

/// Wrapper to extract the integer type half of the float's size
pub(crate) type HalfRep<F> = <<F as Float>::Int as DInt>::H;

public_test_dep! {
/// Trait for some basic operations on floats
#[allow(dead_code)]
Expand Down Expand Up @@ -60,7 +63,7 @@ pub(crate) trait Float:
/// A mask for the significand
const SIGNIFICAND_MASK: Self::Int;

// The implicit bit of the float format
/// The implicit bit of the float format
const IMPLICIT_BIT: Self::Int;

/// A mask for the exponent
Expand Down
2 changes: 1 addition & 1 deletion src/float/mul.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::float::Float;
use crate::int::{CastInto, DInt, HInt, Int};
use crate::int::{CastInto, DInt, HInt, Int, MinInt};

fn mul<F: Float>(a: F, b: F) -> F
where
Expand Down
2 changes: 1 addition & 1 deletion src/float/trunc.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::float::Float;
use crate::int::{CastInto, Int};
use crate::int::{CastInto, Int, MinInt};

fn trunc<F: Float, R: Float>(a: F) -> R
where
Expand Down
10 changes: 5 additions & 5 deletions src/int/addsub.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::int::{DInt, Int};
use crate::int::{DInt, Int, MinInt};

trait UAddSub: DInt {
trait UAddSub: DInt + Int {
fn uadd(self, other: Self) -> Self {
let (lo, carry) = self.lo().overflowing_add(other.lo());
let hi = self.hi().wrapping_add(other.hi());
Expand All @@ -22,7 +22,7 @@ impl UAddSub for u128 {}

trait AddSub: Int
where
<Self as Int>::UnsignedInt: UAddSub,
<Self as MinInt>::UnsignedInt: UAddSub,
{
fn add(self, other: Self) -> Self {
Self::from_unsigned(self.unsigned().uadd(other.unsigned()))
Expand All @@ -37,7 +37,7 @@ impl AddSub for i128 {}

trait Addo: AddSub
where
<Self as Int>::UnsignedInt: UAddSub,
<Self as MinInt>::UnsignedInt: UAddSub,
{
fn addo(self, other: Self) -> (Self, bool) {
let sum = AddSub::add(self, other);
Expand All @@ -50,7 +50,7 @@ impl Addo for u128 {}

trait Subo: AddSub
where
<Self as Int>::UnsignedInt: UAddSub,
<Self as MinInt>::UnsignedInt: UAddSub,
{
fn subo(self, other: Self) -> (Self, bool) {
let sum = AddSub::sub(self, other);
Expand Down
Loading

0 comments on commit 9c6fcb5

Please sign in to comment.