diff --git a/src/int/mod.rs b/src/int/mod.rs index 1ab49f5a..5814f6e1 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -4,9 +4,9 @@ pub mod addsub; pub mod mul; pub mod shift; +pub mod sdiv; mod specialized_div_rem; pub mod udiv; -pub mod sdiv; /// Trait for some basic operations on integers pub trait Int: diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index f75f2ca9..af75cbdb 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -38,7 +38,7 @@ intrinsics! { pub extern "C" fn __moddi3(a: i64, b: i64) -> i64 { i64_div_rem(a, b).1 } - + #[aapcs_on_arm] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __divmoddi4(a: i64, b: i64, rem: &mut i64) -> i64 { @@ -52,7 +52,7 @@ intrinsics! { pub extern "C" fn __divti3(a: i128, b: i128) -> i128 { i128_div_rem(a, b).0 } - + #[win64_128bit_abi_hack] /// Returns `n % d` pub extern "C" fn __modti3(a: i128, b: i128) -> i128 { diff --git a/src/int/specialized_div_rem/asymmetric.rs b/src/int/specialized_div_rem/asymmetric.rs index 133d8249..3a663dcf 100644 --- a/src/int/specialized_div_rem/asymmetric.rs +++ b/src/int/specialized_div_rem/asymmetric.rs @@ -14,7 +14,7 @@ macro_rules! impl_asymmetric { ) => { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - /// + /// /// This is optimized for dividing integers with the same bitwidth as the largest operand in /// an asymmetrically sized division. For example, the x86-64 `divq` assembly instruction /// can divide a 128 bit integer by a 64 bit integer if the quotient fits in 64 bits. @@ -129,7 +129,7 @@ macro_rules! impl_asymmetric { } // Note that this is a large $uD multiplication being used here let mut rem = duo - ((quo as $uD) * div); - + if rem >= div { quo += 1; rem -= div; @@ -140,7 +140,7 @@ macro_rules! impl_asymmetric { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - /// + /// /// This is optimized for dividing integers with the same bitwidth as the largest operand in /// an asymmetrically sized division. For example, the x86-64 `divq` assembly instruction /// can divide a 128 bit integer by a 64 bit integer if the quotient fits in 64 bits. diff --git a/src/int/specialized_div_rem/binary_long.rs b/src/int/specialized_div_rem/binary_long.rs index dc80b75f..dc1b261e 100644 --- a/src/int/specialized_div_rem/binary_long.rs +++ b/src/int/specialized_div_rem/binary_long.rs @@ -11,7 +11,7 @@ macro_rules! impl_binary_long { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - /// + /// /// This uses binary shift long division only, and is designed for CPUs without fast /// multiplication or division hardware. /// @@ -79,7 +79,7 @@ macro_rules! impl_binary_long { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - /// + /// /// This uses binary shift long division only, and is designed for CPUs without fast /// multiplication or division hardware. /// diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs index bd6fc117..86bb2d6a 100644 --- a/src/int/specialized_div_rem/delegate.rs +++ b/src/int/specialized_div_rem/delegate.rs @@ -14,7 +14,7 @@ macro_rules! impl_delegate { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - /// + /// /// This uses binary shift long division, but if it can delegates work to a smaller /// division. This function is used for CPUs with a register size smaller than the division /// size, and that do not have fast multiplication or division hardware. For CPUs with a @@ -168,7 +168,7 @@ macro_rules! impl_delegate { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - /// + /// /// This uses binary shift long division, but if it can delegates work to a smaller /// division. This function is used for CPUs with a register size smaller than the division /// size, and that do not have fast multiplication or division hardware. For CPUs with a diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs index 9e55cb26..f628ca9d 100644 --- a/src/int/specialized_div_rem/mod.rs +++ b/src/int/specialized_div_rem/mod.rs @@ -19,7 +19,7 @@ // be used which calls `u32_div_rem_binary_long`. The `impl_binary_long!` macro uses // no half division, so the chain of calls ends there. // - LLVM supplies 16 bit divisions and smaller -// +// // I could replace all `u64_by_u64_div_rem` in the macros by `u64_div_rem` and likewise // for `u32_by_u32_div_rem`, but that would mean that hardware divisions would never be // used. @@ -79,7 +79,7 @@ unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) { : "{rax}"(duo_lo), "{rdx}"(duo_hi), "r"(div) : "rax", "rdx" ); - return (quo, rem) + return (quo, rem); } // use `_asymmetric` instead of `_trifecta`, because x86_64 supplies the `divq` instruction diff --git a/src/int/specialized_div_rem/trifecta.rs b/src/int/specialized_div_rem/trifecta.rs index 12652b6f..1e1db3d0 100644 --- a/src/int/specialized_div_rem/trifecta.rs +++ b/src/int/specialized_div_rem/trifecta.rs @@ -13,13 +13,13 @@ macro_rules! impl_trifecta { ) => { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - /// + /// /// This is optimized for division of two integers with bit widths twice as large /// as the largest hardware integer division supported. Note that some architectures supply /// a division of an integer larger than register size by a regular sized integer (e.x. /// x86_64 has a `divq` instruction which can divide a 128 bit integer by a 64 bit integer). /// In that case, the `_asymmetric` algorithm should be used instead of this one. - /// + /// /// This is called the trifecta algorithm because it uses three main algorithms: short /// division for small divisors, the "mul or mul - 1" algorithm for when the divisor is /// large enough for the quotient to be determined to be one of two values via only one @@ -92,10 +92,10 @@ macro_rules! impl_trifecta { tmp.1 as $uD ) } - + // relative leading significant bits, cannot overflow because of above branches let rel_leading_sb = div_lz.wrapping_sub(duo_lz); - + // `{2^n, 2^div_sb} <= duo < 2^n_d` // `1 <= div < {2^duo_sb, 2^(n_d - 1)}` // short division branch @@ -122,7 +122,7 @@ macro_rules! impl_trifecta { rem_1 as $uD ) } - + // `{2^n, 2^div_sb} <= duo < 2^n_d` // `2^n_h <= div < {2^duo_sb, 2^(n_d - 1)}` // `mul` or `mul - 1` branch @@ -266,11 +266,11 @@ macro_rules! impl_trifecta { // correct it. This long division algorithm has been carefully constructed to always // underguess the quotient by slim margins. This allows different subalgorithms // to be blindly jumped to without needing an extra correction step. - // + // // The only problem is that it will not work // for many ranges of `duo` and `div`. Fortunately, the short division, // mul or mul - 1 algorithms, and simple divisions happen to exactly fill these gaps. - // + // // For an example, consider the division of 76543210 by 213 and assume that `n_h` // is equal to two decimal digits (note: we are working with base 10 here for // readability). @@ -399,13 +399,13 @@ macro_rules! impl_trifecta { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. - /// + /// /// This is optimized for division of two integers with bit widths twice as large /// as the largest hardware integer division supported. Note that some architectures supply /// a division of an integer larger than register size by a regular sized integer (e.x. /// x86_64 has a `divq` instruction which can divide a 128 bit integer by a 64 bit integer). /// In that case, the `_asymmetric` algorithm should be used instead of this one. - /// + /// /// This is called the trifecta algorithm because it uses three main algorithms: short /// division for small divisors, the "mul or mul - 1" algorithm for when the divisor is /// large enough for the quotient to be determined to be one of two values via only one diff --git a/src/int/udiv.rs b/src/int/udiv.rs index 1fd02899..198ad004 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -12,13 +12,13 @@ intrinsics! { pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { u32_div_rem(n, d).0 } - + #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { u32_div_rem(n, d).1 } - + #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { @@ -28,19 +28,19 @@ intrinsics! { } quo_rem.0 } - + #[maybe_use_optimized_c_shim] /// Returns `n / d` pub extern "C" fn __udivdi3(n: u64, d: u64) -> u64 { u64_div_rem(n, d).0 } - + #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __umoddi3(n: u64, d: u64) -> u64 { u64_div_rem(n, d).1 } - + /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 { let quo_rem = u64_div_rem(n, d); @@ -49,7 +49,7 @@ intrinsics! { } quo_rem.0 } - + #[win64_128bit_abi_hack] /// Returns `n / d` pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 {