JuliaLang · nsajko · May 7, 2023 · oscardssmith · Jan 14, 2024 · nsajko
diff --git a/base/Base.jl b/base/Base.jl
@@ -284,6 +284,7 @@ include("rounding.jl")
 include("div.jl")
 include("rawbigints.jl")
 include("float.jl")
+include("rational_to_float.jl")
 include("twiceprecision.jl")
 include("complex.jl")
 include("rational.jl")

diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
@@ -3049,6 +3049,25 @@ julia> 4/2
 julia> 4.5/2
 2.25
 ```
+
+This function may convert integer arguments to a floating-point number type
+([`AbstractFloat`](@ref)), potentially resulting in a loss of accuracy. To avoid this,
+instead construct a [`Rational`](@ref) from the arguments, then convert the resulting
+rational number to a specific floating-point type of your choice:
+
+```jldoctest
+julia> n = 100000000000000000
+100000000000000000
+
+julia> m = n + 6
+100000000000000006
+
+julia> n/m
+1.0
+
+julia> Float64(n//m)  # `//` constructs a `Rational`
+0.9999999999999999
+```
 """
 /(x, y)
 

diff --git a/base/mpfr.jl b/base/mpfr.jl
@@ -19,7 +19,8 @@ import
         isone, big, _string_n, decompose, minmax, _precision_with_base_2,
         sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
         uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
-        RawBigIntRoundingIncrementHelper, truncated, RawBigInt
+        RawBigIntRoundingIncrementHelper, truncated, RawBigInt, RationalToFloat,
+        rational_to_floating_point
 
 
 using .Base.Libc
@@ -317,12 +318,51 @@ BigFloat(x::Union{UInt8,UInt16,UInt32}, r::MPFRRoundingMode=rounding_raw(BigFloa
 BigFloat(x::Union{Float16,Float32}, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) =
     BigFloat(Float64(x), r; precision=precision)
 
-function BigFloat(x::Rational, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
-    setprecision(BigFloat, precision) do
-        setrounding_raw(BigFloat, r) do
-            BigFloat(numerator(x))::BigFloat / BigFloat(denominator(x))::BigFloat
+function set_2exp!(z::BigFloat, n::BigInt, exp::Int, rm::MPFRRoundingMode)
+    ccall(
+        (:mpfr_set_z_2exp, libmpfr),
+        Int32,
+        (Ref{BigFloat}, Ref{BigInt}, Int, MPFRRoundingMode),
+        z, n, exp, rm,
+    )
+    nothing
+end
+
+function RationalToFloat.to_floating_point_impl(::Type{BigFloat}, ::Type{BigInt}, num, den, romo, prec)
+    num_is_zero = iszero(num)
+    den_is_zero = iszero(den)
+    s = Int8(sign(num))
+    sb = signbit(s)
+    is_zero = num_is_zero & !den_is_zero
+    is_inf = !num_is_zero & den_is_zero
+    is_regular = !num_is_zero & !den_is_zero
+
+    if is_regular
+        let rtfc = RationalToFloat.to_float_components
+            c = rtfc(BigInt, num, den, prec, nothing, romo, sb)
+            ret = BigFloat(precision = prec)
+            mpfr_romo = convert(MPFRRoundingMode, romo)
+            set_2exp!(ret, s * c.integral_significand, Int(c.exponent - prec + true), mpfr_romo)
+            ret
         end
-    end
+    else
+        if is_zero
+            BigFloat(false, MPFRRoundToZero, precision = prec)
+        elseif is_inf
+            BigFloat(s * Inf, MPFRRoundToZero, precision = prec)
+        else
+            BigFloat(precision = prec)
+        end
+    end::BigFloat
+end
+
+function BigFloat(x::Rational, r::RoundingMode; precision::Integer = DEFAULT_PRECISION[])
+    rational_to_floating_point(BigFloat, x, r, precision)
+end
+
+function BigFloat(x::Rational, r::MPFRRoundingMode = ROUNDING_MODE[];
+                  precision::Integer = DEFAULT_PRECISION[])
+    rational_to_floating_point(BigFloat, x, r, precision)
 end
 
 function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=_precision_with_base_2(BigFloat), rounding::MPFRRoundingMode=rounding_raw(BigFloat))

diff --git a/base/rational.jl b/base/rational.jl
@@ -151,19 +151,19 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
 (::Type{T})(x::Rational) where {T<:Integer} = (isinteger(x) ? convert(T, x.num)::T :
     throw(InexactError(nameof(T), T, x)))
 
-AbstractFloat(x::Rational) = (float(x.num)/float(x.den))::AbstractFloat
-function (::Type{T})(x::Rational{S}) where T<:AbstractFloat where S
-    P = promote_type(T,S)
-    convert(T, convert(P,x.num)/convert(P,x.den))::T
-end
- # avoid spurious overflow (#52394).  (Needed for UInt16 or larger;
- # we also include Int16 for consistency of accuracy.)
-Float16(x::Rational{<:Union{Int16,Int32,Int64,UInt16,UInt32,UInt64}}) =
-    Float16(Float32(x))
-Float16(x::Rational{<:Union{Int128,UInt128}}) =
-    Float16(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency
-Float32(x::Rational{<:Union{Int128,UInt128}}) =
-    Float32(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency
+function rational_to_floating_point(::Type{F}, x, rm, prec) where {F}
+    nd = (numerator(x), denominator(x))
+    RationalToFloat.to_floating_point(F, nd..., rm, prec)::F
+end
+
+function (::Type{F})(x::Rational, rm::RoundingMode = RoundNearest) where {F<:AbstractFloat}
+    rational_to_floating_point(F, x, rm, precision(F))::F
+end
+
+function AbstractFloat(x::Q) where {Q<:Rational}
+    T = float(Q)
+    T(x)::T::AbstractFloat
+end
 
 function Rational{T}(x::AbstractFloat) where T<:Integer
     r = rationalize(T, x, tol=0)

diff --git a/base/rational_to_float.jl b/base/rational_to_float.jl
@@ -0,0 +1,209 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module RationalToFloat
+
+# Performance optimization. Unlike raw `<<` or `>>>`, this is supposed
+# to compile to a single instruction, because the semantics correspond
+# to what hardware usually provides.
+function machine_shift(shift::S, a::T, b) where {S,T<:Base.BitInteger}
+    @inline begin
+        mask = 8*sizeof(T) - 1
+        c = b & mask
+        shift(a, c)
+    end
+end
+
+machine_shift(::S, a::Bool, ::Any) where {S} = error("unsupported")
+
+# Fallback for `BigInt` etc.
+machine_shift(shift::S, a, b) where {S} = shift(a, b)
+
+# Arguments are positive integers.
+function div_significand_with_remainder(num, den, minimum_significand_size)
+    clamped = x -> max(zero(x), x)::typeof(x)
+    bw = Base.top_set_bit  # bit width
+    shift = clamped(minimum_significand_size + bw(den) - bw(num) + 0x2)
+    t = machine_shift(<<, num, shift)
+    (divrem(t, den, RoundToZero)..., shift)
+end
+
+# `divrem(n, 1<<k, RoundToZero)`
+function divrem_2(n, k)
+    quo = machine_shift(>>>, n,   k)
+    tmp = machine_shift(<<,  quo, k)
+    rem = n - tmp
+    (quo, rem)
+end
+
+function to_float_components_impl(num, den, precision, max_subnormal_exp)
+    # `+1` because we need an extra, "round", bit for some rounding modes.
+    #
+    # TODO: as a performance optimization, only do this when required
+    #       by the rounding mode
+    prec_p_1 = precision + true
+
+    (quo0, rem0, shift) = div_significand_with_remainder(num, den, prec_p_1)
+    width = Base.top_set_bit(quo0)
+    excess_width = width - prec_p_1
+    exp = width - shift - true
+
+    exp_underflow = if isnothing(max_subnormal_exp)
+        zero(exp)
+    else
+        let d = max_subnormal_exp - exp, T = typeof(d), z = zero(d)::T
+            (signbit(d) ? z : d + true)::T
+        end
+    end
+
+    (quo1, rem1) = divrem_2(quo0, exp_underflow + excess_width)
+    integral_significand = quo1 >>> true
+    round_bit = quo1 % Bool
+    sticky_bit = !iszero(rem1) | !iszero(rem0)
+
+    (; integral_significand, exponent = exp, round_bit, sticky_bit)
+end
+
+struct RoundingIncrementHelper
+    final_bit::Bool
+    round_bit::Bool
+    sticky_bit::Bool
+end
+
+(h::RoundingIncrementHelper)(::Base.Rounding.FinalBit) = h.final_bit
+(h::RoundingIncrementHelper)(::Base.Rounding.RoundBit) = h.round_bit
+(h::RoundingIncrementHelper)(::Base.Rounding.StickyBit) = h.sticky_bit
+
+function to_float_components_rounded(num, den, precision, max_subnormal_exp, romo, sign_bit)
+    overflows = (x, p) -> x == machine_shift(<<, one(x), p)
+    t = to_float_components_impl(num, den, precision, max_subnormal_exp)
+    raw_significand = t.integral_significand
+    rh = RoundingIncrementHelper(raw_significand % Bool, t.round_bit, t.sticky_bit)
+    incr = Base.Rounding.correct_rounding_requires_increment(rh, romo, sign_bit)
+    rounded = raw_significand + incr
+    (integral_significand, exponent) = let exp = t.exponent
+        if overflows(rounded, precision)
+            (rounded >>> true, exp + true)
+        else
+            (rounded, exp)
+        end
+    end
+    (; integral_significand, exponent)
+end
+
+function to_float_components(::Type{T}, num, den, precision, max_subnormal_exp, romo, sb) where {T}
+    to_float_components_rounded(abs(T(num)), den, precision, max_subnormal_exp, romo, sb)
+end
+
+function to_floating_point_fallback(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
+    num_is_zero = iszero(num)
+    den_is_zero = iszero(den)
+    sb = signbit(num)
+    is_zero = num_is_zero & !den_is_zero
+    is_inf = !num_is_zero & den_is_zero
+    is_regular = !num_is_zero & !den_is_zero
+    if is_regular
+        let
+            c = to_float_components(S, num, den, prec, nothing, rm, sb)
+            signif = T(c.integral_significand)::T
+            x = ldexp(signif, c.exponent - prec + true)::T
+            sb ? -x : x
+        end::T
+    else
+        if is_zero
+            zero(T)::T
+        elseif is_inf
+            T(Inf)::T
+        else
+            T(NaN)::T
+        end
+    end::T
+end
+
+function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
+    to_floating_point_fallback(T, S, num, den, rm, prec)
+end
+
+function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T<:Base.IEEEFloat,S}
+    num_is_zero = iszero(num)
+    den_is_zero = iszero(den)
+    sb = signbit(num)
+    is_zero = num_is_zero & !den_is_zero
+    is_inf = !num_is_zero & den_is_zero
+    is_regular = !num_is_zero & !den_is_zero
+    Rnd = Base.Rounding
+    (rm_is_to_zero, rm_is_from_zero) = if Rnd.rounds_to_nearest(rm)
+        (false, false)
+    else
+        let from = Rnd.rounds_away_from_zero(rm, sb)
+            (!from, from)
+        end
+    end::NTuple{2,Bool}
+    exp_max = Base.exponent_max(T)
+    exp_min = Base.exponent_min(T)
+    ieee_repr = Base.ieee754_representation
+    repr_zero = ieee_repr(T, sb, Val(:zero))
+    repr_inf  = ieee_repr(T, sb, Val(:inf))
+    repr_nan  = ieee_repr(T, sb, Val(:nan))
+    U = typeof(repr_zero)
+    repr_zero::U
+    repr_inf::U
+    repr_nan::U
+
+    ret_u = if is_regular
+        let
+            c = to_float_components(S, num, den, prec, exp_min - 1, rm, sb)
+            exp = c.exponent
+            exp_diff = exp - exp_min
+            is_normal = 0 ≤ exp_diff
+            exp_is_huge_p = exp_max < exp
+            exp_is_huge_n = signbit(exp_diff + prec)
+            rounds_to_inf  = exp_is_huge_p & !rm_is_to_zero
+            rounds_to_zero = exp_is_huge_n & !rm_is_from_zero
+
+            if !rounds_to_zero & !exp_is_huge_p
+                let signif = (c.integral_significand % U) & Base.significand_mask(T)
+                    exp_field = (max(exp_diff, zero(exp_diff)) + is_normal) % U
+                    ieee_repr(T, sb, exp_field, signif)::U
+                end
+            elseif rounds_to_zero
+                repr_zero
+            elseif rounds_to_inf
+                repr_inf
+            else
+                ieee_repr(T, sb, Val(:omega))
+            end
+        end
+    else
+        if is_zero
+            repr_zero
+        elseif is_inf
+            repr_inf
+        else
+            repr_nan
+        end
+    end::U
+
+    reinterpret(T, ret_u)::T
+end
+
+# `BigInt` is a safe default.
+to_float_promote_type(::Type{F}, ::Type{S}) where {F,S} = BigInt
+
+const BitIntegerOrBool  = Union{Bool,Base.BitInteger}
+
+# As an optimization, use an integer type narrower than `BigInt` when possible.
+function to_float_promote_type(::Type{F}, ::Type{S}) where {F<:Base.IEEEFloat,S<:BitIntegerOrBool}
+    Max = if sizeof(F) ≤ sizeof(S)
+        S
+    else
+        (S <: Signed) ? Base.inttype(F) : Base.uinttype(F)
+    end
+    widen(Max)
+end
+
+function to_floating_point(::Type{F}, num::T, den::T, rm, prec) where {F,T}
+    S = to_float_promote_type(F, T)
+    to_floating_point_impl(F, S, num, den, rm, prec)
+end
+
+end
diff --git a/test/choosetests.jl b/test/choosetests.jl
@@ -11,7 +11,7 @@ const TESTNAMES = [
         "char", "strings", "triplequote", "unicode", "intrinsics",
         "dict", "hashing", "iobuffer", "staged", "offsetarray",
         "arrayops", "tuple", "reduce", "reducedim", "abstractarray",
-        "intfuncs", "simdloop", "vecelement", "rational",
+        "intfuncs", "simdloop", "vecelement", "rational", "rational_to_float",
         "bitarray", "copy", "math", "fastmath", "functional", "iterators",
         "operators", "ordering", "path", "ccall", "parse", "loading", "gmp",
         "sorting", "spawn", "backtrace", "exceptions",