Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Base: correctly rounded floats constructed from rationals #49749

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions base/Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ include("rounding.jl")
include("div.jl")
include("rawbigints.jl")
include("float.jl")
include("rational_to_float.jl")
include("twiceprecision.jl")
include("complex.jl")
include("rational.jl")
Expand Down
19 changes: 19 additions & 0 deletions base/docs/basedocs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3049,6 +3049,25 @@ julia> 4/2
julia> 4.5/2
2.25
```

This function may convert integer arguments to a floating-point number type
([`AbstractFloat`](@ref)), potentially resulting in a loss of accuracy. To avoid this,
instead construct a [`Rational`](@ref) from the arguments, then convert the resulting
rational number to a specific floating-point type of your choice:

```jldoctest
julia> n = 100000000000000000
100000000000000000

julia> m = n + 6
100000000000000006

julia> n/m
1.0

julia> Float64(n//m) # `//` constructs a `Rational`
0.9999999999999999
```
"""
/(x, y)

Expand Down
52 changes: 46 additions & 6 deletions base/mpfr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ import
isone, big, _string_n, decompose, minmax, _precision_with_base_2,
sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
RawBigIntRoundingIncrementHelper, truncated, RawBigInt
RawBigIntRoundingIncrementHelper, truncated, RawBigInt, RationalToFloat,
rational_to_floating_point


using .Base.Libc
Expand Down Expand Up @@ -317,12 +318,51 @@ BigFloat(x::Union{UInt8,UInt16,UInt32}, r::MPFRRoundingMode=rounding_raw(BigFloa
BigFloat(x::Union{Float16,Float32}, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) =
BigFloat(Float64(x), r; precision=precision)

function BigFloat(x::Rational, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
setprecision(BigFloat, precision) do
setrounding_raw(BigFloat, r) do
BigFloat(numerator(x))::BigFloat / BigFloat(denominator(x))::BigFloat
function set_2exp!(z::BigFloat, n::BigInt, exp::Int, rm::MPFRRoundingMode)
ccall(
(:mpfr_set_z_2exp, libmpfr),
Int32,
(Ref{BigFloat}, Ref{BigInt}, Int, MPFRRoundingMode),
z, n, exp, rm,
)
nothing
end

function RationalToFloat.to_floating_point_impl(::Type{BigFloat}, ::Type{BigInt}, num, den, romo, prec)
num_is_zero = iszero(num)
den_is_zero = iszero(den)
s = Int8(sign(num))
sb = signbit(s)
is_zero = num_is_zero & !den_is_zero
is_inf = !num_is_zero & den_is_zero
is_regular = !num_is_zero & !den_is_zero

if is_regular
let rtfc = RationalToFloat.to_float_components
c = rtfc(BigInt, num, den, prec, nothing, romo, sb)
ret = BigFloat(precision = prec)
mpfr_romo = convert(MPFRRoundingMode, romo)
set_2exp!(ret, s * c.integral_significand, Int(c.exponent - prec + true), mpfr_romo)
ret
end
end
else
if is_zero
BigFloat(false, MPFRRoundToZero, precision = prec)
elseif is_inf
BigFloat(s * Inf, MPFRRoundToZero, precision = prec)
else
BigFloat(precision = prec)
end
end::BigFloat
end

function BigFloat(x::Rational, r::RoundingMode; precision::Integer = DEFAULT_PRECISION[])
rational_to_floating_point(BigFloat, x, r, precision)
end

function BigFloat(x::Rational, r::MPFRRoundingMode = ROUNDING_MODE[];
precision::Integer = DEFAULT_PRECISION[])
rational_to_floating_point(BigFloat, x, r, precision)
end

function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=_precision_with_base_2(BigFloat), rounding::MPFRRoundingMode=rounding_raw(BigFloat))
Expand Down
26 changes: 13 additions & 13 deletions base/rational.jl
Original file line number Diff line number Diff line change
Expand Up @@ -151,19 +151,19 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
(::Type{T})(x::Rational) where {T<:Integer} = (isinteger(x) ? convert(T, x.num)::T :
throw(InexactError(nameof(T), T, x)))

AbstractFloat(x::Rational) = (float(x.num)/float(x.den))::AbstractFloat
function (::Type{T})(x::Rational{S}) where T<:AbstractFloat where S
P = promote_type(T,S)
convert(T, convert(P,x.num)/convert(P,x.den))::T
end
# avoid spurious overflow (#52394). (Needed for UInt16 or larger;
# we also include Int16 for consistency of accuracy.)
Float16(x::Rational{<:Union{Int16,Int32,Int64,UInt16,UInt32,UInt64}}) =
Float16(Float32(x))
Float16(x::Rational{<:Union{Int128,UInt128}}) =
Float16(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency
Float32(x::Rational{<:Union{Int128,UInt128}}) =
Float32(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency
function rational_to_floating_point(::Type{F}, x, rm, prec) where {F}
nd = (numerator(x), denominator(x))
RationalToFloat.to_floating_point(F, nd..., rm, prec)::F
end

function (::Type{F})(x::Rational, rm::RoundingMode = RoundNearest) where {F<:AbstractFloat}
rational_to_floating_point(F, x, rm, precision(F))::F
end

function AbstractFloat(x::Q) where {Q<:Rational}
T = float(Q)
T(x)::T::AbstractFloat
end

function Rational{T}(x::AbstractFloat) where T<:Integer
r = rationalize(T, x, tol=0)
Expand Down
209 changes: 209 additions & 0 deletions base/rational_to_float.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

module RationalToFloat

# Performance optimization. Unlike raw `<<` or `>>>`, this is supposed
# to compile to a single instruction, because the semantics correspond
# to what hardware usually provides.
function machine_shift(shift::S, a::T, b) where {S,T<:Base.BitInteger}
@inline begin
mask = 8*sizeof(T) - 1
c = b & mask
shift(a, c)
end
end
nsajko marked this conversation as resolved.
Show resolved Hide resolved

machine_shift(::S, a::Bool, ::Any) where {S} = error("unsupported")

# Fallback for `BigInt` etc.
machine_shift(shift::S, a, b) where {S} = shift(a, b)

# Arguments are positive integers.
function div_significand_with_remainder(num, den, minimum_significand_size)
clamped = x -> max(zero(x), x)::typeof(x)
bw = Base.top_set_bit # bit width
shift = clamped(minimum_significand_size + bw(den) - bw(num) + 0x2)
t = machine_shift(<<, num, shift)
(divrem(t, den, RoundToZero)..., shift)
end

# `divrem(n, 1<<k, RoundToZero)`
function divrem_2(n, k)
quo = machine_shift(>>>, n, k)
tmp = machine_shift(<<, quo, k)
rem = n - tmp
(quo, rem)
end

function to_float_components_impl(num, den, precision, max_subnormal_exp)
# `+1` because we need an extra, "round", bit for some rounding modes.
#
# TODO: as a performance optimization, only do this when required
# by the rounding mode
prec_p_1 = precision + true

(quo0, rem0, shift) = div_significand_with_remainder(num, den, prec_p_1)
width = Base.top_set_bit(quo0)
excess_width = width - prec_p_1
exp = width - shift - true

exp_underflow = if isnothing(max_subnormal_exp)
zero(exp)
else
let d = max_subnormal_exp - exp, T = typeof(d), z = zero(d)::T
(signbit(d) ? z : d + true)::T
end
end

(quo1, rem1) = divrem_2(quo0, exp_underflow + excess_width)
integral_significand = quo1 >>> true
round_bit = quo1 % Bool
sticky_bit = !iszero(rem1) | !iszero(rem0)

(; integral_significand, exponent = exp, round_bit, sticky_bit)
end

struct RoundingIncrementHelper
final_bit::Bool
round_bit::Bool
sticky_bit::Bool
end

(h::RoundingIncrementHelper)(::Base.Rounding.FinalBit) = h.final_bit
(h::RoundingIncrementHelper)(::Base.Rounding.RoundBit) = h.round_bit
(h::RoundingIncrementHelper)(::Base.Rounding.StickyBit) = h.sticky_bit

function to_float_components_rounded(num, den, precision, max_subnormal_exp, romo, sign_bit)
nsajko marked this conversation as resolved.
Show resolved Hide resolved
overflows = (x, p) -> x == machine_shift(<<, one(x), p)
t = to_float_components_impl(num, den, precision, max_subnormal_exp)
raw_significand = t.integral_significand
rh = RoundingIncrementHelper(raw_significand % Bool, t.round_bit, t.sticky_bit)
incr = Base.Rounding.correct_rounding_requires_increment(rh, romo, sign_bit)
rounded = raw_significand + incr
(integral_significand, exponent) = let exp = t.exponent
if overflows(rounded, precision)
(rounded >>> true, exp + true)
else
(rounded, exp)
end
end
(; integral_significand, exponent)
end

function to_float_components(::Type{T}, num, den, precision, max_subnormal_exp, romo, sb) where {T}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this method need to exist? if this is purely for internal use, surely the caller can be responsible for converting num to the type they want.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's also used in mpfr.jl

to_float_components_rounded(abs(T(num)), den, precision, max_subnormal_exp, romo, sb)
end

function to_floating_point_fallback(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
num_is_zero = iszero(num)
den_is_zero = iszero(den)
sb = signbit(num)
is_zero = num_is_zero & !den_is_zero
is_inf = !num_is_zero & den_is_zero
is_regular = !num_is_zero & !den_is_zero
if is_regular
let
c = to_float_components(S, num, den, prec, nothing, rm, sb)
signif = T(c.integral_significand)::T
x = ldexp(signif, c.exponent - prec + true)::T
sb ? -x : x
end::T
else
if is_zero
zero(T)::T
elseif is_inf
T(Inf)::T
else
T(NaN)::T
end
end::T
end

function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
to_floating_point_fallback(T, S, num, den, rm, prec)
end

function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T<:Base.IEEEFloat,S}
num_is_zero = iszero(num)
den_is_zero = iszero(den)
sb = signbit(num)
is_zero = num_is_zero & !den_is_zero
is_inf = !num_is_zero & den_is_zero
Comment on lines +130 to +131

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here I think it would be natural to make an early return on these checks. So replace them with

num_is_zero & !den_is_zero && return zero(T)
!num_is_zero & den_is_zero && return sb * T(Inf)
num_is_zero & den_is_zero && return T(NaN)

That removes the need for the if-statement further down and , which, for me, simplifies the flow.

Similar things could be done in the other implementations, though it makes less of a difference there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer to avoid return when possible. It's a jump/goto statement, breaking nice control-flow properties known as structured programming. I do use jumps in control flow, but only when necessary.

Copy link
Contributor Author

@nsajko nsajko Jan 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used to be a Go nut (as in "Golang aficionado"), so I see where you're coming from, though.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's only a matter of style of course. I'm not going to fight you over it 😄

is_regular = !num_is_zero & !den_is_zero
Rnd = Base.Rounding
(rm_is_to_zero, rm_is_from_zero) = if Rnd.rounds_to_nearest(rm)
(false, false)
else
let from = Rnd.rounds_away_from_zero(rm, sb)
(!from, from)
end
end::NTuple{2,Bool}
exp_max = Base.exponent_max(T)
exp_min = Base.exponent_min(T)
ieee_repr = Base.ieee754_representation
repr_zero = ieee_repr(T, sb, Val(:zero))
repr_inf = ieee_repr(T, sb, Val(:inf))
repr_nan = ieee_repr(T, sb, Val(:nan))
U = typeof(repr_zero)
repr_zero::U
repr_inf::U
repr_nan::U

ret_u = if is_regular
let
c = to_float_components(S, num, den, prec, exp_min - 1, rm, sb)
exp = c.exponent
exp_diff = exp - exp_min
is_normal = 0 ≤ exp_diff
exp_is_huge_p = exp_max < exp
exp_is_huge_n = signbit(exp_diff + prec)
rounds_to_inf = exp_is_huge_p & !rm_is_to_zero
rounds_to_zero = exp_is_huge_n & !rm_is_from_zero

if !rounds_to_zero & !exp_is_huge_p
let signif = (c.integral_significand % U) & Base.significand_mask(T)
exp_field = (max(exp_diff, zero(exp_diff)) + is_normal) % U
ieee_repr(T, sb, exp_field, signif)::U
end
elseif rounds_to_zero
repr_zero
elseif rounds_to_inf
repr_inf
else
ieee_repr(T, sb, Val(:omega))
end
end
else
if is_zero
repr_zero
elseif is_inf
repr_inf
else
repr_nan
end
end::U

reinterpret(T, ret_u)::T
end

# `BigInt` is a safe default.
to_float_promote_type(::Type{F}, ::Type{S}) where {F,S} = BigInt

const BitIntegerOrBool = Union{Bool,Base.BitInteger}
Copy link

@Joel-Dahne Joel-Dahne Jan 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This variable is only used once, I think it is more natural to just use the union directly. Though in this case I guess it does make the method declaration below slightly too long to fit on one line.

Copy link
Contributor Author

@nsajko nsajko Jan 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My reasoning here was basically:

  1. I'm not exactly sure what's the preferred way to break lines (or format code in general) in Julia source in the Julia project, so I prefer to avoid it completely
  2. Adding a constant is basically free (as long as it's not huge, doesn't mess with precompilation somehow, etc.)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand, it also explains some of the style of code in other places!


# As an optimization, use an integer type narrower than `BigInt` when possible.
function to_float_promote_type(::Type{F}, ::Type{S}) where {F<:Base.IEEEFloat,S<:BitIntegerOrBool}
Max = if sizeof(F) ≤ sizeof(S)
S
else
(S <: Signed) ? Base.inttype(F) : Base.uinttype(F)
end
widen(Max)
end

function to_floating_point(::Type{F}, num::T, den::T, rm, prec) where {F,T}
S = to_float_promote_type(F, T)
to_floating_point_impl(F, S, num, den, rm, prec)
end

end
2 changes: 1 addition & 1 deletion test/choosetests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const TESTNAMES = [
"char", "strings", "triplequote", "unicode", "intrinsics",
"dict", "hashing", "iobuffer", "staged", "offsetarray",
"arrayops", "tuple", "reduce", "reducedim", "abstractarray",
"intfuncs", "simdloop", "vecelement", "rational",
"intfuncs", "simdloop", "vecelement", "rational", "rational_to_float",
"bitarray", "copy", "math", "fastmath", "functional", "iterators",
"operators", "ordering", "path", "ccall", "parse", "loading", "gmp",
"sorting", "spawn", "backtrace", "exceptions",
Expand Down
Loading