From 63830a6f2050e61b7b2aca78e2462487fd3f59d0 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Tue, 6 Dec 2022 12:15:39 -0700 Subject: [PATCH] Generalize Bool parse method to AbstractString (#47782) * Generalize Bool parse method to AbstractString Fixes https://github.com/JuliaStrings/InlineStrings.jl/issues/57. We currently have a specialization for `parse(Bool, ::Union{String, SubString{String})` where `true` and `false` are parsed appropriately. The restriction to `Union{String, SubString{String}}`, however, means we don't get this behavior for other `AbstractString`s. In the linked issue above, for InlineStrings, we end up going through the generic integer parsing codepath which results in an `InexactError` when we try to do `Bool(10)`. The proposal in this PR takes advantage of the fact that there is only the 2 comparisons where we do `_memcmp` that require the input string to be "dense" (in memory), and otherwise, we just do a comparison against a `SubString` of the input string. Relatedly, I've wanted to introduce the concept of an abstrac type like: ```julia abstract type MemoryAddressableString <: AbstractString ``` where the additional required interface would be being able to call `pointer(::MemoryAddressableString)`, since a lot of our string algorithms depend on doing these kind of pointer operations and hence makes it quite a pain to implement your own custom string type. * Apply suggestions from code review Co-authored-by: Stefan Karpinski Co-authored-by: Nick Robinson Co-authored-by: Stefan Karpinski Co-authored-by: Nick Robinson --- base/parse.jl | 15 ++++++++++----- test/parse.jl | 10 ++++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/base/parse.jl b/base/parse.jl index 1c911c96e1479..e5b3d2ae3bc90 100644 --- a/base/parse.jl +++ b/base/parse.jl @@ -176,7 +176,7 @@ function tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos:: return n end -function tryparse_internal(::Type{Bool}, sbuff::Union{String,SubString{String}}, +function tryparse_internal(::Type{Bool}, sbuff::AbstractString, startpos::Int, endpos::Int, base::Integer, raise::Bool) if isempty(sbuff) raise && throw(ArgumentError("input string is empty")) @@ -202,10 +202,15 @@ function tryparse_internal(::Type{Bool}, sbuff::Union{String,SubString{String}}, end len = endpos - startpos + 1 - p = pointer(sbuff) + startpos - 1 - GC.@preserve sbuff begin - (len == 4) && (0 == _memcmp(p, "true", 4)) && (return true) - (len == 5) && (0 == _memcmp(p, "false", 5)) && (return false) + if sbuff isa Union{String, SubString{String}} + p = pointer(sbuff) + startpos - 1 + GC.@preserve sbuff begin + (len == 4) && (0 == _memcmp(p, "true", 4)) && (return true) + (len == 5) && (0 == _memcmp(p, "false", 5)) && (return false) + end + else + (len == 4) && (SubString(sbuff, startpos:startpos+3) == "true") && (return true) + (len == 5) && (SubString(sbuff, startpos:startpos+4) == "false") && (return false) end if raise diff --git a/test/parse.jl b/test/parse.jl index 7cf6d059f0a71..69092b2c4188d 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -41,6 +41,16 @@ Base.iterate(::Issue29451String, i::Integer=1) = i == 1 ? ('0', 2) : nothing @test Issue29451String() == "0" @test parse(Int, Issue29451String()) == 0 +# https://github.com/JuliaStrings/InlineStrings.jl/issues/57 +struct InlineStringIssue57 <: AbstractString end +Base.ncodeunits(::InlineStringIssue57) = 4 +Base.lastindex(::InlineStringIssue57) = 4 +Base.isvalid(::InlineStringIssue57, i::Integer) = 0 < i < 5 +Base.iterate(::InlineStringIssue57, i::Integer=1) = i == 1 ? ('t', 2) : i == 2 ? ('r', 3) : i == 3 ? ('u', 4) : i == 4 ? ('e', 5) : nothing +Base.:(==)(::SubString{InlineStringIssue57}, x::String) = x == "true" + +@test parse(Bool, InlineStringIssue57()) + @testset "Issue 20587, T=$T" for T in Any[BigInt, Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8] T === BigInt && continue # TODO: make BigInt pass this test for s in ["", " ", " "]