From 219729c0419420e4981c5c095159eee98e0f34eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 31 Oct 2017 18:53:20 +0100 Subject: [PATCH] make next behave like nextind for String --- base/strings/string.jl | 20 +++++++++++++++----- test/strings/basic.jl | 14 ++++++++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/base/strings/string.jl b/base/strings/string.jl index 45d1a05b352ca..5fdf82c8764e6 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -231,12 +231,22 @@ end end trailing = utf8_trailing[b + 1] if l < i + trailing - return '\ufffd', i+1 - end - c::UInt32 = 0 - @inbounds for j = 1:(trailing + 1) + for j in (i+1):l + @inbounds if !is_valid_continuation(codeunit(s, j)) + return '\ufffd' j + end + end + return '\ufffd', l+1 + end + @inbounds c::UInt32 = codeunit(s, i) + i += 1 + @inbounds for j = 1:trailing + b = codeunit(s, i) + if !is_valid_continuation(b) + return '\ufffd', i + end c <<= 6 - c += codeunit(s, i) + c += b i += 1 end c -= utf8_offset[trailing + 1] diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 0c7cdb54824ae..8fe6b07e1cc7e 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -198,10 +198,24 @@ end @test lcfirst("")=="" @test lcfirst("*")=="*" end + +# test next for invalid UTF-8 AbstractStrings +@testset "next for invalid UTF-8" begin + srand(1) + let s = "1"*String(rand(0x00:0xff, 2^16)), i = 1, j = 1 + while !done(s, i) + c, i = next(s, i) + j = nextind(s, j) + @test i == j + end + end +end + # test AbstractString functions at beginning of string.jl struct tstStringType <: AbstractString data::Array{UInt8,1} end + @testset "AbstractString functions" begin tstr = tstStringType(Vector{UInt8}("12")) @test_throws ErrorException endof(tstr)