Skip to content

Commit

Permalink
Base.transcode to replace utf8to16 and utf16to8 (#16974)
Browse files Browse the repository at this point in the history
  • Loading branch information
StefanKarpinski authored Jun 16, 2016
1 parent d634100 commit 00ba80d
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 29 deletions.
10 changes: 6 additions & 4 deletions base/c.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,15 @@ if is_windows()
function cwstring(s::AbstractString)
bytes = String(s).data
0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
return push!(utf8to16(bytes), 0)
return push!(transcode(UInt16, bytes), 0)
end
end

# conversions between UTF-8 and UTF-16 for Windows APIs
# transcoding between data in UTF-8 and UTF-16 for Windows APIs

function utf8to16(src::Vector{UInt8})
transcode{T<:Union{UInt8,UInt16}}(::Type{T}, src::Vector{T}) = src

function transcode(::Type{UInt16}, src::Vector{UInt8})
dst = UInt16[]
i, n = 1, length(src)
n > 0 || return dst
Expand Down Expand Up @@ -162,7 +164,7 @@ function utf8to16(src::Vector{UInt8})
return dst
end

function utf16to8(src::Vector{UInt16})
function transcode(::Type{UInt8}, src::Vector{UInt16})
dst = UInt8[]
i, n = 1, length(src)
n > 0 || return dst
Expand Down
4 changes: 2 additions & 2 deletions base/env.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function access_env(onError::Function, str::AbstractString)
error(string("getenv: ", str, ' ', len, "-1 != ", ret, ": ", Libc.FormatMessage()))
end
pop!(val) # NUL
return String(utf16to8(val))
return String(transcode(UInt8, val))
end

function _setenv(svar::AbstractString, sval::AbstractString, overwrite::Bool=true)
Expand Down Expand Up @@ -97,7 +97,7 @@ function next(hash::EnvHash, block::Tuple{Ptr{UInt16},Ptr{UInt16}})
len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos)
buf = Array{UInt16}(len)
unsafe_copy!(pointer(buf), pos, len)
env = String(utf16to8(buf))
env = String(transcode(UInt8, buf))
m = match(r"^(=?[^=]+)=(.*)$"s, env)
if m === nothing
error("malformed environment entry: $env")
Expand Down
4 changes: 2 additions & 2 deletions base/file.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ function tempdir()
error("GetTempPath failed: $(Libc.FormatMessage())")
end
resize!(temppath,lentemppath)
return String(utf16to8(temppath))
return String(transcode(UInt8, temppath))
end
tempname(uunique::UInt32=UInt32(0)) = tempname(tempdir(), uunique)
const temp_prefix = cwstring("jl_")
Expand All @@ -216,7 +216,7 @@ function tempname(temppath::AbstractString,uunique::UInt32)
error("GetTempFileName failed: $(Libc.FormatMessage())")
end
resize!(tname,lentname)
return String(utf16to8(tname))
return String(transcode(UInt8, tname))
end
function mktemp(parent=tempdir())
filename = tempname(parent, UInt32(0))
Expand Down
4 changes: 2 additions & 2 deletions base/filesystem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ export File,
import Base:
UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
nb_available, position, read, read!, readavailable, seek, seekend, show,
skip, stat, unsafe_read, unsafe_write, utf16to8, utf8to16, uv_error,
uvhandle, uvtype, write
skip, stat, unsafe_read, unsafe_write, transcode, uv_error, uvhandle,
uvtype, write

if is_windows()
import Base: cwstring
Expand Down
2 changes: 1 addition & 1 deletion base/interactiveutil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ elseif is_windows()
len = 0
while unsafe_load(plock, len+1) != 0; len += 1; end
# get Vector{UInt16}, transcode data to UTF-8, make a String of it
s = String(utf16to8(unsafe_wrap(Array, plock, len)))
s = String(transcode(UInt8, unsafe_wrap(Array, plock, len)))
systemerror(:GlobalUnlock, 0==ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), plock))
return s
end
Expand Down
8 changes: 4 additions & 4 deletions base/libc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

module Libc

import Base: transcode

export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, calloc, realloc,
errno, strerror, flush_cstdio, systemsleep, time
errno, strerror, flush_cstdio, systemsleep, time, transcode
if is_windows()
export GetLastError, FormatMessage
end

import Base: utf16to8

include(string(length(Core.ARGS)>=2?Core.ARGS[2]:"","errno_h.jl")) # include($BUILDROOT/base/errno_h.jl)

## RawFD ##
Expand Down Expand Up @@ -277,7 +277,7 @@ if is_windows()
buf = Array{UInt16}(len)
unsafe_copy!(pointer(buf), p, len)
ccall(:LocalFree,stdcall,Ptr{Void},(Ptr{Void},),p)
return String(utf16to8(buf))
return String(transcode(UInt8, buf))
end
end

Expand Down
4 changes: 2 additions & 2 deletions base/path.jl
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ function realpath(path::AbstractString)
systemerror(:realpath, n == 0)
x = n < length(buf) # is the buffer big enough?
resize!(buf, n) # shrink if x, grow if !x
x && return String(utf16to8(buf))
x && return String(transcode(UInt8, buf))
end
end

Expand All @@ -150,7 +150,7 @@ function longpath(path::AbstractString)
systemerror(:longpath, n == 0)
x = n < length(buf) # is the buffer big enough?
resize!(buf, n) # shrink if x, grow if !x
x && return String(utf16to8(buf))
x && return String(transcode(UInt8, buf))
end
end

Expand Down
24 changes: 12 additions & 12 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,11 @@ whos(IOBuffer(), Tmp14173) # warm up
@test @allocated(whos(IOBuffer(), Tmp14173)) < 10000

## test conversion from UTF-8 to UTF-16 (for Windows APIs)
import Base: utf8to16, utf16to8
import Base.Libc: transcode

# empty arrays
@test utf8to16(UInt8[]) == UInt16[]
@test utf16to8(UInt16[]) == UInt8[]
@test transcode(UInt16, UInt8[]) == UInt16[]
@test transcode(UInt8, UInt16[]) == UInt8[]

# UTF-8-like sequences
V8 = [
Expand Down Expand Up @@ -304,15 +304,15 @@ I8 = [(s,map(UInt16,s)) for s in X8]

for (X,Y,Z) in ((V8,V8,V8), (I8,V8,I8), (V8,I8,V8), (V8,V8,I8), (I8,V8,V8))
for (a8, a16) in X
@test utf8to16(a8) == a16
@test transcode(UInt16, a8) == a16
for (b8, b16) in Y
ab8 = [a8; b8]
ab16 = [a16; b16]
@test utf8to16(ab8) == ab16
@test transcode(UInt16, ab8) == ab16
for (c8, c16) in Z
abc8 = [ab8; c8]
abc16 = [ab16; c16]
@test utf8to16(abc8) == abc16
@test transcode(UInt16, abc8) == abc16
end
end
end
Expand Down Expand Up @@ -359,18 +359,18 @@ I16 = [

for (X,Y,Z) in ((V16,V16,V16), (I16,V16,I16), (V16,I16,V16), (V16,V16,I16), (I16,V16,V16))
for (a16, a8) in X
@test utf16to8(a16) == a8
@test utf8to16(a8) == a16
@test transcode(UInt8, a16) == a8
@test transcode(UInt16, a8) == a16
for (b16, b8) in Y
ab16 = [a16; b16]
ab8 = [a8; b8]
@test utf16to8(ab16) == ab8
@test utf8to16(ab8) == ab16
@test transcode(UInt8, ab16) == ab8
@test transcode(UInt16, ab8) == ab16
for (c16, c8) in Z
abc16 = [ab16; c16]
abc8 = [ab8; c8]
@test utf16to8(abc16) == abc8
@test utf8to16(abc8) == abc16
@test transcode(UInt8, abc16) == abc8
@test transcode(UInt16, abc8) == abc16
end
end
end
Expand Down

0 comments on commit 00ba80d

Please sign in to comment.