diff --git a/base/c.jl b/base/c.jl index 97a1815b7d23c..ab643f211f646 100644 --- a/base/c.jl +++ b/base/c.jl @@ -130,18 +130,20 @@ end # transcoding between data in UTF-8 and UTF-16 for Windows APIs """ - Base.transcode(T,src::Vector{U}) + transcode(T, src) -Transcodes unicode data `src` to a different encoding, where `U` and `T` are the integers -denoting the input and output code units. Currently supported are UTF-8 and UTF-16, which -are denoted by integers `UInt8` and `UInt16`, respectively. - -NULs are handled like any other character (i.e. the output will be NUL-terminated if and -only if the `src` is). +Convert string data between Unicode encodings. `src` is either a +`String` or an `Vector{UIntXX}` of UTF-XX code units, where +`XX` is 8 or 16. `T` indicates the encoding of the return value: +`String` to return a (UTF-8 encoded) `String` or `UIntXX` +to return a `Vector{UIntXX}` of the UTF-`XX` data. """ function transcode end + transcode{T<:Union{UInt8,UInt16}}(::Type{T}, src::Vector{T}) = src transcode(::Type{Int32}, src::Vector{UInt32}) = reinterpret(Int32, src) +transcode(T, src::String) = transcode(T, src.data) +transcode(::Type{String}, src) = String(transcode(UInt8, src)) function transcode(::Type{UInt16}, src::Vector{UInt8}) dst = UInt16[] diff --git a/base/env.jl b/base/env.jl index cb21ecbe0dcf4..0f41a5bbf170e 100644 --- a/base/env.jl +++ b/base/env.jl @@ -19,7 +19,7 @@ function access_env(onError::Function, str::AbstractString) error(string("getenv: ", str, ' ', len, "-1 != ", ret, ": ", Libc.FormatMessage())) end pop!(val) # NUL - return String(transcode(UInt8, val)) + return transcode(String, val) end function _setenv(svar::AbstractString, sval::AbstractString, overwrite::Bool=true) @@ -97,7 +97,7 @@ function next(hash::EnvHash, block::Tuple{Ptr{UInt16},Ptr{UInt16}}) len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos) buf = Array{UInt16}(len) unsafe_copy!(pointer(buf), pos, len) - env = String(transcode(UInt8, buf)) + env = transcode(String, buf) m = match(r"^(=?[^=]+)=(.*)$"s, env) if m === nothing error("malformed environment entry: $env") diff --git a/base/exports.jl b/base/exports.jl index 8641e82cd8fba..d3046082fdb84 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -874,6 +874,7 @@ export strip, strwidth, summary, + transcode, ucfirst, unescape_string, uppercase, diff --git a/base/file.jl b/base/file.jl index 97aa73a31aee3..396ba739f2cd4 100644 --- a/base/file.jl +++ b/base/file.jl @@ -203,7 +203,7 @@ function tempdir() error("GetTempPath failed: $(Libc.FormatMessage())") end resize!(temppath,lentemppath) - return String(transcode(UInt8, temppath)) + return transcode(String, temppath) end tempname(uunique::UInt32=UInt32(0)) = tempname(tempdir(), uunique) const temp_prefix = cwstring("jl_") @@ -216,7 +216,7 @@ function tempname(temppath::AbstractString,uunique::UInt32) error("GetTempFileName failed: $(Libc.FormatMessage())") end resize!(tname,lentname) - return String(transcode(UInt8, tname)) + return transcode(String, tname) end function mktemp(parent=tempdir()) filename = tempname(parent, UInt32(0)) diff --git a/base/interactiveutil.jl b/base/interactiveutil.jl index ce4274d2551a1..6ac54783f30e4 100644 --- a/base/interactiveutil.jl +++ b/base/interactiveutil.jl @@ -150,7 +150,7 @@ elseif is_windows() len = 0 while unsafe_load(plock, len+1) != 0; len += 1; end # get Vector{UInt16}, transcode data to UTF-8, make a String of it - s = String(transcode(UInt8, unsafe_wrap(Array, plock, len))) + s = transcode(String, unsafe_wrap(Array, plock, len)) systemerror(:GlobalUnlock, 0==ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), plock)) return s end diff --git a/base/libc.jl b/base/libc.jl index 6943d457ebde7..8020147d42ce1 100644 --- a/base/libc.jl +++ b/base/libc.jl @@ -277,7 +277,7 @@ if is_windows() buf = Array{UInt16}(len) unsafe_copy!(pointer(buf), p, len) ccall(:LocalFree,stdcall,Ptr{Void},(Ptr{Void},),p) - return String(transcode(UInt8, buf)) + return transcode(String, buf) end end diff --git a/base/path.jl b/base/path.jl index 494250dc29683..d19779734913a 100644 --- a/base/path.jl +++ b/base/path.jl @@ -136,7 +136,7 @@ function realpath(path::AbstractString) systemerror(:realpath, n == 0) x = n < length(buf) # is the buffer big enough? resize!(buf, n) # shrink if x, grow if !x - x && return String(transcode(UInt8, buf)) + x && return transcode(String, buf) end end @@ -150,7 +150,7 @@ function longpath(path::AbstractString) systemerror(:longpath, n == 0) x = n < length(buf) # is the buffer big enough? resize!(buf, n) # shrink if x, grow if !x - x && return String(transcode(UInt8, buf)) + x && return transcode(String, buf) end end diff --git a/test/misc.jl b/test/misc.jl index e3ae6b72e8ce6..dea52228a7105 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -209,7 +209,6 @@ whos(IOBuffer(), Tmp14173) # warm up @test @allocated(whos(IOBuffer(), Tmp14173)) < 10000 ## test conversion from UTF-8 to UTF-16 (for Windows APIs) -import Base.Libc: transcode # empty arrays @test transcode(UInt16, UInt8[]) == UInt16[] @@ -376,6 +375,13 @@ for (X,Y,Z) in ((V16,V16,V16), (I16,V16,I16), (V16,I16,V16), (V16,V16,I16), (I16 end end +let s = "abcα🐨\0x\0" + for T in (UInt8, UInt16) + @test transcode(T, s) == transcode(T, s.data) + @test transcode(String, transcode(T, s)) == s + end +end + # clipboard functionality if is_windows() for str in ("Hello, world.", "∀ x ∃ y", "")