JuliaLang · StefanKarpinski · Mar 7, 2018 · Mar 1, 2018 · Mar 1, 2018 · Mar 2, 2018
diff --git a/NEWS.md b/NEWS.md
@@ -458,6 +458,9 @@ Library improvements
   * The function `thisind(s::AbstractString, i::Integer)` returns the largest valid index
     less or equal than `i` in the string `s` or `0` if no such index exists ([#24414]).
 
+  * `Char` is now a subtype of `AbstractChar`, and most of the functions that
+    take character arguments now accept any `AbstractChar` ([#26286]).
+
   * `Irrational` is now a subtype of `AbstractIrrational` ([#24245]).
 
   * Introduced the `empty` function, the functional pair to `empty!` which returns a new,

diff --git a/base/arrayshow.jl b/base/arrayshow.jl
@@ -39,7 +39,7 @@ methods. By default returns a string of the same width as original with a
 centered cdot, used in printing of structural zeros of structured matrices.
 Accept keyword args `c` for alternate single character marker.
 """
-function replace_with_centered_mark(s::AbstractString;c::Char = '⋅')
+function replace_with_centered_mark(s::AbstractString;c::AbstractChar = '⋅')
     N = length(s)
     return join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
 end

diff --git a/base/boot.jl b/base/boot.jl
@@ -143,7 +143,7 @@ export
     Signed, Int, Int8, Int16, Int32, Int64, Int128,
     Unsigned, UInt, UInt8, UInt16, UInt32, UInt64, UInt128,
     # string types
-    Char, AbstractString, String, IO,
+    AbstractChar, Char, AbstractString, String, IO,
     # errors
     ErrorException, BoundsError, DivideError, DomainError, Exception,
     InterruptException, InexactError, OutOfMemoryError, ReadOnlyMemoryError,
@@ -177,7 +177,8 @@ primitive type Float32 <: AbstractFloat 32 end
 primitive type Float64 <: AbstractFloat 64 end
 
 #primitive type Bool <: Integer 8 end
-primitive type Char 32 end
+abstract type AbstractChar end
+primitive type Char <: AbstractChar 32 end
 
 primitive type Int8    <: Signed   8 end
 #primitive type UInt8   <: Unsigned 8 end
@@ -460,7 +461,7 @@ function write(io::IO, x::String)
 end
 
 show(io::IO, @nospecialize x) = ccall(:jl_static_show, Cvoid, (Ptr{Cvoid}, Any), io_pointer(io), x)
-print(io::IO, x::Char) = ccall(:jl_uv_putc, Cvoid, (Ptr{Cvoid}, Char), io_pointer(io), x)
+print(io::IO, x::AbstractChar) = ccall(:jl_uv_putc, Cvoid, (Ptr{Cvoid}, Char), io_pointer(io), x)
 print(io::IO, x::String) = (write(io, x); nothing)
 print(io::IO, @nospecialize x) = show(io, x)
 print(io::IO, @nospecialize(x), @nospecialize a...) = (print(io, x); print(io, a...))
@@ -701,9 +702,10 @@ UInt32(x::BuiltinInts)  = toUInt32(x)::UInt32
 UInt64(x::BuiltinInts)  = toUInt64(x)::UInt64
 UInt128(x::BuiltinInts) = toUInt128(x)::UInt128
 
-Char(x::Number) = Char(UInt32(x))
-Char(x::Char) = x
-(::Type{T})(x::Char) where {T<:Number} = T(UInt32(x))
+(::Type{T})(x::Number) where {T<:AbstractChar} = T(UInt32(x))
+(::Type{AbstractChar})(x::Number) = Char(x)
+(::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(UInt32(x))
+(::Type{T})(x::T) where {T<:AbstractChar} = x
 
 (::Type{T})(x::T) where {T<:Number} = x
 

diff --git a/base/char.jl b/base/char.jl
@@ -1,12 +1,50 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-struct InvalidCharError <: Exception
-    char::Char
+"""
+The `AbstractChar` type is the supertype of all character implementations
+in Julia.   A character represents a Unicode code point, and can be converted
+to/from `UInt32` in order to obtain the numerical value of the code point.
+These numerical values determine how characters are compared with `<` and `==`,
+for example.
+
+A given `AbstractChar` subtype may be capable of representing only a subset
+of Unicode, in which case conversion from an unsupported `UInt32` value
+may throw an error.  Conversely, the built-in [`Char`](@ref) type represents
+a *superset* of Unicode (in order to losslessly encode invalid byte streams),
+in which case conversion of a non-Unicode value *to* `UInt32` throws an error.
+The [`isvalid`](@ref) function can be used to check which codepoints are
+representable in a given `AbstractChar` type.
+
+Internally, an `AbstractChar` type may use a variety of encodings.  Conversion
+to `UInt32` will not reveal this encoding because it always returns the
+Unicode value of the character.  (Typically, the raw encoding can be obtained
+via [`reinterpret`](@ref).)   Character I/O uses UTF-8 by default for all
+character types, regardless of their internal encoding.
+"""
+AbstractChar
+
+"""
+    Char(c::Union{Number,AbstractChar})
+
+`Char` is a 32-bit [`AbstractChar`](@ref) type that is the default representation
+of characters in Julia.  `Char` is the type used for character literals like `'x'`
+and it is also the element type of [`String`](@ref).
+
+In order to losslessly represent arbitrary byte streams stored in a `String`,
+a `Char` value may store information that cannot be converted to a Unicode
+codepoint — converting such a `Char` to `UInt32` will throw an error.
+The [`isvalid(c::Char)`](@ref) function can be used to query whether `c`
+represents a valid Unicode character.
+"""
+Char
+
+struct InvalidCharError{T<:AbstractChar} <: Exception
+    char::T
 end
 struct CodePointError <: Exception
     code::Integer
 end
-@noinline invalid_char(c::Char) = throw(InvalidCharError(c))
+@noinline invalid_char(c::AbstractChar) = throw(InvalidCharError(c))
 @noinline code_point_err(u::UInt32) = throw(CodePointError(u))
 
 function ismalformed(c::Char)
@@ -24,6 +62,11 @@ function isoverlong(c::Char)
     is_overlong_enc(u)
 end
 
+# fallback: other AbstractChar types, by default, are assumed
+#           not to support malformed or overlong encodings.
+ismalformed(c::AbstractChar) = false
+isoverlong(c::AbstractChar) = false
+
 function UInt32(c::Char)
     # TODO: use optimized inline LLVM
     u = reinterpret(UInt32, c)
@@ -69,50 +112,56 @@ function Char(b::Union{Int8,UInt8})
     0 ≤ b ≤ 0x7f ? reinterpret(Char, (b % UInt32) << 24) : Char(UInt32(b))
 end
 
-convert(::Type{Char}, x::Number) = Char(x)
-convert(::Type{T}, x::Char) where {T<:Number} = T(x)
+convert(::Type{AbstractChar}, x::Number) = Char(x) # default to Char
+convert(::Type{T}, x::Number) where {T<:AbstractChar} = T(x)
+convert(::Type{T}, x::AbstractChar) where {T<:Number} = T(x)
 
-rem(x::Char, ::Type{T}) where {T<:Number} = rem(UInt32(x), T)
+rem(x::AbstractChar, ::Type{T}) where {T<:Number} = rem(UInt32(x), T)
 
 typemax(::Type{Char}) = reinterpret(Char, typemax(UInt32))
 typemin(::Type{Char}) = reinterpret(Char, typemin(UInt32))
 
-size(c::Char) = ()
-size(c::Char,d) = convert(Int, d) < 1 ? throw(BoundsError()) : 1
-ndims(c::Char) = 0
-ndims(::Type{Char}) = 0
-length(c::Char) = 1
-firstindex(c::Char) = 1
-lastindex(c::Char) = 1
-getindex(c::Char) = c
-getindex(c::Char, i::Integer) = i == 1 ? c : throw(BoundsError())
-getindex(c::Char, I::Integer...) = all(x -> x == 1, I) ? c : throw(BoundsError())
-first(c::Char) = c
-last(c::Char) = c
-eltype(::Type{Char}) = Char
-
-start(c::Char) = false
-next(c::Char, state) = (c, true)
-done(c::Char, state) = state
-isempty(c::Char) = false
-in(x::Char, y::Char) = x == y
+size(c::AbstractChar) = ()
+size(c::AbstractChar,d) = convert(Int, d) < 1 ? throw(BoundsError()) : 1
+ndims(c::AbstractChar) = 0
+ndims(::Type{<:AbstractChar}) = 0
+length(c::AbstractChar) = 1
+firstindex(c::AbstractChar) = 1
+lastindex(c::AbstractChar) = 1
+getindex(c::AbstractChar) = c
+getindex(c::AbstractChar, i::Integer) = i == 1 ? c : throw(BoundsError())
+getindex(c::AbstractChar, I::Integer...) = all(x -> x == 1, I) ? c : throw(BoundsError())
+first(c::AbstractChar) = c
+last(c::AbstractChar) = c
+eltype(::Type{T}) where {T<:AbstractChar} = T
+
+start(c::AbstractChar) = false
+next(c::AbstractChar, state) = (c, true)
+done(c::AbstractChar, state) = state
+isempty(c::AbstractChar) = false
+in(x::AbstractChar, y::AbstractChar) = x == y
 
 ==(x::Char, y::Char) = reinterpret(UInt32, x) == reinterpret(UInt32, y)
 isless(x::Char, y::Char) = reinterpret(UInt32, x) < reinterpret(UInt32, y)
 hash(x::Char, h::UInt) =
     hash_uint64(((reinterpret(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h))
-widen(::Type{Char}) = Char
 
--(x::Char, y::Char) = Int(x) - Int(y)
--(x::Char, y::Integer) = Char(Int32(x) - Int32(y))
-+(x::Char, y::Integer) = Char(Int32(x) + Int32(y))
-+(x::Integer, y::Char) = y + x
+# fallbacks:
+isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y))
+==(x::AbstractChar, y::AbstractChar) = Char(x) == Char(y)
+hash(x::AbstractChar, h::UInt) = hash(Char(x), h)
+widen(::Type{T}) where {T<:AbstractChar} = T
+
+-(x::AbstractChar, y::AbstractChar) = Int(x) - Int(y)
+-(x::T, y::Integer) where {T<:AbstractChar} = T(Int32(x) - Int32(y))
++(x::T, y::Integer) where {T<:AbstractChar} = T(Int32(x) + Int32(y))
++(x::Integer, y::AbstractChar) = y + x
 
-print(io::IO, c::Char) = (write(io, c); nothing)
+print(io::IO, c::AbstractChar) = (write(io, c); nothing)
 
 const hex_chars = UInt8['0':'9';'a':'z']
 
-function show(io::IO, c::Char)
+function show(io::IO, c::AbstractChar)
     if c <= '\\'
         b = c == '\0' ? 0x30 :
             c == '\a' ? 0x61 :
@@ -154,14 +203,14 @@ function show(io::IO, c::Char)
     return
 end
 
-function show(io::IO, ::MIME"text/plain", c::Char)
+function show(io::IO, ::MIME"text/plain", c::T) where {T<:AbstractChar}
     show(io, c)
     if !ismalformed(c)
         print(io, ": ")
         if isoverlong(c)
             print(io, "[overlong] ")
             u = decode_overlong(c)
-            c = Char(u)
+            c = T(u)
         else
             u = UInt32(c)
         end

diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
@@ -209,7 +209,7 @@ is_valid_lvalue(x) = isa(x, Slot) || isa(x, SSAValue) || isa(x, GlobalRef)
 function is_valid_argument(x)
     if isa(x, Slot) || isa(x, SSAValue) || isa(x, GlobalRef) || isa(x, QuoteNode) ||
         (isa(x,Expr) && (x.head in (:static_parameter, :boundscheck, :copyast))) ||
-        isa(x, Number) || isa(x, AbstractString) || isa(x, Char) || isa(x, Tuple) ||
+        isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) ||
         isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing
         return true
     end

diff --git a/base/filesystem.jl b/base/filesystem.jl
@@ -170,6 +170,7 @@ function read(f::File, ::Type{Char})
     end
     return reinterpret(Char, c)
 end
+read(f::File, ::Type{T}) where {T<:AbstractChar} = T(read(f, Char)) # fallback
 
 function unsafe_read(f::File, p::Ptr{UInt8}, nel::UInt)
     check_open(f)

diff --git a/base/io.jl b/base/io.jl
@@ -229,7 +229,7 @@ read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io), byte)
 unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io), p, nb)
 read(io::AbstractPipe) = read(pipe_reader(io))
 readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io), arg; kw...)
-readuntil(io::AbstractPipe, arg::Char; kw...) = readuntil(pipe_reader(io), arg; kw...)
+readuntil(io::AbstractPipe, arg::AbstractChar; kw...) = readuntil(pipe_reader(io), arg; kw...)
 readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io), arg; kw...)
 readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io), arg; kw...)
 readuntil_vector!(io::AbstractPipe, target::AbstractVector, keep::Bool, out) = readuntil_vector!(pipe_reader(io), target, keep, out)
@@ -303,7 +303,7 @@ read!(filename::AbstractString, a) = open(io->read!(io, a), filename)
     readuntil(filename::AbstractString, delim; keep::Bool = false)
 
 Read a string from an I/O stream or a file, up to the given delimiter.
-The delimiter can be a `UInt8`, `Char`, string, or vector.
+The delimiter can be a `UInt8`, `AbstractChar`, string, or vector.
 Keyword argument `keep` controls whether the delimiter is included in the result.
 The text is assumed to be encoded in UTF-8.
 
@@ -570,6 +570,7 @@ function write(io::IO, c::Char)
         n += 1
     end
 end
+write(io::IO, c::AbstractChar) = write(io, Char(c)) # fallback
 
 function write(io::IO, s::Symbol)
     pname = unsafe_convert(Ptr{UInt8}, s)
@@ -627,12 +628,13 @@ function read(io::IO, ::Type{Char})
     end
     return reinterpret(Char, c)
 end
+read(io::IO, ::Type{T}) where {T<:AbstractChar} = T(read(io, Char)) # fallback
 
 # readuntil_string is useful below since it has
 # an optimized method for s::IOStream
 readuntil_string(s::IO, delim::UInt8, keep::Bool) = String(readuntil(s, delim, keep=keep))
 
-function readuntil(s::IO, delim::Char; keep::Bool=false)
+function readuntil(s::IO, delim::AbstractChar; keep::Bool=false)
     if delim ≤ '\x7f'
         return readuntil_string(s, delim % UInt8, keep)
     end
@@ -994,7 +996,7 @@ function skipchars(predicate, io::IO; linecomment=nothing)
 end
 
 """
-    countlines(io::IO; eol::Char = '\\n')
+    countlines(io::IO; eol::AbstractChar = '\\n')
 
 Read `io` until the end of the stream/file and count the number of lines. To specify a file
 pass the filename as the first argument. EOL markers other than `'\\n'` are supported by
@@ -1017,7 +1019,7 @@ julia> countlines(io, eol = '.')
 1
 ```
 """
-function countlines(io::IO; eol::Char='\n')
+function countlines(io::IO; eol::AbstractChar='\n')
     isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported"))
     aeol = UInt8(eol)
     a = Vector{UInt8}(uninitialized, 8192)
@@ -1034,4 +1036,4 @@ function countlines(io::IO; eol::Char='\n')
     nl
 end
 
-countlines(f::AbstractString; eol::Char = '\n') = open(io->countlines(io, eol = eol), f)::Int
+countlines(f::AbstractString; eol::AbstractChar = '\n') = open(io->countlines(io, eol = eol), f)::Int
diff --git a/base/iterators.jl b/base/iterators.jl
@@ -90,7 +90,7 @@ first(r::Reverse) = last(r.itr) # and the last shall be first
 reverse(R::AbstractRange) = Base.reverse(R) # copying ranges is cheap
 reverse(G::Generator) = Generator(G.f, reverse(G.iter))
 reverse(r::Reverse) = r.itr
-reverse(x::Union{Number,Char}) = x
+reverse(x::Union{Number,AbstractChar}) = x
 reverse(p::Pair) = Base.reverse(p) # copying pairs is cheap
 
 start(r::Reverse{<:Tuple}) = length(r.itr)

diff --git a/base/parse.jl b/base/parse.jl
@@ -33,7 +33,7 @@ julia> parse(Complex{Float64}, "3.2e-1 + 4.5im")
 """
 parse(T::Type, str; base = Int)
 
-function parse(::Type{T}, c::Char; base::Integer = 36) where T<:Integer
+function parse(::Type{T}, c::AbstractChar; base::Integer = 36) where T<:Integer
     a::Int = (base <= 36 ? 10 : 36)
     2 <= base <= 62 || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
     d = '0' <= c <= '9' ? c-'0'    :

diff --git a/base/range.jl b/base/range.jl
@@ -917,7 +917,7 @@ in(x::Integer, r::AbstractUnitRange{<:Integer}) = (first(r) <= x) & (x <= last(r
 in(x::Real, r::AbstractRange{T}) where {T<:Integer} =
     isinteger(x) && !isempty(r) && x >= minimum(r) && x <= maximum(r) &&
         (mod(convert(T,x),step(r))-mod(first(r),step(r)) == 0)
-in(x::Char, r::AbstractRange{Char}) =
+in(x::AbstractChar, r::AbstractRange{<:AbstractChar}) =
     !isempty(r) && x >= minimum(r) && x <= maximum(r) &&
         (mod(Int(x) - Int(first(r)), step(r)) == 0)
 

diff --git a/base/reduce.jl b/base/reduce.jl
@@ -259,7 +259,7 @@ reduce_empty(op, T) = _empty_reduce_error()
 reduce_empty(::typeof(+), T) = zero(T)
 reduce_empty(::typeof(+), ::Type{Bool}) = zero(Int)
 reduce_empty(::typeof(*), T) = one(T)
-reduce_empty(::typeof(*), ::Type{Char}) = ""
+reduce_empty(::typeof(*), ::Type{<:AbstractChar}) = ""
 reduce_empty(::typeof(&), ::Type{Bool}) = true
 reduce_empty(::typeof(|), ::Type{Bool}) = false
 
@@ -307,7 +307,7 @@ different types than its inputs.
 """
 reduce_first(op, x) = x
 reduce_first(::typeof(+), x::Bool) = Int(x)
-reduce_first(::typeof(*), x::Char) = string(x)
+reduce_first(::typeof(*), x::AbstractChar) = string(x)
 
 reduce_first(::typeof(add_sum), x) = reduce_first(+, x)
 reduce_first(::typeof(add_sum), x::SmallSigned)   = Int(x)

diff --git a/base/shell.jl b/base/shell.jl
@@ -197,7 +197,7 @@ function print_shell_escaped_posixly(io::IO, args::AbstractString...)
         # that any (reasonable) shell will definitely never consider them to be special
         have_single = false
         have_double = false
-        function isword(c::Char)
+        function isword(c::AbstractChar)
             if '0' <= c <= '9' || 'a' <= c <= 'z' || 'A' <= c <= 'Z'
                 # word characters
             elseif c == '_' || c == '/' || c == '+' || c == '-'

diff --git a/base/show.jl b/base/show.jl
@@ -763,8 +763,8 @@ const expr_parens = Dict(:tuple=>('(',')'), :vcat=>('[',']'),
 
 ## AST decoding helpers ##
 
-is_id_start_char(c::Char) = ccall(:jl_id_start_char, Cint, (UInt32,), c) != 0
-is_id_char(c::Char) = ccall(:jl_id_char, Cint, (UInt32,), c) != 0
+is_id_start_char(c::AbstractChar) = ccall(:jl_id_start_char, Cint, (UInt32,), c) != 0
+is_id_char(c::AbstractChar) = ccall(:jl_id_char, Cint, (UInt32,), c) != 0
 function isidentifier(s::AbstractString)
     isempty(s) && return false
     c, rest = Iterators.peel(s)