Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deprecate string methods of character predicates, e.g. isnumber("") #20342

Merged
merged 1 commit into from
Feb 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1849,4 +1849,10 @@ end)

@deprecate FloatRange{T}(start::T, step, len, den) Base.floatrange(T, start, step, len, den)

for name in ("alnum", "alpha", "cntrl", "digit", "number", "graph",
"lower", "print", "punct", "space", "upper", "xdigit")
f = Symbol("is",name)
@eval @deprecate ($f)(s::AbstractString) all($f, s)
end

# End deprecations scheduled for 0.6
2 changes: 1 addition & 1 deletion base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
end
# TODO: avoid this allocation
groupname = SubString(repl, groupstart, prevind(repl, i))
if isnumber(groupname)
if all(isnumber,groupname)
_write_capture(io, re, parse(Int, groupname))
else
group = PCRE.substring_number_from_name(re.regex, groupname)
Expand Down
10 changes: 5 additions & 5 deletions base/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -280,20 +280,20 @@ isascii(s::AbstractString) = all(isascii, s)
promote_rule{S<:AbstractString,T<:AbstractString}(::Type{S}, ::Type{T}) = String

"""
isxdigit(c::Union{Char,AbstractString}) -> Bool
isxdigit(c::Char) -> Bool

Tests whether a character is a valid hexadecimal digit, or whether this is true for all elements of a string.
Tests whether a character is a valid hexadecimal digit. Note that this does not
include `x` (as in the standard `0x` prefix).

```jldoctest
julia> isxdigit("abc")
julia> isxdigit('a')
true

julia> isxdigit("0x9")
julia> isxdigit('x')
false
```
"""
isxdigit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
isxdigit(s::AbstractString) = all(isxdigit, s)

## uppercase, lowercase, and titlecase transformations ##

Expand Down
74 changes: 28 additions & 46 deletions base/strings/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -219,21 +219,21 @@ is_assigned_char(c) = category_code(c) != UTF8PROC_CATEGORY_CN
## libc character class predicates ##

"""
islower(c::Union{Char,AbstractString}) -> Bool
islower(c::Char) -> Bool

Tests whether a character is a lowercase letter, or whether this is true for all elements of
a string. A character is classified as lowercase if it belongs to Unicode category Ll,
Tests whether a character is a lowercase letter.
A character is classified as lowercase if it belongs to Unicode category Ll,
Letter: Lowercase.
"""
islower(c::Char) = (category_code(c) == UTF8PROC_CATEGORY_LL)

# true for Unicode upper and mixed case

"""
isupper(c::Union{Char,AbstractString}) -> Bool
isupper(c::Char) -> Bool

Tests whether a character is an uppercase letter, or whether this is true for all elements
of a string. A character is classified as uppercase if it belongs to Unicode category Lu,
Tests whether a character is an uppercase letter.
A character is classified as uppercase if it belongs to Unicode category Lu,
Letter: Uppercase, or Lt, Letter: Titlecase.
"""
function isupper(c::Char)
Expand All @@ -242,36 +242,35 @@ function isupper(c::Char)
end

"""
isdigit(c::Union{Char,AbstractString}) -> Bool
isdigit(c::Char) -> Bool

Tests whether a character is a numeric digit (0-9), or whether this is true for all elements
of a string.
Tests whether a character is a numeric digit (0-9).
"""
isdigit(c::Char) = ('0' <= c <= '9')

"""
isalpha(c::Union{Char,AbstractString}) -> Bool
isalpha(c::Char) -> Bool

Tests whether a character is alphabetic, or whether this is true for all elements of a
string. A character is classified as alphabetic if it belongs to the Unicode general
Tests whether a character is alphabetic.
A character is classified as alphabetic if it belongs to the Unicode general
category Letter, i.e. a character whose category code begins with 'L'.
"""
isalpha(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_LO)

"""
isnumber(c::Union{Char,AbstractString}) -> Bool
isnumber(c::Char) -> Bool

Tests whether a character is numeric, or whether this is true for all elements of a string.
Tests whether a character is numeric.
A character is classified as numeric if it belongs to the Unicode general category Number,
i.e. a character whose category code begins with 'N'.
"""
isnumber(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO)

"""
isalnum(c::Union{Char,AbstractString}) -> Bool
isalnum(c::Char) -> Bool

Tests whether a character is alphanumeric, or whether this is true for all elements of a
string. A character is classified as alphabetic if it belongs to the Unicode general
Tests whether a character is alphanumeric.
A character is classified as alphabetic if it belongs to the Unicode general
category Letter or Number, i.e. a character whose category code begins with 'L' or 'N'.
"""
function isalnum(c::Char)
Expand All @@ -283,67 +282,50 @@ end
# following C++ only control characters from the Latin-1 subset return true

"""
iscntrl(c::Union{Char,AbstractString}) -> Bool
iscntrl(c::Char) -> Bool

Tests whether a character is a control character, or whether this is true for all elements
of a string. Control characters are the non-printing characters of the Latin-1 subset of Unicode.
Tests whether a character is a control character.
Control characters are the non-printing characters of the Latin-1 subset of Unicode.
"""
iscntrl(c::Char) = (c <= Char(0x1f) || Char(0x7f) <= c <= Char(0x9f))

"""
ispunct(c::Union{Char,AbstractString}) -> Bool
ispunct(c::Char) -> Bool

Tests whether a character belongs to the Unicode general category Punctuation, i.e. a
character whose category code begins with 'P'. For strings, tests whether this is true for
all elements of the string.
character whose category code begins with 'P'.
"""
ispunct(c::Char) = (UTF8PROC_CATEGORY_PC <= category_code(c) <= UTF8PROC_CATEGORY_PO)

# \u85 is the Unicode Next Line (NEL) character

"""
isspace(c::Union{Char,AbstractString}) -> Bool
isspace(c::Char) -> Bool

Tests whether a character is any whitespace character. Includes ASCII characters '\\t',
'\\n', '\\v', '\\f', '\\r', and ' ', Latin-1 character U+0085, and characters in Unicode
category Zs. For strings, tests whether this is true for all elements of the string.
category Zs.
"""
@inline isspace(c::Char) = c == ' ' || '\t' <= c <='\r' || c == '\u85' || '\ua0' <= c && category_code(c) == UTF8PROC_CATEGORY_ZS

"""
isprint(c::Union{Char,AbstractString}) -> Bool
isprint(c::Char) -> Bool

Tests whether a character is printable, including spaces, but not a control character. For
strings, tests whether this is true for all elements of the string.
Tests whether a character is printable, including spaces, but not a control character.
"""
isprint(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_ZS)

# true in principal if a printer would use ink

"""
isgraph(c::Union{Char,AbstractString}) -> Bool
isgraph(c::Char) -> Bool

Tests whether a character is printable, and not a space, or whether this is true for all
elements of a string. Any character that would cause a printer to use ink should be
Tests whether a character is printable, and not a space.
Any character that would cause a printer to use ink should be
classified with `isgraph(c)==true`.
"""
isgraph(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_SO)

for name = ("alnum", "alpha", "cntrl", "digit", "number", "graph",
"lower", "print", "punct", "space", "upper")
f = Symbol("is",name)
@eval begin
function $f(s::AbstractString)
for c in s
if !$f(c)
return false
end
end
return true
end
end
end

############################################################################
# iterators for grapheme segmentation

Expand Down
56 changes: 28 additions & 28 deletions test/unicode/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -187,34 +187,34 @@ let

end

@test isspace(" \t \n \r ")==true
@test isgraph(" \t \n \r ")==false
@test isprint(" \t \n \r ")==false
@test isalpha(" \t \n \r ")==false
@test isnumber(" \t \n \r ")==false
@test ispunct(" \t \n \r ")==false

@test isspace("ΣβΣβ")==false
@test isalpha("ΣβΣβ")==true
@test isgraph("ΣβΣβ")==true
@test isprint("ΣβΣβ")==true
@test isupper("ΣβΣβ")==false
@test islower("ΣβΣβ")==false
@test isnumber("ΣβΣβ")==false
@test iscntrl("ΣβΣβ")==false
@test ispunct("ΣβΣβ")==false

@test isnumber("23435")==true
@test isdigit("23435")==true
@test isalnum("23435")==true
@test isalpha("23435")==false
@test iscntrl( string(Char(0x0080))) == true
@test ispunct( "‡؟჻") ==true

@test isxdigit('0') == true
@test isxdigit("0") == true
@test isxdigit("a") == true
@test isxdigit("g") == false
@test all(isspace," \t \n \r ")
@test !all(isgraph," \t \n \r ")
@test !all(isprint," \t \n \r ")
@test !all(isalpha," \t \n \r ")
@test !all(isnumber," \t \n \r ")
@test !all(ispunct," \t \n \r ")

@test !all(isspace,"ΣβΣβ")
@test all(isalpha,"ΣβΣβ")
@test all(isgraph,"ΣβΣβ")
@test all(isprint,"ΣβΣβ")
@test !all(isupper,"ΣβΣβ")
@test !all(islower,"ΣβΣβ")
@test !all(isnumber,"ΣβΣβ")
@test !all(iscntrl,"ΣβΣβ")
@test !all(ispunct,"ΣβΣβ")

@test all(isnumber,"23435")
@test all(isdigit,"23435")
@test all(isalnum,"23435")
@test !all(isalpha,"23435")
@test all(iscntrl,string(Char(0x0080)))
@test all(ispunct, "‡؟჻")

@test isxdigit('0')
@test isxdigit('a')
@test !isxdigit('x')
@test !isxdigit('g')

# check utf8proc handling of CN category constants
let c_ll = 'β', c_cn = '\u038B'
Expand Down