diff --git a/NEWS.md b/NEWS.md index d5fb7f049c57e..905a77861d547 100644 --- a/NEWS.md +++ b/NEWS.md @@ -205,6 +205,11 @@ Library improvements * Mutating versions of `randperm` and `randcycle` have been added: `randperm!` and `randcycle!` ([#22723]). + * `islower`, `isupper` for strings have beed un-deprecated in favor of a refined behavior: + e.g. `islower(s)` returns `true` if all letters in `s` are lowercase, and if there + is at least on letter in `s` (the old behavior was `all(islower, s)`); also, a new + function `istitle` is provided. + Compiler/Runtime improvements ----------------------------- diff --git a/base/deprecated.jl b/base/deprecated.jl index fa1bf8a1c14e9..e2ab56407a135 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -1273,7 +1273,7 @@ function (::Type{Matrix})() end for name in ("alnum", "alpha", "cntrl", "digit", "number", "graph", - "lower", "print", "punct", "space", "upper", "xdigit") + "print", "punct", "space", "xdigit") f = Symbol("is",name) @eval @deprecate ($f)(s::AbstractString) all($f, s) end diff --git a/base/exports.jl b/base/exports.jl index 96d860e7d6bb7..42410d13decc8 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -772,6 +772,7 @@ export ispunct, isspace, isupper, + istitle, isvalid, isxdigit, join, diff --git a/base/strings/utf8proc.jl b/base/strings/utf8proc.jl index 2cf9bfdb51d24..52c97b8f45213 100644 --- a/base/strings/utf8proc.jl +++ b/base/strings/utf8proc.jl @@ -9,7 +9,7 @@ export isgraphemebreak, category_code, category_abbrev, category_string # also exported by Base: export normalize_string, graphemes, is_assigned_char, charwidth, isvalid, - islower, isupper, isalpha, isdigit, isnumber, isalnum, + islower, isupper, istitle, isalpha, isdigit, isnumber, isalnum, iscntrl, ispunct, isspace, isprint, isgraph # whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff @@ -265,7 +265,31 @@ julia> islower('❤') false ``` """ -islower(c::Char) = (category_code(c) == UTF8PROC_CATEGORY_LL) +islower(c::Char) = islower(category_code(c)) +islower(ccode::Int32) = ccode == UTF8PROC_CATEGORY_LL + +""" + islower(s::AbstractString) -> Bool + +Tests whether all letters in `s` are lowercase, and that +`s` contains at least one letter. + +# Examples +```jldoctest +julia> islower("let's see...") +true + +julia> islower("Julia") +false + +julia> islower("12 ÷ 3 == 4") +false + +julia> islower(lowercase(randstring())) # very unlikely to be false +true +``` +""" +islower(s::AbstractString) = iscasegood(s, islower, isupper) # true for Unicode upper and mixed case @@ -288,9 +312,81 @@ julia> isupper('❤') false ``` """ -function isupper(c::Char) - ccode = category_code(c) - return ccode == UTF8PROC_CATEGORY_LU || ccode == UTF8PROC_CATEGORY_LT +isupper(c::Char) = isupper(category_code(c)) +isupper(ccode::Int32) = ccode == UTF8PROC_CATEGORY_LU || ccode == UTF8PROC_CATEGORY_LT + +""" + isupper(s::AbstractString) -> Bool + +Tests whether all letters in `s` are uppercase, and that +`s` contains at least one letter. + +# Examples +```jldoctest +julia> isupper("JULIACON 2017") +true + +julia> isupper("Julia") +false + +julia> isupper("12 ÷ 3 == 4") +false + +julia> isupper(uppercase(randstring())) # very unlikely to be false +true +``` +""" +isupper(s::AbstractString) = iscasegood(s, isupper, islower) + +function iscasegood(s::AbstractString, good, bad) + onecased = false + for c in s + ccode = category_code(c) + bad(ccode) && return false + onecased || good(ccode) && (onecased = true) + end + onecased +end + +""" + istitle(s::AbstractString, strict=true) -> Bool + +Tests whether all words in `s` have their first letter in uppercase; +if `strict` is true, all remaining letters must be in lowercase. + +# Examples +```jldoctest +julia> istitle("The Julia Programming Language") +true + +julia> istitle("ISS - International Space Station") +false + +julia> istitle("ISS - International Space Station", false) +true + +julia> istitle(titlecase(randstring())) # very unlikely to be false +true +``` +""" +function istitle(s::AbstractString) + onecased = false + startword = true + for c in s + ccode = category_code(c) + if islower(ccode) + startword && return false + onecased = true + startword = false + elseif isupper(ccode) + startword || return false + onecased = true + startword = false + else + startword = false + end + end + onecased end """ diff --git a/test/unicode/utf8proc.jl b/test/unicode/utf8proc.jl index d06106a9abefc..f3eee8a31f09a 100644 --- a/test/unicode/utf8proc.jl +++ b/test/unicode/utf8proc.jl @@ -320,3 +320,17 @@ end @test eltype(g) == SubString{String} @test collect(g) == ["1","2","3","α","5"] end + +@testset "islower, isupper, istitle for strings" begin + for f in islower, isupper, istitle + @test !f("") + @test !f("; == :") + end + @test islower(" aa; b => ab") + @test isupper(" AA; B => AB") + @test istitle(" Aa; B => Ab") + str = randstring(['a':'z'; 'A':'Z'; '0':'1'; collect("; .-"^10);], 100) + @test str |> lowecase |> islower + @test str |> uppercase |> isupper + @test str |> titlecase |> istitle +end