From de040a1a7d4c341bbc2c45f68f61129c8d9c99dd Mon Sep 17 00:00:00 2001 From: Fredrik Ekre Date: Sat, 3 Nov 2018 00:35:25 +0100 Subject: [PATCH 1/2] Make Regex behave like a scalar in broadcasting. --- NEWS.md | 1 + base/broadcast.jl | 1 + test/regex.jl | 3 +++ 3 files changed, 5 insertions(+) diff --git a/NEWS.md b/NEWS.md index 351276fbb0b1c..851575c3d0702 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,7 @@ Language changes returns a `BigFloat` with precision equal to `precision(BigFloat)` ([#29127]). * Parser inputs ending with a comma are now consistently treated as incomplete. Previously they were sometimes parsed as tuples, depending on whitespace ([#28506]). + * `Regex` now behave like a scalar when used in broadcasting ([#29913]). New library functions --------------------- diff --git a/base/broadcast.jl b/base/broadcast.jl index d3567818b10e4..b4f6b0caf6536 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -625,6 +625,7 @@ broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,R broadcastable(x::Ptr) = Ref(x) broadcastable(::Type{T}) where {T} = Ref{Type{T}}(T) broadcastable(x::Union{AbstractArray,Number,Ref,Tuple,Broadcasted}) = x +broadcastable(r::Regex) = Ref(r) # Default to collecting iterables — which will error for non-iterables broadcastable(x) = collect(x) broadcastable(::Union{AbstractDict, NamedTuple}) = throw(ArgumentError("broadcasting over dictionaries and `NamedTuple`s is reserved")) diff --git a/test/regex.jl b/test/regex.jl index bb665259f5b6c..34c7c0f8c86b3 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -61,3 +61,6 @@ end # 'a' flag to disable UCP @test match(r"\w+", "Düsseldorf").match == "Düsseldorf" @test match(r"\w+"a, "Düsseldorf").match == "D" + +# Regex behaves like a scalar in broadcasting +@test occursin.(r"Hello", ["Hello", "World"]) == [true, false] From b7d7bb95b4a57d265fca9e754d9797a894b654ea Mon Sep 17 00:00:00 2001 From: Fredrik Ekre Date: Sat, 3 Nov 2018 00:37:58 +0100 Subject: [PATCH 2/2] Make regex.jl tests a testset. --- test/regex.jl | 106 +++++++++++++++++++++++++------------------------- 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/test/regex.jl b/test/regex.jl index 34c7c0f8c86b3..223a0f15efb3c 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -1,66 +1,68 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -function collect_eachmatch(re, str; overlap=false) - [m.match for m in collect(eachmatch(re, str, overlap = overlap))] -end +@testset "regex" begin + function collect_eachmatch(re, str; overlap=false) + [m.match for m in collect(eachmatch(re, str, overlap = overlap))] + end -@test collect_eachmatch(r"a?b?", "asbd") == ["a","","b","",""] == - collect_eachmatch(r"""a?b?""", "asbd") -@test collect_eachmatch(r"a?b?", "asbd", overlap=true) == ["a","","b","",""] -@test collect_eachmatch(r"\w+", "hello", overlap=true) == ["hello","ello","llo","lo","o"] -@test collect_eachmatch(r".\s", "x \u2200 x \u2203 y") == ["x ", "∀ ", "x ", "∃ "] -@test collect_eachmatch(r"(\w+)(\s*)", "The dark side of the moon") == - ["The ", "dark ", "side ", "of ", "the ", "moon"] -@test collect_eachmatch(r"", "") == [""] -@test collect_eachmatch(r"", "", overlap=true) == [""] -@test collect_eachmatch(r"aa", "aaaa") == ["aa", "aa"] -@test collect_eachmatch(r"aa", "aaaa", overlap=true) == ["aa", "aa", "aa"] -@test collect_eachmatch(r"", "aaa") == ["", "", "", ""] -@test collect_eachmatch(r"", "aaa", overlap=true) == ["", "", "", ""] -@test collect_eachmatch(r"GCG","GCGCG") == ["GCG"] -@test collect_eachmatch(r"GCG","GCGCG",overlap=true) == ["GCG","GCG"] + @test collect_eachmatch(r"a?b?", "asbd") == ["a","","b","",""] == + collect_eachmatch(r"""a?b?""", "asbd") + @test collect_eachmatch(r"a?b?", "asbd", overlap=true) == ["a","","b","",""] + @test collect_eachmatch(r"\w+", "hello", overlap=true) == ["hello","ello","llo","lo","o"] + @test collect_eachmatch(r".\s", "x \u2200 x \u2203 y") == ["x ", "∀ ", "x ", "∃ "] + @test collect_eachmatch(r"(\w+)(\s*)", "The dark side of the moon") == + ["The ", "dark ", "side ", "of ", "the ", "moon"] + @test collect_eachmatch(r"", "") == [""] + @test collect_eachmatch(r"", "", overlap=true) == [""] + @test collect_eachmatch(r"aa", "aaaa") == ["aa", "aa"] + @test collect_eachmatch(r"aa", "aaaa", overlap=true) == ["aa", "aa", "aa"] + @test collect_eachmatch(r"", "aaa") == ["", "", "", ""] + @test collect_eachmatch(r"", "aaa", overlap=true) == ["", "", "", ""] + @test collect_eachmatch(r"GCG","GCGCG") == ["GCG"] + @test collect_eachmatch(r"GCG","GCGCG",overlap=true) == ["GCG","GCG"] -# Issue 8278 -target = """71.163.72.113 - - [30/Jul/2014:16:40:55 -0700] "GET emptymind.org/thevacantwall/wp-content/uploads/2013/02/DSC_006421.jpg HTTP/1.1" 200 492513 "http://images.search.yahoo.com/images/view;_ylt=AwrB8py9gdlTGEwADcSjzbkF;_ylu=X3oDMTI2cGZrZTA5BHNlYwNmcC1leHAEc2xrA2V4cARvaWQDNTA3NTRiMzYzY2E5OTEwNjBiMjc2YWJhMjkxMTEzY2MEZ3BvcwM0BGl0A2Jpbmc-?back=http%3A%2F%2Fus.yhs4.search.yahoo.com%2Fyhs%2Fsearch%3Fei%3DUTF-8%26p%3Dapartheid%2Bwall%2Bin%2Bpalestine%26type%3Dgrvydef%26param1%3D1%26param2%3Dsid%253Db01676f9c26355f014f8a9db87545d61%2526b%253DChrome%2526ip%253D71.163.72.113%2526p%253Dgroovorio%2526x%253DAC811262A746D3CD%2526dt%253DS940%2526f%253D7%2526a%253Dgrv_tuto1_14_30%26hsimp%3Dyhs-fullyhosted_003%26hspart%3Dironsource&w=588&h=387&imgurl=occupiedpalestine.files.wordpress.com%2F2012%2F08%2F5-peeking-through-the-wall.jpg%3Fw%3D588%26h%3D387&rurl=http%3A%2F%2Fwww.stopdebezetting.com%2Fwereldpers%2Fcompare-the-berlin-wall-vs-israel-s-apartheid-wall-in-palestine.html&size=49.0KB&name=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&p=apartheid+wall+in+palestine&oid=50754b363ca991060b276aba291113cc&fr2=&fr=&tt=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&b=0&ni=21&no=4&ts=&tab=organic&sigr=13evdtqdq&sigb=19k7nsjvb&sigi=12o2la1db&sigt=12lia2m0j&sign=12lia2m0j&.crumb=.yUtKgFI6DE&hsimp=yhs-fullyhosted_003&hspart=ironsource" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36""" -pat = r"""([\d\.]+) ([\w.-]+) ([\w.-]+) (\[.+\]) "([^"\r\n]*|[^"\r\n\[]*\[.+\][^"]+|[^"\r\n]+.[^"]+)" (\d{3}) (\d+|-) ("(?:[^"]|\")+)"? ("(?:[^"]|\")+)"?""" -match(pat, target) + # Issue 8278 + target = """71.163.72.113 - - [30/Jul/2014:16:40:55 -0700] "GET emptymind.org/thevacantwall/wp-content/uploads/2013/02/DSC_006421.jpg HTTP/1.1" 200 492513 "http://images.search.yahoo.com/images/view;_ylt=AwrB8py9gdlTGEwADcSjzbkF;_ylu=X3oDMTI2cGZrZTA5BHNlYwNmcC1leHAEc2xrA2V4cARvaWQDNTA3NTRiMzYzY2E5OTEwNjBiMjc2YWJhMjkxMTEzY2MEZ3BvcwM0BGl0A2Jpbmc-?back=http%3A%2F%2Fus.yhs4.search.yahoo.com%2Fyhs%2Fsearch%3Fei%3DUTF-8%26p%3Dapartheid%2Bwall%2Bin%2Bpalestine%26type%3Dgrvydef%26param1%3D1%26param2%3Dsid%253Db01676f9c26355f014f8a9db87545d61%2526b%253DChrome%2526ip%253D71.163.72.113%2526p%253Dgroovorio%2526x%253DAC811262A746D3CD%2526dt%253DS940%2526f%253D7%2526a%253Dgrv_tuto1_14_30%26hsimp%3Dyhs-fullyhosted_003%26hspart%3Dironsource&w=588&h=387&imgurl=occupiedpalestine.files.wordpress.com%2F2012%2F08%2F5-peeking-through-the-wall.jpg%3Fw%3D588%26h%3D387&rurl=http%3A%2F%2Fwww.stopdebezetting.com%2Fwereldpers%2Fcompare-the-berlin-wall-vs-israel-s-apartheid-wall-in-palestine.html&size=49.0KB&name=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&p=apartheid+wall+in+palestine&oid=50754b363ca991060b276aba291113cc&fr2=&fr=&tt=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&b=0&ni=21&no=4&ts=&tab=organic&sigr=13evdtqdq&sigb=19k7nsjvb&sigi=12o2la1db&sigt=12lia2m0j&sign=12lia2m0j&.crumb=.yUtKgFI6DE&hsimp=yhs-fullyhosted_003&hspart=ironsource" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36""" + pat = r"""([\d\.]+) ([\w.-]+) ([\w.-]+) (\[.+\]) "([^"\r\n]*|[^"\r\n\[]*\[.+\][^"]+|[^"\r\n]+.[^"]+)" (\d{3}) (\d+|-) ("(?:[^"]|\")+)"? ("(?:[^"]|\")+)"?""" + match(pat, target) -# issue #26829 -@test map(m -> m.match, eachmatch(r"^$|\S", "ö")) == ["ö"] + # issue #26829 + @test map(m -> m.match, eachmatch(r"^$|\S", "ö")) == ["ö"] -# issue #26199 -@test map(m -> m.match, eachmatch(r"(\p{L}+)", "Tú")) == ["Tú"] -@test map(m -> m.match, eachmatch(r"(\p{L}+)", "Tú lees.")) == ["Tú", "lees"] -@test map(m -> m.match, eachmatch(r"(\p{L}+)", "¿Cuál es tu pregunta?")) == ["Cuál", "es", "tu", "pregunta"] + # issue #26199 + @test map(m -> m.match, eachmatch(r"(\p{L}+)", "Tú")) == ["Tú"] + @test map(m -> m.match, eachmatch(r"(\p{L}+)", "Tú lees.")) == ["Tú", "lees"] + @test map(m -> m.match, eachmatch(r"(\p{L}+)", "¿Cuál es tu pregunta?")) == ["Cuál", "es", "tu", "pregunta"] -# Issue 9545 (32 bit) -buf = PipeBuffer() -show(buf, r"") -@test read(buf, String) == "r\"\"" + # Issue 9545 (32 bit) + buf = PipeBuffer() + show(buf, r"") + @test read(buf, String) == "r\"\"" -# see #10994, #11447: PCRE2 allows NUL chars in the pattern -@test occursin(Regex("^a\0b\$"), "a\0b") + # see #10994, #11447: PCRE2 allows NUL chars in the pattern + @test occursin(Regex("^a\0b\$"), "a\0b") -# regex match / search string must be a String -@test_throws ArgumentError match(r"test", GenericString("this is a test")) -@test_throws ArgumentError findfirst(r"test", GenericString("this is a test")) + # regex match / search string must be a String + @test_throws ArgumentError match(r"test", GenericString("this is a test")) + @test_throws ArgumentError findfirst(r"test", GenericString("this is a test")) -# Named subpatterns -let m = match(r"(?.)(.)(?.)", "xyz") - @test (m[:a], m[2], m["b"]) == ("x", "y", "z") - @test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")" -end + # Named subpatterns + let m = match(r"(?.)(.)(?.)", "xyz") + @test (m[:a], m[2], m["b"]) == ("x", "y", "z") + @test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")" + end -# Backcapture reference in substitution string -@test replace("abcde", r"(..)(?Pd)" => s"\gxy\\\1") == "adxy\\bce" -@test_throws ErrorException replace("a", r"(?P)" => s"\g") + # Backcapture reference in substitution string + @test replace("abcde", r"(..)(?Pd)" => s"\gxy\\\1") == "adxy\\bce" + @test_throws ErrorException replace("a", r"(?P)" => s"\g") -# Proper unicode handling -@test match(r"∀∀", "∀x∀∀∀").match == "∀∀" + # Proper unicode handling + @test match(r"∀∀", "∀x∀∀∀").match == "∀∀" -# 'a' flag to disable UCP -@test match(r"\w+", "Düsseldorf").match == "Düsseldorf" -@test match(r"\w+"a, "Düsseldorf").match == "D" + # 'a' flag to disable UCP + @test match(r"\w+", "Düsseldorf").match == "Düsseldorf" + @test match(r"\w+"a, "Düsseldorf").match == "D" -# Regex behaves like a scalar in broadcasting -@test occursin.(r"Hello", ["Hello", "World"]) == [true, false] + # Regex behaves like a scalar in broadcasting + @test occursin.(r"Hello", ["Hello", "World"]) == [true, false] +end