From 81e245c850b18d871dbbfc6eebf21d129030ae53 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Thu, 16 Nov 2017 17:43:17 -0500 Subject: [PATCH] fix #22926, raw string escaping rules that allow writing all strings (#24621) --- NEWS.md | 6 ++++++ base/strings/io.jl | 20 +++++++++++++++----- doc/src/manual/strings.md | 18 ++++++++++++++++-- src/julia-parser.scm | 26 ++++++++++++++++++++------ test/strings/io.jl | 16 ---------------- test/syntax.jl | 31 +++++++++++++++++++++++++++++-- 6 files changed, 86 insertions(+), 31 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1ac663ea1be88..ca0345653b530 100644 --- a/NEWS.md +++ b/NEWS.md @@ -130,6 +130,12 @@ Language changes * Like `_`, variable names consisting only of underscores can be assigned, but accessing their values is deprecated ([#24221]). + * Raw string literal escaping rules have been changed to make it possible to write all strings. + The rule is that backslashes escape both quotes and other backslashes, but only when a sequence + of backslashes precedes a quote character. Thus, 2n backslashes followed by a quote encodes n + backslashes and the end of the literal while 2n+1 backslashes followed by a quote encodes n + backslashes followed by a quote character ([#22926]). + Breaking changes ---------------- diff --git a/base/strings/io.jl b/base/strings/io.jl index 8db8699b58390..c43f191585be9 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -371,15 +371,25 @@ macro b_str(s); :(Vector{UInt8}($(unescape_string(s)))); end @raw_str -> String Create a raw string without interpolation and unescaping. -The exception is that quotation marks still must be escaped. +The exception is that quotation marks still must be escaped. Backslashes +escape both quotation marks and other backslashes, but only when a sequence +of backslashes precedes a quote character. Thus, 2n backslashes followed by +a quote encodes n backslashes and the end of the literal while 2n+1 backslashes +followed by a quote encodes n backslashes followed by a quote character. # Examples ```jldoctest -julia> raw"\\"" -"\\"" +julia> println(raw"\\ \$x") +\\ \$x -julia> raw""\" " ""\" -" \\" " +julia> println(raw"\\"") +" + +julia> println(raw"\\\\\\"") +\\" + +julia> println(raw"\\\\x \\\\\\"") +\\\\x \\" ``` """ macro raw_str(s); s; end diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md index 77f1e48c5f689..1bca4a4c7336e 100644 --- a/doc/src/manual/strings.md +++ b/doc/src/manual/strings.md @@ -920,5 +920,19 @@ non-standard string literals of the form `raw"..."`. Raw string literals create ordinary `String` objects which contain the enclosed contents exactly as entered with no interpolation or unescaping. This is useful for strings which contain code or markup in other languages which use `$` or `\` as special -characters. The exception is quotation marks that still must be -escaped, e.g. `raw"\""` is equivalent to `"\""`. +characters. + +The exception is that quotation marks still must be escaped, e.g. `raw"\""` is equivalent +to `"\""`. +To make it possible to express all strings, backslashes then also must be escaped, but +only when appearing right before a quote character: + +```jldoctest +julia> println(raw"\\ \\\"") +\\ \" +``` + +Notice that the first two backslashes appear verbatim in the output, since they do not +precede a quote character. +However, the next backslash character escapes the backslash that follows it, and the +last backslash escapes a quote, since these backslashes appear before a quote. diff --git a/src/julia-parser.scm b/src/julia-parser.scm index 05868ce38f80a..d4bb95495dd26 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -2107,12 +2107,26 @@ (loop c b e 0)) ((eqv? c #\\) - (let ((nxch (not-eof-for delim (read-char p)))) - (if (or (not raw) - (not (or (eqv? nxch delim) #;(eqv? nxch #\\)))) - (write-char #\\ b)) - (write-char nxch b) - (loop (read-char p) b e 0))) + (if raw + (let backslashes ((count 1) + (nxch (not-eof-for delim (read-char p)))) + (cond ((eqv? nxch #\\) + (backslashes (+ 1 count) + (not-eof-for delim (read-char p)))) + ((eqv? nxch delim) + (io.write b (string.rep "\\" (div count 2))) + (if (odd? count) + (begin (write-char delim b) + (loop (read-char p) b e 0)) + (loop nxch b e 0))) + (else + (io.write b (string.rep "\\" count)) + (write-char nxch b) + (loop (read-char p) b e 0)))) + (let ((nxch (not-eof-for delim (read-char p)))) + (write-char #\\ b) + (write-char nxch b) + (loop (read-char p) b e 0)))) ((and (eqv? c #\$) (not raw)) (let ((ex (parse-interpolate s))) diff --git a/test/strings/io.jl b/test/strings/io.jl index 9c8a52307664f..a631a0ad6d9d1 100644 --- a/test/strings/io.jl +++ b/test/strings/io.jl @@ -183,23 +183,7 @@ end @test Base.unindent("\n\t\n \tfoo",4) == "\n \n foo" @test Base.unindent("\n\tfoo\tbar",4) == "\n foo bar" end -@testset "raw_str macro" begin - @test raw"$" == "\$" - @test raw"\n" == "\\n" - @test raw"\t" == "\\t" - s1 = raw""" - lorem ipsum\n - $x = 1$ - """ - - s2 = """ - lorem ipsum\\n - \$x = 1\$ - """ - - @test s1 == s2 -end # issue #22021, string realloc bug with join s22021 = String["\"\"\" non_max_suppression(boxes, scores, max_output_size; iou_threshold=nothing) diff --git a/test/syntax.jl b/test/syntax.jl index 89474605affb8..73425c298c6bc 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -739,10 +739,10 @@ end # Check that string and command literals are parsed to the appropriate macros @test :(x"s") == :(@x_str "s") @test :(x"s"flag) == :(@x_str "s" "flag") -@test :(x"s\"`\x\$\\") == :(@x_str "s\"`\\x\\\$\\\\") +@test :(x"s\"`\x\$\\") == :(@x_str "s\"`\\x\\\$\\") @test :(x`s`) == :(@x_cmd "s") @test :(x`s`flag) == :(@x_cmd "s" "flag") -@test :(x`s\`"\x\$\\`) == :(@x_cmd "s`\"\\x\\\$\\\\") +@test :(x`s\`"\x\$\\`) == :(@x_cmd "s`\"\\x\\\$\\") # Check multiline command literals @test :(@cmd "multiline\ncommand\n") == :``` @@ -1171,3 +1171,30 @@ end # recursively calling expand-forms. @test [(0,0)... 1] == [0 0 1] @test Float32[(0,0)... 1] == Float32[0 0 1] + +@testset "raw_str macro" begin + @test raw"$" == "\$" + @test raw"\n" == "\\n" + @test raw"\t" == "\\t" + + s1 = raw""" + lorem ipsum\n + $x = 1$ + """ + + s2 = """ + lorem ipsum\\n + \$x = 1\$ + """ + + @test s1 == s2 + + # issue #22926 + @test raw"\\" == "\\" + @test raw"\\\\" == "\\\\" + @test raw"\"" == "\"" + @test raw"\\\"" == "\\\"" + @test raw"\\x\\" == "\\\\x\\" + @test raw"x \\\" y" == "x \\\" y" + @test raw"x \\\ y" == "x \\\\\\ y" +end