Skip to content

Commit

Permalink
fix #22926, raw string escaping rules that allow writing all strings (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
JeffBezanson authored Nov 16, 2017
1 parent 0115962 commit 81e245c
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 31 deletions.
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ Language changes
* Like `_`, variable names consisting only of underscores can be assigned,
but accessing their values is deprecated ([#24221]).

* Raw string literal escaping rules have been changed to make it possible to write all strings.
The rule is that backslashes escape both quotes and other backslashes, but only when a sequence
of backslashes precedes a quote character. Thus, 2n backslashes followed by a quote encodes n
backslashes and the end of the literal while 2n+1 backslashes followed by a quote encodes n
backslashes followed by a quote character ([#22926]).

Breaking changes
----------------

Expand Down
20 changes: 15 additions & 5 deletions base/strings/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -371,15 +371,25 @@ macro b_str(s); :(Vector{UInt8}($(unescape_string(s)))); end
@raw_str -> String
Create a raw string without interpolation and unescaping.
The exception is that quotation marks still must be escaped.
The exception is that quotation marks still must be escaped. Backslashes
escape both quotation marks and other backslashes, but only when a sequence
of backslashes precedes a quote character. Thus, 2n backslashes followed by
a quote encodes n backslashes and the end of the literal while 2n+1 backslashes
followed by a quote encodes n backslashes followed by a quote character.
# Examples
```jldoctest
julia> raw"\\""
"\\""
julia> println(raw"\\ \$x")
\\ \$x
julia> raw""\" " ""\"
" \\" "
julia> println(raw"\\"")
"
julia> println(raw"\\\\\\"")
\\"
julia> println(raw"\\\\x \\\\\\"")
\\\\x \\"
```
"""
macro raw_str(s); s; end
Expand Down
18 changes: 16 additions & 2 deletions doc/src/manual/strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -920,5 +920,19 @@ non-standard string literals of the form `raw"..."`. Raw string literals create
ordinary `String` objects which contain the enclosed contents exactly as
entered with no interpolation or unescaping. This is useful for strings which
contain code or markup in other languages which use `$` or `\` as special
characters. The exception is quotation marks that still must be
escaped, e.g. `raw"\""` is equivalent to `"\""`.
characters.

The exception is that quotation marks still must be escaped, e.g. `raw"\""` is equivalent
to `"\""`.
To make it possible to express all strings, backslashes then also must be escaped, but
only when appearing right before a quote character:

```jldoctest
julia> println(raw"\\ \\\"")
\\ \"
```

Notice that the first two backslashes appear verbatim in the output, since they do not
precede a quote character.
However, the next backslash character escapes the backslash that follows it, and the
last backslash escapes a quote, since these backslashes appear before a quote.
26 changes: 20 additions & 6 deletions src/julia-parser.scm
Original file line number Diff line number Diff line change
Expand Up @@ -2107,12 +2107,26 @@
(loop c b e 0))

((eqv? c #\\)
(let ((nxch (not-eof-for delim (read-char p))))
(if (or (not raw)
(not (or (eqv? nxch delim) #;(eqv? nxch #\\))))
(write-char #\\ b))
(write-char nxch b)
(loop (read-char p) b e 0)))
(if raw
(let backslashes ((count 1)
(nxch (not-eof-for delim (read-char p))))
(cond ((eqv? nxch #\\)
(backslashes (+ 1 count)
(not-eof-for delim (read-char p))))
((eqv? nxch delim)
(io.write b (string.rep "\\" (div count 2)))
(if (odd? count)
(begin (write-char delim b)
(loop (read-char p) b e 0))
(loop nxch b e 0)))
(else
(io.write b (string.rep "\\" count))
(write-char nxch b)
(loop (read-char p) b e 0))))
(let ((nxch (not-eof-for delim (read-char p))))
(write-char #\\ b)
(write-char nxch b)
(loop (read-char p) b e 0))))

((and (eqv? c #\$) (not raw))
(let ((ex (parse-interpolate s)))
Expand Down
16 changes: 0 additions & 16 deletions test/strings/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -183,23 +183,7 @@ end
@test Base.unindent("\n\t\n \tfoo",4) == "\n \n foo"
@test Base.unindent("\n\tfoo\tbar",4) == "\n foo bar"
end
@testset "raw_str macro" begin
@test raw"$" == "\$"
@test raw"\n" == "\\n"
@test raw"\t" == "\\t"

s1 = raw"""
lorem ipsum\n
$x = 1$
"""

s2 = """
lorem ipsum\\n
\$x = 1\$
"""

@test s1 == s2
end
# issue #22021, string realloc bug with join
s22021 = String["\"\"\"
non_max_suppression(boxes, scores, max_output_size; iou_threshold=nothing)
Expand Down
31 changes: 29 additions & 2 deletions test/syntax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -739,10 +739,10 @@ end
# Check that string and command literals are parsed to the appropriate macros
@test :(x"s") == :(@x_str "s")
@test :(x"s"flag) == :(@x_str "s" "flag")
@test :(x"s\"`\x\$\\") == :(@x_str "s\"`\\x\\\$\\\\")
@test :(x"s\"`\x\$\\") == :(@x_str "s\"`\\x\\\$\\")
@test :(x`s`) == :(@x_cmd "s")
@test :(x`s`flag) == :(@x_cmd "s" "flag")
@test :(x`s\`"\x\$\\`) == :(@x_cmd "s`\"\\x\\\$\\\\")
@test :(x`s\`"\x\$\\`) == :(@x_cmd "s`\"\\x\\\$\\")

# Check multiline command literals
@test :(@cmd "multiline\ncommand\n") == :```
Expand Down Expand Up @@ -1171,3 +1171,30 @@ end
# recursively calling expand-forms.
@test [(0,0)... 1] == [0 0 1]
@test Float32[(0,0)... 1] == Float32[0 0 1]

@testset "raw_str macro" begin
@test raw"$" == "\$"
@test raw"\n" == "\\n"
@test raw"\t" == "\\t"

s1 = raw"""
lorem ipsum\n
$x = 1$
"""

s2 = """
lorem ipsum\\n
\$x = 1\$
"""

@test s1 == s2

# issue #22926
@test raw"\\" == "\\"
@test raw"\\\\" == "\\\\"
@test raw"\"" == "\""
@test raw"\\\"" == "\\\""
@test raw"\\x\\" == "\\\\x\\"
@test raw"x \\\" y" == "x \\\" y"
@test raw"x \\\ y" == "x \\\\\\ y"
end

0 comments on commit 81e245c

Please sign in to comment.