diff --git a/base/Base.jl b/base/Base.jl index 0595cd4674da6..63921e0926636 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -173,9 +173,9 @@ include("iobuffer.jl") # strings & printing include("intfuncs.jl") include("strings/strings.jl") +include("regex.jl") include("parse.jl") include("shell.jl") -include("regex.jl") include("show.jl") include("arrayshow.jl") include("methodshow.jl") diff --git a/base/cmd.jl b/base/cmd.jl index 4890af1c4c7cd..4496834d8fbc6 100644 --- a/base/cmd.jl +++ b/base/cmd.jl @@ -102,8 +102,10 @@ shell_escape(cmd::Cmd; special::AbstractString="") = shell_escape(cmd.exec..., special=special) shell_escape_posixly(cmd::Cmd) = shell_escape_posixly(cmd.exec...) -shell_escape_winsomely(cmd::Cmd) = - shell_escape_winsomely(cmd.exec...) +escape_microsoft_c_args(cmd::Cmd) = + escape_microsoft_c_args(cmd.exec...) +escape_microsoft_c_args(io::IO, cmd::Cmd) = + escape_microsoft_c_args(io::IO, cmd.exec...) function show(io::IO, cmd::Cmd) print_env = cmd.env !== nothing diff --git a/base/shell.jl b/base/shell.jl index 85243a3e40542..e4291253ec601 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -255,60 +255,101 @@ shell_escape_posixly(args::AbstractString...) = sprint(print_shell_escaped_posixly, args...) -function print_shell_escaped_winsomely(io::IO, args::AbstractString...) - first = true - for arg in args - first || write(io, ' ') - first = false - # Quote any arg that contains a whitespace (' ' or '\t') or a double quote mark '"'. - # It's also valid to quote an arg with just a whitespace, - # but the following may be 'safer', and both implementations are valid anyways. - quotes = any(c -> c in (' ', '\t', '"'), arg) || isempty(arg) - quotes && write(io, '"') - backslashes = 0 - for c in arg - if c == '\\' - backslashes += 1 +""" + shell_escape_wincmd(s::AbstractString) + shell_escape_wincmd(io::IO, s::AbstractString) + +The unexported `shell_escape_wincmd` function escapes Windows +`cmd.exe` shell meta characters. It escapes `()!^<>&|` by placing a +`^` in front. An `@` is only escaped at the start of the string. Pairs +of `"` characters and the strings they enclose are passed through +unescaped. Any remaining `"` is escaped with `^` to ensure that the +number of unescaped `"` characters in the result remains even. + +Since `cmd.exe` substitutes variable references (like `%USER%`) +_before_ processing the escape characters `^` and `"`, this function +makes no attempt to escape the percent sign (`%`). + +Input strings with ASCII control characters that cannot be escaped +(NUL, CR, LF) will cause an `ArgumentError` exception. + +With an I/O stream parameter `io`, the result will be written there, +rather than returned as a string. + +See also: [`escape_microsoft_c_args`](@ref), [`shell_escape_posixly`](@ref) + +# Example +```jldoctest +julia> Base.shell_escape_wincmd("a^\\"^o\\"^u\\"") +"a^^\\"^o\\"^^u^\\"" +``` +""" +function shell_escape_wincmd(io::IO, s::AbstractString) + # https://stackoverflow.com/a/4095133/1990689 + occursin(r"[\r\n\0]", s) && + throw(ArgumentError("control character unsupported by CMD.EXE")) + i = 1 + len = ncodeunits(s) + if len > 0 && s[1] == '@' + write(io, '^') + end + while i <= len + c = s[i] + if c == '"' && (j = findnext('"', s, nextind(s,i))) !== nothing + write(io, SubString(s,i,j)) + i = j + else + if c in ('"', '(', ')', '!', '^', '<', '>', '&', '|') + write(io, '^', c) else - # escape all backslashes and the following double quote - c == '"' && (backslashes = backslashes * 2 + 1) - for j = 1:backslashes - # backslashes aren't special here - write(io, '\\') - end - backslashes = 0 write(io, c) end end - # escape all backslashes, letting the terminating double quote we add below to then be interpreted as a special char - quotes && (backslashes *= 2) - for j = 1:backslashes - write(io, '\\') - end - quotes && write(io, '"') + i = nextind(s,i) end - return nothing end - +shell_escape_wincmd(s::AbstractString) = sprint(shell_escape_wincmd, s; + sizehint = 2*sizeof(s)) """ - shell_escaped_winsomely(args::Union{Cmd,AbstractString...})::String - -Convert the collection of strings `args` into single string suitable for passing as the argument -string for a Windows command line. Windows passes the entire command line as a single string to -the application (unlike POSIX systems, where the list of arguments are passed separately). -Many Windows API applications (including julia.exe), use the conventions of the [Microsoft C -runtime](https://docs.microsoft.com/en-us/cpp/c-language/parsing-c-command-line-arguments) to -split that command line into a list of strings. This function implements the inverse of such a -C runtime command-line parser. It joins command-line arguments to be passed to a Windows console -application into a command line, escaping or quoting meta characters such as space, -double quotes and backslash where needed. This may be useful in concert with the `windows_verbatim` -flag to [`Cmd`](@ref) when constructing process pipelines. + escape_microsoft_c_args(args::Union{Cmd,AbstractString...}) + escape_microsoft_c_args(io::IO, args::Union{Cmd,AbstractString...}) -# Example -```jldoctest -julia> println(shell_escaped_winsomely("A B\\", "C")) -"A B\\" C +Convert a collection of string arguments into a string that can be +passed to many Windows command-line applications. + +Microsoft Windows passes the entire command line as a single string to +the application (unlike POSIX systems, where the shell splits the +command line into a list of arguments). Many Windows API applications +(including julia.exe), use the conventions of the [Microsoft C/C++ +runtime](https://docs.microsoft.com/en-us/cpp/c-language/parsing-c-command-line-arguments) +to split that command line into a list of strings. + +This function implements an inverse for a parser compatible with these rules. +It joins command-line arguments to be passed to a Windows +C/C++/Julia application into a command line, escaping or quoting the +meta characters space, TAB, double quote and backslash where needed. + +See also: [`shell_escape_wincmd`](@ref), [`escape_raw_string`](@ref) """ -shell_escape_winsomely(args::AbstractString...) = - sprint(print_shell_escaped_winsomely, args..., sizehint=(sum(length, args)) + 3*length(args)) +function escape_microsoft_c_args(io::IO, args::AbstractString...) + # http://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES + first = true + for arg in args + if first + first = false + else + write(io, ' ') # separator + end + if isempty(arg) || occursin(r"[ \t\"]", arg) + # Julia raw strings happen to use the same escaping convention + # as the argv[] parser in Microsoft's C runtime library. + escape_raw_string(io, arg) + else + write(io, arg) + end + end +end +escape_microsoft_c_args(args::AbstractString...) = + sprint(escape_microsoft_c_args, args...; + sizehint = (sum(sizeof.(args)) + 3*length(args))) diff --git a/base/strings/io.jl b/base/strings/io.jl index 0afddcbdefb88..48a304fd967d0 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -523,6 +523,59 @@ julia> println(raw"\\\\x \\\\\\"") """ macro raw_str(s); s; end +""" + escape_raw_string(s::AbstractString) + escape_raw_string(io, s::AbstractString) + +Convert a string into a raw-string literal. This function counts in +input string `s` for any double-quote (`"`) character the number _n_ +of preceeding backslash (`\\`) characters, and then increases there +the number of backslashes from _n_ to 2_n_+1 (even for _n_ = 0). It +also doubles any sequence of backslashes at the end of the string. +Finally, it returns the entire string enclosed with added double-quote +delimiters. + +This escaping convention is used in raw strings and other non-standard +string literals. (It also happens to be the escaping convention +expected by the Microsoft C/C++ compiler runtime when it parses a +command-line string into the argv[] array.) + +See also: [`escape_string`](@ref) +""" +function escape_raw_string(io, str::AbstractString) + write(io, '"') + escapes = 0 + for c in str + if c == '\\' + escapes += 1 + else + if c == '"' + # if one or more backslashes are followed by + # a double quote then escape all backslashes + # and the double quote + escapes = escapes * 2 + 1 + end + while escapes > 0 + write(io, '\\') + escapes -= 1 + end + escapes = 0 + write(io, c) + end + end + # also escape any trailing backslashes, + # so they do not affect the closing quote + while escapes > 0 + write(io, '\\') + write(io, '\\') + escapes -= 1 + end + write(io, '"') +end +escape_raw_string(str::AbstractString) = sprint(escape_raw_string, str; + sizehint = lastindex(str) + 2) + + ## multiline strings ## """ diff --git a/test/spawn.jl b/test/spawn.jl index 4e6eca4515c97..0b2c0dbf1bdc3 100644 --- a/test/spawn.jl +++ b/test/spawn.jl @@ -677,8 +677,7 @@ if Sys.iswindows() end -# shell escaping on Windows -@testset "shell_escape_winsomely" begin +@testset "shell escaping on Windows" begin # Note argument A can be parsed both as A or "A". # We do not test that the parsing satisfies either of these conditions. # In other words, tests may fail even for valid parsing. @@ -686,77 +685,101 @@ end # input : # output: "" - @test Base.shell_escape_winsomely("") == "\"\"" + @test Base.escape_microsoft_c_args("") == "\"\"" - @test Base.shell_escape_winsomely("A") == "A" + @test Base.escape_microsoft_c_args("A") == "A" - @test Base.shell_escape_winsomely(`A`) == "A" + @test Base.escape_microsoft_c_args(`A`) == "A" # input : hello world # output: "hello world" - @test Base.shell_escape_winsomely("hello world") == "\"hello world\"" + @test Base.escape_microsoft_c_args("hello world") == "\"hello world\"" # input : hello world # output: "hello world" - @test Base.shell_escape_winsomely("hello\tworld") == "\"hello\tworld\"" + @test Base.escape_microsoft_c_args("hello\tworld") == "\"hello\tworld\"" # input : hello"world # output: "hello\"world" (also valid) hello\"world - @test Base.shell_escape_winsomely("hello\"world") == "\"hello\\\"world\"" + @test Base.escape_microsoft_c_args("hello\"world") == "\"hello\\\"world\"" # input : hello""world # output: "hello\"\"world" (also valid) hello\"\"world - @test Base.shell_escape_winsomely("hello\"\"world") == "\"hello\\\"\\\"world\"" + @test Base.escape_microsoft_c_args("hello\"\"world") == "\"hello\\\"\\\"world\"" # input : hello\world # output: hello\world - @test Base.shell_escape_winsomely("hello\\world") == "hello\\world" + @test Base.escape_microsoft_c_args("hello\\world") == "hello\\world" # input : hello\\world # output: hello\\world - @test Base.shell_escape_winsomely("hello\\\\world") == "hello\\\\world" + @test Base.escape_microsoft_c_args("hello\\\\world") == "hello\\\\world" # input : hello\"world # output: "hello\"world" (also valid) hello\"world - @test Base.shell_escape_winsomely("hello\\\"world") == "\"hello\\\\\\\"world\"" + @test Base.escape_microsoft_c_args("hello\\\"world") == "\"hello\\\\\\\"world\"" # input : hello\\"world # output: "hello\\\\\"world" (also valid) hello\\\\\"world - @test Base.shell_escape_winsomely("hello\\\\\"world") == "\"hello\\\\\\\\\\\"world\"" + @test Base.escape_microsoft_c_args("hello\\\\\"world") == "\"hello\\\\\\\\\\\"world\"" # input : hello world\ # output: "hello world\\" - @test Base.shell_escape_winsomely("hello world\\") == "\"hello world\\\\\"" + @test Base.escape_microsoft_c_args("hello world\\") == "\"hello world\\\\\"" # input : A\B # output: A\B" - @test Base.shell_escape_winsomely("A\\B") == "A\\B" + @test Base.escape_microsoft_c_args("A\\B") == "A\\B" # input : [A\, B] # output: "A\ B" - @test Base.shell_escape_winsomely("A\\", "B") == "A\\ B" + @test Base.escape_microsoft_c_args("A\\", "B") == "A\\ B" # input : A"B # output: "A\"B" - @test Base.shell_escape_winsomely("A\"B") == "\"A\\\"B\"" + @test Base.escape_microsoft_c_args("A\"B") == "\"A\\\"B\"" # input : [A B\, C] # output: "A B\\" C - @test Base.shell_escape_winsomely("A B\\", "C") == "\"A B\\\\\" C" + @test Base.escape_microsoft_c_args("A B\\", "C") == "\"A B\\\\\" C" # input : [A "B, C] # output: "A \"B" C - @test Base.shell_escape_winsomely("A \"B", "C") == "\"A \\\"B\" C" + @test Base.escape_microsoft_c_args("A \"B", "C") == "\"A \\\"B\" C" # input : [A B\, C] # output: "A B\\" C - @test Base.shell_escape_winsomely("A B\\", "C") == "\"A B\\\\\" C" + @test Base.escape_microsoft_c_args("A B\\", "C") == "\"A B\\\\\" C" # input :[A\ B\, C] # output: "A\ B\\" C - @test Base.shell_escape_winsomely("A\\ B\\", "C") == "\"A\\ B\\\\\" C" + @test Base.escape_microsoft_c_args("A\\ B\\", "C") == "\"A\\ B\\\\\" C" # input : [A\ B\, C, D K] # output: "A\ B\\" C "D K" - @test Base.shell_escape_winsomely("A\\ B\\", "C", "D K") == "\"A\\ B\\\\\" C \"D K\"" + @test Base.escape_microsoft_c_args("A\\ B\\", "C", "D K") == "\"A\\ B\\\\\" C \"D K\"" + + # shell_escape_wincmd + @test Base.shell_escape_wincmd("") == "" + @test Base.shell_escape_wincmd("\"") == "^\"" + @test Base.shell_escape_wincmd("\"\"") == "\"\"" + @test Base.shell_escape_wincmd("\"\"\"") == "\"\"^\"" + @test Base.shell_escape_wincmd("\"\"\"\"") == "\"\"\"\"" + @test Base.shell_escape_wincmd("a^\"^o\"^u\"") == "a^^\"^o\"^^u^\"" + @test Base.shell_escape_wincmd("ä^\"^ö\"^ü\"") == "ä^^\"^ö\"^^ü^\"" + @test Base.shell_escape_wincmd("@@()!^<>&|\"") == "^@@^(^)^!^^^<^>^&^|^\"" + @test_throws ArgumentError Base.shell_escape_wincmd("\0") + @test_throws ArgumentError Base.shell_escape_wincmd("\r") + @test_throws ArgumentError Base.shell_escape_wincmd("\n") + + # combined tests of shell_escape_wincmd and escape_microsoft_c_args + @test Base.shell_escape_wincmd(Base.escape_microsoft_c_args( + "julia", "-e", "println(ARGS)", raw"He said \"a^2+b^2=c^2\"!" )) == + "julia -e println^(ARGS^) \"He said \\\"a^^2+b^^2=c^^2\\\"!\"" + + ascii95 = String(range(' ',stop='~')); # all printable ASCII characters + args = ["ab ^` c", " \" ", "\"", ascii95, ascii95, + "\"\\\"\\", "", "|", "&&", ";"]; + @test Base.shell_escape_wincmd(Base.escape_microsoft_c_args(args...)) == "\"ab ^` c\" \" \\\" \" \"\\\"\" \" !\\\"#\$%^&'^(^)*+,-./0123456789:;^<=^>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^^_`abcdefghijklmnopqrstuvwxyz{^|}~\" \" ^!\\\"#\$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\" \"\\\"\\\\\\\"\\\\\" \"\" ^| ^&^& ;" + end diff --git a/test/strings/io.jl b/test/strings/io.jl index 2454323551f17..0b98eb552e4d2 100644 --- a/test/strings/io.jl +++ b/test/strings/io.jl @@ -148,6 +148,7 @@ @test "aaa \\g \n" == unescape_string(str, ['g']) @test "aaa \\g \\n" == unescape_string(str, ['g', 'n']) end + @test Base.escape_raw_string(raw"\"\\\"\\-\\") == "\"\\\"\\\\\\\"\\\\-\\\\\"" end @testset "join()" begin @test join([]) == join([],",") == ""