Skip to content

Commit

Permalink
Add string trimming (#, %, ##, %%) in variable expansion
Browse files Browse the repository at this point in the history
Review comments and edge cases

- the `${}` parser handles escapes, but needs to preserve them for `#`/`%`
  - but `\}` needs to be de-escaped
- reversing strings need to handle escapes, i.e. `a\*c` -> `c\*a`
- build the regex with a scanner, not QuoteMeta+StringReplace
- add more complicated cases to the tests

Separate out + unit test helper functions

Add trim test to dockerfile_test

Signed-off-by: Tristan Stenner <ts@ppi.de>
  • Loading branch information
tstenner committed Oct 24, 2023
1 parent 49c7f9d commit 351fc8b
Show file tree
Hide file tree
Showing 4 changed files with 261 additions and 12 deletions.
14 changes: 8 additions & 6 deletions frontend/dockerfile/dockerfile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,18 +247,20 @@ func testDefaultEnvWithArgs(t *testing.T, sb integration.Sandbox) {
f := getFrontend(t, sb)

dockerfile := []byte(`
FROM busybox AS build
ARG image=idlebox
FROM busy${image#idle} AS build
ARG my_arg
ENV my_arg "my_arg=${my_arg:-def_val}"
ENV my_trimmed_arg "${my_arg%%e*}"
COPY myscript.sh myscript.sh
RUN ./myscript.sh $my_arg
RUN ./myscript.sh $my_arg $my_trimmed_arg
FROM scratch
COPY --from=build /out /out
`)

script := []byte(`
#!/usr/bin/env sh
echo -n $my_arg $1 > /out
echo -n $my_arg $* > /out
`)

dir := integration.Tmpdir(
Expand All @@ -278,9 +280,9 @@ echo -n $my_arg $1 > /out
frontendAttrs map[string]string
expected string
}{
{"nil", nil, "my_arg=def_val my_arg=def_val"},
{"empty", map[string]string{"build-arg:my_arg": ""}, "my_arg=def_val my_arg=def_val"},
{"override", map[string]string{"build-arg:my_arg": "override"}, "my_arg=override my_arg=override"},
{"nil", nil, "my_arg=def_val my_arg=def_val my_arg=d"},
{"empty", map[string]string{"build-arg:my_arg": ""}, "my_arg=def_val my_arg=def_val my_arg=d"},
{"override", map[string]string{"build-arg:my_arg": "override"}, "my_arg=override my_arg=override my_arg=ov"},
} {
t.Run(x.name, func(t *testing.T) {
_, err = f.Solve(sb.Context(), c, client.SolveOpt{
Expand Down
37 changes: 35 additions & 2 deletions frontend/dockerfile/docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -287,17 +287,50 @@ modifiers as specified below:
will be that value. If `variable` is not set then `word` will be the result.
- `${variable:+word}` indicates that if `variable` is set then `word` will be
the result, otherwise the result is the empty string.
- `${variable#pattern}` removes the shortest match of `pattern` from `variable`,
seeking from the start of the string.

```bash
str=foobarbaz echo ${str#f*b} # arbaz
```

- `${variable##pattern}` removes the longest match of `pattern` from `variable`,
seeking from the start of the string.

```bash
str=foobarbaz echo ${str##f*b} # az
```

- `${variable%pattern}` removes the shortest match of `pattern` from `variable`,
seeking backwards from the end of the string.

```bash
string=foobarbaz echo ${string%b*} # foobar
```

- `${variable%%pattern}` removes the longest match of `pattern` from `variable`,
seeking backwards from the end of the string.

```bash
string=foobarbaz echo ${string%%b*} # foo
```

In all cases, `word` can be any string, including additional environment
variables.

Escaping is possible by adding a `\` before the variable: `\$foo` or `\${foo}`,
`pattern` is a glob pattern where `?` matches any single character
and `*` any number of characters (including zero). To match literal `?` and `*`,
use a backslash escape: `\?` and `\*`.

You can escape whole variable names by adding a `\` before the variable: `\$foo` or `\${foo}`,
for example, will translate to `$foo` and `${foo}` literals respectively.
Example (parsed representation is displayed after the `#`):
```dockerfile
FROM busybox
ARG IMAGE=busybox:latest
# FROM busybox:stable
FROM ${IMAGE%:*}:stable
ENV FOO=/bar
WORKDIR ${FOO} # WORKDIR /bar
ADD . $FOO # ADD . /bar
Expand Down
138 changes: 134 additions & 4 deletions frontend/dockerfile/shell/lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package shell
import (
"bytes"
"fmt"
"regexp"
"strings"
"text/scanner"
"unicode"
Expand Down Expand Up @@ -100,7 +101,7 @@ type shellWord struct {
}

func (sw *shellWord) process(source string) (string, []string, error) {
word, words, err := sw.processStopOn(scanner.EOF)
word, words, err := sw.processStopOn(scanner.EOF, sw.rawEscapes)
if err != nil {
err = errors.Wrapf(err, "failed to process %q", source)
}
Expand Down Expand Up @@ -154,7 +155,7 @@ func (w *wordsStruct) getWords() []string {

// Process the word, starting at 'pos', and stop when we get to the
// end of the word or the 'stopChar' character
func (sw *shellWord) processStopOn(stopChar rune) (string, []string, error) {
func (sw *shellWord) processStopOn(stopChar rune, rawEscapes bool) (string, []string, error) {
var result bytes.Buffer
var words wordsStruct

Expand All @@ -166,6 +167,14 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, []string, error) {
charFuncMapping['"'] = sw.processDoubleQuote
}

// temporarily set sw.rawEscapes if needed
if rawEscapes != sw.rawEscapes {
sw.rawEscapes = rawEscapes
defer func() {
sw.rawEscapes = !rawEscapes
}()
}

for sw.scanner.Peek() != scanner.EOF {
ch := sw.scanner.Peek()

Expand Down Expand Up @@ -351,8 +360,9 @@ func (sw *shellWord) processDollar() (string, error) {
ch = sw.scanner.Next()
chs += string(ch)
fallthrough
case '+', '-', '?':
word, _, err := sw.processStopOn('}')
case '+', '-', '?', '#', '%':
rawEscapes := ch == '#' || ch == '%'
word, _, err := sw.processStopOn('}', rawEscapes)
if err != nil {
if sw.scanner.Peek() == scanner.EOF {
return "", errors.New("syntax error: missing '}'")
Expand Down Expand Up @@ -394,6 +404,18 @@ func (sw *shellWord) processDollar() (string, error) {
return "", errors.Errorf("%s: %s", name, message)
}
return value, nil
case '%', '#':
// %/# matches the shortest pattern expansion, %%/## the longest
greedy := false
if word[0] == byte(ch) {
greedy = true
word = word[1:]
}

if ch == '%' {
return trimSuffix(word, value, greedy)
}
return trimPrefix(word, value, greedy)
default:
return "", errors.Errorf("unsupported modifier (%s) in substitution", chs)
}
Expand Down Expand Up @@ -472,3 +494,111 @@ func BuildEnvs(env []string) map[string]string {

return envs
}

// convertShellPatternToRegex converts a shell-like wildcard pattern
// (? is a single char, * either the shortest or longest (greedy) string)
// to an equivalent regular expression.
//
// Based on
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
// but without the bracket expressions (`[]`)
func convertShellPatternToRegex(pattern string, greedy bool) (*regexp.Regexp, error) {
var s scanner.Scanner
s.Init(strings.NewReader(pattern))
var out strings.Builder
out.Grow(len(pattern) + 4)

// match only at the beginning of the string
out.WriteByte('^')

// default: non-greedy wildcards
starPattern := ".*?"
if greedy {
starPattern = ".*"
}

for tok := s.Next(); tok != scanner.EOF; tok = s.Next() {
switch tok {
case '*':
out.WriteString(starPattern)
continue
case '?':
out.WriteByte('.')
continue
case '\\':
// } as part of ${} needs to be escaped, but the escape isn't part
// of the pattern
if s.Peek() == '}' {
continue
}
out.WriteRune('\\')
tok = s.Next()
if tok != '*' && tok != '?' && tok != '\\' {
return nil, errors.Errorf("invalid escape '\\%c'", tok)
}
// regex characters that need to be escaped
// escaping closing is optional, but done for consistency
case '[', ']', '{', '}', '.', '+', '(', ')', '|', '^', '$':
out.WriteByte('\\')
}
out.WriteRune(tok)
}
return regexp.Compile(out.String())
}

func trimPrefix(word, value string, greedy bool) (string, error) {
re, err := convertShellPatternToRegex(word, greedy)
if err != nil {
return "", errors.Errorf("invalid pattern (%s) in substitution: %s", word, err)
}

if idx := re.FindStringIndex(value); idx != nil {
value = value[idx[1]:]
}
return value, nil
}

// reverse without avoid reversing escapes, i.e. a\*c -> c\*a
func reversePattern(pattern string) string {
patternRunes := []rune(pattern)
out := make([]rune, len(patternRunes))
lastIdx := len(patternRunes) - 1
for i := 0; i <= lastIdx; {
tok := patternRunes[i]
outIdx := lastIdx - i
if tok == '\\' && i != lastIdx {
out[outIdx-1] = tok
// the pattern is taken from a ${var#pattern}, so the last
// character can't be an escape character
out[outIdx] = patternRunes[i+1]
i += 2
} else {
out[outIdx] = tok
i++
}
}
return string(out)
}

func reverseString(str string) string {
out := []rune(str)
outIdx := len(out) - 1
for i := 0; i < outIdx; i++ {
out[i], out[outIdx] = out[outIdx], out[i]
outIdx--
}
return string(out)
}

func trimSuffix(pattern, word string, greedy bool) (string, error) {
// regular expressions can't handle finding the shortest rightmost
// string so we reverse both search space and pattern to convert it
// to a leftmost search in both cases
pattern = reversePattern(pattern)
word = reverseString(word)
str, err := trimPrefix(pattern, word, greedy)
if err != nil {
return "", err
}
return reverseString(str), nil
}
84 changes: 84 additions & 0 deletions frontend/dockerfile/shell/lex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,45 @@ import (
"github.com/stretchr/testify/require"
)

func TestConvertShellPatternToRegex(t *testing.T) {
cases := map[string]string{
"*": "^.*",
"?": "^.",
"\\*": "^\\*",
"(()[]{\\}^$.\\*\\?|\\\\": "^\\(\\(\\)\\[\\]\\{\\}\\^\\$\\.\\*\\?\\|\\\\",
}
for pattern, expected := range cases {
res, err := convertShellPatternToRegex(pattern, true)
require.NoError(t, err)
require.Equal(t, expected, res.String())
}
invalid := []string{
"\\", "\\x", "\\\\\\",
}
for _, pattern := range invalid {
_, err := convertShellPatternToRegex(pattern, true)
require.Error(t, err)
}
}

func TestReverseString(t *testing.T) {
require.Equal(t, "12345", reverseString("54321"))
require.Equal(t, "👽🚀🖖", reverseString("🖖🚀👽"))
}

func TestReversePattern(t *testing.T) {
cases := map[string]string{
"a\\*c": "c\\*a",
"\\\\\\ab": "b\\a\\\\",
"ab\\": "\\ba",
"👽\\🚀🖖": "🖖\\🚀👽",
"\\\\b": "b\\\\",
}
for pattern, expected := range cases {
require.Equal(t, expected, reversePattern(pattern))
}
}

func TestShellParserMandatoryEnvVars(t *testing.T) {
var newWord string
var err error
Expand Down Expand Up @@ -358,6 +397,51 @@ func TestProcessWithMatches(t *testing.T) {
},
expectedErr: true,
},
{
// special characters in regular expressions
// } needs to be escaped so it doesn't match the
// closing brace of ${}
input: "${FOO#()[]{\\}^$.\\*\\?|\\\\}",
envs: map[string]string{"FOO": "()[]{}^$.*?|\\x"},
expected: "x",
matches: map[string]struct{}{"FOO": {}},
},
{
input: "${FOO%%\\**}",
envs: map[string]string{"FOO": "xx**"},
expected: "xx",
matches: map[string]struct{}{"FOO": {}},
},
{
input: "${FOO#*x*y}",
envs: map[string]string{"FOO": "xxyy"},
expected: "y",
matches: map[string]struct{}{"FOO": {}},
},
{
input: "${FOO##*x}",
envs: map[string]string{"FOO": "xxyy"},
expected: "yy",
matches: map[string]struct{}{"FOO": {}},
},
{
input: "${FOO#?\\?}",
envs: map[string]string{"FOO": "???y"},
expected: "?y",
matches: map[string]struct{}{"FOO": {}},
},
{
input: "${ABC:-.}${FOO%x}${ABC:-.}",
envs: map[string]string{"FOO": "xxyy"},
expected: ".xxyy.",
matches: map[string]struct{}{"FOO": {}},
},
{
input: "${FOO%%\\**\\*}",
envs: map[string]string{"FOO": "a***yy*"},
expected: "a",
matches: map[string]struct{}{"FOO": {}},
},
}

for _, c := range tc {
Expand Down

0 comments on commit 351fc8b

Please sign in to comment.