From 02c7aa4ffb8360783720abc4f22d3963155e6110 Mon Sep 17 00:00:00 2001 From: Jesse Suen Date: Mon, 19 Aug 2019 18:02:41 -0700 Subject: [PATCH] Fix issue saving outputs which overlap paths with inputs --- Gopkg.lock | 4 +- .../artifact-input-output-samedir.yaml | 101 ++++++++++++++++++ util/archive/archive.go | 8 +- workflow/common/util.go | 11 +- workflow/common/util_test.go | 26 +++++ workflow/executor/executor.go | 17 ++- workflow/executor/executor_test.go | 62 ++++++++++- workflow/validate/validate.go | 4 +- 8 files changed, 221 insertions(+), 12 deletions(-) create mode 100644 test/e2e/functional/artifact-input-output-samedir.yaml create mode 100644 workflow/common/util_test.go diff --git a/Gopkg.lock b/Gopkg.lock index 63c40153caaa..ee51680fedda 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -57,7 +57,7 @@ [[projects]] branch = "master" - digest = "1:69825fad444fa2678b4ede69e55f84c0c611e13d3cf6731022b78a583547509f" + digest = "1:992caf139336e7efda99ca252fe6f26d3b588ab7e4cd041534f7a4501009748e" name = "github.com/argoproj/pkg" packages = [ "cli", @@ -75,7 +75,7 @@ "time", ] pruneopts = "" - revision = "719976ae138a36de1f81e94a956e79a7e3a5309c" + revision = "1032539fc7f1d4c64630a4c66abcd565a65fcb64" [[projects]] digest = "1:ac2a05be7167c495fe8aaf8aaf62ecf81e78d2180ecb04e16778dc6c185c96a5" diff --git a/test/e2e/functional/artifact-input-output-samedir.yaml b/test/e2e/functional/artifact-input-output-samedir.yaml new file mode 100644 index 000000000000..292dd59fd494 --- /dev/null +++ b/test/e2e/functional/artifact-input-output-samedir.yaml @@ -0,0 +1,101 @@ +# Tests ability to capture output directories and files when it overlaps with inputs +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: artifact-input-output-samedir- +spec: + entrypoint: artifact-input-output-samedir + templates: + - name: artifact-input-output-samedir + steps: + - - name: generate + template: generate + - - name: collect + template: collect + arguments: + artifacts: + - name: samedir + from: "{{steps.generate.outputs.artifacts.dir}}" + - name: hello + from: "{{steps.generate.outputs.artifacts.hello}}" + - - name: verify + template: verify + arguments: + artifacts: + - name: outer + from: "{{steps.collect.outputs.artifacts.outer}}" + - name: inner + from: "{{steps.collect.outputs.artifacts.inner}}" + - name: hello + from: "{{steps.collect.outputs.artifacts.hello}}" + - name: coincidental-prefix + from: "{{steps.collect.outputs.artifacts.coincidental-prefix}}" + + # generate a folder structure with directories + - name: generate + script: + image: docker/whalesay:latest + command: [sh, -xe] + source: | + sleep 1 + mkdir -p /outer/inner + cowsay outer | tee /outer/outer.txt + cowsay inner | tee /outer/inner/inner.txt + cowsay hello | tee /hello.txt + outputs: + artifacts: + - name: dir + path: /outer + - name: hello + path: /hello.txt + + # test collection of artifacts where: + # 1) input and output mapped to same directory + # 2) collecting an output directory of a subdir + # 3) output happens to have same prefix of an input, but is unrelated + - name: collect + inputs: + artifacts: + - name: samedir + path: /outer + - name: hello + path: /hello.txt + container: + image: docker/whalesay:latest + command: [sh, -c] + args: [" + sleep 1 && + cowsay coincidental-prefix | tee /hello.txt-COINCIDENCAL.txt + "] + outputs: + artifacts: + - name: outer + path: /outer + - name: inner + path: /outer/inner + - name: hello + path: /hello.txt + - name: coincidental-prefix + path: /hello.txt-COINCIDENCAL.txt + + # verifies collection was successful + - name: verify + inputs: + artifacts: + - name: outer + path: /outer + - name: inner + path: /inner + - name: hello + path: /hello.txt + - name: coincidental-prefix + path: /coincidental-prefix.txt + script: + image: alpine:3.8 + command: [sh, -xe] + source: | + cat /outer/outer.txt + cat /outer/inner/inner.txt + cat /inner/inner.txt + cat /hello.txt + cat /coincidental-prefix.txt diff --git a/util/archive/archive.go b/util/archive/archive.go index 400b12667fb8..5903e549a80d 100644 --- a/util/archive/archive.go +++ b/util/archive/archive.go @@ -49,7 +49,8 @@ func TarGzToWriter(sourcePath string, w io.Writer) error { func tarDir(sourcePath string, tw *tar.Writer) error { baseName := filepath.Base(sourcePath) - return filepath.Walk(sourcePath, func(fpath string, info os.FileInfo, err error) error { + count := 0 + err := filepath.Walk(sourcePath, func(fpath string, info os.FileInfo, err error) error { if err != nil { return errors.InternalWrapError(err) } @@ -59,7 +60,8 @@ func tarDir(sourcePath string, tw *tar.Writer) error { return errors.InternalWrapError(err) } nameInArchive = filepath.Join(baseName, nameInArchive) - log.Infof("writing %s", nameInArchive) + log.Debugf("writing %s", nameInArchive) + count++ var header *tar.Header if (info.Mode() & os.ModeSymlink) != 0 { @@ -102,6 +104,8 @@ func tarDir(sourcePath string, tw *tar.Writer) error { } return nil }) + log.Infof("archived %d files/dirs in %s", count, sourcePath) + return err } func tarFile(sourcePath string, tw *tar.Writer) error { diff --git a/workflow/common/util.go b/workflow/common/util.go index dd008e959829..10c1990ffd4c 100644 --- a/workflow/common/util.go +++ b/workflow/common/util.go @@ -34,13 +34,18 @@ import ( // user specified volumeMounts in the template, and returns the deepest volumeMount // (if any). A return value of nil indicates the path is not under any volumeMount. func FindOverlappingVolume(tmpl *wfv1.Template, path string) *apiv1.VolumeMount { - if tmpl.Container == nil { + var volMounts []apiv1.VolumeMount + if tmpl.Container != nil { + volMounts = tmpl.Container.VolumeMounts + } else if tmpl.Script != nil { + volMounts = tmpl.Script.VolumeMounts + } else { return nil } var volMnt *apiv1.VolumeMount deepestLen := 0 - for _, mnt := range tmpl.Container.VolumeMounts { - if !strings.HasPrefix(path, mnt.MountPath) { + for _, mnt := range volMounts { + if path != mnt.MountPath && !strings.HasPrefix(path, mnt.MountPath+"/") { continue } if len(mnt.MountPath) > deepestLen { diff --git a/workflow/common/util_test.go b/workflow/common/util_test.go new file mode 100644 index 000000000000..2f5658501c02 --- /dev/null +++ b/workflow/common/util_test.go @@ -0,0 +1,26 @@ +package common + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + + wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" +) + +// TestFindOverlappingVolume tests logic of TestFindOverlappingVolume +func TestFindOverlappingVolume(t *testing.T) { + volMnt := corev1.VolumeMount{ + Name: "workdir", + MountPath: "/user-mount", + } + templateWithVolMount := &wfv1.Template{ + Container: &corev1.Container{ + VolumeMounts: []corev1.VolumeMount{volMnt}, + }, + } + assert.Equal(t, &volMnt, FindOverlappingVolume(templateWithVolMount, "/user-mount")) + assert.Equal(t, &volMnt, FindOverlappingVolume(templateWithVolMount, "/user-mount/subdir")) + assert.Nil(t, FindOverlappingVolume(templateWithVolMount, "/user-mount-coincidental-prefix")) +} diff --git a/workflow/executor/executor.go b/workflow/executor/executor.go index d2b8fa4748b8..6b81a82a126e 100644 --- a/workflow/executor/executor.go +++ b/workflow/executor/executor.go @@ -386,8 +386,23 @@ func (we *WorkflowExecutor) stageArchiveFile(mainCtrID string, art *wfv1.Artifac return fileName, localArtPath, nil } +// isBaseImagePath checks if the given artifact path resides in the base image layer of the container +// versus a shared volume mount between the wait and main container func (we *WorkflowExecutor) isBaseImagePath(path string) bool { - return common.FindOverlappingVolume(&we.Template, path) == nil + // first check if path overlaps with a user-specified volumeMount + if common.FindOverlappingVolume(&we.Template, path) != nil { + return false + } + // next check if path overlaps with a shared input-artifact emptyDir mounted by argo + for _, inArt := range we.Template.Inputs.Artifacts { + if path == inArt.Path { + return false + } + if strings.HasPrefix(path, inArt.Path+"/") { + return false + } + } + return true } // SaveParameters will save the content in the specified file path as output parameter value diff --git a/workflow/executor/executor_test.go b/workflow/executor/executor_test.go index b6c7350c6265..8d68e72e8cbd 100644 --- a/workflow/executor/executor_test.go +++ b/workflow/executor/executor_test.go @@ -3,10 +3,12 @@ package executor import ( "testing" - wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" - "github.com/argoproj/argo/workflow/executor/mocks" "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes/fake" + + wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" + "github.com/argoproj/argo/workflow/executor/mocks" ) const ( @@ -46,3 +48,59 @@ func TestSaveParameters(t *testing.T) { assert.Nil(t, err) assert.Equal(t, *we.Template.Outputs.Parameters[0].Value, "has a newline") } + +// TestIsBaseImagePath tests logic of isBaseImagePath which determines if a path is coming from a +// base image layer versus a shared volumeMount. +func TestIsBaseImagePath(t *testing.T) { + templateWithSameDir := wfv1.Template{ + Inputs: wfv1.Inputs{ + Artifacts: []wfv1.Artifact{ + { + Name: "samedir", + Path: "/samedir", + }, + }, + }, + Container: &corev1.Container{}, + Outputs: wfv1.Outputs{ + Artifacts: []wfv1.Artifact{ + { + Name: "samedir", + Path: "/samedir", + }, + }, + }, + } + + we := WorkflowExecutor{ + Template: templateWithSameDir, + } + // 1. unrelated dir/file should be captured from base image layer + assert.True(t, we.isBaseImagePath("/foo")) + + // 2. when input and output directory is same, it should be captured from shared emptyDir + assert.False(t, we.isBaseImagePath("/samedir")) + + // 3. when output is a sub path of input dir, it should be captured from shared emptyDir + we.Template.Outputs.Artifacts[0].Path = "/samedir/inner" + assert.False(t, we.isBaseImagePath("/samedir/inner")) + + // 4. when output happens to overlap with input (in name only), it should be captured from base image layer + we.Template.Inputs.Artifacts[0].Path = "/hello.txt" + we.Template.Outputs.Artifacts[0].Path = "/hello.txt-COINCIDENCE" + assert.True(t, we.isBaseImagePath("/hello.txt-COINCIDENCE")) + + // 5. when output is under a user specified volumeMount, it should be captured from shared mount + we.Template.Inputs.Artifacts = nil + we.Template.Container.VolumeMounts = []corev1.VolumeMount{ + { + Name: "workdir", + MountPath: "/user-mount", + }, + } + we.Template.Outputs.Artifacts[0].Path = "/user-mount/some-path" + assert.False(t, we.isBaseImagePath("/user-mount")) + assert.False(t, we.isBaseImagePath("/user-mount/some-path")) + assert.False(t, we.isBaseImagePath("/user-mount/some-path/foo")) + assert.True(t, we.isBaseImagePath("/user-mount-coincidence")) +} diff --git a/workflow/validate/validate.go b/workflow/validate/validate.go index ea4cd5f83b8b..dbdb998053eb 100644 --- a/workflow/validate/validate.go +++ b/workflow/validate/validate.go @@ -705,7 +705,7 @@ func validateOutputs(scope map[string]interface{}, tmpl *wfv1.Template) error { return nil } -// validateBaseImageOutputs detects if the template contains an output from +// validateBaseImageOutputs detects if the template contains an valid output from base image layer func (ctx *templateValidationCtx) validateBaseImageOutputs(tmpl *wfv1.Template) error { switch ctx.ContainerRuntimeExecutor { case "", common.ContainerRuntimeExecutorDocker: @@ -725,7 +725,7 @@ func (ctx *templateValidationCtx) validateBaseImageOutputs(tmpl *wfv1.Template) } if tmpl.Script != nil { - for _, volMnt := range tmpl.Container.VolumeMounts { + for _, volMnt := range tmpl.Script.VolumeMounts { if strings.HasPrefix(volMnt.MountPath, out.Path+"/") { return errors.Errorf(errors.CodeBadRequest, "templates.%s.outputs.artifacts.%s: %s", tmpl.Name, out.Name, errMsg) }