From 71f1d34851df3f9fd8deca0844c73343da1c36b6 Mon Sep 17 00:00:00 2001 From: Ian Kerins Date: Mon, 31 Aug 2020 17:51:54 -0400 Subject: [PATCH] Stop caching COPY layers Cached COPY layers are expensive in that they both need to be retrieved over the network and occupy space in the layer cache. They are unnecessary in that we already have all resources needed to execute the COPY locally, and doing so is a trivial file-system operation. This is in contrast to RUN layers, which can do arbitrary and unbounded work. The end result is that cached COPY commands were more expensive when cached, not less. Remove them. Resolves #1357 --- README.md | 2 +- pkg/commands/commands.go | 2 + pkg/commands/copy.go | 81 ------------------- pkg/commands/copy_test.go | 154 ------------------------------------- pkg/executor/build.go | 2 - pkg/executor/build_test.go | 138 +++++---------------------------- 6 files changed, 23 insertions(+), 356 deletions(-) diff --git a/README.md b/README.md index ea65a735cf..b9dfbd2edc 100644 --- a/README.md +++ b/README.md @@ -378,7 +378,7 @@ as a remote image destination: ### Caching #### Caching Layers -kaniko can cache layers created by `RUN` and `COPY` commands in a remote repository. +kaniko can cache layers created by `RUN` commands in a remote repository. Before executing a command, kaniko checks the cache for the layer. If it exists, kaniko will pull and extract the cached layer instead of executing the command. If not, kaniko will execute the command and then push the newly created layer to the cache. diff --git a/pkg/commands/commands.go b/pkg/commands/commands.go index 44139093fe..2864113ee2 100644 --- a/pkg/commands/commands.go +++ b/pkg/commands/commands.go @@ -51,6 +51,8 @@ type DockerCommand interface { RequiresUnpackedFS() bool + // Whether the output layer of this command should be cached in and + // retrieved from the layer cache. ShouldCacheOutput() bool // ShouldDetectDeletedFiles returns true if the command could delete files. diff --git a/pkg/commands/copy.go b/pkg/commands/copy.go index 72696649f8..7de8c639f8 100644 --- a/pkg/commands/copy.go +++ b/pkg/commands/copy.go @@ -17,7 +17,6 @@ limitations under the License. package commands import ( - "fmt" "os" "path/filepath" "strings" @@ -142,90 +141,10 @@ func (c *CopyCommand) RequiresUnpackedFS() bool { return true } -func (c *CopyCommand) ShouldCacheOutput() bool { - return true -} - -// CacheCommand returns true since this command should be cached -func (c *CopyCommand) CacheCommand(img v1.Image) DockerCommand { - - return &CachingCopyCommand{ - img: img, - cmd: c.cmd, - buildcontext: c.buildcontext, - extractFn: util.ExtractFile, - } -} - func (c *CopyCommand) From() string { return c.cmd.From } -type CachingCopyCommand struct { - BaseCommand - caching - img v1.Image - extractedFiles []string - cmd *instructions.CopyCommand - buildcontext string - extractFn util.ExtractFunction -} - -func (cr *CachingCopyCommand) ExecuteCommand(config *v1.Config, buildArgs *dockerfile.BuildArgs) error { - logrus.Infof("Found cached layer, extracting to filesystem") - var err error - - if cr.img == nil { - return errors.New(fmt.Sprintf("cached command image is nil %v", cr.String())) - } - - layers, err := cr.img.Layers() - if err != nil { - return errors.Wrapf(err, "retrieve image layers") - } - - if len(layers) != 1 { - return errors.New(fmt.Sprintf("expected %d layers but got %d", 1, len(layers))) - } - - cr.layer = layers[0] - cr.extractedFiles, err = util.GetFSFromLayers(kConfig.RootDir, layers, util.ExtractFunc(cr.extractFn), util.IncludeWhiteout()) - - logrus.Debugf("extractedFiles: %s", cr.extractedFiles) - if err != nil { - return errors.Wrap(err, "extracting fs from image") - } - - return nil -} - -func (cr *CachingCopyCommand) FilesUsedFromContext(config *v1.Config, buildArgs *dockerfile.BuildArgs) ([]string, error) { - return copyCmdFilesUsedFromContext(config, buildArgs, cr.cmd, cr.buildcontext) -} - -func (cr *CachingCopyCommand) FilesToSnapshot() []string { - f := cr.extractedFiles - logrus.Debugf("%d files extracted by caching copy command", len(f)) - logrus.Tracef("Extracted files: %s", f) - - return f -} - -func (cr *CachingCopyCommand) MetadataOnly() bool { - return false -} - -func (cr *CachingCopyCommand) String() string { - if cr.cmd == nil { - return "nil command" - } - return cr.cmd.String() -} - -func (cr *CachingCopyCommand) From() string { - return cr.cmd.From -} - func resolveIfSymlink(destPath string) (string, error) { if !filepath.IsAbs(destPath) { return "", errors.New("dest path must be abs") diff --git a/pkg/commands/copy_test.go b/pkg/commands/copy_test.go index 09d60c5c54..280ed0ec2d 100755 --- a/pkg/commands/copy_test.go +++ b/pkg/commands/copy_test.go @@ -16,7 +16,6 @@ limitations under the License. package commands import ( - "archive/tar" "fmt" "io" "io/ioutil" @@ -236,159 +235,6 @@ func Test_resolveIfSymlink(t *testing.T) { } } -func Test_CachingCopyCommand_ExecuteCommand(t *testing.T) { - tempDir := setupTestTemp() - - tarContent, err := prepareTarFixture([]string{"foo.txt"}) - if err != nil { - t.Errorf("couldn't prepare tar fixture %v", err) - } - - config := &v1.Config{} - buildArgs := &dockerfile.BuildArgs{} - - type testCase struct { - desctiption string - expectLayer bool - expectErr bool - count *int - expectedCount int - command *CachingCopyCommand - extractedFiles []string - contextFiles []string - } - testCases := []testCase{ - func() testCase { - err = ioutil.WriteFile(filepath.Join(tempDir, "foo.txt"), []byte("meow"), 0644) - if err != nil { - t.Errorf("couldn't write tempfile %v", err) - t.FailNow() - } - - c := &CachingCopyCommand{ - img: fakeImage{ - ImageLayers: []v1.Layer{ - fakeLayer{TarContent: tarContent}, - }, - }, - buildcontext: tempDir, - cmd: &instructions.CopyCommand{ - SourcesAndDest: []string{ - "foo.txt", "foo.txt", - }, - }, - } - count := 0 - tc := testCase{ - desctiption: "with valid image and valid layer", - count: &count, - expectedCount: 1, - expectLayer: true, - extractedFiles: []string{"/foo.txt"}, - contextFiles: []string{"foo.txt"}, - } - c.extractFn = func(_ string, _ *tar.Header, _ io.Reader) error { - *tc.count++ - return nil - } - tc.command = c - return tc - }(), - func() testCase { - c := &CachingCopyCommand{} - tc := testCase{ - desctiption: "with no image", - expectErr: true, - } - c.extractFn = func(_ string, _ *tar.Header, _ io.Reader) error { - return nil - } - tc.command = c - return tc - }(), - func() testCase { - c := &CachingCopyCommand{ - img: fakeImage{}, - } - c.extractFn = func(_ string, _ *tar.Header, _ io.Reader) error { - return nil - } - return testCase{ - desctiption: "with image containing no layers", - expectErr: true, - command: c, - } - }(), - func() testCase { - c := &CachingCopyCommand{ - img: fakeImage{ - ImageLayers: []v1.Layer{ - fakeLayer{}, - }, - }, - } - c.extractFn = func(_ string, _ *tar.Header, _ io.Reader) error { - return nil - } - tc := testCase{ - desctiption: "with image one layer which has no tar content", - expectErr: false, // this one probably should fail but doesn't because of how ExecuteCommand and util.GetFSFromLayers are implemented - cvgw- 2019-11-25 - expectLayer: true, - } - tc.command = c - return tc - }(), - } - - for _, tc := range testCases { - t.Run(tc.desctiption, func(t *testing.T) { - c := tc.command - err := c.ExecuteCommand(config, buildArgs) - if !tc.expectErr && err != nil { - t.Errorf("Expected err to be nil but was %v", err) - } else if tc.expectErr && err == nil { - t.Error("Expected err but was nil") - } - - if tc.count != nil { - if *tc.count != tc.expectedCount { - t.Errorf("Expected extractFn to be called %v times but was called %v times", tc.expectedCount, *tc.count) - } - for _, file := range tc.extractedFiles { - match := false - cFiles := c.FilesToSnapshot() - for _, cFile := range cFiles { - if file == cFile { - match = true - break - } - } - if !match { - t.Errorf("Expected extracted files to include %v but did not %v", file, cFiles) - } - } - - cmdFiles, err := c.FilesUsedFromContext( - config, buildArgs, - ) - if err != nil { - t.Errorf("failed to get files used from context from command %v", err) - } - - if len(cmdFiles) != len(tc.contextFiles) { - t.Errorf("expected files used from context to equal %v but was %v", tc.contextFiles, cmdFiles) - } - } - - if c.layer == nil && tc.expectLayer { - t.Error("expected the command to have a layer set but instead was nil") - } else if c.layer != nil && !tc.expectLayer { - t.Error("expected the command to have no layer set but instead found a layer") - } - }) - } -} - func TestCopyCommand_ExecuteCommand_Extended(t *testing.T) { setupDirs := func(t *testing.T) (string, string) { testDir, err := ioutil.TempDir("", "") diff --git a/pkg/executor/build.go b/pkg/executor/build.go index 38d6471a5d..17f7753dec 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -185,8 +185,6 @@ func (s *stageBuilder) populateCompositeKey(command fmt.Stringer, files []string switch v := command.(type) { case *commands.CopyCommand: compositeKey = s.populateCopyCmdCompositeKey(command, v.From(), compositeKey) - case *commands.CachingCopyCommand: - compositeKey = s.populateCopyCmdCompositeKey(command, v.From(), compositeKey) } srcCtx := s.opts.SrcContext diff --git a/pkg/executor/build_test.go b/pkg/executor/build_test.go index 00631cccab..cfb655138b 100644 --- a/pkg/executor/build_test.go +++ b/pkg/executor/build_test.go @@ -794,89 +794,6 @@ func Test_stageBuilder_build(t *testing.T) { retrieve: true, }, }, - func() testcase { - dir, filenames := tempDirAndFile(t) - filename := filenames[0] - filepath := filepath.Join(dir, filename) - - tarContent := generateTar(t, dir, filename) - - ch := NewCompositeCache("", "") - ch.AddPath(filepath, "") - - hash, err := ch.Hash() - if err != nil { - t.Errorf("couldn't create hash %v", err) - } - copyCommandCacheKey := hash - return testcase{ - description: "copy command cache enabled and key in cache", - opts: &config.KanikoOptions{Cache: true}, - layerCache: &fakeLayerCache{ - retrieve: true, - img: fakeImage{ - ImageLayers: []v1.Layer{ - fakeLayer{ - TarContent: tarContent, - }, - }, - }, - }, - rootDir: dir, - expectedCacheKeys: []string{copyCommandCacheKey}, - // CachingCopyCommand is not pushed to the cache - pushedCacheKeys: []string{}, - commands: getCommands(dir, []instructions.Command{ - &instructions.CopyCommand{ - SourcesAndDest: []string{ - filename, "foo.txt", - }, - }, - }), - fileName: filename, - } - }(), - func() testcase { - dir, filenames := tempDirAndFile(t) - filename := filenames[0] - tarContent := []byte{} - destDir, err := ioutil.TempDir("", "baz") - if err != nil { - t.Errorf("could not create temp dir %v", err) - } - filePath := filepath.Join(dir, filename) - ch := NewCompositeCache("", "") - ch.AddPath(filePath, "") - - hash, err := ch.Hash() - if err != nil { - t.Errorf("couldn't create hash %v", err) - } - return testcase{ - description: "copy command cache enabled and key is not in cache", - opts: &config.KanikoOptions{Cache: true}, - config: &v1.ConfigFile{Config: v1.Config{WorkingDir: destDir}}, - layerCache: &fakeLayerCache{}, - image: fakeImage{ - ImageLayers: []v1.Layer{ - fakeLayer{ - TarContent: tarContent, - }, - }, - }, - rootDir: dir, - expectedCacheKeys: []string{hash}, - pushedCacheKeys: []string{hash}, - commands: getCommands(dir, []instructions.Command{ - &instructions.CopyCommand{ - SourcesAndDest: []string{ - filename, "foo.txt", - }, - }, - }), - fileName: filename, - } - }(), func() testcase { dir, filenames := tempDirAndFile(t) filename := filenames[0] @@ -887,8 +804,6 @@ func Test_stageBuilder_build(t *testing.T) { t.Errorf("could not create temp dir %v", err) } - filePath := filepath.Join(dir, filename) - ch := NewCompositeCache("", fmt.Sprintf("RUN foobar")) hash1, err := ch.Hash() @@ -896,17 +811,6 @@ func Test_stageBuilder_build(t *testing.T) { t.Errorf("couldn't create hash %v", err) } - ch.AddKey(fmt.Sprintf("COPY %s bar.txt", filename)) - ch.AddPath(filePath, "") - - hash2, err := ch.Hash() - if err != nil { - t.Errorf("couldn't create hash %v", err) - } - ch = NewCompositeCache("", fmt.Sprintf("COPY %s foo.txt", filename)) - ch.AddKey(fmt.Sprintf("COPY %s bar.txt", filename)) - ch.AddPath(filePath, "") - image := fakeImage{ ImageLayers: []v1.Layer{ fakeLayer{ @@ -940,7 +844,7 @@ COPY %s bar.txt cmds := stage.Commands return testcase{ - description: "cached run command followed by uncached copy command result in consistent read and write hashes", + description: "cached run command followed by copy command results in consistent read and write hashes", opts: &config.KanikoOptions{Cache: true}, rootDir: dir, config: &v1.ConfigFile{Config: v1.Config{WorkingDir: destDir}}, @@ -950,9 +854,8 @@ COPY %s bar.txt }, image: image, // hash1 is the read cachekey for the first layer - // hash2 is the read cachekey for the second layer - expectedCacheKeys: []string{hash1, hash2}, - pushedCacheKeys: []string{hash2}, + expectedCacheKeys: []string{hash1}, + pushedCacheKeys: []string{}, commands: getCommands(dir, cmds), } }(), @@ -960,28 +863,30 @@ COPY %s bar.txt dir, filenames := tempDirAndFile(t) filename := filenames[0] tarContent := generateTar(t, filename) + destDir, err := ioutil.TempDir("", "baz") if err != nil { t.Errorf("could not create temp dir %v", err) } + filePath := filepath.Join(dir, filename) - ch := NewCompositeCache("", fmt.Sprintf("COPY %s foo.txt", filename)) + + ch := NewCompositeCache("", fmt.Sprintf("COPY %s bar.txt", filename)) ch.AddPath(filePath, "") - hash1, err := ch.Hash() + // copy hash + _, err = ch.Hash() if err != nil { t.Errorf("couldn't create hash %v", err) } - ch.AddKey(fmt.Sprintf("COPY %s bar.txt", filename)) - ch.AddPath(filePath, "") - hash2, err := ch.Hash() + ch.AddKey(fmt.Sprintf("RUN foobar")) + + // run hash + runHash, err := ch.Hash() if err != nil { t.Errorf("couldn't create hash %v", err) } - ch = NewCompositeCache("", fmt.Sprintf("COPY %s foo.txt", filename)) - ch.AddKey(fmt.Sprintf("COPY %s bar.txt", filename)) - ch.AddPath(filePath, "") image := fakeImage{ ImageLayers: []v1.Layer{ @@ -993,9 +898,9 @@ COPY %s bar.txt dockerFile := fmt.Sprintf(` FROM ubuntu:16.04 -COPY %s foo.txt COPY %s bar.txt -`, filename, filename) +RUN foobar +`, filename) f, _ := ioutil.TempFile("", "") ioutil.WriteFile(f.Name(), []byte(dockerFile), 0755) opts := &config.KanikoOptions{ @@ -1012,24 +917,21 @@ COPY %s bar.txt t.Errorf("Failed to parse stages to Kaniko Stages: %s", err) } _ = ResolveCrossStageInstructions(kanikoStages) - stage := kanikoStages[0] cmds := stage.Commands return testcase{ - description: "cached copy command followed by uncached copy command result in consistent read and write hashes", + description: "copy command followed by cached run command results in consistent read and write hashes", opts: &config.KanikoOptions{Cache: true}, rootDir: dir, config: &v1.ConfigFile{Config: v1.Config{WorkingDir: destDir}}, layerCache: &fakeLayerCache{ - keySequence: []string{hash1}, + keySequence: []string{runHash}, img: image, }, - image: image, - // hash1 is the read cachekey for the first layer - // hash2 is the read cachekey for the second layer - expectedCacheKeys: []string{hash1, hash2}, - pushedCacheKeys: []string{hash2}, + image: image, + expectedCacheKeys: []string{runHash}, + pushedCacheKeys: []string{}, commands: getCommands(dir, cmds), } }(),