From 2dce6f709724e063ccbcab2edde12bc1857f0e17 Mon Sep 17 00:00:00 2001 From: Jon Johnson Date: Thu, 3 Oct 2019 14:08:36 -0700 Subject: [PATCH] Add option to filter layers from tarball Continuation of this PR: https://github.com/google/go-containerregistry/pull/209 This should be considered a relatively advanced option, but for folks that know what they are doing you can reduce the amount of data that you need to encode in the tarball for the daemon to load it. The ultimate use case of this option will be from daemon.Write, which currently uses the docker load interface to pull image into the daemon, however, this currently reuploads (and redownloads) the base image on each write in context like ko. If we can determine the set of layers that already exist in the daemon we can elide these from the tarball to dramatically improve performance. Related: #205 --- go.mod | 2 +- go.sum | 2 ++ pkg/v1/tarball/options.go | 55 +++++++++++++++++++++++++++++ pkg/v1/tarball/write.go | 36 +++++++++++++------ pkg/v1/tarball/write_test.go | 67 ++++++++++++++++++++++++++++++++++++ 5 files changed, 150 insertions(+), 12 deletions(-) create mode 100644 pkg/v1/tarball/options.go diff --git a/go.mod b/go.mod index 8c2053d3c..24b4a632a 100644 --- a/go.mod +++ b/go.mod @@ -41,7 +41,7 @@ require ( golang.org/x/sync v0.0.0-20190423024810-112230192c58 golang.org/x/text v0.3.2 // indirect golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 // indirect - golang.org/x/tools v0.0.0-20191001184121-329c8d646ebe // indirect + golang.org/x/tools v0.0.0-20191003162220-c56b4b191e2d // indirect google.golang.org/appengine v1.1.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gotest.tools v2.2.0+incompatible // indirect diff --git a/go.sum b/go.sum index e1bf4279a..3d94963d6 100644 --- a/go.sum +++ b/go.sum @@ -163,6 +163,8 @@ golang.org/x/tools v0.0.0-20190926165942-a8d5d34286bd h1:L7bTtbmMojUZYEAt0OrTU0Z golang.org/x/tools v0.0.0-20190926165942-a8d5d34286bd/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191001184121-329c8d646ebe h1:hFr8KcN0dM0/dqbUW0KZYN+YXJeZBpBWIG9ZkMuX1vQ= golang.org/x/tools v0.0.0-20191001184121-329c8d646ebe/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191003162220-c56b4b191e2d h1:xzbfnqkhzcf2qHVgbBaXlaj+JlFOxT6P/oe2+yVuwBE= +golang.org/x/tools v0.0.0-20191003162220-c56b4b191e2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0 h1:igQkv0AAhEIvTEpD5LIpAfav2eeVO9HBTjvKHVJPRSs= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= diff --git a/pkg/v1/tarball/options.go b/pkg/v1/tarball/options.go new file mode 100644 index 000000000..63d9c4e0d --- /dev/null +++ b/pkg/v1/tarball/options.go @@ -0,0 +1,55 @@ +// Copyright 2019 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tarball + +import ( + v1 "github.com/google/go-containerregistry/pkg/v1" +) + +// Option is a functional option for tarball operations. +type Option func(*options) error + +// LayerFilter defines a function for filtering layers. +// True - indicates the layer should be kept, +// False - indicates the layer should be excluded. +type LayerFilter func(v1.Layer) (bool, error) + +type options struct { + filter LayerFilter +} + +func makeOptions(opts ...Option) (*options, error) { + o := &options{ + filter: func(v1.Layer) (bool, error) { + return true, nil + }, + } + + for _, option := range opts { + if err := option(o); err != nil { + return nil, err + } + } + + return o, nil +} + +// WithLayerFilter allows omitting layers when writing a tarball. +func WithLayerFilter(lf LayerFilter) Option { + return func(o *options) error { + o.filter = lf + return nil + } +} diff --git a/pkg/v1/tarball/write.go b/pkg/v1/tarball/write.go index 13a4a655e..fbab8ac14 100644 --- a/pkg/v1/tarball/write.go +++ b/pkg/v1/tarball/write.go @@ -29,41 +29,41 @@ import ( // WriteToFile writes in the compressed format to a tarball, on disk. // This is just syntactic sugar wrapping tarball.Write with a new file. -func WriteToFile(p string, ref name.Reference, img v1.Image) error { +func WriteToFile(p string, ref name.Reference, img v1.Image, opt ...Option) error { w, err := os.Create(p) if err != nil { return err } defer w.Close() - return Write(ref, img, w) + return Write(ref, img, w, opt...) } // MultiWriteToFile writes in the compressed format to a tarball, on disk. // This is just syntactic sugar wrapping tarball.MultiWrite with a new file. -func MultiWriteToFile(p string, tagToImage map[name.Tag]v1.Image) error { +func MultiWriteToFile(p string, tagToImage map[name.Tag]v1.Image, opt ...Option) error { refToImage := make(map[name.Reference]v1.Image, len(tagToImage)) for i, d := range tagToImage { refToImage[i] = d } - return MultiRefWriteToFile(p, refToImage) + return MultiRefWriteToFile(p, refToImage, opt...) } // MultiRefWriteToFile writes in the compressed format to a tarball, on disk. // This is just syntactic sugar wrapping tarball.MultiRefWrite with a new file. -func MultiRefWriteToFile(p string, refToImage map[name.Reference]v1.Image) error { +func MultiRefWriteToFile(p string, refToImage map[name.Reference]v1.Image, opt ...Option) error { w, err := os.Create(p) if err != nil { return err } defer w.Close() - return MultiRefWrite(refToImage, w) + return MultiRefWrite(refToImage, w, opt...) } // Write is a wrapper to write a single image and tag to a tarball. -func Write(ref name.Reference, img v1.Image, w io.Writer) error { - return MultiRefWrite(map[name.Reference]v1.Image{ref: img}, w) +func Write(ref name.Reference, img v1.Image, w io.Writer, opt ...Option) error { + return MultiRefWrite(map[name.Reference]v1.Image{ref: img}, w, opt...) } // MultiWrite writes the contents of each image to the provided reader, in the compressed format. @@ -71,12 +71,12 @@ func Write(ref name.Reference, img v1.Image, w io.Writer) error { // One manifest.json file at the top level containing information about several images. // One file for each layer, named after the layer's SHA. // One file for the config blob, named after its SHA. -func MultiWrite(tagToImage map[name.Tag]v1.Image, w io.Writer) error { +func MultiWrite(tagToImage map[name.Tag]v1.Image, w io.Writer, opt ...Option) error { refToImage := make(map[name.Reference]v1.Image, len(tagToImage)) for i, d := range tagToImage { refToImage[i] = d } - return MultiRefWrite(refToImage, w) + return MultiRefWrite(refToImage, w, opt...) } // MultiRefWrite writes the contents of each image to the provided reader, in the compressed format. @@ -84,7 +84,12 @@ func MultiWrite(tagToImage map[name.Tag]v1.Image, w io.Writer) error { // One manifest.json file at the top level containing information about several images. // One file for each layer, named after the layer's SHA. // One file for the config blob, named after its SHA. -func MultiRefWrite(refToImage map[name.Reference]v1.Image, w io.Writer) error { +func MultiRefWrite(refToImage map[name.Reference]v1.Image, w io.Writer, opt ...Option) error { + o, err := makeOptions(opt...) + if err != nil { + return err + } + tf := tar.NewWriter(w) defer tf.Close() @@ -144,6 +149,15 @@ func MultiRefWrite(refToImage map[name.Reference]v1.Image, w io.Writer) error { // https://www.gnu.org/software/gzip/manual/html_node/Overview.html layerFiles[i] = fmt.Sprintf("%s.tar.gz", hex) + // We filter late because the length of layerFiles must match the diff_ids + // in config file. It is ok if the file doesn't exist when the daemon + // already has a given layer, since it won't try to read it. + if keep, err := o.filter(l); err != nil { + return err + } else if !keep { + continue + } + r, err := l.Compressed() if err != nil { return err diff --git a/pkg/v1/tarball/write_test.go b/pkg/v1/tarball/write_test.go index c6c69ea8d..5fc4560b1 100644 --- a/pkg/v1/tarball/write_test.go +++ b/pkg/v1/tarball/write_test.go @@ -15,8 +15,11 @@ package tarball_test import ( + "archive/tar" + "io" "io/ioutil" "os" + "strings" "testing" "github.com/google/go-cmp/cmp" @@ -350,3 +353,67 @@ func getDiffIDs(t *testing.T, layers []v1.Layer) []v1.Hash { return diffIDs } + +func TestFilteredWrite(t *testing.T) { + // Make a tempfile for tarball writes. + fp, err := ioutil.TempFile("", "") + if err != nil { + t.Fatalf("Error creating temp file.") + } + t.Log(fp.Name()) + defer fp.Close() + defer os.Remove(fp.Name()) + + // Make a random image + randImage, err := random.Image(256, 8) + if err != nil { + t.Fatalf("Error creating random image.") + } + tag, err := name.NewTag("gcr.io/foo/bar:latest", name.StrictValidation) + if err != nil { + t.Fatalf("Error creating test tag.") + } + + layers, err := randImage.Layers() + if err != nil { + t.Fatalf("Layers() = %v", err) + } + rld, err := layers[0].Digest() + if err != nil { + t.Fatalf("Digest() = %v", err) + } + + lf := func(l v1.Layer) (bool, error) { + // Filter the first layer in the image. + if ld, err := l.Digest(); err != nil { + return false, err + } else { + return ld != rld, nil + } + } + + if err := tarball.WriteToFile(fp.Name(), tag, randImage, tarball.WithLayerFilter(lf)); err != nil { + t.Fatalf("Unexpected error writing tarball: %v", err) + } + + f, err := os.Open(fp.Name()) + if err != nil { + t.Fatalf("os.Open() = %v", err) + } + defer f.Close() + + tarReader := tar.NewReader(f) + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("scanning tarfile: %v", err) + } + + if strings.Contains(header.Name, rld.Hex) { + t.Errorf("Saw file %v in tarball, want %v elided.", header.Name, rld) + } + } +}