From b7753ad1f0b7656d644920dd938ca4ee85db75b6 Mon Sep 17 00:00:00 2001 From: aptenodytes-forsteri Date: Wed, 6 Oct 2021 13:59:31 -0400 Subject: [PATCH] Cache loaded images for performance improvements. Locally on ubuntu 18.04, the join_layers step takes greater than 30 seconds for the container_bundle_with_install_pkgs target without this change, and ~5 seconds with this change. With the previous implementation, join_layers was passing the same set of images to reader.parts for each call to ReadImage. The reader, created fresh for each call to ReadImage, would then load these same images again. This results in a fixed cost for every image you add to the bundle. For large bundles, this can be very costly (on the order of minutes). Updated other callers of compat.ReadImages to use this new API of creating a reader first, then calling reader.ReadImage. --- container/go/cmd/digester/digester.go | 3 +- container/go/cmd/flattener/flattener.go | 3 +- container/go/cmd/join_layers/join_layers.go | 16 ++--- container/go/cmd/pusher/pusher.go | 3 +- container/go/pkg/compat/reader.go | 68 ++++++++++++--------- tests/container/BUILD | 29 +++++++++ 6 files changed, 84 insertions(+), 38 deletions(-) diff --git a/container/go/cmd/digester/digester.go b/container/go/cmd/digester/digester.go index f0a6f2782..559559063 100644 --- a/container/go/cmd/digester/digester.go +++ b/container/go/cmd/digester/digester.go @@ -49,7 +49,8 @@ func main() { if err != nil { log.Fatalf("Unable to determine parts of the image from the specified arguments: %v", err) } - img, err := compat.ReadImage(imgParts) + r := compat.Reader{Parts: imgParts} + img, err := r.ReadImage() if err != nil { log.Fatalf("Error reading image: %v", err) } diff --git a/container/go/cmd/flattener/flattener.go b/container/go/cmd/flattener/flattener.go index c2e0594d9..c4f96bca1 100644 --- a/container/go/cmd/flattener/flattener.go +++ b/container/go/cmd/flattener/flattener.go @@ -52,7 +52,8 @@ func main() { if err != nil { log.Fatalf("Unable to determine parts of the image from the specified arguments: %v", err) } - img, err := compat.ReadImage(imgParts) + r := compat.Reader{Parts: imgParts} + img, err := r.ReadImage() if err != nil { log.Fatalf("Error reading image: %v", err) } diff --git a/container/go/cmd/join_layers/join_layers.go b/container/go/cmd/join_layers/join_layers.go index faab26335..8e8296075 100644 --- a/container/go/cmd/join_layers/join_layers.go +++ b/container/go/cmd/join_layers/join_layers.go @@ -85,17 +85,19 @@ func writeOutput(outputTarball string, tarballFormat string, tagToConfigs, tagTo if err != nil { return errors.Wrap(err, "unable to load images from the given tarballs") } + parts := compat.ImageParts{ + Images: images, + Layers: layerParts, + } + r := compat.Reader{Parts: parts} for tag, configFile := range tagToConfigs { // Manifest file may not have been specified and this is ok as it's // only required if the base images has foreign layers. manifestFile := tagToBaseManifests[tag] - parts := compat.ImageParts{ - Config: configFile, - BaseManifest: manifestFile, - Images: images, - Layers: layerParts, - } - img, err := compat.ReadImage(parts) + r.Parts.Config = configFile + r.Parts.BaseManifest = manifestFile + + img, err := r.ReadImage() if err != nil { return errors.Wrapf(err, "unable to load image %v corresponding to config %s", tag, configFile) } diff --git a/container/go/cmd/pusher/pusher.go b/container/go/cmd/pusher/pusher.go index 4fac73ccb..b54497270 100644 --- a/container/go/cmd/pusher/pusher.go +++ b/container/go/cmd/pusher/pusher.go @@ -99,7 +99,8 @@ func main() { if err != nil { log.Fatalf("Unable to determine parts of the image from the specified arguments: %v", err) } - img, err := compat.ReadImage(imgParts) + r := compat.Reader{Parts: imgParts} + img, err := r.ReadImage() if err != nil { log.Fatalf("Error reading image: %v", err) } diff --git a/container/go/pkg/compat/reader.go b/container/go/pkg/compat/reader.go index bdbe3f125..d42b7145d 100644 --- a/container/go/pkg/compat/reader.go +++ b/container/go/pkg/compat/reader.go @@ -117,11 +117,11 @@ func ImagePartsFromArgs(config, baseManifest, imgTarball string, layers []string return result, nil } -// reader maintains the state necessary to build a legacyImage object from an +// Reader maintains the state necessary to build a legacyImage object from an // ImageParts object. -type reader struct { +type Reader struct { // parts is the ImageParts being loaded. - parts ImageParts + Parts ImageParts // baseManifest is the manifest of the very first base image in the chain // of images being loaded. baseManifest *v1.Manifest @@ -130,32 +130,35 @@ type reader struct { // layerLookup is a map from the diffID of a layer to the layer // itself. layerLookup map[v1.Hash]v1.Layer + // loadedImageCache is a cache of all images that have been loaded into memory, + // to prevent costly reloads. + loadedImageCache map[v1.Hash]bool } // loadMetadata loads the image metadata for the image parts in the given // reader. -func (r *reader) loadMetadata() error { - cf, err := os.Open(r.parts.Config) +func (r *Reader) loadMetadata() error { + cf, err := os.Open(r.Parts.Config) if err != nil { - return errors.Wrapf(err, "unable to open image config file %s", r.parts.Config) + return errors.Wrapf(err, "unable to open image config file %s", r.Parts.Config) } c, err := v1.ParseConfigFile(cf) if err != nil { - return errors.Wrapf(err, "unable to parse image config from %s", r.parts.Config) + return errors.Wrapf(err, "unable to parse image config from %s", r.Parts.Config) } r.config = c - if r.parts.BaseManifest == "" { + if r.Parts.BaseManifest == "" { // Base manifest is optional. It's only needed for images whose base // manifests have foreign layers. return nil } - mf, err := os.Open(r.parts.BaseManifest) + mf, err := os.Open(r.Parts.BaseManifest) if err != nil { - return errors.Wrapf(err, "unable to open base image manifest file %s", r.parts.BaseManifest) + return errors.Wrapf(err, "unable to open base image manifest file %s", r.Parts.BaseManifest) } m, err := v1.ParseManifest(mf) if err != nil { - return errors.Wrapf(err, "unable to parse base image manifest from %s", r.parts.BaseManifest) + return errors.Wrapf(err, "unable to parse base image manifest from %s", r.Parts.BaseManifest) } r.baseManifest = m return nil @@ -209,7 +212,7 @@ func (l *foreignLayer) MediaType() (types.MediaType, error) { // loadForeignLayers loads the foreign layers from the base manifest in the // given reader into the layer lookup. -func (r *reader) loadForeignLayers() error { +func (r *Reader) loadForeignLayers() error { if r.baseManifest == nil { // No base manifest so no foreign layers to load. return nil @@ -237,8 +240,12 @@ func (r *reader) loadForeignLayers() error { // loadImages loads the layers from the given images into the layers lookup // in the given reader. -func (r *reader) loadImages(images []v1.Image) error { +func (r *Reader) loadImages(images []v1.Image) error { for _, img := range images { + digest, _ := img.Digest() + if r.loadedImageCache[digest] { + continue + } layers, err := img.Layers() if err != nil { return errors.Wrap(err, "unable to get the layers in image") @@ -250,6 +257,7 @@ func (r *reader) loadImages(images []v1.Image) error { } r.layerLookup[diffID] = l } + r.loadedImageCache[digest] = true } return nil } @@ -257,24 +265,24 @@ func (r *reader) loadImages(images []v1.Image) error { // loadImgTarball loads the layers from the image tarball in the parts section // of the given reader if one was specified into the layers lookup in the given // reader. -func (r *reader) loadImgTarball() error { - if r.parts.ImageTarball == "" { +func (r *Reader) loadImgTarball() error { + if r.Parts.ImageTarball == "" { return nil } - img, err := tarball.ImageFromPath(r.parts.ImageTarball, nil) + img, err := tarball.ImageFromPath(r.Parts.ImageTarball, nil) if err != nil { - return errors.Wrapf(err, "unable to load image from tarball %s", r.parts.ImageTarball) + return errors.Wrapf(err, "unable to load image from tarball %s", r.Parts.ImageTarball) } if err := r.loadImages([]v1.Image{img}); err != nil { - return errors.Wrapf(err, "unable to load the layers from image loaded from tarball %s", r.parts.ImageTarball) + return errors.Wrapf(err, "unable to load the layers from image loaded from tarball %s", r.Parts.ImageTarball) } return nil } // loadLayers loads layers specified as parts in the ImageParts section in the // given reader. -func (r *reader) loadLayers() error { - for _, l := range r.parts.Layers { +func (r *Reader) loadLayers() error { + for _, l := range r.Parts.Layers { layer, err := l.V1Layer() if err != nil { return errors.Wrap(err, "unable to build a v1.Layer from the specified parts") @@ -289,23 +297,27 @@ func (r *reader) loadLayers() error { } // ReadImage loads a v1.Image from the given ImageParts -func ReadImage(parts ImageParts) (v1.Image, error) { +func (r *Reader) ReadImage() (v1.Image, error) { // Special case: if we only have a tarball, we can instantiate the image // directly from that. Otherwise, we'll process the image layers // individually as specified in the config. - if parts.ImageTarball != "" && parts.Config == "" { - return tarball.ImageFromPath(parts.ImageTarball, nil) + if r.Parts.ImageTarball != "" && r.Parts.Config == "" { + return tarball.ImageFromPath(r.Parts.ImageTarball, nil) } - r := reader{parts: parts} - r.layerLookup = make(map[v1.Hash]v1.Layer) + if r.layerLookup == nil { + r.layerLookup = make(map[v1.Hash]v1.Layer) + } + if r.loadedImageCache == nil { + r.loadedImageCache = make(map[v1.Hash]bool) + } if err := r.loadMetadata(); err != nil { return nil, errors.Wrap(err, "unable to load image metadata") } if err := r.loadForeignLayers(); err != nil { return nil, errors.Wrap(err, "unable to load foreign layers specified in the base manifest") } - if err := r.loadImages(r.parts.Images); err != nil { + if err := r.loadImages(r.Parts.Images); err != nil { return nil, errors.Wrap(err, "unable to load layers from the images in the given image parts") } if err := r.loadImgTarball(); err != nil { @@ -318,12 +330,12 @@ func ReadImage(parts ImageParts) (v1.Image, error) { for _, diffID := range r.config.RootFS.DiffIDs { layer, ok := r.layerLookup[diffID] if !ok { - return nil, errors.Errorf("unable to locate layer with diffID %v as indicated in image config %s", diffID, parts.Config) + return nil, errors.Errorf("unable to locate layer with diffID %v as indicated in image config %s", diffID, r.Parts.Config) } layers = append(layers, layer) } img := &legacyImage{ - configPath: parts.Config, + configPath: r.Parts.Config, layers: layers, } if err := img.init(); err != nil { diff --git a/tests/container/BUILD b/tests/container/BUILD index 5758a58d3..c65929846 100644 --- a/tests/container/BUILD +++ b/tests/container/BUILD @@ -17,6 +17,7 @@ load( "@bazel_tools//tools/build_rules:test_rules.bzl", "file_test", ) +load("@rules_python//python:defs.bzl", "py_test") load("//container:bundle.bzl", "container_bundle") load( "//container:container.bzl", @@ -969,3 +970,31 @@ py_test( srcs_version = "PY3", deps = ["@containerregistry"], ) + +container_image( + name = "test_install_pkgs", + base = "//tests/docker/package_managers:test_install_pkgs.tar", +) + +container_image( + name = "test_install_git_for_reproducibility_1", + base = "//tests/docker/package_managers:install_git_for_reproducibility_1.tar", +) + +# Test a container bundle with several images and install_pkgs. +# This should not tax join_layers unnecessarily. +# Run bazel build @io_bazel_rules_docker//tests/container:container_bundle_with_install_pkgs.tar to test. +# Prior to adding a caching step to compat/reader.go, join_layers.go +# would take substantially longer for this target. +container_bundle( + name = "container_bundle_with_install_pkgs", + images = { + "install_pkgs_1:latest": ":test_install_pkgs", + "install_pkgs_2:latest": ":test_install_git_for_reproducibility_1", + "localhost:5000/image0:latest": "//testdata:base_with_entrypoint", + "localhost:5000/image1:latest": "//testdata:link_with_files_base", + "localhost:5000/image2:latest": "//testdata:with_double_env", + "localhost:5000/image3:latest": "//testdata:with_label", + "localhost:5000/image4:latest": "//testdata:with_double_label", + }, +)