Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For gzip operations, use a new Go helper by default instead of which(gzip) #1613

Merged
merged 11 commits into from
Sep 8, 2020
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,8 @@ docker_toolchain_configure(
# Should be set explcitly for remote execution.
docker_path="<enter absolute path to the docker binary (in the remote exec env) here>",
# OPTIONAL: Path to the gzip binary.
# Either gzip_path or gzip_target should be set explcitly for remote execution.
gzip_path="<enter absolute path to the gzip binary (in the remote exec env) here>",
# OPTIONAL: Bazel target for the gzip tool.
# Either gzip_path or gzip_target should be set explcitly for remote execution.
gzip_target="<enter absolute path (i.e., must start with repo name @...//:...) to an executable gzip target>",
# OPTIONAL: Path to the xz binary.
# Should be set explcitly for remote execution.
Expand Down
1 change: 0 additions & 1 deletion container/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ bzl_library(
"//skylib:filetype",
"//skylib:label",
"//skylib:path",
"//skylib:zip",
"@bazel_tools//tools/build_defs/hash:hash.bzl",
],
)
Expand Down
30 changes: 30 additions & 0 deletions container/go/cmd/zipper/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2020 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ###
# Build file for zipper binary.

load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")

go_binary(
name = "zipper",
embed = [":go_default_library"],
visibility = ["//visibility:public"],
)

go_library(
name = "go_default_library",
srcs = ["zipper.go"],
importpath = "github.com/bazelbuild/rules_docker",
visibility = ["//visibility:private"],
)
89 changes: 89 additions & 0 deletions container/go/cmd/zipper/zipper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright 2020 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
////////////////////////////////////////////////
// This package performs gzip operations.

package main

import (
"compress/gzip"
"flag"
"io"
"log"
"os"
)

var (
src = flag.String("src", "", "The source location of the file to zip/unzip.")
dst = flag.String("dst", "", "The destination location of the file, after zip/unzip.")
decompress = flag.Bool("decompress", false, "If true, perform gunzip. If false, gzip.")
fast = flag.Bool("fast", false, "If true, use the fastest compression level.")
)

func main() {
flag.Parse()

if *src == "" {
log.Fatalln("Required option -src was not specified.")
}

if *dst == "" {
log.Fatalln("Required option -dst was not specified.")
}

var copy_from io.Reader
var copy_to io.Writer

f, err := os.Open(*src)
if err != nil {
log.Fatalf("Unable to read input file: %v", err)
}
t, err := os.Create(*dst)
if err != nil {
log.Fatalf("Unable to create output file: %v", err)
}

if *decompress {
zr, err := gzip.NewReader(f)
if err != nil {
log.Fatalf("Unable to read: %v", err)
}
defer func() {
if err := zr.Close(); err != nil {
log.Fatalf("Unable to close gzip reader: %v", err)
}
}()
copy_from = zr
copy_to = t
} else {
level := gzip.DefaultCompression
if *fast {
level = gzip.BestSpeed
}
zw, err := gzip.NewWriterLevel(t, level)
if err != nil {
log.Fatalf("Unable to create gzip writer: %v", err)
}
defer func() {
if err := zw.Close(); err != nil {
log.Fatalf("Unable to close gzip writer: %v", err)
}
}()
copy_from = f
copy_to = zw
}
if _, err := io.Copy(copy_to, copy_from); err != nil {
log.Fatalf("Unable to perform the gzip operation from %q to %q: %v", *src, *dst, err)
}
}
3 changes: 2 additions & 1 deletion container/import.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ load(
"//skylib:zip.bzl",
_gunzip = "gunzip",
_gzip = "gzip",
_zip_tools = "tools",
)

def _is_filetype(filename, extensions):
Expand Down Expand Up @@ -178,7 +179,7 @@ container_import = rule(
mandatory = False,
),
"repository": attr.string(default = "bazel"),
}, _hash_tools, _layer_tools),
}, _hash_tools, _layer_tools, _zip_tools),
executable = True,
outputs = {
"out": "%{name}.tar",
Expand Down
3 changes: 2 additions & 1 deletion container/layer.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ load(
load(
"//skylib:zip.bzl",
_gzip = "gzip",
_zip_tools = "tools",
)

_DEFAULT_MTIME = -1
Expand Down Expand Up @@ -288,7 +289,7 @@ _layer_attrs = dicts.add({
"portable_mtime": attr.bool(default = False),
"symlinks": attr.string_dict(),
"tars": attr.label_list(allow_files = tar_filetype),
}, _hash_tools, _layer_tools)
}, _hash_tools, _layer_tools, _zip_tools)

_layer_outputs = {
"layer": "%{name}-layer.tar",
Expand Down
117 changes: 81 additions & 36 deletions skylib/zip.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,67 @@
# limitations under the License.
"""Functions for producing the gzip of an artifact."""

def _gzip_path(toolchain_info):
"""Resolve the user-supplied gzip path, if any.

Args:
toolchain_info: The DockerToolchainInfo

Returns:
Path to gzip, or empty string.
"""
if toolchain_info.gzip_target:
return toolchain_info.gzip_target.files_to_run.executable.path
else:
return toolchain_info.gzip_path

def _gzip(ctx, artifact, out, decompress, options, mnemonic):
"""A helper that calls either the compiled zipper, or the gzip tool.

Args:
ctx: The context
artifact: The artifact to zip/unzip
out: The output file.
decompress: Whether to decompress (True) or compress (False)
options: str list, Command-line options.
mnemonic: A one-word description of the action
"""
toolchain_info = ctx.toolchains["@io_bazel_rules_docker//toolchains/docker:toolchain_type"].info
gzip_path = _gzip_path(toolchain_info)

if not gzip_path:
# The user did not specify a gzip tool; use the Go helper provided with rules_docker.
ctx.actions.run(
executable = ctx.executable._zipper,
arguments = ["-src", artifact.path, "-dst", out.path] + (
["-decompress"] if decompress else []
) + (options or []),
inputs = [artifact],
outputs = [out],
mnemonic = mnemonic,
tools = ctx.attr._zipper[DefaultInfo].default_runfiles.files,
)
else:
# Call the gzip path or target supplied by the user.
input_manifests = []
tools = []
if toolchain_info.gzip_target:
tools, _, input_manifests = ctx.resolve_command(tools = [toolchain_info.gzip_target])

opt_str = " ".join([repr(o) for o in (options or [])])
command = "%s -d %s < %s > %s" if decompress else "%s -n %s < %s > %s"
command = command % (gzip_path, opt_str, artifact.path, out.path)

ctx.actions.run_shell(
command = command,
input_manifests = input_manifests,
inputs = [artifact],
outputs = [out],
use_default_shell_env = True,
mnemonic = mnemonic,
tools = tools,
)

def gzip(ctx, artifact, options = None):
"""Create an action to compute the gzipped artifact.

Expand All @@ -25,26 +86,13 @@ def gzip(ctx, artifact, options = None):
the gzipped artifact.
"""
out = ctx.actions.declare_file(artifact.basename + ".gz")
toolchain_info = ctx.toolchains["@io_bazel_rules_docker//toolchains/docker:toolchain_type"].info
input_manifests = []
tools = []
gzip_path = toolchain_info.gzip_path
if toolchain_info.gzip_target:
gzip_path = toolchain_info.gzip_target.files_to_run.executable.path
tools, _, input_manifests = ctx.resolve_command(tools = [toolchain_info.gzip_target])
elif toolchain_info.gzip_path == "":
fail("gzip could not be found. Make sure it is in the path or set it " +
"explicitly in the docker_toolchain_configure")

opt_str = " ".join([repr(o) for o in (options or [])])
ctx.actions.run_shell(
command = "%s -n %s < %s > %s" % (gzip_path, opt_str, artifact.path, out.path),
input_manifests = input_manifests,
inputs = [artifact],
outputs = [out],
use_default_shell_env = True,
_gzip(
ctx = ctx,
artifact = artifact,
out = out,
decompress = False,
options = options,
mnemonic = "GZIP",
tools = tools,
)
return out

Expand All @@ -59,23 +107,20 @@ def gunzip(ctx, artifact):
the gunzipped artifact.
"""
out = ctx.actions.declare_file(artifact.basename + ".nogz")
toolchain_info = ctx.toolchains["@io_bazel_rules_docker//toolchains/docker:toolchain_type"].info
input_manifests = []
tools = []
gzip_path = toolchain_info.gzip_path
if toolchain_info.gzip_target:
gzip_path = toolchain_info.gzip_target.files_to_run.executable.path
tools, _, input_manifests = ctx.resolve_command(tools = [toolchain_info.gzip_target])
elif toolchain_info.gzip_path == "":
fail("gzip could not be found. Make sure it is in the path or set it " +
"explicitly in the docker_toolchain_configure")
ctx.actions.run_shell(
command = "%s -d < %s > %s" % (gzip_path, artifact.path, out.path),
input_manifests = input_manifests,
inputs = [artifact],
outputs = [out],
use_default_shell_env = True,
_gzip(
ctx = ctx,
artifact = artifact,
out = out,
decompress = True,
options = None,
mnemonic = "GUNZIP",
tools = tools,
)
return out

tools = {
"_zipper": attr.label(
default = Label("//container/go/cmd/zipper"),
cfg = "host",
executable = True,
),
}
10 changes: 0 additions & 10 deletions testing/default_toolchain/WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,13 @@ local_repository(
path = "replace/this/path",
)

load(
"//:local_tool.bzl",
"local_tool",
)

local_tool(
name = "gzip",
)

load(
"@io_bazel_rules_docker//toolchains/docker:toolchain.bzl",
docker_toolchain_configure = "toolchain_configure",
)

docker_toolchain_configure(
name = "docker_config",
gzip_target = "@gzip//:gzip",
)

load(
Expand Down
10 changes: 5 additions & 5 deletions tests/container/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -238,13 +238,13 @@ container_push(
# BEGIN_DO_NOT_IMPORT
file_test(
name = "test_digest_output1",
content = "sha256:18e27a73e3faabc819f903fc8e603de2bfd49813e231c2e8ce98b533577450e2",
content = "sha256:4ec78686f1edd4f30cca6222f17ace5a2b9f8db9708727c3a140c954af0a534d",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm curious why the digest changed given we're still using gzip in the Go tool. Are we specifying a different compression level than before? In any case, I expect you'll need to update a bunch of digests here (https://github.com/bazelbuild/rules_docker/blob/master/container/image_test.py) as well in case we can't get the digests to match.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried digging into this a little. None of the compression levels in compress/gzip produce a digest that matches the gzip tool. There may be subtle differences in how gzip and compress/gzip package the header or something?

//container/image_test.py passes; I suspect it's because we never verify the digest of gzipped files, only manifests.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried digging into this a little. None of the compression levels in compress/gzip produce a digest that matches the gzip tool. There may be subtle differences in how gzip and compress/gzip package the header or something?

SGTM. Thanks for looking into it.

//container/image_test.py passes; I suspect it's because we never verify the digest of gzipped files, only manifests.

This is weird. I believe manifest transitively includes the digests the compressed layers. I don't remember if the manifest contents directly list the layer digests or the manifest which includes the digest of the image config which in turn includes the digests of the compressed layers. Note that some images only include uncompressed layers. However, I think most of our images include compressed layers. I'm a little worried that the current implementation isn't actually calling the Go tool to compress the layers for the images used by this test which may be undesirable for your use case :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please see comment below: #1613 (comment)

file = ":deb_image_with_dpkgs.digest",
)

file_test(
name = "test_digest_output2",
content = "sha256:8da7a9421c68be888336492184d54b42db91a0b1f2096d85f4eaf3a25b7f234e",
content = "sha256:cf3bc4bfee0707fc15ee50b27af6dcb0fe6c0585a6ba89870736ac8f72c702e9",
file = ":null_cmd_and_entrypoint_none.digest",
)
# END_DO_NOT_IMPORT
Expand Down Expand Up @@ -585,7 +585,7 @@ container_push(
# BEGIN_DO_NOT_IMPORT
file_test(
name = "test_push_digest_output",
content = "sha256:0149d475d8e25903446311d3da4a4aa54f348103b84c43b6ad3351484fdaff67",
content = "sha256:ea10d597d15d62e3a33ea378f75b08d9afc332f685495900724b857a8b9d2d8a",
file = ":push_test.digest",
)

Expand All @@ -610,13 +610,13 @@ file_test(

file_test(
name = "alpine_custom_attr_digest_test",
content = "sha256:6a7bd5d833271be520664af1391e399b2fe10ae7647ee091ee24e2e2774d38d6",
content = "sha256:2f3bf2d061a1bab130cdd98f10b73dfceb08bfbf94a610c62f20c787073e5178",
file = ":new_push_test_legacy_from_container_img.digest",
)

file_test(
name = "new_alpine_linux_armv6_image_tar_digest_test",
content = "sha256:7b2554a0a34e7ba53f5e70f4b1ed1f8515d9f9d34b0dd75e33fe8fc6384a04c5",
content = "sha256:249804db68a05bb88cc353d4aff090b084d8b8312665638d2f8d7bdaeaa423b3",
file = ":new_alpine_linux_armv6_image_tar.digest",
)

Expand Down
Loading