Skip to content

Commit

Permalink
refactor: move all manifest parsing and validation to manifest.bzl
Browse files Browse the repository at this point in the history
Cleanup resolve.bzl and package_index.bzl by moving all of the manifest
functionality to a separate manifest.bzl file where we now do all of the
work to generate the lock: manifest parsing, validation and the package
index and resolution. IMHO this is how it should be because the lock is
the "frozen state" of the manifest.

* _parse() parses the YAML

* _from_dict validates the manifest dict and does the rest of the
  changes that we need to produce a manifest struct

* add extra validation for e.g. duplicated architectures

* _lock is the only method that's exposed to the outside and it
  encapsulates all of the other parts, calling _from_dict and all of the
  package index and resolution, to produce the lock file.

* move get_dupes to util.bzl

* refactor the "source" struct into the new manifest where we can now
  centralize a lot of the structure and logic spread across multiple
  parts of the code.

* remove yq_toolchain_prefix since it's always "yq" and, looking at GH
  code search, this seems to be a copy-paste leftover from rules_js (or
  the other way around)... the code is always the same and it never
  receives a string different from "yq".
  • Loading branch information
jjmaestro committed Sep 19, 2024
1 parent 95da3ce commit 866ea51
Show file tree
Hide file tree
Showing 11 changed files with 277 additions and 129 deletions.
1 change: 1 addition & 0 deletions apt/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ bzl_library(
"//apt/private:deb_import",
"//apt/private:index",
"//apt/private:lockfile",
"//apt/private:manifest",
"//apt/private:resolve",
],
)
6 changes: 3 additions & 3 deletions apt/extensions.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
load("//apt/private:deb_import.bzl", "deb_import")
load("//apt/private:index.bzl", "deb_package_index")
load("//apt/private:lockfile.bzl", "lockfile")
load("//apt/private:resolve.bzl", "deb_resolve", "internal_resolve")
load("//apt/private:manifest.bzl", "manifest")
load("//apt/private:resolve.bzl", "deb_resolve")

def _distroless_extension(module_ctx):
root_direct_deps = []
Expand All @@ -13,9 +14,8 @@ def _distroless_extension(module_ctx):
for install in mod.tags.install:
lockf = None
if not install.lock:
lockf = internal_resolve(
lockf = manifest.lock(
module_ctx,
"yq",
install.manifest,
install.resolve_transitive,
)
Expand Down
15 changes: 13 additions & 2 deletions apt/private/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ bzl_library(
visibility = ["//apt:__subpackages__"],
deps = [
":lockfile",
"@bazel_tools//tools/build_defs/repo:http.bzl",
"@bazel_tools//tools/build_defs/repo:utils.bzl",
],
)

Expand All @@ -39,6 +37,18 @@ bzl_library(
deps = [":util"],
)

bzl_library(
name = "manifest",
srcs = ["manifest.bzl"],
visibility = ["//apt:__subpackages__"],
deps = [
":lockfile",
":package_index",
":util",
"@aspect_bazel_lib//lib:repo_utils",
],
)

bzl_library(
name = "package_index",
srcs = ["package_index.bzl"],
Expand All @@ -52,6 +62,7 @@ bzl_library(
visibility = ["//apt:__subpackages__"],
deps = [
":lockfile",
":manifest",
":package_index",
"@aspect_bazel_lib//lib:repo_utils",
],
Expand Down
126 changes: 126 additions & 0 deletions apt/private/manifest.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
"manifest"

load("@aspect_bazel_lib//lib:repo_utils.bzl", "repo_utils")
load(":lockfile.bzl", "lockfile")
load(":package_index.bzl", "package_index")
load(":util.bzl", "util")

def _parse(rctx, manifest_label):
host_yq = Label("@yq_{}//:yq{}".format(
repo_utils.platform(rctx),
".exe" if repo_utils.is_windows(rctx) else "",
))

yq_args = [
str(rctx.path(host_yq)),
str(rctx.path(manifest_label)),
"-o=json",
]

result = rctx.execute(yq_args)
if result.return_code:
err = "failed to parse manifest - '{}' exited with {}: "
err += "\nSTDOUT:\n{}\nSTDERR:\n{}"
fail(err.format(
" ".join(yq_args),
result.return_code,
result.stdout,
result.stderr,
))

return json.decode(result.stdout if result.stdout != "null" else "{}")

def _source(src):
_ext = lambda name, ext: "%s%s" % (name, (".%s" % ext) if ext else "")

src["url"] = src["url"].rstrip("/")

index = "Packages"

index_path = "dists/{dist}/{comp}/binary-{arch}".format(**src)
output = "{dist}/{comp}/{arch}/{index}".format(index = index, **src)

return struct(
arch = src["arch"],
base_url = src["url"],
index = index,
index_full = lambda ext: _ext(index, ext),
output = output,
output_full = lambda ext: _ext(output, ext),
index_path = index_path,
index_url = lambda ext: "/".join((src["url"], index_path, _ext(index, ext))),
)

def _from_dict(manifest, manifest_label):
manifest["label"] = manifest_label

if manifest["version"] != 1:
err = "Unsupported manifest version: {}. Please use `version: 1`"
fail(err.format(manifest["version"]))

for key in ("sources", "archs", "packages"):
if type(manifest[key]) != "list":
fail("`{}` should be an array".format(key))

for key in ("archs", "packages"):
dupes = util.get_dupes(manifest[key])
if dupes:
err = "Duplicate {}: {}. Please remove them from manifest {}"
fail(err.format(key, dupes, manifest["label"]))

sources = []

for arch in manifest["archs"]:
for src in manifest["sources"]:
dist, components = src["channel"].split(" ", 1)

for comp in components.split(" "):
src["dist"] = dist
src["comp"] = comp
src["arch"] = arch

sources.append(_source(src))

manifest["sources"] = sources

return struct(**manifest)

def _lock(rctx, manifest, include_transitive):
pkgindex = package_index.new(rctx, manifest)

lockf = lockfile.empty(rctx)

for arch in manifest.archs:
for package_name in manifest.packages:
resolved = package_index.parse_depends(package_name).pop()

rctx.report_progress("Resolving %s" % package_name)
package, dependencies = pkgindex.resolve_all(
arch = arch,
name = resolved["name"],
version = resolved["version"],
include_transitive = include_transitive,
)

if not package:
fail("Unable to locate package `%s`" % package_name)

lockf.add_package(package, arch)

for dep in dependencies:
lockf.add_package(dep, arch)
lockf.add_package_dependency(package, dep, arch)

return lockf

manifest = struct(
lock = lambda rctx, manifest_label, include_transitive: _lock(
rctx,
_from_dict(_parse(rctx, manifest_label), manifest_label),
include_transitive,
),
# NOTE: these are exposed here for testing purposes, DO NOT USE OTHERWISE
_source = _source,
_from_dict = _from_dict,
_lock = _lock,
)
56 changes: 16 additions & 40 deletions apt/private/package_index.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

load(":version.bzl", version_lib = "version")

def _fetch_package_index(rctx, url, dist, comp, arch):
def _fetch_package_index(rctx, source):
# See https://linux.die.net/man/1/xz and https://linux.die.net/man/1/gzip
# --keep -> keep the original file (Bazel might be still committing the output to the cache)
# --force -> overwrite the output if it exists
Expand All @@ -15,24 +15,8 @@ def _fetch_package_index(rctx, url, dist, comp, arch):
failed_attempts = []

for ext, cmd in supported_extensions.items():
index = "Packages"
index_full = "{}.{}".format(index, ext)

output = "{dist}/{comp}/{arch}/{index}".format(
dist = dist,
comp = comp,
arch = arch,
index = index,
)
output_full = "{}.{}".format(output, ext)

index_url = "{url}/dists/{dist}/{comp}/binary-{arch}/{index_full}".format(
url = url,
dist = dist,
comp = comp,
arch = arch,
index_full = index_full,
)
index_url = source.index_url(ext)
output_full = source.output_full(ext)

download = rctx.download(
url = index_url,
Expand Down Expand Up @@ -70,7 +54,7 @@ def _fetch_package_index(rctx, url, dist, comp, arch):

fail("Failed to fetch packages index:\n" + "\n".join(attempt_messages))

return rctx.read(output)
return rctx.read(source.output)

def _package_set(packages, keys, package):
for key in keys[:-1]:
Expand All @@ -79,7 +63,7 @@ def _package_set(packages, keys, package):
packages = packages[key]
packages[keys[-1]] = package

def _parse_package_index(packages, contents, arch, root):
def _parse_package_index(packages, contents, source):
last_key = ""
pkg = {}
for group in contents.split("\n\n"):
Expand Down Expand Up @@ -108,10 +92,10 @@ def _parse_package_index(packages, contents, arch, root):
pkg[key] = value

if len(pkg.keys()) != 0:
pkg["Root"] = root
pkg["Root"] = source.base_url
_package_set(
packages,
keys = (arch, pkg["Package"], pkg["Version"]),
keys = (source.arch, pkg["Package"], pkg["Version"]),
package = pkg,
)
last_key = ""
Expand All @@ -125,25 +109,17 @@ def _package_get(packages, arch, name, version = None):

return versions.get(version, None)

def _index(rctx, sources, archs):
def _index(rctx, manifest):
packages = {}

for arch in archs:
for (url, dist, comp) in sources:
# We assume that `url` does not contain a trailing forward slash when passing to
# functions below. If one is present, remove it. Some HTTP servers do not handle
# redirects properly when a path contains "//"
# (ie. https://mymirror.com/ubuntu//dists/noble/stable/... may return a 404
# on misconfigured HTTP servers)
url = url.rstrip("/")

index = "{}/{} for {}".format(dist, comp, arch)
for source in manifest.sources:
index = "%s/%s" % (source.index_path, source.index)

rctx.report_progress("Fetching package index: %s" % index)
output = _fetch_package_index(rctx, url, dist, comp, arch)
rctx.report_progress("Fetching package index: %s" % index)
output = _fetch_package_index(rctx, source)

rctx.report_progress("Parsing package index: %s" % index)
_parse_package_index(packages, output, arch, url)
rctx.report_progress("Parsing package index: %s" % index)
_parse_package_index(packages, output, source)

return struct(
packages = packages,
Expand Down Expand Up @@ -294,8 +270,8 @@ def _resolve_all(index, arch, name, version, include_transitive):

return root_package, dependencies

def _new(rctx, sources, archs):
index = _index(rctx, sources, archs)
def _new(rctx, manifest):
index = _index(rctx, manifest)

return struct(
resolve_all = lambda **kwargs: _resolve_all(index, **kwargs),
Expand Down
Loading

0 comments on commit 866ea51

Please sign in to comment.