From 7e977eee4179b894606e98114e455dca49b95a4f Mon Sep 17 00:00:00 2001 From: Ed Santiago Date: Thu, 23 May 2024 14:58:26 -0600 Subject: [PATCH] Create a local registry ...to minimize hiccups. RUN-2091 in Jira. Network registries are too unreliable; they cause too many flakes in CI. Here we set up a registry running on each VM, prepopulated with all container images used in podman and buildah tests. Related PRs: https://github.com/containers/podman/pull/22726 https://github.com/containers/buildah/pull/5584 Once those merge, podman and buildah CI tests will fetch images from this local registry. Signed-off-by: Ed Santiago --- IMG_SFX | 2 +- cache_images/debian_setup.sh | 4 + cache_images/fedora_setup.sh | 2 + cache_images/local-cache-registry | 341 ++++++++++++++++++++++++++++++ lib.sh | 10 + 5 files changed, 358 insertions(+), 1 deletion(-) create mode 100755 cache_images/local-cache-registry diff --git a/IMG_SFX b/IMG_SFX index eeade580..9d1c9e02 100644 --- a/IMG_SFX +++ b/IMG_SFX @@ -1 +1 @@ -20240708t135624z-f40f39d13 +20240708t152000z-f40f39d13 diff --git a/cache_images/debian_setup.sh b/cache_images/debian_setup.sh index cb9250c5..4e69ce65 100644 --- a/cache_images/debian_setup.sh +++ b/cache_images/debian_setup.sh @@ -57,6 +57,10 @@ fi nm_ignore_cni +if ! ((CONTAINER)); then + initialize_local_cache_registry +fi + finalize echo "SUCCESS!" diff --git a/cache_images/fedora_setup.sh b/cache_images/fedora_setup.sh index a405a7ad..3ff1ea6d 100644 --- a/cache_images/fedora_setup.sh +++ b/cache_images/fedora_setup.sh @@ -49,6 +49,8 @@ if ! ((CONTAINER)); then else msg "Enabling cgroup management from containers" ooe.sh $SUDO setsebool -P container_manage_cgroup true + + initialize_local_cache_registry fi fi diff --git a/cache_images/local-cache-registry b/cache_images/local-cache-registry new file mode 100755 index 00000000..f4643a73 --- /dev/null +++ b/cache_images/local-cache-registry @@ -0,0 +1,341 @@ +#! /bin/bash +# +# local-cache-registry - set up and manage a local registry with cached images +# +# Used in containers CI, to reduce exposure to registry flakes. +# +# We start with the docker registry image. Pull it, extract the registry +# binary and config, tweak the config, and create a systemd unit file that +# will start the registry at boot. +# +# We also populate that registry with a (hardcoded) list of container +# images used in CI tests. That way a CI VM comes up alreay ready, +# and CI tests do not need to do remote pulls. The image list is +# hardcoded right here in this script file, in the automation_images +# repo. See below for reasons. +# +ME=$(basename $0) + +############################################################################### +# BEGIN defaults + +# FQIN of registry image. From this image, we extract the registry to run. +PODMAN_REGISTRY_IMAGE=quay.io/libpod/registry:2.8.2 + +# Fixed path to registry setup. This is the directory used by the registry. +PODMAN_REGISTRY_WORKDIR=/var/cache/local-registry + +# Fixed port on which registry listens. This is hardcoded and must be +# shared knowledge among all CI repos that use this registry. +REGISTRY_PORT=60333 + +# Podman binary to run +PODMAN=${PODMAN:-/usr/bin/podman} + +# Temporary directories for podman, so we don't clobber any system files. +# Wipe them upon script exit. +PODMAN_TMPROOT=$(mktemp -d --tmpdir $ME.XXXXXXX) +trap 'status=$?; rm -rf $PODMAN_TMPROOT && exit $status' 0 + +# Images to cache. Default prefix is "quay.io/libpod/" +# +# It seems evil to hardcode this list as part of the script itself +# instead of a separate file or resource but there's a good reason: +# keeping code and data together in one place makes it possible for +# a podman (and some day other repo?) developer to run a single +# command, contrib/cirrus/get-local-registry-script, which will +# fetch this script and allow the dev to run it to start a local +# registry on their system. +# +# As of 2024-07-02 this list includes podman and buildah images +# +# FIXME: periodically run this to look for no-longer-needed images: +# +# for i in $(sed -ne '/IMAGELIST=/,/^[^ ]/p' &2 + exit 1 +} + +# END help messages +############################################################################### +# BEGIN option processing + +while getopts "i:P:hv" opt; do + case "$opt" in + i) PODMAN_REGISTRY_IMAGE=$OPTARG ;; + P) REGISTRY_PORT=$OPTARG ;; + h) echo "$usage"; exit 0;; + v) verbose=1 ;; + \?) echo "Run '$ME -h' for help" >&2; exit 1;; + esac +done +shift $((OPTIND-1)) + +# END option processing +############################################################################### +# BEGIN helper functions + +function podman() { + ${PODMAN} --root ${PODMAN_TMPROOT}/root \ + --runroot ${PODMAN_TMPROOT}/runroot \ + --tmpdir ${PODMAN_TMPROOT}/tmp \ + "$@" +} + +############### +# must_pass # Run a command quietly; abort with error on failure +############### +function must_pass() { + local log=${PODMAN_TMPROOT}/log + + "$@" &> $log + if [ $? -ne 0 ]; then + echo "$ME: Command failed: $*" >&2 + cat $log >&2 + + # If we ever get here, it's a given that the registry is not running. + exit 1 + fi +} + +################### +# wait_for_port # Returns once port is available on localhost +################### +function wait_for_port() { + local port=$1 # Numeric port + + local host=127.0.0.1 + local _timeout=5 + + # Wait + while [ $_timeout -gt 0 ]; do + { exec {unused_fd}<> /dev/tcp/$host/$port; } &>/dev/null && return + sleep 1 + _timeout=$(( $_timeout - 1 )) + done + + die "Timed out waiting for port $port" +} + +################# +# cache_image # (singular) fetch one remote image +################# +function cache_image() { + local img=$1 + + # Almost all our images are under libpod; no need to repeat that part + if ! expr "$img" : "^\(.*\)/" >/dev/null; then + img="libpod/$img" + fi + + # Almost all our images are from quay.io, but "domain.tld" prefix overrides + registry=$(expr "$img" : "^\([^/.]\+\.[^/]\+\)/" || true) + if [[ -n "$registry" ]]; then + img=$(expr "$img" : "[^/]\+/\(.*\)") + else + registry=quay.io + fi + + echo + echo "...caching: $registry / $img" + + # FIXME: inspect, and only pull if missing? + + for retry in 1 2 3 0;do + skopeo --registries-conf /dev/null \ + copy --all --dest-tls-verify=false \ + docker://$registry/$img \ + docker://127.0.0.1:${REGISTRY_PORT}/$img \ + && return + + sleep $((retry * 30)) + done + + die "Too many retries; unable to cache $registry/$img" +} + +################## +# cache_images # (plural) fetch all remote images +################## +function cache_images() { + for img in "${IMAGELIST[@]}"; do + cache_image "$img" + done +} + +# END helper functions +############################################################################### +# BEGIN action processing + +################### +# do_initialize # Start, then cache images +################### +# +# Intended to be run only from automation_images repo, or by developer +# on local workstation. This should never be run from podman/buildah/etc +# because it defeats the entire purpose of the cache -- a dead registry +# will cause this to fail. +# +function do_initialize() { + # This action can only be run as root + if [[ "$(id -u)" != "0" ]]; then + die "this script must be run as root" + fi + + # For the next few commands, die on any error + set -e + + mkdir -p ${PODMAN_REGISTRY_WORKDIR} + + # Copy of this script + if ! [[ $0 =~ ${PODMAN_REGISTRY_WORKDIR} ]]; then + rm -f ${PODMAN_REGISTRY_WORKDIR}/$ME + cp $0 ${PODMAN_REGISTRY_WORKDIR}/$ME + fi + + # Give it three tries, to compensate for flakes + podman pull ${PODMAN_REGISTRY_IMAGE} &>/dev/null || + podman pull ${PODMAN_REGISTRY_IMAGE} &>/dev/null || + must_pass podman pull ${PODMAN_REGISTRY_IMAGE} + + # Mount the registry image... + registry_root=$(podman image mount ${PODMAN_REGISTRY_IMAGE}) + + # ...copy the registry binary into our own bin... + cp ${registry_root}/bin/registry /usr/bin/docker-registry + + # ...and copy the config, making a few adjustments to it. + sed -e "s;/var/lib/registry;${PODMAN_REGISTRY_WORKDIR};" \ + -e "s;:5000;127.0.0.1:${REGISTRY_PORT};" \ + < ${registry_root}/etc/docker/registry/config.yml \ + > /etc/local-registry.yml + podman image umount -a + + # Create a systemd unit file. Enable it (so it starts at boot) + # and also start it --now. + cat > /etc/systemd/system/$ME.service <