From 02da51a8b2b732e7ad98d2a1d433f8aea372a471 Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Wed, 4 Sep 2024 15:46:14 -0400 Subject: [PATCH 1/3] tests/upgrade: just reboot if any configuration changes were needed I've seen nodes get into a stuck state if we made configuration changes to the node in order to workaround the various starting state issues over time. Let's just do a whole instance reboot if we made any configuration changes to the instance to guarantee we don't end up in a situation where the node is waiting forever to upgrade. --- tests/kola/upgrade/extended/test.sh | 46 ++++++++++++++++------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/tests/kola/upgrade/extended/test.sh b/tests/kola/upgrade/extended/test.sh index d09173ac6..488d107c6 100755 --- a/tests/kola/upgrade/extended/test.sh +++ b/tests/kola/upgrade/extended/test.sh @@ -53,12 +53,12 @@ set -eux -o pipefail . /etc/os-release # for $VERSION_ID -need_zincati_restart='false' +need_restart='false' # delete the disabling of updates that was done by the test framework if [ -f /etc/zincati/config.d/90-disable-auto-updates.toml ]; then rm -f /etc/zincati/config.d/90-disable-auto-updates.toml - need_zincati_restart='true' + need_restart='true' fi # Early `next` releases before [1] had auto-updates disabled too. Let's @@ -66,7 +66,7 @@ fi # [1] https://github.com/coreos/fedora-coreos-config/commit/99eab318998441760cca224544fc713651f7a16d if [ -f /etc/zincati/config.d/90-disable-on-non-production-stream.toml ]; then rm -f /etc/zincati/config.d/90-disable-on-non-production-stream.toml - need_zincati_restart='true' + need_restart='true' fi get_booted_deployment_json() { @@ -107,35 +107,41 @@ grab-gpg-keys() { # https://github.com/coreos/fedora-coreos-tracker/issues/749 max_version=${target_version:0:2} # i.e. 36, 37, 38, etc.. for ver in $(seq $VERSION_ID $max_version); do - test -e "/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-${ver}-primary" && continue - curl -L "https://src.fedoraproject.org/rpms/fedora-repos/raw/rawhide/f/RPM-GPG-KEY-fedora-${ver}-primary" | \ - sudo tee "/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-${ver}-primary" - sudo chcon -v --reference="/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-${VERSION_ID}-primary" "/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-${ver}-primary" + file="/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-${ver}-primary" + if [ ! -e $file ]; then + need_restart='true' + curl -L "https://src.fedoraproject.org/rpms/fedora-repos/raw/rawhide/f/RPM-GPG-KEY-fedora-${ver}-primary" | \ + sudo tee $file + sudo chcon -v --reference="/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-${VERSION_ID}-primary" $file + fi done - # restart Zincati in case the process had been kicked off earlier - # than this script ran. - need_zincati_restart='true' } fix-update-url() { # We switched to non stg URL in zincati v0.0.10 [1]. For older clients # we need to update the runtime configuration of zincati to get past the problem. # [1] https://github.com/coreos/zincati/commit/1d73801ccd015cdce89f082cb1eeb9b4b8335760 - cat <<'EOF' > /run/zincati/config.d/50-fedora-coreos-cincinnati.toml + file='/etc/zincati/config.d/50-fedora-coreos-cincinnati.toml' + if [ ! -e $file ]; then + need_restart='true' + cat > $file <<'EOF' [cincinnati] base_url= "https://updates.coreos.fedoraproject.org" EOF - need_zincati_restart='true' + fi } fix-allow-downgrade() { # Older FCOS will complain about an upgrade target being 'chronologically older than current' # This is documented in https://github.com/coreos/fedora-coreos-tracker/issues/481 # We can workaround the problem via a config dropin: - cat <<'EOF' > /run/zincati/config.d/99-fedora-coreos-allow-downgrade.toml + file='/etc/zincati/config.d/99-fedora-coreos-allow-downgrade.toml' + if [ ! -e $file ]; then + need_restart='true' + cat > $file <<'EOF' updates.allow_downgrade = true EOF - need_zincati_restart='true' + fi } ok "Reached version: $version" @@ -195,14 +201,14 @@ esac if vereq $version $last_release; then systemctl stop zincati rpm-ostree rebase "fedora-compose:fedora/$(arch)/coreos/${target_stream}" $target_version - /tmp/autopkgtest-reboot reboot # execute the reboot + /tmp/autopkgtest-reboot $version # execute the reboot sleep infinity fi -# Restart Zincati if configuration was changed -if [ "${need_zincati_restart}" == "true" ]; then - rpm-ostree cancel # in case anything was already in progress - systemctl restart zincati +# Restart if configuration was changed +if [ "${need_restart}" == "true" ]; then + /tmp/autopkgtest-reboot setup + sleep infinity fi # Watch the Zincati logs to see if it got a lead on a new update. @@ -219,7 +225,7 @@ set -o pipefail # OK update has been initiated, prepare for reboot and loop to show # status of zincati and rpm-ostreed -/tmp/autopkgtest-reboot-prepare reboot +/tmp/autopkgtest-reboot-prepare $version while true; do sleep 20 systemctl status rpm-ostreed zincati --lines=0 From c9c4341ecbf0b184f7596378d3faa00203d2ce9b Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Wed, 4 Sep 2024 21:00:43 -0400 Subject: [PATCH 2/3] tests/upgrade: add workaround for cgroups v1 origins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Fedora 41 now apparently cgroups v2 is no longer supported at all: ``` [ 2.070050] Run /init as init process [!!!!!!] Refusing to run under cgroup v1, SY… specified on kernel command line. ``` So when upgrading from older systems that defaulted to cgroups v1 we need to just go ahead and move them over to cgroups v2 so they'll be able to update fully to the latest FCOS releases being built today. --- tests/kola/upgrade/extended/test.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/kola/upgrade/extended/test.sh b/tests/kola/upgrade/extended/test.sh index 488d107c6..988acaaee 100755 --- a/tests/kola/upgrade/extended/test.sh +++ b/tests/kola/upgrade/extended/test.sh @@ -144,6 +144,16 @@ EOF fi } +move-to-cgroups-v2() { + # When upgrading to latest F41+ the system won't even boot on cgroups v1 + if grep -q unified_cgroup_hierarchy /proc/cmdline; then + systemctl stop zincati + rpm-ostree cancel + rpm-ostree kargs --delete=systemd.unified_cgroup_hierarchy + need_restart='true' + fi +} + ok "Reached version: $version" # Are we all the way at the desired target version? @@ -179,16 +189,19 @@ fi case "$stream" in 'next') verlt $version '35.20211119.1.0' && grab-gpg-keys + verlt $version '34.20210413.1.0' && move-to-cgroups-v2 verlt $version '32.20200517.1.0' && fix-allow-downgrade verlt $version '32.20200505.1.0' && fix-update-url ;; 'testing') verlt $version '35.20211119.2.0' && grab-gpg-keys + verlt $version '34.20210529.2.0' && move-to-cgroups-v2 verlt $version '31.20200517.2.0' && fix-allow-downgrade verlt $version '31.20200505.2.0' && fix-update-url ;; 'stable') verlt $version '35.20211119.3.0' && grab-gpg-keys + verlt $version '34.20210529.3.0' && move-to-cgroups-v2 verlt $version '31.20200517.3.0' && fix-allow-downgrade verlt $version '31.20200505.3.0' && fix-update-url ;; From c3b127ed0b14c2adf08d27d98bc97c9e635c41cc Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Wed, 4 Sep 2024 21:05:30 -0400 Subject: [PATCH 3/3] Update cgroups warning with new information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks like F41+ systems will simply refuse to boot with a message like: ``` [ 2.070050] Run /init as init process [!!!!!!] Refusing to run under cgroup v1, SY… specified on kernel command line. ``` --- .../15fcos/usr/libexec/coreos-check-cgroups-version | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/overlay.d/15fcos/usr/libexec/coreos-check-cgroups-version b/overlay.d/15fcos/usr/libexec/coreos-check-cgroups-version index 5e0ec8b1b..64a9793e0 100755 --- a/overlay.d/15fcos/usr/libexec/coreos-check-cgroups-version +++ b/overlay.d/15fcos/usr/libexec/coreos-check-cgroups-version @@ -12,11 +12,10 @@ motd_path=/run/motd.d/30_cgroupsv1_warning.motd cat << EOF > "${motd_path}" ${warn} ########################################################################## -WARNING: This system is using cgroups v1. For increased reliability -it is strongly recommended to migrate this system and your workloads -to use cgroups v2. A future version of Podman will also drop support -for cgroups v1. For instructions on how to adjust kernel arguments -to use cgroups v2, see: +WARNING: This system is using cgroups v1. In Fedora 41 this system will +no longer continue to boot. It is strongly recommended to migrate this +system and your workloads to use cgroups v2. For instructions on how to +adjust kernel arguments to use cgroups v2, see: https://docs.fedoraproject.org/en-US/fedora-coreos/kernel-args/ To disable this warning, use: