From 2d46d0537361c44e349004634523cdc11b933b0f Mon Sep 17 00:00:00 2001 From: Chris Evich Date: Tue, 17 Oct 2023 14:52:59 -0400 Subject: [PATCH 1/2] Implement bare-metal Mac M1 podman-machine testing Setup and execute podman machine testing on bare-metal M1 Macs using a pool of shared and semi-persistent hosts. Automated and manual processes outside this repository are responsible for providing and maintaining all hosts. Ref. https://github.com/containers/automation/tree/main/mac_pw_pool Update the `localmachine` make target to standardize execution across platforms. Update/simplify podman-machine e2e README to reflect current reality. Warning: This CI setup and supporting infrastructure was developed in favor of expediency vs reliability and stability. There are many possible failure-modes (known and unknown) which may lead to undefined test behaviors. Future work may address some of these as they are encountered or discovered. [NO NEW TESTS NEEDED] Signed-off-by: Chris Evich --- .cirrus.yml | 85 ++++++++++++++++++++++++++++------- Makefile | 4 +- contrib/cirrus/mac_cleanup.sh | 26 +++++++++++ contrib/cirrus/mac_env.sh | 26 +++++++++++ contrib/cirrus/mac_setup.sh | 33 ++++++++++++++ pkg/machine/e2e/README.md | 80 ++++++++++++++++----------------- 6 files changed, 194 insertions(+), 60 deletions(-) create mode 100755 contrib/cirrus/mac_cleanup.sh create mode 100755 contrib/cirrus/mac_env.sh create mode 100755 contrib/cirrus/mac_setup.sh diff --git a/.cirrus.yml b/.cirrus.yml index c0ba7841e831..24fe708cd582 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -446,19 +446,25 @@ osx_alt_build_task: only_if: *no_rhel_release depends_on: - build - env: - DISTRO_NV: macos-ventura - VM_IMAGE_NAME: ghcr.io/cirruslabs/${DISTRO_NV}-base:latest - CTR_FQIN: notused - # OSX platform variation prevents this being included in alt_build_task - TEST_FLAVOR: "altbuild" - ALT_NAME: 'OSX Cross' - osx_instance: - image: $VM_IMAGE_NAME - setup_script: - - brew install go - - brew install go-md2man - - go version + persistent_worker: &mac_pw + labels: + os: darwin + arch: arm64 + env: &mac_env + CIRRUS_SHELL: "/bin/bash" # sh is the default + CIRRUS_WORKING_DIR: "$HOME/ci/task-${CIRRUS_TASK_ID}" # Isolation: $HOME will be set to "ci" dir. + # Prevent cache-pollution fron one task to the next. + GOPATH: "$CIRRUS_WORKING_DIR/.go" + GOCACHE: "$CIRRUS_WORKING_DIR/.go/cache" + GOENV: "$CIRRUS_WORKING_DIR/.go/support" + GOSRC: "$HOME/ci/task-${CIRRUS_TASK_ID}" + # This host is/was shared with potentially many other CI tasks. + # The previous task may have been canceled or aborted. + prep_script: &mac_cleanup "contrib/cirrus/mac_cleanup.sh" + basic_build_script: + - make .install.ginkgo + - make podman-remote + - make podman-mac-helper build_amd64_script: - make podman-remote-release-darwin_amd64.zip build_arm64_script: @@ -467,11 +473,13 @@ osx_alt_build_task: - cd contrib/pkginstaller - make ARCH=amd64 NO_CODESIGN=1 pkginstaller - make ARCH=aarch64 NO_CODESIGN=1 pkginstaller - # This task cannot make use of the shared repo.tbz artifact and must - # produce a new repo.tbz artifact for consumption by 'artifacts' task. + # Produce a new repo.tbz artifact for consumption by dependent tasks. repo_prep_script: *repo_prep repo_artifacts: *repo_artifacts - always: *runner_stats + # This host is/was shared with potentially many other CI tasks. + # Ensure nothing is left running while waiting for the next task. + always: + task_cleanup_script: *mac_cleanup # Build freebsd release natively on a FreeBSD VM. @@ -793,6 +801,50 @@ podman_machine_windows_task: main_script: ".\\repo\\contrib\\cirrus\\win-podman-machine-main.ps1" +podman_machine_mac_task: + name: *std_name_fmt + alias: podman_machine_mac + only_if: *no_rhel_release + depends_on: + - osx_alt_build + - local_integration_test + - remote_integration_test + - container_integration_test + - rootless_integration_test + persistent_worker: *mac_pw + env: + <<: *mac_env + # Consumed by podman-machine ginkgo tests + CONTAINERS_MACHINE_PROVIDER: "applehv" + # TODO: Should not require a special image, for now it does. + # Simply remove the line below when a mac image is GA. + MACHINE_IMAGE: "https://fedorapeople.org/groups/podman/testing/applehv/arm64/fedora-coreos-38.20230925.dev.0-applehv.aarch64.raw.gz" + # Values necessary to populate std_name_fmt alias + TEST_FLAVOR: "machine-mac" + DISTRO_NV: "darwin" + PRIV_NAME: "rootless" # intended use-case + clone_script: # artifacts from osx_alt_build_task + - mkdir -p $CIRRUS_WORKING_DIR + - cd $CIRRUS_WORKING_DIR + - $ARTCURL/OSX%20Cross/repo/repo.tbz + - tar xjf repo.tbz + # This host is/was shared with potentially many other CI tasks. + # The previous task may have been canceled or aborted. + prep_script: *mac_cleanup + setup_script: "contrib/cirrus/mac_setup.sh" + env_script: "contrib/cirrus/mac_env.sh" + # TODO: Timeout bumped b/c initial image download (~5min) and VM + # resize (~2min) causes test-timeout (90s default). Should + # tests deal with this internally? + smoke_test_script: + - MACHINE_TEST_TIMEOUT=500 make localmachine FOCUS_FILE="basic_test.go" + test_script: + - make localmachine + # This host is/was shared with potentially many other CI tasks. + # Ensure nothing is left running while waiting for the next task. + always: + task_cleanup_script: *mac_cleanup + # Always run subsequent to integration tests. While parallelism is lost # with runtime, debugging system-test failures can be more challenging # for some golang developers. Otherwise the following tasks run across @@ -1062,6 +1114,7 @@ success_task: # TODO: issue #20548; These tests are new and mostly fail. # Ignore status until tests, scripts, and/or environment stabalize. # - podman_machine_windows + - podman_machine_mac - local_system_test - local_system_test_aarch64 - remote_system_test diff --git a/Makefile b/Makefile index 8af2a19e9eac..76753b88805a 100644 --- a/Makefile +++ b/Makefile @@ -624,8 +624,8 @@ localintegration: test-binaries ginkgo remoteintegration: test-binaries ginkgo-remote .PHONY: localmachine -localmachine: test-binaries .install.ginkgo - $(MAKE) ginkgo-run GINKGO_PARALLEL=n GINKGOWHAT=pkg/machine/e2e/. HACK= +localmachine: + $(MAKE) ginkgo-run GINKGO_PARALLEL=n TAGS="$(REMOTETAGS)" GINKGO_FLAKE_ATTEMPTS=0 FOCUS_FILE=$(FOCUS_FILE) GINKGOWHAT=pkg/machine/e2e/. HACK= .PHONY: localsystem localsystem: diff --git a/contrib/cirrus/mac_cleanup.sh b/contrib/cirrus/mac_cleanup.sh new file mode 100755 index 000000000000..37e8081b8f4f --- /dev/null +++ b/contrib/cirrus/mac_cleanup.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# This script is intended to be called by Cirrus-CI on a Mac M1 persistent worker. +# It performs a best-effort attempt at cleaning up from one task execution to the next. +# Since it run both before and after tasks, it must exit cleanly if there was a cleanup +# failure (i.e. file or directory not found). + +# Help anybody debugging side-effects, since failures are ignored (by necessity). +set +e -x + +# These are the main processes which could leak out of testing. +killall podman vfkit gvproxy make go ginkgo + +# This is defined as $TMPDIR during setup. Name must be kept +# "short" as sockets may reside here. Darwin suffers from +# the same limited socket-pathname character-length restriction +# as Linux. +rm -rf /private/tmp/ci/* /private/tmp/ci/.??* + +# Don't clobber the $CIRRUS_WORKING_DIR for this (running) task. +# shellcheck disable=SC2154 +find "${ORIGINAL_HOME:-$HOME}/ci" -mindepth 1 -maxdepth 1 \ + -not -name "*task-${CIRRUS_TASK_ID}*" -prune -exec rm -rf '{}' + + +# Bash scripts exit with the status of the last command. +true diff --git a/contrib/cirrus/mac_env.sh b/contrib/cirrus/mac_env.sh new file mode 100755 index 000000000000..35431de5ff88 --- /dev/null +++ b/contrib/cirrus/mac_env.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +set -euo pipefail + +# This script is intended to be called by Cirrus-CI on a Mac M1 persistent worker. +# It runs /after/ `mac_setup.sh` to help developers debug any environment +# related issues. It must not make any actualy changes to the environment. + +# Many variables can affect operations, make them all known to assist debugging. +echo "Selection of current env. vars:" +for env_var_name in $(awk 'BEGIN{for(v in ENVIRON) print v}' | grep -Eiv '(^PATH$)|(^BASH_FUNC)|(^_.*)' | sort) +do + echo " ${env_var_name}=${!env_var_name}" +done + +# The latest toolchain is always installed when instances are created. Make it known +# what version that actually is. +go version + +# Golang is sensitive to a collection of key variables. Make them known to assist +# with any debugging. N/B: Most filepath values should point somewhere under $HOME/ci/ +go env + +# The latest version is installed system-wide when instances are created. Make the +# current version known. +vfkit --version diff --git a/contrib/cirrus/mac_setup.sh b/contrib/cirrus/mac_setup.sh new file mode 100755 index 000000000000..3ab9163baf23 --- /dev/null +++ b/contrib/cirrus/mac_setup.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# This script is intended to be called by Cirrus-CI on a Mac M1 persistent worker. +# It runs after the preparatory `mac_cleanup.sh` to performs all the user-level +# environment setup required to execute testing. It assumes whatever system-wide +# setup is required, has already happened and was successful. + +set -euo pipefail + +# The otherwise standard `/etc/ci_environment` file cannot be used in this +# context, because the system is shared for multiple tasks. Instead, persist +# env. vars required during /subsequent/ testing steps via a "magic" Cirrus-CI +# mechanism. These cannot be set in the task YAML because they would interfere +# with repo. cloning and task preparation. +# Ref: +# https://cirrus-ci.org/guide/tips-and-tricks/#setting-environment-variables-from-scripts + +# Tests expect to call compiled binaries first, make sure they're found first. +# shellcheck disable=SC2154 +echo "PATH=$CIRRUS_WORKING_DIR/bin/darwin:$PATH" >> $CIRRUS_ENV + +# Post-task cleanup needs to know the actual user home directory +# shellcheck disable=SC2154 +echo "ORIGINAL_HOME=$HOME" >> $CIRRUS_ENV + +# Help isolate CI-operations from system-operations and simplify task cleanup. +# shellcheck disable=SC2154 +echo "HOME=$HOME/ci" >> $CIRRUS_ENV +# shellcheck disable=SC2154 +echo "TMPDIR=/private/tmp/ci" >> $CIRRUS_ENV + +# Removed completely during cleanup. +mkdir -p /private/tmp/ci diff --git a/pkg/machine/e2e/README.md b/pkg/machine/e2e/README.md index 7b0637a1d436..5a1e324a20c9 100644 --- a/pkg/machine/e2e/README.md +++ b/pkg/machine/e2e/README.md @@ -1,42 +1,38 @@ -# Working README for running the machine tests - -Note: you must not have any machines defined before running tests -## Linux - -### QEMU - -`make localmachine` - -## Microsoft Windows - -### HyperV - -1. Open a powershell as admin -1. $env:CONTAINERS_MACHINE_PROVIDER="hyperv" -1. `./winmake localmachine` - -Note: To run specific test files, add the test files to the end of the winmake command: - -`./winmake localmachine "basic_test.go start_test.go"` - -### WSL -1. Open a powershell as a regular user -1. Build and copy win-sshproxy into bin/ -1. `./winmake localmachine` - -Note: To run specific test files, add the test files to the end of the winmake command: - -`./winmake localmachine "basic_test.go start_test.go"` - -## MacOS - -### Apple Hypervisor - -1. `make podman-remote` -1. `make .install.ginkgo` -1. `export TMPDIR=/Users/` -1. `export CONTAINERS_MACHINE_PROVIDER="applehv"` -1. `export MACHINE_IMAGE="https://fedorapeople.org/groups/podman/testing/applehv/arm64/fedora-coreos-38.20230925.dev.0-applehv.aarch64.raw.gz"` -1. `./test/tools/build/ginkgo -vv --tags "remote exclude_graphdriver_btrfs btrfs_noversion exclude_graphdriver_devicemapper containers_image_openpgp remote" -timeout=90m --trace --no-color pkg/machine/e2e/.` - -Note: Add `--focus-file "basic_test.go" ` to only run basic test +# Working README for running the machine tests + +Note: you must not have any machines defined before running tests +## Linux + +### QEMU + +`make localmachine` + +## Microsoft Windows + +### HyperV + +1. Open a powershell as admin +1. $env:CONTAINERS_MACHINE_PROVIDER="hyperv" +1. `./winmake localmachine` + +Note: To run specific test files, add the test files to the end of the winmake command: + +`./winmake localmachine "basic_test.go start_test.go"` + +### WSL +1. Open a powershell as a regular user +1. Build and copy win-sshproxy into bin/ +1. `./winmake localmachine` + +Note: To run specific test files, add the test files to the end of the winmake command: + +`./winmake localmachine "basic_test.go start_test.go"` + +## MacOS + +### Apple Hypervisor + +1. `make podman-remote` +1. `export CONTAINERS_MACHINE_PROVIDER="applehv"` +1. `export MACHINE_IMAGE="https://fedorapeople.org/groups/podman/testing/applehv/arm64/fedora-coreos-38.20230925.dev.0-applehv.aarch64.raw.gz"` +1. `make localmachine` (Add `FOCUS_FILE=basic_test.go` to only run basic test) From f1dc126bf6c3a33d72e3a49e81630fd1ce5c73f5 Mon Sep 17 00:00:00 2001 From: Chris Evich Date: Thu, 30 Nov 2023 11:00:10 -0500 Subject: [PATCH 2/2] Do not aggregate failing mac test status Issue Ref: #20853 Allow the tests to fail, but don't block merging PRs. This commit should be reverted when #20853 is resolved. Signed-off-by: Chris Evich --- .cirrus.yml | 3 ++- contrib/cirrus/cirrus_yaml_test.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 24fe708cd582..f22e0bc34bf9 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -1114,7 +1114,8 @@ success_task: # TODO: issue #20548; These tests are new and mostly fail. # Ignore status until tests, scripts, and/or environment stabalize. # - podman_machine_windows - - podman_machine_mac + # TODO: Issue #20853; Tests mostly fail then timeout after an hour. + # - podman_machine_mac - local_system_test - local_system_test_aarch64 - remote_system_test diff --git a/contrib/cirrus/cirrus_yaml_test.py b/contrib/cirrus/cirrus_yaml_test.py index 196b745a2dec..b21a3fd792fd 100755 --- a/contrib/cirrus/cirrus_yaml_test.py +++ b/contrib/cirrus/cirrus_yaml_test.py @@ -27,7 +27,7 @@ class TestDependsOn(TestCaseBase): ALL_TASK_NAMES = None SUCCESS_DEPS_EXCLUDE = set(['success', 'bench_stuff', 'artifacts', - 'release', 'release_test', 'podman_machine_windows']) + 'release', 'release_test', 'podman_machine_windows', 'podman_machine_mac']) def setUp(self): super().setUp()