From 9130f64cfd7cead86adb7ff0ebcf8792891b4f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Mon, 22 Jan 2024 11:46:54 -0800 Subject: [PATCH] kata-containers: adapt for msft-main sources * Cleans up runtime make flags: * Set DEFVIRTIOFSDAEMON to the Rust binary path. * The following were redundant as we were setting the default: * DEFSHAREDFS (now DEFSHAREDFS_CLH_VIRTIOFS) * DEFVIRTIOFSCACHESIZE * DEFSANDBOXCGROUPONLY * DEFSTATICRESOURCEMGMT_CLH * The following were referring to macros that do NOT expand to a value: * QEMUPATH * MACHINETYPE * FEATURE_SELINUX does not exist in the Kata source code. * DEFENABLEANNOTATIONS should not be set to ".*". * Removes all patches. See https://github.com/microsoft/kata-containers/pull/122#issuecomment-1847896264 for a discussion. Since then, the memory patch was included in msft-main and patch 0004 is included in microsoft/kata-containers#154. * Adds BuildRequires for devmapper code paths in msft-main. * Requires the new Rust virtiosfd 1.8.0. * Ensures sed doesn't break symlinks. For configuration.toml, sed would convert that file from a symlink to a regular file, so it'd become out of sync with configuration-clh.toml. rootfs.sh isn't a symlink but added the flag as well for good measure. --- ...emd-kernel-cmdline-params-for-initrd.patch | 25 -- ...llow-10-seconds-for-VM-creation-star.patch | 28 -- ...sbuilder-Add-support-for-CBL-Mariner.patch | 122 -------- ...-9671-Wait-for-a-possibly-slow-Guest.patch | 29 -- ...-Merged-PR-9805-Add-support-for-MSHV.patch | 27 -- ...-Fix-enable_debug-for-hypervisor.clh.patch | 28 -- ...56-shim-avoid-memory-hotplug-timeout.patch | 28 -- ...t-for-variables-that-are-not-mutated.patch | 54 ---- SPECS/kata-containers/kata-containers.spec | 35 +-- ...untime-reduce-uvm-high-mem-footprint.patch | 283 ------------------ 10 files changed, 13 insertions(+), 646 deletions(-) delete mode 100644 SPECS/kata-containers/0001-Append-systemd-kernel-cmdline-params-for-initrd.patch delete mode 100644 SPECS/kata-containers/0001-Merged-PR-9607-Allow-10-seconds-for-VM-creation-star.patch delete mode 100644 SPECS/kata-containers/0001-osbuilder-Add-support-for-CBL-Mariner.patch delete mode 100644 SPECS/kata-containers/0002-Merged-PR-9671-Wait-for-a-possibly-slow-Guest.patch delete mode 100644 SPECS/kata-containers/0003-Merged-PR-9805-Add-support-for-MSHV.patch delete mode 100644 SPECS/kata-containers/0004-Merged-PR-9806-Fix-enable_debug-for-hypervisor.clh.patch delete mode 100644 SPECS/kata-containers/0005-Merged-PR-9956-shim-avoid-memory-hotplug-timeout.patch delete mode 100644 SPECS/kata-containers/drop-mut-for-variables-that-are-not-mutated.patch delete mode 100644 SPECS/kata-containers/runtime-reduce-uvm-high-mem-footprint.patch diff --git a/SPECS/kata-containers/0001-Append-systemd-kernel-cmdline-params-for-initrd.patch b/SPECS/kata-containers/0001-Append-systemd-kernel-cmdline-params-for-initrd.patch deleted file mode 100644 index 8744c2c5e96..00000000000 --- a/SPECS/kata-containers/0001-Append-systemd-kernel-cmdline-params-for-initrd.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 0503cd61a56ed09de60981fedecc226df3845860 Mon Sep 17 00:00:00 2001 -From: dallasd1 -Date: Wed, 26 Jul 2023 08:40:44 -0700 -Subject: [PATCH] Append systemd kernel cmdline params for initrd - ---- - src/runtime/pkg/katautils/create.go | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go -index 67ea03dcf..2c829a691 100644 ---- a/src/runtime/pkg/katautils/create.go -+++ b/src/runtime/pkg/katautils/create.go -@@ -57,7 +57,7 @@ func getKernelParams(needSystemd, trace bool) []vc.Param { - } - - func needSystemd(config vc.HypervisorConfig) bool { -- return config.ImagePath != "" -+ return config.ImagePath != "" || config.InitrdPath != "" - } - - // HandleFactory set the factory --- -2.17.1 - diff --git a/SPECS/kata-containers/0001-Merged-PR-9607-Allow-10-seconds-for-VM-creation-star.patch b/SPECS/kata-containers/0001-Merged-PR-9607-Allow-10-seconds-for-VM-creation-star.patch deleted file mode 100644 index 58c3ef06405..00000000000 --- a/SPECS/kata-containers/0001-Merged-PR-9607-Allow-10-seconds-for-VM-creation-star.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 590604dca0f6a0636933be21fc6a490c0f17af34 Mon Sep 17 00:00:00 2001 -From: Daniel Mihai -Date: Tue, 16 Aug 2022 17:01:12 +0000 -Subject: [PATCH 2/3] Merged PR 9607: Allow 10 seconds for VM creation + start - -Allow 10 seconds for VM creation + start ---- - src/runtime/virtcontainers/clh.go | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go -index 71bd931..444d9de 100644 ---- a/src/runtime/virtcontainers/clh.go -+++ b/src/runtime/virtcontainers/clh.go -@@ -688,7 +688,9 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { - } - clh.state.PID = pid - -- ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) -+ // FIXME - for now allow more than one second to create and start the VM. -+ //ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) -+ ctx, cancel := context.WithTimeout(ctx, 10*time.Second) - defer cancel() - - if err := clh.bootVM(ctx); err != nil { --- -2.25.1 - diff --git a/SPECS/kata-containers/0001-osbuilder-Add-support-for-CBL-Mariner.patch b/SPECS/kata-containers/0001-osbuilder-Add-support-for-CBL-Mariner.patch deleted file mode 100644 index d7d8b128c83..00000000000 --- a/SPECS/kata-containers/0001-osbuilder-Add-support-for-CBL-Mariner.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 36198274dcb4332f1acd445d2a80854232b1d236 Mon Sep 17 00:00:00 2001 -From: Dallas Delaney -Date: Thu, 26 Jan 2023 14:58:55 -0800 -Subject: [PATCH] osbuilder: Add support for CBL-Mariner - -Add osbuilder support to build a rootfs and image -based on the CBL-Mariner Linux distro - -Fixes: #6462 - -Signed-off-by: Dallas Delaney ---- - tools/osbuilder/README.md | 14 +++++----- - .../rootfs-builder/cbl-mariner/Dockerfile.in | 15 +++++++++++ - .../rootfs-builder/cbl-mariner/config.sh | 10 +++++++ - .../rootfs-builder/cbl-mariner/rootfs_lib.sh | 26 +++++++++++++++++++ - 4 files changed, 58 insertions(+), 7 deletions(-) - create mode 100644 tools/osbuilder/rootfs-builder/cbl-mariner/Dockerfile.in - create mode 100644 tools/osbuilder/rootfs-builder/cbl-mariner/config.sh - create mode 100644 tools/osbuilder/rootfs-builder/cbl-mariner/rootfs_lib.sh - -diff --git a/tools/osbuilder/README.md b/tools/osbuilder/README.md -index 343d2bf60..9415de74e 100644 ---- a/tools/osbuilder/README.md -+++ b/tools/osbuilder/README.md -@@ -80,7 +80,7 @@ filesystem components to generate an initrd. - 3. When generating an image, the initrd is extracted to obtain the base rootfs for - the image. - --Ubuntu is the default distro for building the rootfs, to use a different one, you can set `DISTRO=alpine|clearlinux|debian|ubuntu`. -+Ubuntu is the default distro for building the rootfs, to use a different one, you can set `DISTRO=alpine|clearlinux|debian|ubuntu|cbl-mariner`. - For example `make USE_DOCKER=true DISTRO=alpine rootfs` will make an Alpine rootfs using Docker. - - ### Rootfs creation -@@ -209,9 +209,9 @@ of the the osbuilder distributions. - > Note: this table is not relevant for the dracut build method, since it supports - any Linux distribution and architecture where dracut is available. - --| |Alpine |CentOS Stream |Clear Linux |Debian/Ubuntu | --|-- |-- |-- |-- |-- | --|**ARM64** |:heavy_check_mark:|:heavy_check_mark:| | | --|**PPC64le**| |:heavy_check_mark:| |:heavy_check_mark:| --|**s390x** | |:heavy_check_mark:| |:heavy_check_mark:| --|**x86_64** |:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:| -+| |Alpine |CentOS Stream |Clear Linux |Debian/Ubuntu |CBL-Mariner | -+|-- |-- |-- |-- |-- |-- | -+|**ARM64** |:heavy_check_mark:|:heavy_check_mark:| | | | -+|**PPC64le**| |:heavy_check_mark:| |:heavy_check_mark:| | -+|**s390x** | |:heavy_check_mark:| |:heavy_check_mark:| | -+|**x86_64** |:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:| -diff --git a/tools/osbuilder/rootfs-builder/cbl-mariner/Dockerfile.in b/tools/osbuilder/rootfs-builder/cbl-mariner/Dockerfile.in -new file mode 100644 -index 000000000..6fa29807d ---- /dev/null -+++ b/tools/osbuilder/rootfs-builder/cbl-mariner/Dockerfile.in -@@ -0,0 +1,15 @@ -+# Copyright (c) 2023 Microsoft Corporation -+# -+# SPDX-License-Identifier: Apache-2.0 -+ -+ARG IMAGE_REGISTRY=mcr.microsoft.com -+FROM ${IMAGE_REGISTRY}/cbl-mariner/base/core:@OS_VERSION@ -+ -+RUN tdnf -y install \ -+ ca-certificates \ -+ build-essential \ -+ dnf \ -+ git \ -+ tar -+ -+@INSTALL_RUST@ -diff --git a/tools/osbuilder/rootfs-builder/cbl-mariner/config.sh b/tools/osbuilder/rootfs-builder/cbl-mariner/config.sh -new file mode 100644 -index 000000000..694124acd ---- /dev/null -+++ b/tools/osbuilder/rootfs-builder/cbl-mariner/config.sh -@@ -0,0 +1,10 @@ -+# Copyright (c) 2023 Microsoft Corporation -+# -+# SPDX-License-Identifier: Apache-2.0 -+ -+OS_NAME=cbl-mariner -+OS_VERSION=${OS_VERSION:-2.0} -+LIBC="gnu" -+PACKAGES="core-packages-base-image ca-certificates" -+[ "$AGENT_INIT" = no ] && PACKAGES+=" systemd" -+[ "$SECCOMP" = yes ] && PACKAGES+=" libseccomp" -diff --git a/tools/osbuilder/rootfs-builder/cbl-mariner/rootfs_lib.sh b/tools/osbuilder/rootfs-builder/cbl-mariner/rootfs_lib.sh -new file mode 100644 -index 000000000..0288d4d77 ---- /dev/null -+++ b/tools/osbuilder/rootfs-builder/cbl-mariner/rootfs_lib.sh -@@ -0,0 +1,26 @@ -+# Copyright (c) 2023 Microsoft Corporation -+# -+# SPDX-License-Identifier: Apache-2.0 -+ -+build_rootfs() -+{ -+ # Mandatory -+ local ROOTFS_DIR="$1" -+ -+ [ -z "$ROOTFS_DIR" ] && die "need rootfs" -+ -+ # In case of support EXTRA packages, use it to allow -+ # users add more packages to the base rootfs -+ local EXTRA_PKGS=${EXTRA_PKGS:-""} -+ -+ check_root -+ mkdir -p "${ROOTFS_DIR}" -+ PKG_MANAGER="tdnf" -+ -+ DNF="${PKG_MANAGER} -y --installroot=${ROOTFS_DIR} --noplugins --releasever=${OS_VERSION}" -+ -+ info "install packages for rootfs" -+ $DNF install ${EXTRA_PKGS} ${PACKAGES} -+ -+ rm -rf ${ROOTFS_DIR}/usr/share/{bash-completion,cracklib,doc,info,locale,man,misc,pixmaps,terminfo,zoneinfo,zsh} -+} --- -2.33.8 - diff --git a/SPECS/kata-containers/0002-Merged-PR-9671-Wait-for-a-possibly-slow-Guest.patch b/SPECS/kata-containers/0002-Merged-PR-9671-Wait-for-a-possibly-slow-Guest.patch deleted file mode 100644 index aeac808d058..00000000000 --- a/SPECS/kata-containers/0002-Merged-PR-9671-Wait-for-a-possibly-slow-Guest.patch +++ /dev/null @@ -1,29 +0,0 @@ -From ec322fec7e9c132c4caa0a93175320cb0d8fba73 Mon Sep 17 00:00:00 2001 -From: Daniel Mihai -Date: Mon, 22 Aug 2022 22:02:31 +0000 -Subject: [PATCH 3/3] Merged PR 9671: Wait for a possibly slow Guest - -Wait for a possibly slow Guest - -On some Host VMs it takes longer than 30 seconds to connect to -the Agent - e.g., if enable_debug is enabled for [hypervisor.clh]. ---- - src/runtime/config/configuration-clh.toml.in | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in -index f09c095f..0ce7a98d 100644 ---- a/src/runtime/config/configuration-clh.toml.in -+++ b/src/runtime/config/configuration-clh.toml.in -@@ -289,7 +289,7 @@ block_device_driver = "virtio-blk" - - # Agent connection dialing timeout value in seconds - # (default: 30) --#dial_timeout = 30 -+dial_timeout = 60 - - [runtime] - # If enabled, the runtime will log additional debug messages to the --- -2.17.1 - diff --git a/SPECS/kata-containers/0003-Merged-PR-9805-Add-support-for-MSHV.patch b/SPECS/kata-containers/0003-Merged-PR-9805-Add-support-for-MSHV.patch deleted file mode 100644 index b682c2b8d1f..00000000000 --- a/SPECS/kata-containers/0003-Merged-PR-9805-Add-support-for-MSHV.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 67e4b4ceaefea83a1e5c77a7760fa1f9b37589f4 Mon Sep 17 00:00:00 2001 -From: Daniel Mihai -Date: Thu, 1 Sep 2022 15:07:16 +0000 -Subject: [PATCH 09/10] Merged PR 9805: Add support for MSHV - -Cloud Hypervisor is able to use either /dev/mshv or /dev/kvm. ---- - src/runtime/pkg/resourcecontrol/cgroups.go | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/runtime/pkg/resourcecontrol/cgroups.go b/src/runtime/pkg/resourcecontrol/cgroups.go -index 4210392d..d4608458 100644 ---- a/src/runtime/pkg/resourcecontrol/cgroups.go -+++ b/src/runtime/pkg/resourcecontrol/cgroups.go -@@ -64,7 +64,8 @@ func sandboxDevices() []specs.LinuxDeviceCgroup { - // In order to run Virtual Machines and create virtqueues, hypervisors - // need access to certain character devices in the host, like kvm and vhost-net. - hypervisorDevices := []string{ -- "/dev/kvm", // To run virtual machines -+ "/dev/kvm", // To run virtual machines using KVM -+ "/dev/mshv", // To run virtual machines using MSHV - "/dev/vhost-net", // To create virtqueues - "/dev/vfio/vfio", // To access VFIO devices - "/dev/vhost-vsock", // To interact with vsock if --- -2.17.1 - diff --git a/SPECS/kata-containers/0004-Merged-PR-9806-Fix-enable_debug-for-hypervisor.clh.patch b/SPECS/kata-containers/0004-Merged-PR-9806-Fix-enable_debug-for-hypervisor.clh.patch deleted file mode 100644 index 6843b704163..00000000000 --- a/SPECS/kata-containers/0004-Merged-PR-9806-Fix-enable_debug-for-hypervisor.clh.patch +++ /dev/null @@ -1,28 +0,0 @@ -From c844e8011f0726e2a371115c209d4c3d63273b3b Mon Sep 17 00:00:00 2001 -From: Daniel Mihai -Date: Thu, 1 Sep 2022 15:54:16 +0000 -Subject: [PATCH 10/10] Merged PR 9806: Fix enable_debug for [hypervisor.clh] - -Fix error when using enable_debug = true in configuration.toml: - -level=error msg="Error create pseudo tty" -error="open /dev/ptmx: operation not permitted" ---- - src/runtime/pkg/resourcecontrol/cgroups.go | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/runtime/pkg/resourcecontrol/cgroups.go b/src/runtime/pkg/resourcecontrol/cgroups.go -index d4608458..f674e97a 100644 ---- a/src/runtime/pkg/resourcecontrol/cgroups.go -+++ b/src/runtime/pkg/resourcecontrol/cgroups.go -@@ -57,6 +57,7 @@ func sandboxDevices() []specs.LinuxDeviceCgroup { - "/dev/zero", - "/dev/urandom", - "/dev/console", -+ "/dev/ptmx", - } - - // Processes running in a device-cgroup are constrained, they have acccess --- -2.17.1 - diff --git a/SPECS/kata-containers/0005-Merged-PR-9956-shim-avoid-memory-hotplug-timeout.patch b/SPECS/kata-containers/0005-Merged-PR-9956-shim-avoid-memory-hotplug-timeout.patch deleted file mode 100644 index de9230c9e7d..00000000000 --- a/SPECS/kata-containers/0005-Merged-PR-9956-shim-avoid-memory-hotplug-timeout.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 7fab743a43e4f2063d560161753f2b6390c7add6 Mon Sep 17 00:00:00 2001 -From: Dan Mihai -Date: Thu, 15 Sep 2022 20:50:12 +0000 -Subject: [PATCH] Merged PR 9956: shim: avoid memory hotplug timeout - -Wait up to 10 seconds for cloud-hypervisor memory hotplug. ---- - src/runtime/virtcontainers/clh.go | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go -index 118e1b4d..f18b6c6f 100644 ---- a/src/runtime/virtcontainers/clh.go -+++ b/src/runtime/virtcontainers/clh.go -@@ -918,7 +918,9 @@ func (clh *cloudHypervisor) ResizeMemory(ctx context.Context, reqMemMB uint32, m - } - - cl := clh.client() -- ctx, cancelResize := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) -+ // FIXME: memory hotplug sometimes takes longer than 1 second. -+ // ctx, cancelResize := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) -+ ctx, cancelResize := context.WithTimeout(ctx, 10*time.Second) - defer cancelResize() - - resize := *chclient.NewVmResize() --- -2.17.1 - diff --git a/SPECS/kata-containers/drop-mut-for-variables-that-are-not-mutated.patch b/SPECS/kata-containers/drop-mut-for-variables-that-are-not-mutated.patch deleted file mode 100644 index 6eddcfdc68f..00000000000 --- a/SPECS/kata-containers/drop-mut-for-variables-that-are-not-mutated.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 19a8a137b1c5fd9248896bd5f63638acfc9aff8c Mon Sep 17 00:00:00 2001 -From: Muhammad Falak R Wani -Date: Thu, 14 Sep 2023 14:56:17 +0530 -Subject: [PATCH 1/2] kata-types: drop mut for variables that are not mutated - -Signed-off-by: Muhammad Falak R Wani ---- - src/libs/kata-types/src/annotations/mod.rs | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/libs/kata-types/src/annotations/mod.rs b/src/libs/kata-types/src/annotations/mod.rs -index c8d6312..d6c51c1 100644 ---- a/src/libs/kata-types/src/annotations/mod.rs -+++ b/src/libs/kata-types/src/annotations/mod.rs -@@ -462,8 +462,8 @@ impl Annotation { - let u32_err = io::Error::new(io::ErrorKind::InvalidData, "parse u32 error".to_string()); - let u64_err = io::Error::new(io::ErrorKind::InvalidData, "parse u64 error".to_string()); - let i32_err = io::Error::new(io::ErrorKind::InvalidData, "parse i32 error".to_string()); -- let mut hv = config.hypervisor.get_mut(hypervisor_name).unwrap(); -- let mut ag = config.agent.get_mut(agent_name).unwrap(); -+ let hv = config.hypervisor.get_mut(hypervisor_name).unwrap(); -+ let ag = config.agent.get_mut(agent_name).unwrap(); - for (key, value) in &self.annotations { - if hv.security_info.is_annotation_enabled(key) { - match key.as_str() { --- -2.40.1 - -From 7ec3b121c3891f4e4de643bcbef3287d7f564d7f Mon Sep 17 00:00:00 2001 -From: Muhammad Falak R Wani -Date: Thu, 14 Sep 2023 15:31:16 +0530 -Subject: [PATCH 2/2] agent: drop mut from variable which is not mutated - -Signed-off-by: Muhammad Falak R Wani ---- - src/agent/src/signal.rs | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/agent/src/signal.rs b/src/agent/src/signal.rs -index 79dea3b..8ec6556 100644 ---- a/src/agent/src/signal.rs -+++ b/src/agent/src/signal.rs -@@ -57,7 +57,7 @@ async fn handle_sigchild(logger: Logger, sandbox: Arc>) -> Result - continue; - } - -- let mut p = process.unwrap(); -+ let p = process.unwrap(); - - let ret: i32 = match wait_status { - WaitStatus::Exited(_, c) => c, --- -2.40.1 - diff --git a/SPECS/kata-containers/kata-containers.spec b/SPECS/kata-containers/kata-containers.spec index 8ac721cfc0f..19a084d05db 100644 --- a/SPECS/kata-containers/kata-containers.spec +++ b/SPECS/kata-containers/kata-containers.spec @@ -21,21 +21,16 @@ %global kataclhdir /usr/share/cloud-hypervisor %global katainitrddir /var/cache/kata-containers/osbuilder-images/kernel-uvm -%global runtime_make_vars QEMUPATH=%{qemupath} \\\ - KERNELTYPE="compressed" \\\ +# DEFAULT_HYPERVISOR: makes configuration.toml link to configuration-clh.toml. +%global runtime_make_vars KERNELTYPE="compressed" \\\ KERNELPARAMS="systemd.legacy_systemd_cgroup_controller=yes systemd.unified_cgroup_hierarchy=0" \\\ - DEFSHAREDFS="virtio-fs" \\\ - DEFVIRTIOFSDAEMON=%{_libexecdir}/"virtiofsd" \\\ - DEFVIRTIOFSCACHESIZE=0 \\\ - DEFSANDBOXCGROUPONLY=false \\\ + DEFVIRTIOFSDAEMON=%{_libexecdir}/"virtiofsd-rs" \\\ + DEFSTATICRESOURCEMGMT_CLH=true \\\ DEFSTATICSANDBOXWORKLOADMEM=1792 \\\ DEFMEMSZ=256 \\\ SKIP_GO_VERSION_CHECK=y \\\ - MACHINETYPE=%{machinetype} \\\ DESTDIR=%{buildroot} \\\ PREFIX=/usr \\\ - FEATURE_SELINUX="yes" \\\ - DEFENABLEANNOTATIONS=['\\\".*\\\"'] \\\ DEFAULT_HYPERVISOR=cloud-hypervisor %global agent_make_vars LIBC=gnu \\\ @@ -52,15 +47,6 @@ Source0: https://github.com/%{name}/%{name}/archive/refs/tags/%{version}. Source1: https://github.com/%{name}/%{name}/releases/download/%{version}/%{name}-%{version}-vendor.tar.gz Source2: 50-kata Source3: mariner-build-uvm.sh -Patch0: 0001-Merged-PR-9607-Allow-10-seconds-for-VM-creation-star.patch -Patch1: 0002-Merged-PR-9671-Wait-for-a-possibly-slow-Guest.patch -Patch2: 0003-Merged-PR-9805-Add-support-for-MSHV.patch -Patch3: 0004-Merged-PR-9806-Fix-enable_debug-for-hypervisor.clh.patch -Patch4: 0005-Merged-PR-9956-shim-avoid-memory-hotplug-timeout.patch -Patch5: runtime-reduce-uvm-high-mem-footprint.patch -Patch6: drop-mut-for-variables-that-are-not-mutated.patch -Patch7: 0001-osbuilder-Add-support-for-CBL-Mariner.patch -Patch8: 0001-Append-systemd-kernel-cmdline-params-for-initrd.patch BuildRequires: golang BuildRequires: git-core @@ -76,11 +62,15 @@ BuildRequires: kernel BuildRequires: busybox BuildRequires: cargo BuildRequires: rust +BuildRequires: device-mapper-devel +BuildRequires: clang Requires: busybox Requires: kernel Requires: libseccomp -Requires: qemu-virtiofsd +# Must match the version specified by the `assets.virtiofsd.version` field in +# %{SOURCE0}/versions.yaml. +Requires: virtiofsd = 1.8.0 Conflicts: kata-agent Conflicts: kata-ksm-throttler @@ -143,12 +133,13 @@ install -m 0755 -D -t %{buildroot}%{katauvmdir} %{SOURCE3} install -m 0644 -D -t %{buildroot}%{katauvmdir} VERSION install -m 0644 -D -t %{buildroot}%{katauvmdir} versions.yaml install -D -m 0644 ci/install_yq.sh %{buildroot}%{katauvmdir}/ci/install_yq.sh -sed -i 's#distro_config_dir="${script_dir}/${distro}#distro_config_dir="${script_dir}/cbl-mariner#g' tools/osbuilder/rootfs-builder/rootfs.sh +sed --follow-symlinks -i 's#distro_config_dir="${script_dir}/${distro}#distro_config_dir="${script_dir}/cbl-mariner#g' tools/osbuilder/rootfs-builder/rootfs.sh pushd src/runtime %make_install %{runtime_make_vars} -sed -i -e "s|image = .*$|initrd = \"%{katainitrddir}/kata-containers-initrd.img\"|" %{buildroot}%{kataconfigdir}/configuration.toml -sed -i -e "s|kernel = .*$|kernel = \"%{kataclhdir}/vmlinux.bin\"|" %{buildroot}%{kataconfigdir}/configuration.toml +# Ensure sed doesn't replace the configuration.toml symlink by a regular file. +sed --follow-symlinks -i -e "s|image = .*$|initrd = \"%{katainitrddir}/kata-containers-initrd.img\"|" %{buildroot}%{kataconfigdir}/configuration.toml +sed --follow-symlinks -i -e "s|kernel = .*$|kernel = \"%{kataclhdir}/vmlinux.bin\"|" %{buildroot}%{kataconfigdir}/configuration.toml popd pushd src/agent diff --git a/SPECS/kata-containers/runtime-reduce-uvm-high-mem-footprint.patch b/SPECS/kata-containers/runtime-reduce-uvm-high-mem-footprint.patch deleted file mode 100644 index 7175f085b22..00000000000 --- a/SPECS/kata-containers/runtime-reduce-uvm-high-mem-footprint.patch +++ /dev/null @@ -1,283 +0,0 @@ -From ff6c016a20f95580e7d1f06e3787c0675675807f Mon Sep 17 00:00:00 2001 -From: Manuel Huber -Date: Wed, 22 Mar 2023 17:12:09 +0000 -Subject: [PATCH] Merged PR 12983: Commit d5ed88f3: Fix 43668151: Resolve high - UVM memory footprint - -Bug: https://microsoft.visualstudio.com/OS/_workitems/edit/43668151 - -Rationale: This is a temporary solution for optimizing memory usage for the -current mechanism of requesting resources through pod Limit annotations: -- if no Limits are specified and hence WorkloadMemMB is 0, set - a default value 'StaticWorkloadDefaultMem' to allocate a default amount - of memory for use for containers in the sandbox in addition to the base memory -- if Limits are specified, the base memory and the sum of Limits are - allocated. The end user needs to be aware of the minimum memory - requirements for their pods, otherwise the pod will be stuck in the - ContainerCreating state - -Testing: Manual testing, creating pods with Limits and without limits, and with two containers where each container has a limit, tested with integration in a SPEC file where the config variables were set via environment variables via the make command ---- - src/runtime/Makefile | 8 ++++- - src/runtime/config/configuration-clh.toml.in | 17 +++++---- - src/runtime/config/configuration-fc.toml.in | 5 +++ - src/runtime/config/configuration-qemu.toml.in | 7 +++- - src/runtime/pkg/katautils/config.go | 36 ++++++++++--------- - src/runtime/pkg/oci/utils.go | 11 ++++++ - src/runtime/virtcontainers/hypervisor.go | 2 +- - src/runtime/virtcontainers/sandbox.go | 3 ++ - 8 files changed, 63 insertions(+), 26 deletions(-) - -diff --git a/src/runtime/Makefile b/src/runtime/Makefile -index 99dde7e..1fbac61 100644 ---- a/src/runtime/Makefile -+++ b/src/runtime/Makefile -@@ -158,7 +158,7 @@ DEFVCPUS := 1 - # Default maximum number of vCPUs - DEFMAXVCPUS := 0 - # Default memory size in MiB --DEFMEMSZ := 2048 -+DEFMEMSZ ?= 2048 - # Default memory slots - # Cases to consider : - # - nvdimm rootfs image -@@ -225,6 +225,9 @@ DEFSANDBOXCGROUPONLY ?= false - - DEFSTATICRESOURCEMGMT ?= false - -+# Default memory for use for workloads within the sandbox if no specific workload memory value is requested -+DEFSTATICSANDBOXWORKLOADMEM ?= 2048 -+ - DEFBINDMOUNTS := [] - - SED = sed -@@ -292,6 +295,7 @@ ifneq (,$(CLHCMD)) - # CLH-specific options (all should be suffixed by "_CLH") - # currently, huge pages are required for virtiofsd support - DEFNETWORKMODEL_CLH := tcfilter -+ DEFSTATICRESOURCEMGMT_CLH = true - KERNELTYPE_CLH = uncompressed - KERNEL_NAME_CLH = $(call MAKE_KERNEL_NAME,$(KERNELTYPE_CLH)) - KERNELPATH_CLH = $(KERNELDIR)/$(KERNEL_NAME_CLH) -@@ -501,7 +505,9 @@ USER_VARS += DEFENTROPYSOURCE - USER_VARS += DEFVALIDENTROPYSOURCES - USER_VARS += DEFSANDBOXCGROUPONLY - USER_VARS += DEFSTATICRESOURCEMGMT -+USER_VARS += DEFSTATICRESOURCEMGMT_CLH - USER_VARS += DEFSTATICRESOURCEMGMT_FC -+USER_VARS += DEFSTATICSANDBOXWORKLOADMEM - USER_VARS += DEFBINDMOUNTS - USER_VARS += DEFVFIOMODE - USER_VARS += BUILDFLAGS -diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in -index df7cc7a..d9e4864 100644 ---- a/src/runtime/config/configuration-clh.toml.in -+++ b/src/runtime/config/configuration-clh.toml.in -@@ -31,7 +31,7 @@ rootfs_type=@DEFROOTFSTYPE@ - # - # Known limitations: - # * Does not work by design: --# - CPU Hotplug -+# - CPU Hotplug - # - Memory Hotplug - # - NVDIMM devices - # -@@ -206,9 +206,9 @@ block_device_driver = "virtio-blk" - # and we strongly advise users to refer the Cloud Hypervisor official - # documentation for a better understanding of its internals: - # https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md --# -+# - # Bandwidth rate limiter options --# -+# - # net_rate_limiter_bw_max_rate controls network I/O bandwidth (size in bits/sec - # for SB/VM). - # The same value is used for inbound and outbound bandwidth. -@@ -242,9 +242,9 @@ block_device_driver = "virtio-blk" - # and we strongly advise users to refer the Cloud Hypervisor official - # documentation for a better understanding of its internals: - # https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/docs/io_throttling.md --# -+# - # Bandwidth rate limiter options --# -+# - # disk_rate_limiter_bw_max_rate controls disk I/O bandwidth (size in bits/sec - # for SB/VM). - # The same value is used for inbound and outbound bandwidth. -@@ -380,7 +380,12 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ - # - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O - # does not yet support sandbox sizing annotations. - # - When running single containers using a tool like ctr, container sizing information will be available. --static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@ -+static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_CLH@ -+ -+# If set, the runtime will use the value as the default workload memory in MB for the sandbox when no workload memory request is passed -+# down to the shim via the OCI when static sandbox resource management is enabled. With this, we ensure that workloads have a proper -+# default amount of memory available within the sandbox. -+static_sandbox_default_workload_mem=@DEFSTATICSANDBOXWORKLOADMEM@ - - # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. - # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. -diff --git a/src/runtime/config/configuration-fc.toml.in b/src/runtime/config/configuration-fc.toml.in -index 10dc177..6dfe5ce 100644 ---- a/src/runtime/config/configuration-fc.toml.in -+++ b/src/runtime/config/configuration-fc.toml.in -@@ -358,6 +358,11 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ - # - When running single containers using a tool like ctr, container sizing information will be available. - static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_FC@ - -+# If set, the runtime will use the value as the default workload memory in MB for the sandbox when no workload memory request is passed -+# down to the shim via the OCI when static sandbox resource management is enabled. With this, we ensure that workloads have a proper -+# default amount of memory available within the sandbox. -+static_sandbox_default_workload_mem=@DEFSTATICSANDBOXWORKLOADMEM@ -+ - # If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will - # be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. - disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@ -diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in -index 4fb5a8b..cb29ca5 100644 ---- a/src/runtime/config/configuration-qemu.toml.in -+++ b/src/runtime/config/configuration-qemu.toml.in -@@ -33,7 +33,7 @@ rootfs_type=@DEFROOTFSTYPE@ - # - # Known limitations: - # * Does not work by design: --# - CPU Hotplug -+# - CPU Hotplug - # - Memory Hotplug - # - NVDIMM devices - # -@@ -622,6 +622,11 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@ - # - When running single containers using a tool like ctr, container sizing information will be available. - static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@ - -+# If set, the runtime will use the value as the default workload memory in MB for the sandbox when no workload memory request is passed -+# down to the shim via the OCI when static sandbox resource management is enabled. With this, we ensure that workloads have a proper -+# default amount of memory available within the sandbox. -+static_sandbox_default_workload_mem=@DEFSTATICSANDBOXWORKLOADMEM@ -+ - # If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path. - # This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. - # If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` -diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go -index 997d073..866db0b 100644 ---- a/src/runtime/pkg/katautils/config.go -+++ b/src/runtime/pkg/katautils/config.go -@@ -161,23 +161,24 @@ type hypervisor struct { - } - - type runtime struct { -- InterNetworkModel string `toml:"internetworking_model"` -- JaegerEndpoint string `toml:"jaeger_endpoint"` -- JaegerUser string `toml:"jaeger_user"` -- JaegerPassword string `toml:"jaeger_password"` -- VfioMode string `toml:"vfio_mode"` -- GuestSeLinuxLabel string `toml:"guest_selinux_label"` -- SandboxBindMounts []string `toml:"sandbox_bind_mounts"` -- Experimental []string `toml:"experimental"` -- Tracing bool `toml:"enable_tracing"` -- DisableNewNetNs bool `toml:"disable_new_netns"` -- DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` -- EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` -- Debug bool `toml:"enable_debug"` -- SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` -- StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` -- EnablePprof bool `toml:"enable_pprof"` -- DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"` -+ InterNetworkModel string `toml:"internetworking_model"` -+ JaegerEndpoint string `toml:"jaeger_endpoint"` -+ JaegerUser string `toml:"jaeger_user"` -+ JaegerPassword string `toml:"jaeger_password"` -+ VfioMode string `toml:"vfio_mode"` -+ GuestSeLinuxLabel string `toml:"guest_selinux_label"` -+ SandboxBindMounts []string `toml:"sandbox_bind_mounts"` -+ Experimental []string `toml:"experimental"` -+ Tracing bool `toml:"enable_tracing"` -+ DisableNewNetNs bool `toml:"disable_new_netns"` -+ DisableGuestSeccomp bool `toml:"disable_guest_seccomp"` -+ EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"` -+ Debug bool `toml:"enable_debug"` -+ SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"` -+ StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"` -+ EnablePprof bool `toml:"enable_pprof"` -+ DisableGuestEmptyDir bool `toml:"disable_guest_empty_dir"` -+ StaticSandboxWorkloadDefaultMem uint32 `toml:"static_sandbox_default_workload_mem"` - } - - type agent struct { -@@ -1372,6 +1373,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat - config.EnableVCPUsPinning = tomlConf.Runtime.EnableVCPUsPinning - config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel - config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt -+ config.StaticSandboxWorkloadDefaultMem = tomlConf.Runtime.StaticSandboxWorkloadDefaultMem - config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly - config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs - config.EnablePprof = tomlConf.Runtime.EnablePprof -diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go -index d2d713f..436a0d2 100644 ---- a/src/runtime/pkg/oci/utils.go -+++ b/src/runtime/pkg/oci/utils.go -@@ -143,6 +143,9 @@ type RuntimeConfig struct { - // any later resource updates. - StaticSandboxResourceMgmt bool - -+ // Memory to allocate for workloads within the sandbox when workload memory is unspecified -+ StaticSandboxWorkloadDefaultMem uint32 -+ - // Determines if create a netns for hypervisor process - DisableNewNetNs bool - -@@ -952,6 +955,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st - - StaticResourceMgmt: runtime.StaticSandboxResourceMgmt, - -+ StaticWorkloadDefaultMem: runtime.StaticSandboxWorkloadDefaultMem, -+ - ShmSize: shmSize, - - VfioMode: runtime.VfioMode, -@@ -976,6 +981,12 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st - // with the base number of CPU/memory (which is equal to the default CPU/memory specified for the runtime - // configuration or annotations) as well as any specified workload resources. - if sandboxConfig.StaticResourceMgmt { -+ // If no Limits are set in pod config, use StaticWorkloadDefaultMem to ensure the containers generally -+ // have a reasonable amount of memory available -+ if sandboxConfig.SandboxResources.WorkloadMemMB == 0 { -+ sandboxConfig.SandboxResources.WorkloadMemMB = sandboxConfig.StaticWorkloadDefaultMem -+ } -+ - sandboxConfig.SandboxResources.BaseCPUs = sandboxConfig.HypervisorConfig.NumVCPUs - sandboxConfig.SandboxResources.BaseMemMB = sandboxConfig.HypervisorConfig.MemorySize - -diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go -index dee5fec..0d86807 100644 ---- a/src/runtime/virtcontainers/hypervisor.go -+++ b/src/runtime/virtcontainers/hypervisor.go -@@ -74,7 +74,7 @@ const ( - vSockLogsPort = 1025 - - // MinHypervisorMemory is the minimum memory required for a VM. -- MinHypervisorMemory = 256 -+ MinHypervisorMemory = 64 - - defaultMsize9p = 8192 - -diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go -index 523c072..bb36af0 100644 ---- a/src/runtime/virtcontainers/sandbox.go -+++ b/src/runtime/virtcontainers/sandbox.go -@@ -160,6 +160,9 @@ type SandboxConfig struct { - - HypervisorConfig HypervisorConfig - -+ StaticWorkloadDefaultMem uint32 -+ -+ // Memory to allocate for workloads within the sandbox when workload memory is unspecified - ShmSize uint64 - - SandboxResources SandboxResourceSizing --- -2.25.1 -