From 8c39dd56e5ad4f2ba03b1067cd49c5cef44272d0 Mon Sep 17 00:00:00 2001 From: Sayan Chowdhury Date: Wed, 26 Apr 2023 16:48:12 +0530 Subject: [PATCH 1/2] misc: Add tests to the test installing of NVIDIA on instances Signed-off-by: Sayan Chowdhury --- kola/tests/misc/nvidia.go | 52 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 kola/tests/misc/nvidia.go diff --git a/kola/tests/misc/nvidia.go b/kola/tests/misc/nvidia.go new file mode 100644 index 000000000..a78c3b3de --- /dev/null +++ b/kola/tests/misc/nvidia.go @@ -0,0 +1,52 @@ +package misc + +import ( + "bytes" + "fmt" + "time" + + "github.com/coreos/pkg/capnslog" + "github.com/flatcar/mantle/kola" + "github.com/flatcar/mantle/kola/cluster" + "github.com/flatcar/mantle/kola/register" + "github.com/flatcar/mantle/util" +) + +const ( + CmdTimeout = time.Second * 300 +) + +var plog = capnslog.NewPackageLogger("github.com/flatcar/mantle", "kola/tests/misc") + +func init() { + register.Register(®ister.Test{ + Name: "cl.misc.nvidia", + Run: verifyNvidiaInstallation, + ClusterSize: 1, + Distros: []string{"cl"}, + // This test is to test the NVIDIA installation, limited to AZURE for now + Platforms: []string{"azure"}, + Architectures: []string{"amd64"}, + Flags: []register.Flag{register.NoEnableSelinux}, + }) +} + +func verifyNvidiaInstallation(c cluster.TestCluster) { + if kola.AzureOptions.Size != "Standard_NC6s_v3" { + c.Skip("skipping due to wrong instance size") + } + m := c.Machines()[0] + + nvidiaStatusRetry := func() error { + out, err := c.SSH(m, "systemctl is-active nvidia.service") + if !bytes.Contains(out, []byte("inactive")) { + return fmt.Errorf("nvidia.service: %q: %v", out, err) + } + return nil + } + + if err := util.Retry(40, 15*time.Second, nvidiaStatusRetry); err != nil { + c.Fatal(err) + } + c.AssertCmdOutputContains(m, "/opt/bin/nvidia-smi", "Tesla") +} From 4cd37d76c8f9a474b47bf97d7f59dda38ec24f30 Mon Sep 17 00:00:00 2001 From: Sayan Chowdhury Date: Fri, 16 Jun 2023 13:17:28 +0530 Subject: [PATCH 2/2] CHANGELOG: Add changelog for adding nvidia tests Signed-off-by: Sayan Chowdhury --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7418253b..f3c59bc2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - `DefaultUser` parameter when registering a test to use a user different from `core` ([#424](https://github.com/flatcar/mantle/pull/424)) - `systemd.sysext.custom-oem` for testing the activation of the OEM sysext image ([#423](https://github.com/flatcar/mantle/pull/423)) - Kubernetes 1.27 tests ([#441](https://github.com/flatcar/mantle/pull/441)) +- Add tests for testing the installation/integrity of the NVIDIA drivers ([#433](https://github.com/flatcar/mantle/pull/433)) ### Changed @@ -67,7 +68,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Ignition v3 support and tests ([#301](https://github.com/flatcar-linux/mantle/pull/301), [#311](https://github.com/flatcar-linux/mantle/pull/311)) - Butane config support ([#318](https://github.com/flatcar-linux/mantle/pull/318)) - GCP: support testing with GVNIC ([#322](https://github.com/flatcar-linux/mantle/pull/322)) -- `networkd` Ignition translation test ([#344](https://github.com/flatcar-linux/mantle/pull/334)) +- `networkd` Ignition translation test ([#344](https://github.com/flatcar-linux/mantle/pull/334)) - kola test `cl.misc.falco` that tests falco kmod building ([#339](https://github.com/flatcar-linux/mantle/pull/339)) - Kubernetes test for release 1.24.1 ([#337](https://github.com/flatcar-linux/mantle/pull/337)) - Added storage abstraction for Equinix Metal tests (SSH can be used in addition of Google Cloud Storage) ([#340](https://github.com/flatcar-linux/mantle/pull/340))