diff --git a/.drone.jsonnet b/.drone.jsonnet index 47d6c52a3f..c3d99136c5 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -458,6 +458,7 @@ local integration_extensions = Step('e2e-extensions', target='e2e-qemu', privile QEMU_MEMORY_WORKERS: '4096', WITH_CONFIG_PATCH_WORKER: '@_out/extensions-patch.json', IMAGE_REGISTRY: local_registry, + QEMU_EXTRA_DISKS: '1', EXTRA_TEST_ARGS: '-talos.extensions.testtype=qemu', }); local integration_cilium = Step('e2e-cilium', target='e2e-qemu', privileged=true, depends_on=[load_artifacts], environment={ @@ -718,7 +719,7 @@ local e2e_pipelines = [ Pipeline('e2e-gcp', default_pipeline_steps + [capi_docker, e2e_capi, e2e_gcp]) + e2e_trigger(['e2e-gcp']), // cron pipelines, triggered on schedule events - Pipeline('cron-e2e-aws', default_pipeline_steps + [capi_docker, e2e_capi, e2e_aws], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']), + Pipeline('cron-e2e-aws', default_pipeline_steps + [e2e_aws_prepare, tf_apply, e2e_aws_tf_apply_post, e2e_aws, tf_destroy], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']), Pipeline('cron-e2e-gcp', default_pipeline_steps + [capi_docker, e2e_capi, e2e_gcp], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']), ]; diff --git a/Makefile b/Makefile index 92047b5bae..1f26ef5a4b 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ CLOUD_IMAGES_EXTRA_ARGS ?= "" ARTIFACTS := _out TOOLS ?= ghcr.io/siderolabs/tools:v1.5.0 -PKGS ?= v1.6.0-alpha.0-5-g7717b7e +PKGS ?= v1.6.0-alpha.0-7-g2e1c0b9 EXTRAS ?= v1.5.0 # renovate: datasource=github-tags depName=golang/go GO_VERSION ?= 1.20 diff --git a/hack/test/extensions/extension-patch-filter.jq b/hack/test/extensions/extension-patch-filter.jq index 0761cd1f10..e500c0fac3 100644 --- a/hack/test/extensions/extension-patch-filter.jq +++ b/hack/test/extensions/extension-patch-filter.jq @@ -29,6 +29,9 @@ { "name": "ax88796b" }, + { + "name": "btrfs" + }, { "name": "cdc_ether" }, diff --git a/internal/integration/api/common.go b/internal/integration/api/common.go index e33bef484c..334d9b89e8 100644 --- a/internal/integration/api/common.go +++ b/internal/integration/api/common.go @@ -8,16 +8,12 @@ package api import ( "bufio" - "bytes" "context" "strings" "time" - "github.com/siderolabs/go-retry/retry" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/remotecommand" - "k8s.io/kubectl/pkg/scheme" "github.com/siderolabs/talos/internal/integration/base" "github.com/siderolabs/talos/pkg/machinery/client" @@ -132,67 +128,15 @@ file locks (-x) unlimited suite.Require().NoError(err) // wait for the pod to be ready - suite.Require().NoError(retry.Constant(8*time.Minute, retry.WithUnits(time.Second*10)).Retry( - func() error { - pod, podErr := suite.Clientset.CoreV1().Pods("default").Get(suite.ctx, "defaults-test", metav1.GetOptions{}) - if podErr != nil { - return retry.ExpectedErrorf("error getting pod: %s", podErr) - } - - if pod.Status.Phase != corev1.PodRunning { - return retry.ExpectedErrorf("pod is not running yet: %s", pod.Status.Phase) - } - - return nil - }, - )) + suite.Require().NoError(suite.WaitForPodToBeRunning(suite.ctx, 10*time.Minute, "default", "defaults-test")) - stdout, stderr, err := suite.executeRemoteCommand("default", "defaults-test", "ulimit -c -d -e -f -l -m -n -q -r -s -t -v -x") + stdout, stderr, err := suite.ExecuteCommandInPod(suite.ctx, "default", "defaults-test", "ulimit -c -d -e -f -l -m -n -q -r -s -t -v -x") suite.Require().NoError(err) suite.Require().Equal("", stderr) suite.Require().Equal(strings.TrimPrefix(expectedUlimit, "\n"), stdout) } -func (suite *CommonSuite) executeRemoteCommand(namespace, podName, command string) (string, string, error) { - cmd := []string{ - "/bin/sh", - "-c", - command, - } - req := suite.Clientset.CoreV1().RESTClient().Post().Resource("pods").Name(podName). - Namespace(namespace).SubResource("exec") - option := &corev1.PodExecOptions{ - Command: cmd, - Stdin: false, - Stdout: true, - Stderr: true, - TTY: false, - } - - req.VersionedParams( - option, - scheme.ParameterCodec, - ) - - exec, err := remotecommand.NewSPDYExecutor(suite.RestConfig, "POST", req.URL()) - if err != nil { - return "", "", err - } - - var stdout, stderr bytes.Buffer - - err = exec.StreamWithContext(suite.ctx, remotecommand.StreamOptions{ - Stdout: &stdout, - Stderr: &stderr, - }) - if err != nil { - return "", "", err - } - - return stdout.String(), stderr.String(), nil -} - func init() { allSuites = append(allSuites, &CommonSuite{}) } diff --git a/internal/integration/api/extensions.go b/internal/integration/api/extensions.go index c6552b2073..3f6aaa1a01 100644 --- a/internal/integration/api/extensions.go +++ b/internal/integration/api/extensions.go @@ -23,10 +23,10 @@ import ( "github.com/cosi-project/runtime/pkg/resource" "github.com/cosi-project/runtime/pkg/safe" "github.com/cosi-project/runtime/pkg/state" - "github.com/siderolabs/go-retry/retry" corev1 "k8s.io/api/core/v1" nodev1 "k8s.io/api/node/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" "github.com/siderolabs/talos/cmd/talosctl/pkg/talos/helpers" "github.com/siderolabs/talos/internal/integration/base" @@ -35,7 +35,6 @@ import ( "github.com/siderolabs/talos/pkg/machinery/config/machine" "github.com/siderolabs/talos/pkg/machinery/constants" "github.com/siderolabs/talos/pkg/machinery/resources/network" - "github.com/siderolabs/talos/pkg/machinery/resources/v1alpha1" ) // ExtensionsSuite verifies Talos is securebooted. @@ -129,6 +128,7 @@ func (suite *ExtensionsSuite) TestExtensionsExpectedModules() { "asix": "asix.ko", "ax88179_178a": "ax88179_178a.ko", "ax88796b": "ax88796b.ko", + "btrfs": "btrfs.ko", "cdc_ether": "cdc_ether.ko", "cdc_mbim": "cdc_mbim.ko", "cdc_ncm": "cdc_ncm.ko", @@ -195,34 +195,23 @@ func (suite *ExtensionsSuite) TestExtensionsExpectedModules() { } } -// TestExtensionsExpectedServices verifies expected services are running. -func (suite *ExtensionsSuite) TestExtensionsExpectedServices() { - expectedServices := []string{ - "ext-hello-world", - "ext-iscsid", - "ext-nut-client", - "ext-qemu-guest-agent", - "ext-tgtd", +// TestExtensionsISCSI verifies expected services are running. +func (suite *ExtensionsSuite) TestExtensionsISCSI() { + expectedServices := map[string]string{ + "ext-iscsid": "Running", + "ext-tgtd": "Running", } - // Tailscale service keeps on restarting unless authed, so this test is disabled for now. - if ok := os.Getenv("TALOS_INTEGRATION_RUN_TAILSCALE"); ok != "" { - expectedServices = append(expectedServices, "ext-tailscale") - } + suite.testServicesRunning(expectedServices) +} - switch ExtensionsTestType(suite.ExtensionsTestType) { - case ExtensionsTestTypeNone: - case ExtensionsTestTypeQEMU: - case ExtensionsTestTypeNvidia: - expectedServices = []string{"ext-nvidia-persistenced"} - case ExtensionsTestTypeNvidiaFabricManager: - expectedServices = []string{ - "ext-nvidia-persistenced", - "ext-nvidia-fabricmanager", - } +// TestExtensionsNutClient verifies nut client is working. +func (suite *ExtensionsSuite) TestExtensionsNutClient() { + if suite.ExtensionsTestType != string(ExtensionsTestTypeQEMU) { + suite.T().Skip("skipping as qemu extensions test are not enabled") } - suite.testServicesRunning(expectedServices) + suite.testServicesRunning(map[string]string{"ext-nut-client": "Running"}) } // TestExtensionsQEMUGuestAgent verifies qemu guest agent is working. @@ -231,6 +220,8 @@ func (suite *ExtensionsSuite) TestExtensionsQEMUGuestAgent() { suite.T().Skip("skipping as qemu extensions test are not enabled") } + suite.testServicesRunning(map[string]string{"ext-qemu-guest-agent": "Running"}) + node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker) ctx := client.WithNode(suite.ctx, node) @@ -242,9 +233,6 @@ func (suite *ExtensionsSuite) TestExtensionsQEMUGuestAgent() { ) suite.Require().NoError(err) - bootID, err := suite.ReadBootID(ctx) - suite.Require().NoError(err) - clusterStatePath, err := suite.Cluster.StatePath() suite.Require().NoError(err) @@ -253,10 +241,14 @@ func (suite *ExtensionsSuite) TestExtensionsQEMUGuestAgent() { defer conn.Close() //nolint:errcheck - _, err = conn.Write([]byte(`{"execute":"guest-shutdown", "arguments": {"mode": "reboot"}}`)) - suite.Require().NoError(err) + // now we want to reboot the node using the guest agent + suite.AssertRebooted( + suite.ctx, node, func(nodeCtx context.Context) error { + _, err = conn.Write([]byte(`{"execute":"guest-shutdown", "arguments": {"mode": "reboot"}}`)) - suite.AssertBootIDChanged(ctx, bootID, node, time.Minute*5) + return err + }, 5*time.Minute, + ) } // TestExtensionsTailscale verifies tailscale is working. @@ -270,6 +262,8 @@ func (suite *ExtensionsSuite) TestExtensionsTailscale() { suite.T().Skip("skipping as tailscale integration tests are not enabled") } + suite.testServicesRunning(map[string]string{"ext-tailscale": "Running"}) + node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker) ctx := client.WithNode(suite.ctx, node) @@ -292,6 +286,10 @@ func (suite *ExtensionsSuite) TestExtensionsHelloWorldService() { node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker) + suite.testServicesRunning(map[string]string{ + "ext-hello-world": "Running", + }) + url := url.URL{ Scheme: "http", Host: node, @@ -342,38 +340,148 @@ func (suite *ExtensionsSuite) TestExtensionsGvisor() { suite.Require().NoError(err) // wait for the pod to be ready - suite.Require().NoError(retry.Constant(4*time.Minute, retry.WithUnits(time.Second*10)).Retry( - func() error { - pod, err := suite.Clientset.CoreV1().Pods("default").Get(suite.ctx, "nginx-gvisor", metav1.GetOptions{}) - if err != nil { - return retry.ExpectedErrorf("error getting pod: %s", err) - } + suite.Require().NoError(suite.WaitForPodToBeRunning(suite.ctx, 5*time.Minute, "default", "nginx-gvisor")) +} + +// TestExtensionsZFS verifies zfs is working, udev rules work and the pool is mounted on reboot. +func (suite *ExtensionsSuite) TestExtensionsZFS() { + if suite.ExtensionsTestType != string(ExtensionsTestTypeQEMU) { + suite.T().Skip("skipping as qemu extensions test are not enabled") + } + + suite.testServicesRunning(map[string]string{"ext-zpool-importer": "Finished"}) + + node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker) + ctx := client.WithNode(suite.ctx, node) + + var zfsPoolExists bool + + userDisks, err := suite.UserDisks(suite.ctx, node, 4) + suite.Require().NoError(err) + + suite.Require().NotEmpty(userDisks, "expected at least one user disk with size greater than 4GB to be available") - if pod.Status.Phase != corev1.PodRunning { - return retry.ExpectedErrorf("pod is not running yet: %s", pod.Status.Phase) + resp, err := suite.Client.LS(ctx, &machineapi.ListRequest{ + Root: fmt.Sprintf("/dev/%s1", userDisks[0]), + }) + suite.Require().NoError(err) + + if _, err = resp.Recv(); err == nil { + zfsPoolExists = true + } + + if !zfsPoolExists { + _, err = suite.Clientset.CoreV1().Pods("kube-system").Create(suite.ctx, &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "zpool-create", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "zpool-create", + Image: "alpine", + Command: []string{ + "tail", + "-f", + "/dev/null", + }, + SecurityContext: &corev1.SecurityContext{ + Privileged: pointer.Bool(true), + }, + }, + }, + HostNetwork: true, + HostPID: true, + }, + }, metav1.CreateOptions{}) + defer suite.Clientset.CoreV1().Pods("kube-system").Delete(suite.ctx, "zpool-create", metav1.DeleteOptions{}) //nolint:errcheck + + suite.Require().NoError(err) + + // wait for the pod to be ready + suite.Require().NoError(suite.WaitForPodToBeRunning(suite.ctx, 5*time.Minute, "kube-system", "zpool-create")) + + stdout, stderr, err := suite.ExecuteCommandInPod( + suite.ctx, + "kube-system", + "zpool-create", + fmt.Sprintf("nsenter --mount=/proc/1/ns/mnt -- zpool create -m /var/tank tank %s", userDisks[0]), + ) + suite.Require().NoError(err) + + suite.Require().Equal("", stderr) + suite.Require().Equal("", stdout) + + stdout, stderr, err = suite.ExecuteCommandInPod( + suite.ctx, + "kube-system", + "zpool-create", + "nsenter --mount=/proc/1/ns/mnt -- zfs create -V 1gb tank/vol", + ) + suite.Require().NoError(err) + + suite.Require().Equal("", stderr) + suite.Require().Equal("", stdout) + } + + checkZFSPoolMounted := func() bool { + mountsResp, err := suite.Client.Mounts(ctx) + suite.Require().NoError(err) + + for _, msg := range mountsResp.Messages { + for _, stats := range msg.Stats { + if stats.MountedOn == "/var/tank" { + return true + } } + } + + return false + } + + checkZFSVolumePathPopulatedByUdev := func() { + // this is the path that udev will populate, which is a symlink to the actual device + path := "/dev/zvol/tank/vol" + + stream, err := suite.Client.LS(ctx, &machineapi.ListRequest{ + Root: path, + }) + + suite.Require().NoError(err) + + suite.Require().NoError(helpers.ReadGRPCStream(stream, func(info *machineapi.FileInfo, node string, multipleNodes bool) error { + suite.Require().Equal("/dev/zd0", info.Name, "expected %s to exist", path) return nil - }, - )) + })) + } + + suite.Require().True(checkZFSPoolMounted()) + checkZFSVolumePathPopulatedByUdev() + + // now we want to reboot the node and make sure the pool is still mounted + suite.AssertRebooted( + suite.ctx, node, func(nodeCtx context.Context) error { + return base.IgnoreGRPCUnavailable(suite.Client.Reboot(nodeCtx)) + }, 5*time.Minute, + ) + + suite.Require().True(checkZFSPoolMounted()) + checkZFSVolumePathPopulatedByUdev() } -func (suite *ExtensionsSuite) testServicesRunning(services []string) { +func (suite *ExtensionsSuite) testServicesRunning(serviceStatus map[string]string) { node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker) ctx := client.WithNode(suite.ctx, node) - items, err := safe.StateListAll[*v1alpha1.Service](ctx, suite.Client.COSI) - suite.Require().NoError(err) + for svc, state := range serviceStatus { + resp, err := suite.Client.ServiceInfo(ctx, svc) + suite.Require().NoError(err) + suite.Require().NotNil(resp, "expected service %s to be registered", svc) - for _, expected := range services { - svc, found := items.Find(func(s *v1alpha1.Service) bool { - return s.Metadata().ID() == expected - }) - if !found { - suite.T().Fatalf("expected %s to be registered", expected) + for _, svcInfo := range resp { + suite.Require().Equal(state, svcInfo.Service.State, "expected service %s to have state %s", svc, state) } - - suite.Require().True(svc.TypedSpec().Running, "expected %s to be running", expected) } } diff --git a/internal/integration/base/api.go b/internal/integration/base/api.go index 094ddfdaa1..c044c0ad78 100644 --- a/internal/integration/base/api.go +++ b/internal/integration/base/api.go @@ -454,6 +454,32 @@ func (apiSuite *APISuite) ReadConfigFromNode(nodeCtx context.Context) (config.Pr return provider, nil } +// UserDisks returns list of user disks on the with size greater than sizeGreaterThanGB. +func (apiSuite *APISuite) UserDisks(ctx context.Context, node string, sizeGreaterThanGB int) ([]string, error) { + nodeCtx := client.WithNodes(ctx, node) + + resp, err := apiSuite.Client.Disks(nodeCtx) + if err != nil { + return nil, err + } + + var disks []string + + for _, msg := range resp.Messages { + for _, disk := range msg.Disks { + if disk.SystemDisk { + continue + } + + if disk.Size > uint64(sizeGreaterThanGB)*1024*1024*1024 { + disks = append(disks, disk.DeviceName) + } + } + } + + return disks, nil +} + // TearDownSuite closes Talos API client. func (apiSuite *APISuite) TearDownSuite() { if apiSuite.Client != nil { diff --git a/internal/integration/base/k8s.go b/internal/integration/base/k8s.go index 14cb56b958..c3664592b9 100644 --- a/internal/integration/base/k8s.go +++ b/internal/integration/base/k8s.go @@ -7,6 +7,7 @@ package base import ( + "bytes" "context" "fmt" "time" @@ -24,6 +25,8 @@ import ( "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" clientcmdapi "k8s.io/client-go/tools/clientcmd/api" + "k8s.io/client-go/tools/remotecommand" + "k8s.io/kubectl/pkg/scheme" taloskubernetes "github.com/siderolabs/talos/pkg/kubernetes" ) @@ -173,3 +176,61 @@ func (k8sSuite *K8sSuite) WaitForEventExists(ctx context.Context, ns string, che return nil }) } + +// WaitForPodToBeRunning waits for the pod with the given namespace and name to be running. +func (k8sSuite *K8sSuite) WaitForPodToBeRunning(ctx context.Context, timeout time.Duration, namespace, podName string) error { + return retry.Constant(timeout, retry.WithUnits(time.Second*10)).Retry( + func() error { + pod, err := k8sSuite.Clientset.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) + if err != nil { + return retry.ExpectedErrorf("error getting pod: %s", err) + } + + if pod.Status.Phase != corev1.PodRunning { + return retry.ExpectedErrorf("pod is not running yet: %s", pod.Status.Phase) + } + + return nil + }, + ) +} + +// ExecuteCommandInPod executes the given command in the pod with the given namespace and name. +func (k8sSuite *K8sSuite) ExecuteCommandInPod(ctx context.Context, namespace, podName, command string) (string, string, error) { + cmd := []string{ + "/bin/sh", + "-c", + command, + } + req := k8sSuite.Clientset.CoreV1().RESTClient().Post().Resource("pods").Name(podName). + Namespace(namespace).SubResource("exec") + option := &corev1.PodExecOptions{ + Command: cmd, + Stdin: false, + Stdout: true, + Stderr: true, + TTY: false, + } + + req.VersionedParams( + option, + scheme.ParameterCodec, + ) + + exec, err := remotecommand.NewSPDYExecutor(k8sSuite.RestConfig, "POST", req.URL()) + if err != nil { + return "", "", err + } + + var stdout, stderr bytes.Buffer + + err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{ + Stdout: &stdout, + Stderr: &stderr, + }) + if err != nil { + return "", "", err + } + + return stdout.String(), stderr.String(), nil +} diff --git a/pkg/machinery/gendata/data/pkgs b/pkg/machinery/gendata/data/pkgs index 38c2df17d0..d50c8893e0 100644 --- a/pkg/machinery/gendata/data/pkgs +++ b/pkg/machinery/gendata/data/pkgs @@ -1 +1 @@ -v1.6.0-alpha.0-5-g7717b7e \ No newline at end of file +v1.6.0-alpha.0-7-g2e1c0b9 \ No newline at end of file