Skip to content

Commit

Permalink
test: run infra integration tests against Talemu provider
Browse files Browse the repository at this point in the history
Now Talemu based tests set up `MachineRequestSet` with 30 machines
before the tests and tear it down after.

New blocks validate links and machine requests creation and deletion.

Fixes: #366

Signed-off-by: Artem Chernyshev <artem.chernyshev@talos-systems.com>
  • Loading branch information
Unix4ever committed Sep 12, 2024
1 parent f83cf3b commit 81e08eb
Show file tree
Hide file tree
Showing 4 changed files with 217 additions and 35 deletions.
153 changes: 153 additions & 0 deletions cmd/integration-test/pkg/tests/infra.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// Copyright (c) 2024 Sidero Labs, Inc.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.

package tests

import (
"context"
"fmt"
"slices"
"testing"
"time"

"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/resource/rtestutils"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/go-retry/retry"
"github.com/stretchr/testify/require"

"github.com/siderolabs/omni/client/pkg/client"
"github.com/siderolabs/omni/client/pkg/omni/resources"
"github.com/siderolabs/omni/client/pkg/omni/resources/infra"
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
"github.com/siderolabs/omni/client/pkg/omni/resources/siderolink"
)

// AssertMachinesShouldBeProvisioned creates a machine request set and waits until all requests are fulfilled.
//
//nolint:gocognit
func AssertMachinesShouldBeProvisioned(testCtx context.Context, client *client.Client, machineCount int, machineRequestSetName,
talosVersion, infraProvider string,
) TestFunc {
return func(t *testing.T) {
ctx, cancel := context.WithTimeout(testCtx, time.Second*60)
defer cancel()

machineRequestSet, err := safe.ReaderGetByID[*omni.MachineRequestSet](ctx, client.Omni().State(), machineRequestSetName)

if !state.IsNotFoundError(err) {
require.NoError(t, err)
}

if machineRequestSet != nil {
rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName})
}

machineRequestSet = omni.NewMachineRequestSet(resources.DefaultNamespace, machineRequestSetName)

machineRequestSet.TypedSpec().Value.Extensions = []string{
"siderolabs/" + HelloWorldServiceExtensionName,
}

machineRequestSet.TypedSpec().Value.ProviderId = infraProvider
machineRequestSet.TypedSpec().Value.TalosVersion = talosVersion
machineRequestSet.TypedSpec().Value.MachineCount = int32(machineCount)

require.NoError(t, client.Omni().State().Create(ctx, machineRequestSet))

var resources safe.List[*infra.MachineRequestStatus]

err = retry.Constant(time.Second*60).RetryWithContext(ctx, func(ctx context.Context) error {
resources, err = safe.ReaderListAll[*infra.MachineRequestStatus](ctx, client.Omni().State(),
state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)),
)
if err != nil {
return err
}

if resources.Len() != machineCount {
return retry.ExpectedErrorf("provision machine count is %d, expected %d", resources.Len(), machineCount)
}

return nil
})

require.NoError(t, err)

err = retry.Constant(time.Second*60).RetryWithContext(ctx, func(ctx context.Context) error {
var machines safe.List[*omni.MachineStatus]

machines, err = safe.ReaderListAll[*omni.MachineStatus](ctx, client.Omni().State())
if err != nil {
return err
}

if machines.Len() < machineCount {
return retry.ExpectedErrorf("links count is %d, expected at least %d", resources.Len(), machineCount)
}

for r := range resources.All() {
requestedMachines := machines.FilterLabelQuery(resource.LabelEqual(omni.LabelMachineRequest, r.Metadata().ID()))

if requestedMachines.Len() == 0 {
return retry.ExpectedErrorf("machine request %q doesn't have the related link", r.Metadata().ID())
}

if requestedMachines.Len() != 1 {
return fmt.Errorf("more than one machine is labeled with %q machine request label", r.Metadata().ID())
}

m := requestedMachines.Get(0)
if m.TypedSpec().Value.Hardware == nil {
return retry.ExpectedErrorf("the machine %q is not fully provisioned", r.Metadata().ID())
}
}

return nil
})

require.NoError(t, err)
}
}

// AssertMachinesShouldBeDeprovisioned removes the machine request set and checks that all related links were deleted.
func AssertMachinesShouldBeDeprovisioned(testCtx context.Context, client *client.Client, machineRequestSetName string) TestFunc {
return func(t *testing.T) {
ctx, cancel := context.WithTimeout(testCtx, time.Second*60)
defer cancel()

requestIDs := rtestutils.ResourceIDs[*infra.MachineRequest](ctx, t, client.Omni().State(),
state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)),
)

links, err := safe.ReaderListAll[*siderolink.Link](ctx, client.Omni().State())

require.NoError(t, err)

linkIDs := make([]string, 0, len(requestIDs))

for l := range links.All() {
mr, ok := l.Metadata().Labels().Get(omni.LabelMachineRequest)
if !ok {
continue
}

if slices.Index(requestIDs, mr) != -1 {
linkIDs = append(linkIDs, l.Metadata().ID())
}
}

rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName})

for _, id := range requestIDs {
rtestutils.AssertNoResource[*infra.MachineRequest](ctx, t, client.Omni().State(), id)
}

for _, id := range linkIDs {
rtestutils.AssertNoResource[*siderolink.Link](ctx, t, client.Omni().State(), id)
}
}
}
73 changes: 49 additions & 24 deletions cmd/integration-test/pkg/tests/tests.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ package tests

import (
"context"
"errors"
"fmt"
"log"
"net/http"
Expand Down Expand Up @@ -54,9 +53,10 @@ type TalosAPIKeyPrepareFunc func(ctx context.Context, contextName string) error
type Options struct {
RunTestPattern string

CleanupLinks bool
RunStatsCheck bool
ExpectedMachines int
CleanupLinks bool
RunStatsCheck bool
ExpectedMachines int
ProvisionMachines int

RestartAMachineFunc RestartAMachineFunc
WipeAMachineFunc WipeAMachineFunc
Expand All @@ -68,6 +68,7 @@ type Options struct {
AnotherTalosVersion string
AnotherKubernetesVersion string
OmnictlPath string
InfraProvider string
}

// Run the integration tests.
Expand Down Expand Up @@ -1144,8 +1145,6 @@ Test flow of cluster creation and scaling using cluster templates.`,
var re *regexp.Regexp

if options.RunTestPattern != "" {
var err error

if re, err = regexp.Compile(options.RunTestPattern); err != nil {
log.Printf("run test pattern parse error: %s", err)

Expand Down Expand Up @@ -1180,33 +1179,47 @@ Test flow of cluster creation and scaling using cluster templates.`,
}
}

preRunTests := []testing.InternalTest{}

if options.ProvisionMachines != 0 {
preRunTests = append(preRunTests, testing.InternalTest{
Name: "AssertMachinesShouldBeProvisioned",
F: AssertMachinesShouldBeProvisioned(ctx, rootClient, options.ProvisionMachines, "main", options.MachineOptions.TalosVersion, options.InfraProvider),
})
}

if len(preRunTests) > 0 {
if err = runTests(preRunTests); err != nil {
return err
}
}

machineSemaphore := semaphore.NewWeighted(int64(options.ExpectedMachines))

exitCode := testing.MainStart(
matchStringOnly(func(string, string) (bool, error) { return true, nil }),
makeTests(ctx, testsToRun, machineSemaphore),
nil,
nil,
nil,
).Run()
if err = runTests(makeTests(ctx, testsToRun, machineSemaphore)); err != nil {
return err
}

postRunTests := []testing.InternalTest{}

extraTests := []testing.InternalTest{}
if options.ProvisionMachines != 0 {
postRunTests = append(postRunTests, testing.InternalTest{
Name: "AssertMachinesShouldBeDeprovisioned",
F: AssertMachinesShouldBeDeprovisioned(ctx, rootClient, "main"),
})
}

if options.RunStatsCheck {
extraTests = append(extraTests, testing.InternalTest{
postRunTests = append(postRunTests, testing.InternalTest{
Name: "AssertStatsLimits",
F: AssertStatsLimits(ctx),
})
}

if len(extraTests) > 0 && exitCode == 0 {
exitCode = testing.MainStart(
matchStringOnly(func(string, string) (bool, error) { return true, nil }),
extraTests,
nil,
nil,
nil,
).Run()
if len(postRunTests) > 0 {
if err = runTests(postRunTests); err != nil {
return err
}
}

if options.CleanupLinks {
Expand All @@ -1215,8 +1228,20 @@ Test flow of cluster creation and scaling using cluster templates.`,
}
}

return nil
}

func runTests(testsToRun []testing.InternalTest) error {
exitCode := testing.MainStart(
matchStringOnly(func(string, string) (bool, error) { return true, nil }),
testsToRun,
nil,
nil,
nil,
).Run()

if exitCode != 0 {
return errors.New("test failed")
return fmt.Errorf("test failed")
}

return nil
Expand Down
20 changes: 13 additions & 7 deletions cmd/integration-test/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,16 @@ var rootCmd = &cobra.Command{
testOptions := tests.Options{
RunTestPattern: rootCmdFlags.runTestPattern,

ExpectedMachines: rootCmdFlags.expectedMachines,
CleanupLinks: rootCmdFlags.cleanupLinks,
RunStatsCheck: rootCmdFlags.runStatsCheck,
ExpectedMachines: rootCmdFlags.expectedMachines,
CleanupLinks: rootCmdFlags.cleanupLinks,
RunStatsCheck: rootCmdFlags.runStatsCheck,
ProvisionMachines: rootCmdFlags.provisionMachinesCount,

MachineOptions: rootCmdFlags.machineOptions,
AnotherTalosVersion: rootCmdFlags.anotherTalosVersion,
AnotherKubernetesVersion: rootCmdFlags.anotherKubernetesVersion,
OmnictlPath: rootCmdFlags.omnictlPath,
InfraProvider: rootCmdFlags.infraProvider,
}

if rootCmdFlags.restartAMachineScript != "" {
Expand Down Expand Up @@ -116,11 +118,13 @@ func execCmd(ctx context.Context, parsedScript []string, args ...string) error {
var rootCmdFlags struct {
endpoint string
runTestPattern string
infraProvider string

expectedMachines int
parallel int64
cleanupLinks bool
runStatsCheck bool
provisionMachinesCount int
expectedMachines int
parallel int64
cleanupLinks bool
runStatsCheck bool

testsTimeout time.Duration

Expand Down Expand Up @@ -158,6 +162,8 @@ func init() {
rootCmd.Flags().DurationVarP(&rootCmdFlags.testsTimeout, "timeout", "t", time.Hour, "tests global timeout")
rootCmd.Flags().BoolVar(&rootCmdFlags.cleanupLinks, "cleanup-links", false, "remove all links after the tests are complete")
rootCmd.Flags().BoolVar(&rootCmdFlags.runStatsCheck, "run-stats-check", false, "runs stats check after the test is complete")
rootCmd.Flags().IntVar(&rootCmdFlags.provisionMachinesCount, "provision-machines", 0, "provisions machines through the infrastructure provider")
rootCmd.Flags().StringVar(&rootCmdFlags.infraProvider, "infra-provider", "talemu", "use infra provider with the specified ID when provisioning the machines")
}

// withContext wraps with CLI context.
Expand Down
6 changes: 2 additions & 4 deletions hack/test/integration.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,17 +129,15 @@ KERNEL_ARGS="siderolink.api=grpc://$LOCAL_IP:8090?jointoken=${JOIN_TOKEN} talos.
if [[ "${RUN_TALEMU_TESTS:-false}" == "true" ]]; then
PROMETHEUS_CONTAINER=$(docker run --network host -p "9090:9090" -v "$(pwd)/hack/compose/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml" -it --rm -d prom/prometheus)

TALEMU_CONTAINER=$(docker run --network host --cap-add=NET_ADMIN -it --rm -d ghcr.io/siderolabs/talemu:latest --kernel-args="${KERNEL_ARGS}" --machines=30)

sleep 10
TALEMU_CONTAINER=$(docker run --network host --cap-add=NET_ADMIN -it --rm -d ghcr.io/siderolabs/talemu-infra-provider:latest --create-service-account --omni-api-endpoint=https://$LOCAL_IP:8099)

SSL_CERT_DIR=hack/certs:/etc/ssl/certs \
${ARTIFACTS}/integration-test-linux-amd64 \
--endpoint https://my-instance.localhost:8099 \
--talos-version=${TALOS_VERSION} \
--omnictl-path=${ARTIFACTS}/omnictl-linux-amd64 \
--expected-machines=30 \
--cleanup-links \
--provision-machines=30 \
--run-stats-check \
-t 4m \
-p 10 \
Expand Down

0 comments on commit 81e08eb

Please sign in to comment.