Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Soak Test for CNI. #2915

Merged
merged 2 commits into from
May 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions scripts/run-soak-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

# The script runs amazon-vpc-cni static canary tests
# The tests in this suite are designed to exercise AZ failure scenarios.

set -e

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
GINKGO_TEST_BUILD="$SCRIPT_DIR/../test/build"
# TEST_IMAGE_REGISTRY is the registry in test-infra-* accounts where e2e test images are stored
TEST_IMAGE_REGISTRY=${TEST_IMAGE_REGISTRY:-"617930562442.dkr.ecr.us-west-2.amazonaws.com"}

# If $ENDPOINT is set, as in it is for beta clusters then $ENDPOINT_OPTION,
# defined in lib/cluster.sh will add --eks-endpoint=$ENDPOINT to the ginkgo
# test command

source "$SCRIPT_DIR"/lib/cluster.sh
source "$SCRIPT_DIR"/lib/canary.sh

function run_ginkgo_test() {
local focus=$1
echo "Running ginkgo tests with focus: $focus"

(CGO_ENABLED=0 ginkgo $EXTRA_GINKGO_FLAGS --no-color --focus="$focus" -v --timeout 3h --fail-on-pending $GINKGO_TEST_BUILD/cni.test -- \
--cluster-kubeconfig="$KUBE_CONFIG_PATH" \
--cluster-name="$CLUSTER_NAME" \
--aws-region="$REGION" \
--aws-vpc-id="$VPC_ID" \
--ng-name-label-key="kubernetes.io/os" \
--ng-name-label-val="linux" \
--test-image-registry=$TEST_IMAGE_REGISTRY \
--publish-cw-metrics=true \
$ENDPOINT_OPTION)
}

load_cluster_details

run_ginkgo_test "SOAK_TEST"

echo "all tests ran successfully in $(($SECONDS / 60)) minutes and $(($SECONDS % 60)) seconds"
199 changes: 199 additions & 0 deletions test/integration/cni/soak_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package cni

import (
"fmt"
"strconv"
"time"

"github.com/aws/amazon-vpc-cni-k8s/test/framework/utils"
"github.com/aws/amazon-vpc-cni-k8s/test/integration/common"
"github.com/aws/aws-sdk-go/service/ec2"

"github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
v1 "k8s.io/api/apps/v1"
coreV1 "k8s.io/api/core/v1"
)

// Ensures Pods are launched on both Primary and Secondary Network Interfaces on two nodes.
// and the test verifies network connectivity across pods launched on these interfaces.

// The total test will take 1 hour of constantly exercising pod launch on primary and secondary interfaces.
// running connectivity tests, and deleting the pods, and repeating the process.

var _ = Describe("SOAK Test pod networking", Ordered, func() {

var (
err error
serverListenCmd []string
serverListenCmdArgs []string
testConnectionCommandFunc func(serverPod coreV1.Pod, port int) []string
testFailedConnectionCommandFunc func(serverPod coreV1.Pod, port int) []string
testerExpectedStdOut string
testerExpectedStdErr string
serverPort int
protocol string
primaryNodeDeployment *v1.Deployment
secondaryNodeDeployment *v1.Deployment
interfaceToPodListOnPrimaryNode common.InterfaceTypeToPodList
interfaceToPodListOnSecondaryNode common.InterfaceTypeToPodList
timesToRunTheTest = 12
waitDuringInMinutes = time.Duration(5) * time.Minute
)

BeforeAll(func() {
fmt.Println("Starting SOAK test")

protocol = ec2.ProtocolTcp
serverPort = 2273

By("Authorize Security Group Ingress on EC2 instance.")
err = f.CloudServices.EC2().
AuthorizeSecurityGroupIngress(instanceSecurityGroupID, protocol, serverPort, serverPort, "0.0.0.0/0", false)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is instanceSecurityGroupID defined?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nvm, found it

Expect(err).ToNot(HaveOccurred())

By("Authorize Security Group Egress on EC2 instance.")
err = f.CloudServices.EC2().
AuthorizeSecurityGroupEgress(instanceSecurityGroupID, protocol, serverPort, serverPort, "0.0.0.0/0")
Expect(err).ToNot(HaveOccurred())
})

AfterAll(func() {
fmt.Println("Cleaning SOAK test")

By("Revoke Security Group Ingress.")
err = f.CloudServices.EC2().
RevokeSecurityGroupIngress(instanceSecurityGroupID, protocol, serverPort, serverPort, "0.0.0.0/0", false)
Expect(err).ToNot(HaveOccurred())

By("Revoke Security Group Egress.")
err = f.CloudServices.EC2().
RevokeSecurityGroupEgress(instanceSecurityGroupID, protocol, serverPort, serverPort, "0.0.0.0/0")
Expect(err).ToNot(HaveOccurred())

By("SOAK test completed")
})

Context("[SOAK_TEST] Establish TCP connection from tester to server on both Primary and Secondary ENI", func() {
BeforeEach(func() {
serverListenCmd = []string{"nc"}
// The nc flag "-l" for listen mode, "-k" to keep server up and not close connection after each connection
serverListenCmdArgs = []string{"-k", "-l", strconv.Itoa(serverPort)}

// netcat verbose output is being redirected to stderr instead of stdout
// The nc flag "-v" for verbose output and "-wn" for timing out in n seconds
testConnectionCommandFunc = func(receiverPod coreV1.Pod, port int) []string {
return []string{"nc", "-v", "-w5", receiverPod.Status.PodIP, strconv.Itoa(port)}
}

// Create a negative test case with the wrong port number. This is to reinforce the
// positive test case work by verifying negative cases do throw error
testFailedConnectionCommandFunc = func(receiverPod coreV1.Pod, port int) []string {
return []string{"nc", "-v", "-w5", receiverPod.Status.PodIP, strconv.Itoa(port + 1)}
}

serverContainer := manifest.
NewNetCatAlpineContainer(f.Options.TestImageRegistry).
Command(serverListenCmd).
Args(serverListenCmdArgs).
Build()

By("Creating Pods on Primary and Secondary ENI on Primary and Secondary Node")
primaryNodeDeployment = manifest.
NewDefaultDeploymentBuilder().
Container(serverContainer).
Replicas(maxIPPerInterface*2). // X2 so Pods are created on secondary ENI too
NodeName(primaryNode.Name).
PodLabel("node", "primary").
Name("primary-node-server").
Build()

primaryNodeDeployment, err = f.K8sResourceManagers.
DeploymentManager().
CreateAndWaitTillDeploymentIsReady(primaryNodeDeployment, utils.DefaultDeploymentReadyTimeout)

Expect(err).ToNot(HaveOccurred())

interfaceToPodListOnPrimaryNode =
common.GetPodsOnPrimaryAndSecondaryInterface(primaryNode, "node", "primary", f)

// At least two Pods should be placed on the Primary and Secondary Interface
// on the Primary and Secondary Node in order to test all possible scenarios
Expect(len(interfaceToPodListOnPrimaryNode.PodsOnPrimaryENI)).
Should(BeNumerically(">", 1))

Expect(len(interfaceToPodListOnPrimaryNode.PodsOnSecondaryENI)).
Should(BeNumerically(">", 1))

secondaryNodeDeployment = manifest.
NewDefaultDeploymentBuilder().
Container(serverContainer).
Replicas(maxIPPerInterface*2). // X2 so Pods are created on secondary ENI too
NodeName(secondaryNode.Name).
PodLabel("node", "secondary").
Name("secondary-node-server").
Build()

secondaryNodeDeployment, err = f.K8sResourceManagers.
DeploymentManager().
CreateAndWaitTillDeploymentIsReady(secondaryNodeDeployment, utils.DefaultDeploymentReadyTimeout)
Expect(err).ToNot(HaveOccurred())

interfaceToPodListOnSecondaryNode =
common.GetPodsOnPrimaryAndSecondaryInterface(secondaryNode, "node", "secondary", f)

Expect(len(interfaceToPodListOnSecondaryNode.PodsOnPrimaryENI)).
Should(BeNumerically(">", 1))

Expect(len(interfaceToPodListOnSecondaryNode.PodsOnSecondaryENI)).
Should(BeNumerically(">", 1))
})

AfterEach(func() {
By("TearDown Pods")
err = f.K8sResourceManagers.DeploymentManager().
DeleteAndWaitTillDeploymentIsDeleted(primaryNodeDeployment)
Expect(err).ToNot(HaveOccurred())

err = f.K8sResourceManagers.DeploymentManager().
DeleteAndWaitTillDeploymentIsDeleted(secondaryNodeDeployment)
Expect(err).ToNot(HaveOccurred())

})

for i := 0; i < timesToRunTheTest; i++ {
It("assert connectivity across nodes and across interface types", func() {

testerExpectedStdErr = "succeeded!"
testerExpectedStdOut = ""

CheckConnectivityForMultiplePodPlacement(
interfaceToPodListOnPrimaryNode, interfaceToPodListOnSecondaryNode,
serverPort, testerExpectedStdOut, testerExpectedStdErr, testConnectionCommandFunc)

By("verifying connection fails for unreachable port")

VerifyConnectivityFailsForNegativeCase(interfaceToPodListOnPrimaryNode.PodsOnPrimaryENI[0],
interfaceToPodListOnPrimaryNode.PodsOnPrimaryENI[1], serverPort,
testFailedConnectionCommandFunc)

time.Sleep(waitDuringInMinutes)
})
}
})
})
Loading