diff --git a/.gitignore b/.gitignore index 27e2f7832..18d44ceb8 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,9 @@ *.so *.dylib +# Avoid checking in keys +*.pem + # Test binary, built with `go test -c` *.test diff --git a/test/e2e/framework/kubernetes/get-logs.go b/test/e2e/framework/kubernetes/get-logs.go index 7cd728af8..80f9c83d8 100644 --- a/test/e2e/framework/kubernetes/get-logs.go +++ b/test/e2e/framework/kubernetes/get-logs.go @@ -12,9 +12,16 @@ import ( "k8s.io/client-go/tools/clientcmd" ) -func PrintPodLogs(kubeconfigpath, namespace, labelSelector string) { +type GetPodLogs struct { + KubeConfigFilePath string + Namespace string + LabelSelector string +} + +func (p *GetPodLogs) Run() error { + fmt.Printf("printing pod logs for namespace: %s, labelselector: %s\n", p.Namespace, p.LabelSelector) // Load the kubeconfig file to get the configuration to access the cluster - config, err := clientcmd.BuildConfigFromFlags("", kubeconfigpath) + config, err := clientcmd.BuildConfigFromFlags("", p.KubeConfigFilePath) if err != nil { log.Printf("error building kubeconfig: %s\n", err) } @@ -25,8 +32,14 @@ func PrintPodLogs(kubeconfigpath, namespace, labelSelector string) { log.Printf("error creating clientset: %s\n", err) } + PrintPodLogs(context.Background(), clientset, p.Namespace, p.LabelSelector) + + return nil +} + +func PrintPodLogs(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) { // List all the pods in the namespace - pods, err := clientset.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{ + pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ LabelSelector: labelSelector, }) if err != nil { diff --git a/test/e2e/framework/kubernetes/install-retina-helm.go b/test/e2e/framework/kubernetes/install-retina-helm.go index 82f10745d..e5507b7ef 100644 --- a/test/e2e/framework/kubernetes/install-retina-helm.go +++ b/test/e2e/framework/kubernetes/install-retina-helm.go @@ -1,6 +1,7 @@ package kubernetes import ( + "context" "fmt" "log" "os" @@ -11,10 +12,12 @@ import ( "helm.sh/helm/v3/pkg/action" "helm.sh/helm/v3/pkg/chart/loader" "helm.sh/helm/v3/pkg/cli" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" ) const ( - createTimeout = 240 * time.Second // windpws is slow + createTimeout = 20 * time.Minute // windows is slow deleteTimeout = 60 * time.Second ) @@ -32,6 +35,8 @@ type InstallHelmChart struct { } func (i *InstallHelmChart) Run() error { + ctx, cancel := context.WithTimeout(context.Background(), createTimeout) + defer cancel() settings := cli.New() settings.KubeConfig = i.KubeConfigFilePath actionConfig := new(action.Configuration) @@ -97,7 +102,7 @@ func (i *InstallHelmChart) Run() error { client.WaitForJobs = true // install the chart here - rel, err := client.Run(chart, chart.Values) + rel, err := client.RunWithContext(ctx, chart, chart.Values) if err != nil { return fmt.Errorf("failed to install chart: %w", err) } @@ -106,6 +111,23 @@ func (i *InstallHelmChart) Run() error { // this will confirm the values set during installation log.Printf("chart values: %v\n", rel.Config) + // ensure all pods are running, since helm doesn't care about windows + config, err := clientcmd.BuildConfigFromFlags("", i.KubeConfigFilePath) + if err != nil { + return fmt.Errorf("error building kubeconfig: %w", err) + } + + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return fmt.Errorf("error creating Kubernetes client: %w", err) + } + + labelSelector := "k8s-app=retina" + err = WaitForPodReady(ctx, clientset, "kube-system", labelSelector) + if err != nil { + return fmt.Errorf("error waiting for retina pods to be ready: %w", err) + } + return nil } diff --git a/test/e2e/framework/kubernetes/no-crashes.go b/test/e2e/framework/kubernetes/no-crashes.go index b598dd64c..a5d5ec03b 100644 --- a/test/e2e/framework/kubernetes/no-crashes.go +++ b/test/e2e/framework/kubernetes/no-crashes.go @@ -4,19 +4,19 @@ import ( "context" "fmt" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/fields" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/clientcmd" ) -type NoCrashes struct { +var ErrPodCrashed = fmt.Errorf("pod has crashes") + +type EnsureStableCluster struct { LabelSelector string PodNamespace string KubeConfigFilePath string } -func (n *NoCrashes) Run() error { +func (n *EnsureStableCluster) Run() error { config, err := clientcmd.BuildConfigFromFlags("", n.KubeConfigFilePath) if err != nil { return fmt.Errorf("error building kubeconfig: %w", err) @@ -27,32 +27,17 @@ func (n *NoCrashes) Run() error { return fmt.Errorf("error creating Kubernetes client: %w", err) } - fieldSelector := fields.Everything() - - pods, err := clientset.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{ - LabelSelector: n.LabelSelector, - FieldSelector: fieldSelector.String(), - }) + err = WaitForPodReady(context.TODO(), clientset, n.PodNamespace, n.LabelSelector) if err != nil { - return fmt.Errorf("error listing pods: %w", err) + return fmt.Errorf("error waiting for retina pods to be ready: %w", err) } - - for _, pod := range pods.Items { - for _, status := range pod.Status.ContainerStatuses { - if status.RestartCount > 0 { - PrintPodLogs(n.KubeConfigFilePath, pod.Namespace, pod.Name) - return fmt.Errorf("Pod %s has %d restarts", pod.Name, status) - } - } - } - return nil } -func (n *NoCrashes) Prevalidate() error { +func (n *EnsureStableCluster) Prevalidate() error { return nil } -func (n *NoCrashes) Stop() error { +func (n *EnsureStableCluster) Stop() error { return nil } diff --git a/test/e2e/framework/kubernetes/wait-pod-ready.go b/test/e2e/framework/kubernetes/wait-pod-ready.go index 208fe140d..c53a7def0 100644 --- a/test/e2e/framework/kubernetes/wait-pod-ready.go +++ b/test/e2e/framework/kubernetes/wait-pod-ready.go @@ -20,7 +20,11 @@ const ( func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) error { podReadyMap := make(map[string]bool) + printIterator := 0 conditionFunc := wait.ConditionWithContextFunc(func(context.Context) (bool, error) { + defer func() { + printIterator++ + }() var podList *corev1.PodList podList, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector}) if err != nil { @@ -40,11 +44,21 @@ func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, names return false, fmt.Errorf("error getting Pod: %w", err) } + for istatus := range pod.Status.ContainerStatuses { + status := &pod.Status.ContainerStatuses[istatus] + if status.RestartCount > 0 { + return false, fmt.Errorf("pod %s has %d restarts: status: %+v: %w", pod.Name, status.RestartCount, status, ErrPodCrashed) + } + } + // Check the Pod phase if pod.Status.Phase != corev1.PodRunning { - log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", pod.Name) + if printIterator%5 == 0 { + log.Printf("pod \"%s\" is not in Running state yet. Waiting...\n", pod.Name) + } return false, nil } + if !podReadyMap[pod.Name] { log.Printf("pod \"%s\" is in Running state\n", pod.Name) podReadyMap[pod.Name] = true @@ -56,6 +70,7 @@ func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, names err := wait.PollUntilContextCancel(ctx, RetryIntervalPodsReady, true, conditionFunc) if err != nil { + PrintPodLogs(ctx, clientset, namespace, labelSelector) return fmt.Errorf("error waiting for pods in namespace \"%s\" with label \"%s\" to be in Running state: %w", namespace, labelSelector, err) } return nil diff --git a/test/e2e/jobs/jobs.go b/test/e2e/jobs/jobs.go index 49ed20c43..83579256b 100644 --- a/test/e2e/jobs/jobs.go +++ b/test/e2e/jobs/jobs.go @@ -11,35 +11,46 @@ import ( tcp "github.com/microsoft/retina/test/e2e/scenarios/tcp" ) -func CreateTestInfra(subID, clusterName, location, kubeConfigFilePath string) *types.Job { +func CreateTestInfra(subID, clusterName, location, kubeConfigFilePath string, createInfra bool) *types.Job { job := types.NewJob("Create e2e test infrastructure") - job.AddStep(&azure.CreateResourceGroup{ - SubscriptionID: subID, - ResourceGroupName: clusterName, - Location: location, - }, nil) - - job.AddStep(&azure.CreateVNet{ - VnetName: "testvnet", - VnetAddressSpace: "10.0.0.0/9", - }, nil) - - job.AddStep(&azure.CreateSubnet{ - SubnetName: "testsubnet", - SubnetAddressSpace: "10.0.0.0/12", - }, nil) - - job.AddStep(&azure.CreateNPMCluster{ - ClusterName: clusterName, - PodCidr: "10.128.0.0/9", - DNSServiceIP: "192.168.0.10", - ServiceCidr: "192.168.0.0/28", - }, nil) - - job.AddStep(&azure.GetAKSKubeConfig{ - KubeConfigFilePath: kubeConfigFilePath, - }, nil) + if createInfra { + job.AddStep(&azure.CreateResourceGroup{ + SubscriptionID: subID, + ResourceGroupName: clusterName, + Location: location, + }, nil) + + job.AddStep(&azure.CreateVNet{ + VnetName: "testvnet", + VnetAddressSpace: "10.0.0.0/9", + }, nil) + + job.AddStep(&azure.CreateSubnet{ + SubnetName: "testsubnet", + SubnetAddressSpace: "10.0.0.0/12", + }, nil) + + job.AddStep(&azure.CreateNPMCluster{ + ClusterName: clusterName, + PodCidr: "10.128.0.0/9", + DNSServiceIP: "192.168.0.10", + ServiceCidr: "192.168.0.0/28", + }, nil) + + job.AddStep(&azure.GetAKSKubeConfig{ + KubeConfigFilePath: kubeConfigFilePath, + }, nil) + + } else { + job.AddStep(&azure.GetAKSKubeConfig{ + KubeConfigFilePath: "./test.pem", + ClusterName: clusterName, + SubscriptionID: subID, + ResourceGroupName: clusterName, + Location: location, + }, nil) + } job.AddStep(&generic.LoadFlags{ TagEnv: generic.DefaultTagEnv, @@ -122,7 +133,7 @@ func InstallAndTestRetinaBasicMetrics(kubeConfigFilePath, chartPath string) *typ job.AddScenario(dns.ValidateBasicDNSMetrics(scenario.name, scenario.req, scenario.resp)) } - job.AddStep(&kubernetes.NoCrashes{ + job.AddStep(&kubernetes.EnsureStableCluster{ PodNamespace: "kube-system", LabelSelector: "k8s-app=retina", }, nil) @@ -189,7 +200,7 @@ func UpgradeAndTestRetinaAdvancedMetrics(kubeConfigFilePath, chartPath, valuesFi job.AddScenario(latency.ValidateLatencyMetric()) - job.AddStep(&kubernetes.NoCrashes{ + job.AddStep(&kubernetes.EnsureStableCluster{ PodNamespace: "kube-system", LabelSelector: "k8s-app=retina", }, nil) diff --git a/test/e2e/retina_e2e_test.go b/test/e2e/retina_e2e_test.go index f804eef49..e69dab3e6 100644 --- a/test/e2e/retina_e2e_test.go +++ b/test/e2e/retina_e2e_test.go @@ -2,6 +2,7 @@ package retina import ( "crypto/rand" + "flag" "math/big" "os" "os/user" @@ -16,14 +17,23 @@ import ( "github.com/stretchr/testify/require" ) -var locations = []string{"eastus2", "centralus", "southcentralus", "uksouth", "centralindia", "westus2"} +var ( + locations = []string{"eastus2", "centralus", "southcentralus", "uksouth", "centralindia", "westus2"} + createInfra = flag.Bool("create-infra", true, "create a Resource group, vNET and AKS cluster for testing") + deleteInfra = flag.Bool("delete-infra", true, "delete a Resource group, vNET and AKS cluster for testing") +) // TestE2ERetina tests all e2e scenarios for retina func TestE2ERetina(t *testing.T) { curuser, err := user.Current() require.NoError(t, err) + flag.Parse() - clusterName := curuser.Username + common.NetObsRGtag + strconv.FormatInt(time.Now().Unix(), 10) + clusterName := os.Getenv("CLUSTER_NAME") + if clusterName == "" { + clusterName = curuser.Username + common.NetObsRGtag + strconv.FormatInt(time.Now().Unix(), 10) + t.Logf("CLUSTER_NAME is not set, generating a random cluster name: %s", clusterName) + } subID := os.Getenv("AZURE_SUBSCRIPTION_ID") require.NotEmpty(t, subID) @@ -49,7 +59,7 @@ func TestE2ERetina(t *testing.T) { kubeConfigFilePath := filepath.Join(rootDir, "test", "e2e", "test.pem") // CreateTestInfra - createTestInfra := types.NewRunner(t, jobs.CreateTestInfra(subID, clusterName, location, kubeConfigFilePath)) + createTestInfra := types.NewRunner(t, jobs.CreateTestInfra(subID, clusterName, location, kubeConfigFilePath, *createInfra)) createTestInfra.Run() // Hacky way to ensure that the test infra is deleted even if the test panics @@ -57,7 +67,9 @@ func TestE2ERetina(t *testing.T) { if r := recover(); r != nil { t.Logf("Recovered in TestE2ERetina, %v", r) } - _ = jobs.DeleteTestInfra(subID, clusterName, location).Run() + if *deleteInfra { + _ = jobs.DeleteTestInfra(subID, clusterName, location).Run() + } }() // Install and test Retina basic metrics