From 0197a0e557694272c241efb933bce55bcc8f6ba9 Mon Sep 17 00:00:00 2001
From: mabotao <1397247577@qq.com>
Date: Tue, 13 Apr 2021 17:52:04 +0800
Subject: [PATCH] add E2E of failover test

Signed-off-by: mabotao <1397247577@qq.com>
---
 test/e2e/failover_test.go | 313 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 313 insertions(+)
 create mode 100644 test/e2e/failover_test.go

diff --git a/test/e2e/failover_test.go b/test/e2e/failover_test.go
new file mode 100644
index 000000000000..b2629df386bb
--- /dev/null
+++ b/test/e2e/failover_test.go
@@ -0,0 +1,313 @@
+package e2e
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"time"
+
+	"github.com/onsi/ginkgo"
+	"github.com/onsi/gomega"
+	appsv1 "k8s.io/api/apps/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/rand"
+	"k8s.io/apimachinery/pkg/util/wait"
+	kubeclient "k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/clientcmd"
+	"k8s.io/klog/v2"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
+	workv1alpha1 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha1"
+	"github.com/karmada-io/karmada/pkg/util"
+	"github.com/karmada-io/karmada/pkg/util/names"
+	"github.com/karmada-io/karmada/test/helper"
+)
+
+// BasicPropagation focus on basic propagation functionality testing.
+var _ = ginkgo.Describe("failover testing", func() {
+	ginkgo.Context("Deployment propagation testing", func() {
+		var groupMatchedClusters []*clusterv1alpha1.Cluster
+		var falseClusters []*clusterv1alpha1.Cluster
+		policyNamespace := testNamespace
+		policyName := deploymentNamePrefix + rand.String(RandomStrLength)
+		deploymentNamespace := testNamespace
+		deploymentName := policyName
+		deployment := helper.NewDeployment(deploymentNamespace, deploymentName)
+		originalAPIEndpointList := make(map[string]string)
+		maxGroups := 1
+		minGroups := 1
+		numOfFailedClusters := 1
+
+		// targetClusterNames is a slice of cluster names in resource binding
+		var targetClusterNames []string
+
+		// set MaxGroups=MinGroups=1, label is location=CHN.
+		policy := helper.NewPolicyWithGroupsDeployment(policyNamespace, policyName, deployment, maxGroups, minGroups, clusterLabels)
+
+		ginkgo.BeforeEach(func() {
+			ginkgo.By(fmt.Sprintf("creating policy(%s/%s)", policyNamespace, policyName), func() {
+				_, err := karmadaClient.PolicyV1alpha1().PropagationPolicies(policyNamespace).Create(context.TODO(), policy, metav1.CreateOptions{})
+				gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+			})
+
+		})
+
+		ginkgo.AfterEach(func() {
+			ginkgo.By(fmt.Sprintf("removing policy(%s/%s)", policyNamespace, policyName), func() {
+				err := karmadaClient.PolicyV1alpha1().PropagationPolicies(policyNamespace).Delete(context.TODO(), policyName, metav1.DeleteOptions{})
+				gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+			})
+		})
+
+		ginkgo.It("deployment propagation testing", func() {
+			ginkgo.By(fmt.Sprintf("creating deployment(%s/%s)", deploymentNamespace, deploymentName), func() {
+				fmt.Printf("MaxGroups= %v, MinGroups= %v\n", maxGroups, minGroups)
+				_, err := kubeClient.AppsV1().Deployments(testNamespace).Create(context.TODO(), deployment, metav1.CreateOptions{})
+				gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+			})
+
+			ginkgo.By("collect API endpoint of each cluster", func() {
+				for _, cluster := range clusters {
+					originalAPIEndpointList, err := collectAPIEndpoints(controlPlaneClient, cluster.Name, originalAPIEndpointList)
+					gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+					fmt.Printf("Original API endpoint of cluster %s is (%s)\n", cluster.Name, originalAPIEndpointList[cluster.Name])
+				}
+			})
+
+			ginkgo.By("check if deployment present on right clusters", func() {
+				targetClusterNames, _ = allBindingClusters(deployment, minGroups)
+
+				for _, targetClusterName := range targetClusterNames {
+					clusterClient := getClusterClient(targetClusterName)
+					gomega.Expect(clusterClient).ShouldNot(gomega.BeNil())
+
+					klog.Infof("Check whether deployment(%s/%s) is present on cluster(%s)", deploymentNamespace, deploymentName, targetClusterName)
+					err := wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) {
+						_, err = clusterClient.AppsV1().Deployments(deploymentNamespace).Get(context.TODO(), deploymentName, metav1.GetOptions{})
+						if err != nil {
+							if errors.IsNotFound(err) {
+								return false, nil
+							}
+							return false, err
+						}
+						targetCluster, _ := util.GetCluster(controlPlaneClient, targetClusterName)
+						groupMatchedClusters = append(groupMatchedClusters, targetCluster)
+						fmt.Printf("Deployment(%s/%s) is present on cluster(%s).\n", deploymentNamespace, deploymentName, targetClusterName)
+						return true, nil
+					})
+					gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+				}
+
+				fmt.Printf("Successfully scheduled to %d clusters\n", len(groupMatchedClusters))
+				gomega.Expect(minGroups == len(groupMatchedClusters)).ShouldNot(gomega.BeFalse())
+			})
+
+			ginkgo.By("set a fixed number of matched cluster condition statuses to false", func() {
+				temp := numOfFailedClusters
+				for _, cluster := range groupMatchedClusters {
+					if temp > 0 {
+						clusterClient := getClusterClient(cluster.Name)
+						gomega.Expect(clusterClient).ShouldNot(gomega.BeNil())
+
+						temp--
+						err := setWrongAPIEndpoint(controlPlaneClient, cluster.Name)
+						gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+
+						falseClusters = append(falseClusters, cluster)
+						// TODO wait for false status, three timeouts mechanism
+					}
+				}
+				fmt.Printf("false clusters are %s\n", falseClusters[0].Name)
+			})
+
+			clusters, err := fetchClusters(karmadaClient)
+			gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+
+			ginkgo.By("check whether deployments of failed clusters are rescheduled to other available clusters", func() {
+				totalNum := 0
+
+				// Since labels are added to all clusters, clusters are used here instead of written as clusters which have label.
+				if numOfFailedClusters > (len(clusters) - len(groupMatchedClusters)) {
+					fmt.Printf("there are not enough candidate clusters for rescheduling")
+				} else {
+					targetClusterNames, err := allBindingClusters(deployment, minGroups)
+					gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+
+					for _, targetClusterName := range targetClusterNames {
+						clusterClient := getClusterClient(targetClusterName)
+						gomega.Expect(clusterClient).ShouldNot(gomega.BeNil())
+
+						klog.Infof("Check whether deployment(%s/%s) is present on cluster(%s)", deploymentNamespace, deploymentName, targetClusterName)
+						err := wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) {
+							_, err = clusterClient.AppsV1().Deployments(deploymentNamespace).Get(context.TODO(), deploymentName, metav1.GetOptions{})
+							if err != nil {
+								if errors.IsNotFound(err) {
+									return false, nil
+								}
+								return false, err
+							}
+							fmt.Printf("Deployment(%s/%s) is present on cluster(%s).\n", deploymentNamespace, deploymentName, targetClusterName)
+							return true, nil
+						})
+						gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+						totalNum++
+					}
+				}
+				fmt.Printf("reschedule in %d target clusters\n", totalNum)
+				gomega.Expect(totalNum == minGroups).ShouldNot(gomega.BeFalse())
+			})
+
+			ginkgo.By("recover not ready clusters", func() {
+				for _, falseCluster := range falseClusters {
+					fmt.Printf("cluster %s is waiting for recovering\n", falseCluster.Name)
+					originalAPIEndpoint := originalAPIEndpointList[falseCluster.Name]
+					_ = recoverAPIEndpoint(controlPlaneClient, falseCluster.Name, originalAPIEndpoint)
+					err := delFalseClusterDeployment(deployment, falseCluster.Name)
+					gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+				}
+			})
+
+			ginkgo.By(fmt.Sprintf("removing deployment(%s/%s)", deploymentNamespace, deploymentName), func() {
+				err := kubeClient.AppsV1().Deployments(testNamespace).Delete(context.TODO(), deploymentName, metav1.DeleteOptions{})
+				gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+			})
+
+			ginkgo.By("check if the rescheduled deployment has been deleted from member clusters", func() {
+				for _, targetClusterName := range targetClusterNames {
+					fmt.Printf("tag4, %s\n", targetClusterNames)
+					clusterClient := getClusterClient(targetClusterName)
+					gomega.Expect(clusterClient).ShouldNot(gomega.BeNil())
+
+					klog.Infof("Waiting for deployment(%s/%s) disappear on cluster(%s)", deploymentNamespace, deploymentName, targetClusterName)
+					err = wait.Poll(2*time.Second, 10*time.Second, func() (done bool, err error) {
+						_, err = clusterClient.AppsV1().Deployments(deploymentNamespace).Get(context.TODO(), deploymentName, metav1.GetOptions{})
+						if err != nil {
+							if errors.IsNotFound(err) {
+								return true, nil
+							}
+							return false, err
+						}
+						return false, nil
+					})
+					gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+				}
+			})
+		})
+	})
+})
+
+// collect API endpoint of each cluster, store them in map format, cluster.Name as key and APIEndpoint as value
+func collectAPIEndpoints(c client.Client, clusterName string, originalAPIEndpointList map[string]string) (map[string]string, error) {
+	err := wait.Poll(2*time.Second, 10*time.Second, func() (done bool, err error) {
+		clusterObj := &clusterv1alpha1.Cluster{}
+		if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil {
+			return false, err
+		}
+		originalAPIEndpointList[clusterName] = clusterObj.Spec.APIEndpoint
+		return true, nil
+	})
+	return originalAPIEndpointList, err
+}
+
+// Set wrong API endpoint
+func setWrongAPIEndpoint(c client.Client, clusterName string) error {
+	err := wait.Poll(2*time.Second, 10*time.Second, func() (done bool, err error) {
+		clusterObj := &clusterv1alpha1.Cluster{}
+		if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil {
+			if errors.IsConflict(err) {
+				return false, nil
+			}
+			return false, err
+		}
+		// set the APIEndpoint of matched cluster to a wrong value
+		wrongAPIEndpoint := "https://172.19.1.3:6443"
+		clusterObj.Spec.APIEndpoint = wrongAPIEndpoint
+		if err := c.Update(context.TODO(), clusterObj); err != nil {
+			if errors.IsConflict(err) {
+				return false, nil
+			}
+			return false, err
+		}
+		return true, nil
+	})
+	return err
+}
+
+// Recover API endpoint of the false cluster
+func recoverAPIEndpoint(c client.Client, clusterName string, originalAPIEndpoint string) error {
+	err := wait.Poll(2*time.Second, 10*time.Second, func() (done bool, err error) {
+		clusterObj := &clusterv1alpha1.Cluster{}
+		if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil {
+			return false, err
+		}
+		clusterObj.Spec.APIEndpoint = originalAPIEndpoint
+		if err := c.Update(context.TODO(), clusterObj); err != nil {
+			if errors.IsConflict(err) {
+				return false, nil
+			}
+			return false, err
+		}
+		fmt.Printf("recovered API endpoint is %s\n", clusterObj.Spec.APIEndpoint)
+		return true, nil
+	})
+	return err
+}
+
+// get the target cluster names from binding information
+func allBindingClusters(deployment *appsv1.Deployment, minGroups int) (targetClusterNames []string, err error) {
+	targetClusterNames = nil
+	bindingName := names.GenerateBindingName(deployment.Kind, deployment.Name)
+	fmt.Printf("deploy kind is %s, name is %s\n", deployment.Kind, deployment.Name)
+	binding := &workv1alpha1.ResourceBinding{}
+
+	fmt.Printf("collect the target clusters in resource binding\n")
+	err = wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) {
+		err = controlPlaneClient.Get(context.TODO(), client.ObjectKey{Namespace: deployment.Namespace, Name: bindingName}, binding)
+		if err != nil {
+			if errors.IsNotFound(err) {
+				return false, nil
+			}
+			return false, err
+		}
+		return true, nil
+	})
+	if err != nil {
+		return nil, err
+	}
+	for _, cluster := range binding.Spec.Clusters {
+		targetClusterNames = append(targetClusterNames, cluster.Name)
+	}
+	fmt.Printf("target clusters in resource binding are %s\n", targetClusterNames)
+	if len(targetClusterNames) == minGroups {
+		return targetClusterNames, nil
+	}
+	fmt.Printf("wrong scheduling result\n")
+	return nil, nil
+}
+
+// delete deployment of false cluster
+func delFalseClusterDeployment(deployment *appsv1.Deployment, clusterName string) error {
+	// change kubeConfig to the config of false cluster, the redefined kubeConfig will not affect the global corresponding information
+	homeDir := os.Getenv("HOME")
+	kubeConfigPath := fmt.Sprintf("%s/.kube/%s.config", homeDir, clusterName)
+	kubeConfig, err := clientcmd.BuildConfigFromFlags("", kubeConfigPath)
+	if err != nil {
+		fmt.Printf("kubeconfig fail")
+		return err
+	}
+	kubeClient, err := kubeclient.NewForConfig(kubeConfig)
+	if err != nil {
+		fmt.Printf("kubeClient fail")
+		return err
+	}
+
+	err = kubeClient.AppsV1().Deployments(testNamespace).Delete(context.TODO(), deployment.Name, metav1.DeleteOptions{})
+	if err != nil {
+		fmt.Printf("deleting deployment of cluster %s fail\n", clusterName)
+		return err
+	}
+	fmt.Printf("deployment in false cluster %s has been deleted\n", clusterName)
+	return nil
+}