From 0197a0e557694272c241efb933bce55bcc8f6ba9 Mon Sep 17 00:00:00 2001 From: mabotao <1397247577@qq.com> Date: Tue, 13 Apr 2021 17:52:04 +0800 Subject: [PATCH] add E2E of failover test Signed-off-by: mabotao <1397247577@qq.com> --- test/e2e/failover_test.go | 313 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 313 insertions(+) create mode 100644 test/e2e/failover_test.go diff --git a/test/e2e/failover_test.go b/test/e2e/failover_test.go new file mode 100644 index 000000000000..b2629df386bb --- /dev/null +++ b/test/e2e/failover_test.go @@ -0,0 +1,313 @@ +package e2e + +import ( + "context" + "fmt" + "os" + "time" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/apimachinery/pkg/util/wait" + kubeclient "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + + clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" + workv1alpha1 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha1" + "github.com/karmada-io/karmada/pkg/util" + "github.com/karmada-io/karmada/pkg/util/names" + "github.com/karmada-io/karmada/test/helper" +) + +// BasicPropagation focus on basic propagation functionality testing. +var _ = ginkgo.Describe("failover testing", func() { + ginkgo.Context("Deployment propagation testing", func() { + var groupMatchedClusters []*clusterv1alpha1.Cluster + var falseClusters []*clusterv1alpha1.Cluster + policyNamespace := testNamespace + policyName := deploymentNamePrefix + rand.String(RandomStrLength) + deploymentNamespace := testNamespace + deploymentName := policyName + deployment := helper.NewDeployment(deploymentNamespace, deploymentName) + originalAPIEndpointList := make(map[string]string) + maxGroups := 1 + minGroups := 1 + numOfFailedClusters := 1 + + // targetClusterNames is a slice of cluster names in resource binding + var targetClusterNames []string + + // set MaxGroups=MinGroups=1, label is location=CHN. + policy := helper.NewPolicyWithGroupsDeployment(policyNamespace, policyName, deployment, maxGroups, minGroups, clusterLabels) + + ginkgo.BeforeEach(func() { + ginkgo.By(fmt.Sprintf("creating policy(%s/%s)", policyNamespace, policyName), func() { + _, err := karmadaClient.PolicyV1alpha1().PropagationPolicies(policyNamespace).Create(context.TODO(), policy, metav1.CreateOptions{}) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + }) + + }) + + ginkgo.AfterEach(func() { + ginkgo.By(fmt.Sprintf("removing policy(%s/%s)", policyNamespace, policyName), func() { + err := karmadaClient.PolicyV1alpha1().PropagationPolicies(policyNamespace).Delete(context.TODO(), policyName, metav1.DeleteOptions{}) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + }) + }) + + ginkgo.It("deployment propagation testing", func() { + ginkgo.By(fmt.Sprintf("creating deployment(%s/%s)", deploymentNamespace, deploymentName), func() { + fmt.Printf("MaxGroups= %v, MinGroups= %v\n", maxGroups, minGroups) + _, err := kubeClient.AppsV1().Deployments(testNamespace).Create(context.TODO(), deployment, metav1.CreateOptions{}) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + }) + + ginkgo.By("collect API endpoint of each cluster", func() { + for _, cluster := range clusters { + originalAPIEndpointList, err := collectAPIEndpoints(controlPlaneClient, cluster.Name, originalAPIEndpointList) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + fmt.Printf("Original API endpoint of cluster %s is (%s)\n", cluster.Name, originalAPIEndpointList[cluster.Name]) + } + }) + + ginkgo.By("check if deployment present on right clusters", func() { + targetClusterNames, _ = allBindingClusters(deployment, minGroups) + + for _, targetClusterName := range targetClusterNames { + clusterClient := getClusterClient(targetClusterName) + gomega.Expect(clusterClient).ShouldNot(gomega.BeNil()) + + klog.Infof("Check whether deployment(%s/%s) is present on cluster(%s)", deploymentNamespace, deploymentName, targetClusterName) + err := wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) { + _, err = clusterClient.AppsV1().Deployments(deploymentNamespace).Get(context.TODO(), deploymentName, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return false, nil + } + return false, err + } + targetCluster, _ := util.GetCluster(controlPlaneClient, targetClusterName) + groupMatchedClusters = append(groupMatchedClusters, targetCluster) + fmt.Printf("Deployment(%s/%s) is present on cluster(%s).\n", deploymentNamespace, deploymentName, targetClusterName) + return true, nil + }) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + } + + fmt.Printf("Successfully scheduled to %d clusters\n", len(groupMatchedClusters)) + gomega.Expect(minGroups == len(groupMatchedClusters)).ShouldNot(gomega.BeFalse()) + }) + + ginkgo.By("set a fixed number of matched cluster condition statuses to false", func() { + temp := numOfFailedClusters + for _, cluster := range groupMatchedClusters { + if temp > 0 { + clusterClient := getClusterClient(cluster.Name) + gomega.Expect(clusterClient).ShouldNot(gomega.BeNil()) + + temp-- + err := setWrongAPIEndpoint(controlPlaneClient, cluster.Name) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + + falseClusters = append(falseClusters, cluster) + // TODO wait for false status, three timeouts mechanism + } + } + fmt.Printf("false clusters are %s\n", falseClusters[0].Name) + }) + + clusters, err := fetchClusters(karmadaClient) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + + ginkgo.By("check whether deployments of failed clusters are rescheduled to other available clusters", func() { + totalNum := 0 + + // Since labels are added to all clusters, clusters are used here instead of written as clusters which have label. + if numOfFailedClusters > (len(clusters) - len(groupMatchedClusters)) { + fmt.Printf("there are not enough candidate clusters for rescheduling") + } else { + targetClusterNames, err := allBindingClusters(deployment, minGroups) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + + for _, targetClusterName := range targetClusterNames { + clusterClient := getClusterClient(targetClusterName) + gomega.Expect(clusterClient).ShouldNot(gomega.BeNil()) + + klog.Infof("Check whether deployment(%s/%s) is present on cluster(%s)", deploymentNamespace, deploymentName, targetClusterName) + err := wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) { + _, err = clusterClient.AppsV1().Deployments(deploymentNamespace).Get(context.TODO(), deploymentName, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return false, nil + } + return false, err + } + fmt.Printf("Deployment(%s/%s) is present on cluster(%s).\n", deploymentNamespace, deploymentName, targetClusterName) + return true, nil + }) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + totalNum++ + } + } + fmt.Printf("reschedule in %d target clusters\n", totalNum) + gomega.Expect(totalNum == minGroups).ShouldNot(gomega.BeFalse()) + }) + + ginkgo.By("recover not ready clusters", func() { + for _, falseCluster := range falseClusters { + fmt.Printf("cluster %s is waiting for recovering\n", falseCluster.Name) + originalAPIEndpoint := originalAPIEndpointList[falseCluster.Name] + _ = recoverAPIEndpoint(controlPlaneClient, falseCluster.Name, originalAPIEndpoint) + err := delFalseClusterDeployment(deployment, falseCluster.Name) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + } + }) + + ginkgo.By(fmt.Sprintf("removing deployment(%s/%s)", deploymentNamespace, deploymentName), func() { + err := kubeClient.AppsV1().Deployments(testNamespace).Delete(context.TODO(), deploymentName, metav1.DeleteOptions{}) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + }) + + ginkgo.By("check if the rescheduled deployment has been deleted from member clusters", func() { + for _, targetClusterName := range targetClusterNames { + fmt.Printf("tag4, %s\n", targetClusterNames) + clusterClient := getClusterClient(targetClusterName) + gomega.Expect(clusterClient).ShouldNot(gomega.BeNil()) + + klog.Infof("Waiting for deployment(%s/%s) disappear on cluster(%s)", deploymentNamespace, deploymentName, targetClusterName) + err = wait.Poll(2*time.Second, 10*time.Second, func() (done bool, err error) { + _, err = clusterClient.AppsV1().Deployments(deploymentNamespace).Get(context.TODO(), deploymentName, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return true, nil + } + return false, err + } + return false, nil + }) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + } + }) + }) + }) +}) + +// collect API endpoint of each cluster, store them in map format, cluster.Name as key and APIEndpoint as value +func collectAPIEndpoints(c client.Client, clusterName string, originalAPIEndpointList map[string]string) (map[string]string, error) { + err := wait.Poll(2*time.Second, 10*time.Second, func() (done bool, err error) { + clusterObj := &clusterv1alpha1.Cluster{} + if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil { + return false, err + } + originalAPIEndpointList[clusterName] = clusterObj.Spec.APIEndpoint + return true, nil + }) + return originalAPIEndpointList, err +} + +// Set wrong API endpoint +func setWrongAPIEndpoint(c client.Client, clusterName string) error { + err := wait.Poll(2*time.Second, 10*time.Second, func() (done bool, err error) { + clusterObj := &clusterv1alpha1.Cluster{} + if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil { + if errors.IsConflict(err) { + return false, nil + } + return false, err + } + // set the APIEndpoint of matched cluster to a wrong value + wrongAPIEndpoint := "https://172.19.1.3:6443" + clusterObj.Spec.APIEndpoint = wrongAPIEndpoint + if err := c.Update(context.TODO(), clusterObj); err != nil { + if errors.IsConflict(err) { + return false, nil + } + return false, err + } + return true, nil + }) + return err +} + +// Recover API endpoint of the false cluster +func recoverAPIEndpoint(c client.Client, clusterName string, originalAPIEndpoint string) error { + err := wait.Poll(2*time.Second, 10*time.Second, func() (done bool, err error) { + clusterObj := &clusterv1alpha1.Cluster{} + if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil { + return false, err + } + clusterObj.Spec.APIEndpoint = originalAPIEndpoint + if err := c.Update(context.TODO(), clusterObj); err != nil { + if errors.IsConflict(err) { + return false, nil + } + return false, err + } + fmt.Printf("recovered API endpoint is %s\n", clusterObj.Spec.APIEndpoint) + return true, nil + }) + return err +} + +// get the target cluster names from binding information +func allBindingClusters(deployment *appsv1.Deployment, minGroups int) (targetClusterNames []string, err error) { + targetClusterNames = nil + bindingName := names.GenerateBindingName(deployment.Kind, deployment.Name) + fmt.Printf("deploy kind is %s, name is %s\n", deployment.Kind, deployment.Name) + binding := &workv1alpha1.ResourceBinding{} + + fmt.Printf("collect the target clusters in resource binding\n") + err = wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) { + err = controlPlaneClient.Get(context.TODO(), client.ObjectKey{Namespace: deployment.Namespace, Name: bindingName}, binding) + if err != nil { + if errors.IsNotFound(err) { + return false, nil + } + return false, err + } + return true, nil + }) + if err != nil { + return nil, err + } + for _, cluster := range binding.Spec.Clusters { + targetClusterNames = append(targetClusterNames, cluster.Name) + } + fmt.Printf("target clusters in resource binding are %s\n", targetClusterNames) + if len(targetClusterNames) == minGroups { + return targetClusterNames, nil + } + fmt.Printf("wrong scheduling result\n") + return nil, nil +} + +// delete deployment of false cluster +func delFalseClusterDeployment(deployment *appsv1.Deployment, clusterName string) error { + // change kubeConfig to the config of false cluster, the redefined kubeConfig will not affect the global corresponding information + homeDir := os.Getenv("HOME") + kubeConfigPath := fmt.Sprintf("%s/.kube/%s.config", homeDir, clusterName) + kubeConfig, err := clientcmd.BuildConfigFromFlags("", kubeConfigPath) + if err != nil { + fmt.Printf("kubeconfig fail") + return err + } + kubeClient, err := kubeclient.NewForConfig(kubeConfig) + if err != nil { + fmt.Printf("kubeClient fail") + return err + } + + err = kubeClient.AppsV1().Deployments(testNamespace).Delete(context.TODO(), deployment.Name, metav1.DeleteOptions{}) + if err != nil { + fmt.Printf("deleting deployment of cluster %s fail\n", clusterName) + return err + } + fmt.Printf("deployment in false cluster %s has been deleted\n", clusterName) + return nil +}