From fecadf910650345378bf2143545fcaf448d43c6e Mon Sep 17 00:00:00 2001 From: Pritesh Lahoti Date: Thu, 19 Dec 2024 00:24:23 +0530 Subject: [PATCH] test: fix flaky test due to timeout The e2e test times out probably due to a lot of PVs getting provisioned and running out of space. This PR restricts the PV sizes across all tests to 1Gi (as opposed to the default 100Gi). --- .../e2e/install/cockroachdb_helm_e2e_test.go | 168 ++++++++---------- 1 file changed, 78 insertions(+), 90 deletions(-) diff --git a/tests/e2e/install/cockroachdb_helm_e2e_test.go b/tests/e2e/install/cockroachdb_helm_e2e_test.go index 434bde75..a8dbaa6a 100644 --- a/tests/e2e/install/cockroachdb_helm_e2e_test.go +++ b/tests/e2e/install/cockroachdb_helm_e2e_test.go @@ -59,31 +59,17 @@ func TestCockroachDbHelmInstall(t *testing.T) { // Setup the args. For this test, we will set the following input values: options := &helm.Options{ KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), - SetValues: map[string]string{ + SetValues: getHelmValues(map[string]string{ "conf.cluster-name": "test", "init.provisioning.enabled": "true", "init.provisioning.databases[0].name": testDBName, "init.provisioning.databases[0].owners[0]": "root", - "storage.persistentVolume.size": "1Gi", - }, + }), } // Deploy the cockroachdb helm chart and checks installation should succeed. - err := helm.InstallE(t, options, helmChartPath, releaseName) - require.NoError(t, err) - - //... and make sure to delete the helm release at the end of the test. - defer func() { - helm.Delete(t, options, releaseName, true) - - danglingSecrets := []string{crdbCluster.CaSecret, crdbCluster.ClientSecret, crdbCluster.NodeSecret} - - for i := range danglingSecrets { - _, err = k8s.GetSecretE(t, kubectlOptions, danglingSecrets[i]) - require.Equal(t, true, kube.IsNotFound(err)) - t.Logf("Secret %s deleted by helm uninstall", danglingSecrets[i]) - } - }() + helm.Install(t, options, helmChartPath, releaseName) + defer cleanupResources(t, crdbCluster, releaseName, kubectlOptions, options) // Print the debug logs in case of test failure. defer func() { @@ -149,28 +135,18 @@ func TestCockroachDbHelmInstallWithCAProvided(t *testing.T) { // Setup the args. For this test, we will set the following input values: options := &helm.Options{ KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), - SetValues: map[string]string{ + SetValues: getHelmValues(map[string]string{ "tls.certs.selfSigner.caProvided": "true", "tls.certs.selfSigner.caSecret": customCASecret, - "storage.persistentVolume.size": "1Gi", - }, + }), } // Deploy the cockroachdb helm chart and checks installation should succeed. - err = helm.InstallE(t, options, helmChartPath, releaseName) - require.NoError(t, err) + helm.Install(t, options, helmChartPath, releaseName) //... and make sure to delete the helm release at the end of the test. defer func() { - helm.Delete(t, options, releaseName, true) - - danglingSecrets := []string{crdbCluster.ClientSecret, crdbCluster.NodeSecret} - - for i := range danglingSecrets { - _, err = k8s.GetSecretE(t, kubectlOptions, danglingSecrets[i]) - require.Equal(t, true, kube.IsNotFound(err)) - t.Logf("Secret %s deleted by helm uninstall", danglingSecrets[i]) - } + cleanupResources(t, crdbCluster, releaseName, kubectlOptions, options) // custom user CA certificate secret should not be deleted by pre-delete job _, err = k8s.GetSecretE(t, kubectlOptions, crdbCluster.CaSecret) @@ -277,21 +253,18 @@ func TestCockroachDbHelmMigration(t *testing.T) { // Setup the args options := &helm.Options{ KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), - SetValues: map[string]string{ - "tls.certs.provided": "true", - "tls.certs.selfSigner.enabled": "false", - "tls.certs.clientRootSecret": crdbCluster.ClientSecret, - "tls.certs.nodeSecret": crdbCluster.NodeSecret, - "storage.persistentVolume.size": "1Gi", - }, + SetValues: getHelmValues(map[string]string{ + "tls.certs.provided": "true", + "tls.certs.selfSigner.enabled": "false", + "tls.certs.clientRootSecret": crdbCluster.ClientSecret, + "tls.certs.nodeSecret": crdbCluster.NodeSecret, + }), } // Deploy the cockroachdb helm chart and checks installation should succeed. - err = helm.InstallE(t, options, helmChartPath, releaseName) - require.NoError(t, err) + helm.Install(t, options, helmChartPath, releaseName) + defer cleanupResources(t, crdbCluster, releaseName, kubectlOptions, options) - //... and make sure to delete the helm release at the end of the test. - defer helm.Delete(t, options, releaseName, true) // Print the debug logs in case of test failure. defer func() { if t.Failed() { @@ -313,10 +286,9 @@ func TestCockroachDbHelmMigration(t *testing.T) { // Default method is self-signer so no need to set explicitly options = &helm.Options{ KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), - SetValues: map[string]string{ - "storage.persistentVolume.size": "1Gi", + SetValues: getHelmValues(map[string]string{ "statefulset.updateStrategy.type": "OnDelete", - }, + }), ExtraArgs: map[string][]string{ "upgrade": []string{ "--timeout=20m", @@ -371,19 +343,14 @@ func TestCockroachDbWithInsecureMode(t *testing.T) { // Setup the args. For this test, we will set the following input values: options := &helm.Options{ KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), - SetValues: map[string]string{ + SetValues: getHelmValues(map[string]string{ "tls.enabled": "false", - }, + }), } // Deploy the cockroachdb helm chart and checks installation should succeed. - err := helm.InstallE(t, options, helmChartPath, releaseName) - require.NoError(t, err) - - //... and make sure to delete the helm release at the end of the test. - defer func() { - helm.Delete(t, options, releaseName, true) - }() + helm.Install(t, options, helmChartPath, releaseName) + defer cleanupResources(t, crdbCluster, releaseName, kubectlOptions, options) // Print the debug logs in case of test failure. defer func() { @@ -466,23 +433,18 @@ spec: // Setup the args. For this test, we will set the following input values: options := &helm.Options{ KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), - SetValues: map[string]string{ + SetValues: getHelmValues(map[string]string{ "tls.enabled": "true", "tls.certs.selfSigner.enabled": "false", "tls.certs.certManager": "true", "tls.certs.certManagerIssuer.kind": "Issuer", "tls.certs.certManagerIssuer.name": "cockroachdb", - }, + }), } // Deploy the cockroachdb helm chart and checks installation should succeed. - err = helm.InstallE(t, options, helmChartPath, releaseName) - require.NoError(t, err) - - //... and make sure to delete the helm release at the end of the test. - defer func() { - helm.Delete(t, options, releaseName, true) - }() + helm.Install(t, options, helmChartPath, releaseName) + defer cleanupResources(t, crdbCluster, releaseName, kubectlOptions, options) // Print the debug logs in case of test failure. defer func() { @@ -525,7 +487,6 @@ func testWALFailoverExistingCluster(t *testing.T, additionalValues map[string]st namespaceName := "cockroach" + strings.ToLower(random.UniqueId()) numReplicas := 3 kubectlOptions := k8s.NewKubectlOptions("", "", namespaceName) - var err error crdbCluster := testutil.CockroachCluster{ Cfg: cfg, @@ -549,12 +510,11 @@ func testWALFailoverExistingCluster(t *testing.T, additionalValues map[string]st }() // Configure options for the initial deployment. - initialValues := map[string]string{ - "conf.cluster-name": "test", - "conf.store.enabled": "true", - "storage.persistentVolume.size": "1Gi", - "statefulset.replicas": strconv.Itoa(numReplicas), - } + initialValues := getHelmValues(map[string]string{ + "conf.cluster-name": "test", + "conf.store.enabled": "true", + "statefulset.replicas": strconv.Itoa(numReplicas), + }) options := &helm.Options{ KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), SetValues: initialValues, @@ -562,25 +522,7 @@ func testWALFailoverExistingCluster(t *testing.T, additionalValues map[string]st // Deploy the helm chart and confirm the installation is successful. helm.Install(t, options, helmChartPath, releaseName) - - defer func() { - err = helm.DeleteE(t, options, releaseName, true) - // Ignore the error if the operation timed out. - if err == nil || !strings.Contains(err.Error(), "timed out") { - require.NoError(t, err) - } - - danglingSecrets := []string{ - crdbCluster.CaSecret, - crdbCluster.ClientSecret, - crdbCluster.NodeSecret, - } - for i := range danglingSecrets { - _, err = k8s.GetSecretE(t, kubectlOptions, danglingSecrets[i]) - require.Equal(t, true, kube.IsNotFound(err)) - t.Logf("Secret %s deleted by helm uninstall", danglingSecrets[i]) - } - }() + defer cleanupResources(t, crdbCluster, releaseName, kubectlOptions, options) // Wait for the service endpoint to be available. serviceName := fmt.Sprintf("%s-cockroachdb-public", releaseName) @@ -639,3 +581,49 @@ func testWALFailoverExistingCluster(t *testing.T, additionalValues map[string]st testutil.RequireClusterToBeReadyEventuallyTimeout(t, crdbCluster, 600*time.Second) } } + +func getHelmValues(inputValues map[string]string) map[string]string { + defaultValues := map[string]string{ + // Override the persistent storage size to 1Gi so that we do not run out of space. + "storage.persistentVolume.size": "1Gi", + } + + for k, v := range defaultValues { + inputValues[k] = v + } + + return inputValues +} + +func cleanupResources( + t *testing.T, + crdbCluster testutil.CockroachCluster, + releaseName string, + kubectlOptions *k8s.KubectlOptions, + options *helm.Options, +) { + err := helm.DeleteE(t, options, releaseName, true) + // Ignore the error if the operation timed out. + if err == nil || !strings.Contains(err.Error(), "timed out") { + require.NoError(t, err) + } else { + t.Logf("Error while deleting helm release: %v", err) + } + + danglingSecrets := []string{ + crdbCluster.CaSecret, + crdbCluster.ClientSecret, + crdbCluster.NodeSecret, + } + for i := range danglingSecrets { + _, err = k8s.GetSecretE(t, kubectlOptions, danglingSecrets[i]) + require.Equal(t, true, kube.IsNotFound(err)) + t.Logf("Secret %s deleted by helm uninstall", danglingSecrets[i]) + } + + // Delete all PVs to free up space for other tests in a best-effort manner. + err = k8s.RunKubectlE(t, kubectlOptions, "delete", "pv", "--all", "--wait=false") + if err != nil { + t.Logf("Error while deleting PVs: %v", err) + } +}