diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 76fdae162d16..680e42a75fcb 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -193,7 +193,7 @@ jobs: strategy: fail-fast: false matrix: - dtest: [basics, bootstraptoken, cacerts, etcd, lazypull, skew, upgrade] + dtest: [basics, bootstraptoken, cacerts, etcd, lazypull, skew, snapshotrestore, upgrade] env: BRANCH_NAME: ${{ needs.build-go-tests.outputs.branch_name }} steps: @@ -222,6 +222,8 @@ jobs: cd ./tests/docker/${{ matrix.dtest }} if [ ${{ matrix.dtest }} = "upgrade" ] || [ ${{ matrix.dtest }} = "skew" ]; then ./${{ matrix.dtest }}.test -k3sImage=$K3S_IMAGE -branch=$BRANCH_NAME + elif [ ${{ matrix.dtest }} = "snapshotrestore" ]; then + ./${{ matrix.dtest }}.test -ci else ./${{ matrix.dtest }}.test -k3sImage=$K3S_IMAGE fi \ No newline at end of file diff --git a/tests/docker/snapshotrestore/snapshotrestore_test.go b/tests/docker/snapshotrestore/snapshotrestore_test.go new file mode 100644 index 000000000000..555ff482b032 --- /dev/null +++ b/tests/docker/snapshotrestore/snapshotrestore_test.go @@ -0,0 +1,209 @@ +package snapshotrestore + +import ( + "flag" + "fmt" + "strings" + "testing" + + tester "github.com/k3s-io/k3s/tests/docker" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/utils/set" +) + +var k3sImage = flag.String("k3sImage", "rancher/systemd-node", "The image used to provision containers") +var serverCount = flag.Int("serverCount", 3, "number of server nodes") +var agentCount = flag.Int("agentCount", 1, "number of agent nodes") +var ci = flag.Bool("ci", false, "running on CI") +var config *tester.TestConfig +var snapshotname string + +func Test_DockerSnapshotRestore(t *testing.T) { + RegisterFailHandler(Fail) + flag.Parse() + suiteConfig, reporterConfig := GinkgoConfiguration() + RunSpecs(t, "SnapshotRestore Test Suite", suiteConfig, reporterConfig) +} + +var _ = Describe("Verify snapshots and cluster restores work", Ordered, func() { + Context("Setup Cluster", func() { + It("should provision servers and agents", func() { + var err error + config, err = tester.NewTestConfig(*k3sImage) + Expect(err).NotTo(HaveOccurred()) + Expect(config.ProvisionServers(*serverCount)).To(Succeed()) + Expect(config.ProvisionAgents(*agentCount)).To(Succeed()) + Eventually(func() error { + return tester.CheckDefaultDeployments(config.KubeconfigFile) + }, "60s", "5s").Should(Succeed()) + Eventually(func() error { + return tester.NodesReady(config.KubeconfigFile) + }, "40s", "5s").Should(Succeed()) + }) + }) + Context("Cluster creates snapshots and workloads:", func() { + It("Verifies test workload before snapshot is created", func() { + res, err := config.DeployWorkload("clusterip.yaml") + Expect(err).NotTo(HaveOccurred(), "Cluster IP manifest not deployed: "+res) + + Eventually(func(g Gomega) { + cmd := "kubectl get pods -o=name -l k8s-app=nginx-app-clusterip --field-selector=status.phase=Running --kubeconfig=" + config.KubeconfigFile + res, err := tester.RunCommand(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(res).Should((ContainSubstring("test-clusterip")), "failed cmd: %q result: %s", cmd, res) + }, "240s", "5s").Should(Succeed()) + }) + + It("Verifies Snapshot is created", func() { + Eventually(func(g Gomega) { + _, err := config.Servers[0].RunCmdOnNode("k3s etcd-snapshot save") + g.Expect(err).NotTo(HaveOccurred()) + cmd := "ls /var/lib/rancher/k3s/server/db/snapshots/" + snapshotname, err = config.Servers[0].RunCmdOnNode(cmd) + g.Expect(err).NotTo(HaveOccurred()) + fmt.Println("Snapshot Name", snapshotname) + g.Expect(snapshotname).Should(ContainSubstring("on-demand-server-0")) + }, "240s", "10s").Should(Succeed()) + }) + + It("Verifies another test workload after snapshot is created", func() { + res, err := config.DeployWorkload("nodeport.yaml") + Expect(err).NotTo(HaveOccurred(), "NodePort manifest not deployed: "+res) + Eventually(func(g Gomega) { + cmd := "kubectl get pods -o=name -l k8s-app=nginx-app-nodeport --field-selector=status.phase=Running --kubeconfig=" + config.KubeconfigFile + res, err := tester.RunCommand(cmd) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(res).Should(ContainSubstring("test-nodeport"), "nodeport pod was not created") + }, "240s", "5s").Should(Succeed()) + }) + + }) + + Context("Cluster restores from snapshot", func() { + It("Restores the snapshot", func() { + //Stop k3s on all servers + for _, server := range config.Servers { + cmd := "systemctl stop k3s" + Expect(server.RunCmdOnNode(cmd)).Error().NotTo(HaveOccurred()) + if server != config.Servers[0] { + cmd = "k3s-killall.sh" + Expect(server.RunCmdOnNode(cmd)).Error().NotTo(HaveOccurred()) + } + } + //Restores from snapshot on server-0 + cmd := "k3s server --cluster-init --cluster-reset --cluster-reset-restore-path=/var/lib/rancher/k3s/server/db/snapshots/" + snapshotname + res, err := config.Servers[0].RunCmdOnNode(cmd) + Expect(err).NotTo(HaveOccurred()) + Expect(res).Should(ContainSubstring("Managed etcd cluster membership has been reset, restart without --cluster-reset flag now")) + + cmd = "systemctl start k3s" + Expect(config.Servers[0].RunCmdOnNode(cmd)).Error().NotTo(HaveOccurred()) + + }) + + It("Checks that other servers are not ready", func() { + By("Fetching node status") + var readyNodeNames []string + var notReadyNodeNames []string + Eventually(func(g Gomega) { + readyNodeNames = []string{config.Servers[0].Name} + for _, agent := range config.Agents { + readyNodeNames = append(readyNodeNames, agent.Name) + } + for _, server := range config.Servers[1:] { + notReadyNodeNames = append(notReadyNodeNames, server.Name) + } + g.Expect(CheckNodeStatus(config.KubeconfigFile, readyNodeNames, notReadyNodeNames)).To(Succeed()) + }, "240s", "5s").Should(Succeed()) + }) + + It("Rejoins other servers to cluster", func() { + // We must remove the db directory on the other servers before restarting k3s + // otherwise the nodes may join the old cluster + for _, server := range config.Servers[1:] { + cmd := "rm -rf /var/lib/rancher/k3s/server/db" + Expect(server.RunCmdOnNode(cmd)).Error().NotTo(HaveOccurred()) + } + + for _, server := range config.Servers[1:] { + cmd := "systemctl start k3s" + Expect(server.RunCmdOnNode(cmd)).Error().NotTo(HaveOccurred()) + } + }) + + It("Checks that all nodes and pods are ready", func() { + By("Fetching node status") + Eventually(func() error { + return tester.NodesReady(config.KubeconfigFile) + }, "60s", "5s").Should(Succeed()) + + By("Fetching Pods status") + Eventually(func(g Gomega) { + pods, err := tester.ParsePods(config.KubeconfigFile) + g.Expect(err).NotTo(HaveOccurred()) + for _, pod := range pods { + if strings.Contains(pod.Name, "helm-install") { + g.Expect(string(pod.Status.Phase)).Should(Equal("Succeeded"), pod.Name) + } else { + g.Expect(string(pod.Status.Phase)).Should(Equal("Running"), pod.Name) + } + } + }, "120s", "5s").Should(Succeed()) + }) + + It("Verifies that workload1 exists and workload2 does not", func() { + cmd := "kubectl get pods --kubeconfig=" + config.KubeconfigFile + res, err := tester.RunCommand(cmd) + Expect(err).NotTo(HaveOccurred()) + Expect(res).Should(ContainSubstring("test-clusterip")) + Expect(res).ShouldNot(ContainSubstring("test-nodeport")) + }) + }) +}) + +var failed bool +var _ = AfterEach(func() { + failed = failed || CurrentSpecReport().Failed() +}) + +var _ = AfterSuite(func() { + if *ci || (config != nil && !failed) { + Expect(config.Cleanup()).To(Succeed()) + } +}) + +// Checks if nodes match the expected status +// We use kubectl directly, because getting a NotReady node status from the API is not easy +func CheckNodeStatus(kubeconfigFile string, readyNodes, notReadyNodes []string) error { + readyNodesSet := set.New(readyNodes...) + notReadyNodesSet := set.New(notReadyNodes...) + foundReadyNodes := make(set.Set[string], 0) + foundNotReadyNodes := make(set.Set[string], 0) + + cmd := "kubectl get nodes --no-headers --kubeconfig=" + kubeconfigFile + res, err := tester.RunCommand(cmd) + if err != nil { + return err + } + // extract the node status from the 2nd column of kubectl output + for _, line := range strings.Split(res, "\n") { + if strings.Contains(line, "k3s-test") { + // Line for some reason needs to be split twice + split := strings.Fields(line) + status := strings.TrimSpace(split[1]) + if status == "NotReady" { + foundNotReadyNodes.Insert(split[0]) + } else if status == "Ready" { + foundReadyNodes.Insert(split[0]) + } + } + } + if !foundReadyNodes.Equal(readyNodesSet) { + return fmt.Errorf("expected ready nodes %v, found %v", readyNodesSet, foundReadyNodes) + } + if !foundNotReadyNodes.Equal(notReadyNodesSet) { + return fmt.Errorf("expected not ready nodes %v, found %v", notReadyNodesSet, foundNotReadyNodes) + } + return nil +} diff --git a/tests/e2e/snapshotrestore/Vagrantfile b/tests/e2e/snapshotrestore/Vagrantfile deleted file mode 100644 index 6e9cac5f9613..000000000000 --- a/tests/e2e/snapshotrestore/Vagrantfile +++ /dev/null @@ -1,106 +0,0 @@ -ENV['VAGRANT_NO_PARALLEL'] = 'no' -NODE_ROLES = (ENV['E2E_NODE_ROLES'] || - ["server-0", "server-1", "server-2", "agent-0", "agent-1"]) -NODE_BOXES = (ENV['E2E_NODE_BOXES'] || - ['bento/ubuntu-24.04', 'bento/ubuntu-24.04', 'bento/ubuntu-24.04', 'bento/ubuntu-24.04', 'bento/ubuntu-24.04']) -GITHUB_BRANCH = (ENV['E2E_GITHUB_BRANCH'] || "master") -RELEASE_VERSION = (ENV['E2E_RELEASE_VERSION'] || "") -GOCOVER = (ENV['E2E_GOCOVER'] || "") -NODE_CPUS = (ENV['E2E_NODE_CPUS'] || 2).to_i -NODE_MEMORY = (ENV['E2E_NODE_MEMORY'] || 2048).to_i -# Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks -NETWORK_PREFIX = "10.10.10" -install_type = "" - -def provision(vm, role, role_num, node_num) - vm.box = NODE_BOXES[node_num] - vm.hostname = role - # An expanded netmask is required to allow VM<-->VM communication, virtualbox defaults to /32 - node_ip = "#{NETWORK_PREFIX}.#{100+node_num}" - vm.network "private_network", ip: node_ip, netmask: "255.255.255.0" - - scripts_location = Dir.exist?("./scripts") ? "./scripts" : "../scripts" - vagrant_defaults = File.exist?("./vagrantdefaults.rb") ? "./vagrantdefaults.rb" : "../vagrantdefaults.rb" - load vagrant_defaults - - defaultOSConfigure(vm) - addCoverageDir(vm, role, GOCOVER) - install_type = getInstallType(vm, RELEASE_VERSION, GITHUB_BRANCH) - - vm.provision "shell", inline: "ping -c 2 k3s.io" - - if role.include?("server") && role_num == 0 - vm.provision 'k3s-primary-server', type: 'k3s', run: 'once' do |k3s| - k3s.args = "server " - k3s.config = <<~YAML - token: vagrant - cluster-init: true - node-external-ip: #{NETWORK_PREFIX}.100 - flannel-iface: eth1 - tls-san: #{NETWORK_PREFIX}.100.nip.io - YAML - k3s.env = %W[K3S_KUBECONFIG_MODE=0644 #{install_type}] - k3s.config_mode = '0644' # side-step https://github.com/k3s-io/k3s/issues/4321 - end - - elsif role.include?("server") && role_num != 0 - vm.provision 'k3s-secondary-server', type: 'k3s', run: 'once' do |k3s| - k3s.args = "server" - k3s.config = <<~YAML - server: "https://#{NETWORK_PREFIX}.100:6443" - token: vagrant - node-external-ip: #{node_ip} - flannel-iface: eth1 - YAML - k3s.env = %W[K3S_KUBECONFIG_MODE=0644 K3S_TOKEN=vagrant #{install_type}] - k3s.config_mode = '0644' # side-step https://github.com/k3s-io/k3s/issues/4321 - end - end - - if role.include?("agent") - vm.provision 'k3s-agent', type: 'k3s', run: 'once' do |k3s| - k3s.args = "agent" - k3s.config = <<~YAML - server: "https://#{NETWORK_PREFIX}.100:6443" - token: vagrant - node-external-ip: #{node_ip} - flannel-iface: eth1 - YAML - k3s.env = %W[K3S_KUBECONFIG_MODE=0644 #{install_type}] - k3s.config_mode = '0644' # side-step https://github.com/k3s-io/k3s/issues/4321 - end - end - if vm.box.to_s.include?("microos") - vm.provision 'k3s-reload', type: 'reload', run: 'once' - end - # This step does not run by default and is designed to be called by higher level tools -end - -Vagrant.configure("2") do |config| - config.vagrant.plugins = ["vagrant-k3s", "vagrant-reload"] - # Default provider is libvirt, virtualbox is only provided as a backup - config.vm.provider "libvirt" do |v| - v.cpus = NODE_CPUS - v.memory = NODE_MEMORY - # We replicate the default prefix, but add a timestamp to enable parallel runs and cleanup of old VMs - v.default_prefix = File.basename(Dir.getwd) + "_" + Time.now.to_i.to_s + "_" - end - config.vm.provider "virtualbox" do |v| - v.cpus = NODE_CPUS - v.memory = NODE_MEMORY - end - - if NODE_ROLES.kind_of?(String) - NODE_ROLES = NODE_ROLES.split(" ", -1) - end - if NODE_BOXES.kind_of?(String) - NODE_BOXES = NODE_BOXES.split(" ", -1) - end - - NODE_ROLES.each_with_index do |role, i| - role_num = role.split("-", -1).pop.to_i - config.vm.define role do |node| - provision(node.vm, role, role_num, i) - end - end -end diff --git a/tests/e2e/snapshotrestore/snapshotrestore_test.go b/tests/e2e/snapshotrestore/snapshotrestore_test.go deleted file mode 100644 index 0f2fb8b9bd26..000000000000 --- a/tests/e2e/snapshotrestore/snapshotrestore_test.go +++ /dev/null @@ -1,318 +0,0 @@ -package snapshotrestore - -import ( - "flag" - "fmt" - "os" - "strings" - "testing" - "time" - - "github.com/k3s-io/k3s/tests/e2e" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// Valid nodeOS: -// bento/ubuntu-24.04, opensuse/Leap-15.6.x86_64 -// eurolinux-vagrant/rocky-8, eurolinux-vagrant/rocky-9, - -var nodeOS = flag.String("nodeOS", "bento/ubuntu-24.04", "VM operating system") -var serverCount = flag.Int("serverCount", 3, "number of server nodes") -var agentCount = flag.Int("agentCount", 1, "number of agent nodes") -var hardened = flag.Bool("hardened", false, "true or false") -var ci = flag.Bool("ci", false, "running on CI") -var local = flag.Bool("local", false, "deploy a locally built K3s binary") - -// Environment Variables Info: -// E2E_RELEASE_VERSION=v1.23.1+k3s2 (default: latest commit from master) - -func Test_E2ESnapshotRestore(t *testing.T) { - RegisterFailHandler(Fail) - flag.Parse() - suiteConfig, reporterConfig := GinkgoConfiguration() - RunSpecs(t, "SnapshotRestore Test Suite", suiteConfig, reporterConfig) -} - -var ( - kubeConfigFile string - serverNodeNames []string - agentNodeNames []string - snapshotname string -) - -var _ = ReportAfterEach(e2e.GenReport) - -var _ = Describe("Verify snapshots and cluster restores work", Ordered, func() { - Context("Cluster creates snapshots and workloads:", func() { - It("Starts up with no issues", func() { - var err error - if *local { - serverNodeNames, agentNodeNames, err = e2e.CreateLocalCluster(*nodeOS, *serverCount, *agentCount) - } else { - serverNodeNames, agentNodeNames, err = e2e.CreateCluster(*nodeOS, *serverCount, *agentCount) - } - Expect(err).NotTo(HaveOccurred(), e2e.GetVagrantLog(err)) - fmt.Println("CLUSTER CONFIG") - fmt.Println("OS:", *nodeOS) - fmt.Println("Server Nodes:", serverNodeNames) - fmt.Println("Agent Nodes:", agentNodeNames) - kubeConfigFile, err = e2e.GenKubeConfigFile(serverNodeNames[0]) - Expect(err).NotTo(HaveOccurred()) - }) - - It("Checks Node and Pod Status", func() { - fmt.Printf("\nFetching node status\n") - Eventually(func(g Gomega) { - nodes, err := e2e.ParseNodes(kubeConfigFile, false) - g.Expect(err).NotTo(HaveOccurred()) - for _, node := range nodes { - g.Expect(node.Status).Should(Equal("Ready")) - } - }, "620s", "5s").Should(Succeed()) - _, _ = e2e.ParseNodes(kubeConfigFile, true) - - fmt.Printf("\nFetching Pods status\n") - Eventually(func(g Gomega) { - pods, err := e2e.ParsePods(kubeConfigFile, false) - g.Expect(err).NotTo(HaveOccurred()) - for _, pod := range pods { - if strings.Contains(pod.Name, "helm-install") { - g.Expect(pod.Status).Should(Equal("Completed"), pod.Name) - } else { - g.Expect(pod.Status).Should(Equal("Running"), pod.Name) - } - } - }, "620s", "5s").Should(Succeed()) - _, _ = e2e.ParsePods(kubeConfigFile, true) - }) - - It("Verifies test workload before snapshot is created", func() { - res, err := e2e.DeployWorkload("clusterip.yaml", kubeConfigFile, *hardened) - Expect(err).NotTo(HaveOccurred(), "Cluster IP manifest not deployed: "+res) - - Eventually(func(g Gomega) { - cmd := "kubectl get pods -o=name -l k8s-app=nginx-app-clusterip --field-selector=status.phase=Running --kubeconfig=" + kubeConfigFile - res, err := e2e.RunCommand(cmd) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(res).Should((ContainSubstring("test-clusterip")), "failed cmd: %q result: %s", cmd, res) - }, "240s", "5s").Should(Succeed()) - }) - - It("Verifies Snapshot is created", func() { - Eventually(func(g Gomega) { - cmd := "k3s etcd-snapshot save" - _, err := e2e.RunCmdOnNode(cmd, "server-0") - g.Expect(err).NotTo(HaveOccurred()) - cmd = "ls /var/lib/rancher/k3s/server/db/snapshots/" - snapshotname, err = e2e.RunCmdOnNode(cmd, "server-0") - g.Expect(err).NotTo(HaveOccurred()) - fmt.Println("Snapshot Name", snapshotname) - g.Expect(snapshotname).Should(ContainSubstring("on-demand-server-0")) - }, "420s", "10s").Should(Succeed()) - }) - - It("Verifies another test workload after snapshot is created", func() { - _, err := e2e.DeployWorkload("nodeport.yaml", kubeConfigFile, *hardened) - Expect(err).NotTo(HaveOccurred(), "NodePort manifest not deployed") - Eventually(func(g Gomega) { - cmd := "kubectl get pods -o=name -l k8s-app=nginx-app-nodeport --field-selector=status.phase=Running --kubeconfig=" + kubeConfigFile - res, err := e2e.RunCommand(cmd) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(res).Should(ContainSubstring("test-nodeport"), "nodeport pod was not created") - }, "240s", "5s").Should(Succeed()) - }) - - }) - - Context("Cluster is reset normally", func() { - It("Resets the cluster", func() { - for _, nodeName := range serverNodeNames { - cmd := "systemctl stop k3s" - Expect(e2e.RunCmdOnNode(cmd, nodeName)).Error().NotTo(HaveOccurred()) - if nodeName != serverNodeNames[0] { - cmd = "k3s-killall.sh" - Expect(e2e.RunCmdOnNode(cmd, nodeName)).Error().NotTo(HaveOccurred()) - } - } - - cmd := "k3s server --cluster-reset" - res, err := e2e.RunCmdOnNode(cmd, serverNodeNames[0]) - Expect(err).NotTo(HaveOccurred()) - Expect(res).Should(ContainSubstring("Managed etcd cluster membership has been reset, restart without --cluster-reset flag now")) - - cmd = "systemctl start k3s" - Expect(e2e.RunCmdOnNode(cmd, serverNodeNames[0])).Error().NotTo(HaveOccurred()) - }) - - It("Checks that other servers are not ready", func() { - fmt.Printf("\nFetching node status\n") - Eventually(func(g Gomega) { - nodes, err := e2e.ParseNodes(kubeConfigFile, false) - g.Expect(err).NotTo(HaveOccurred()) - for _, node := range nodes { - if strings.Contains(node.Name, serverNodeNames[0]) || strings.Contains(node.Name, "agent-") { - g.Expect(node.Status).Should(Equal("Ready")) - } else { - g.Expect(node.Status).Should(Equal("NotReady")) - } - } - }, "240s", "5s").Should(Succeed()) - _, _ = e2e.ParseNodes(kubeConfigFile, true) - }) - - It("Rejoins other servers to cluster", func() { - // We must remove the db directory on the other servers before restarting k3s - // otherwise the nodes may join the old cluster - for _, nodeName := range serverNodeNames[1:] { - cmd := "rm -rf /var/lib/rancher/k3s/server/db" - Expect(e2e.RunCmdOnNode(cmd, nodeName)).Error().NotTo(HaveOccurred()) - } - - for _, nodeName := range serverNodeNames[1:] { - cmd := "systemctl start k3s" - Expect(e2e.RunCmdOnNode(cmd, nodeName)).Error().NotTo(HaveOccurred()) - time.Sleep(20 * time.Second) //Stagger the restarts for etcd leaners - } - }) - - It("Checks that all nodes and pods are ready", func() { - Eventually(func(g Gomega) { - nodes, err := e2e.ParseNodes(kubeConfigFile, false) - g.Expect(err).NotTo(HaveOccurred()) - for _, node := range nodes { - nodeJournal, _ := e2e.GetJournalLogs(node.Name) - g.Expect(node.Status).Should(Equal("Ready"), nodeJournal) - } - }, "420s", "5s").Should(Succeed()) - - _, _ = e2e.ParseNodes(kubeConfigFile, true) - - fmt.Printf("\nFetching Pods status\n") - Eventually(func(g Gomega) { - pods, err := e2e.ParsePods(kubeConfigFile, false) - g.Expect(err).NotTo(HaveOccurred()) - for _, pod := range pods { - if strings.Contains(pod.Name, "helm-install") { - g.Expect(pod.Status).Should(Equal("Completed"), pod.Name) - } else { - g.Expect(pod.Status).Should(Equal("Running"), pod.Name) - } - } - }, "420s", "5s").Should(Succeed()) - }) - It("Verifies that workload1 and workload1 exist", func() { - cmd := "kubectl get pods --kubeconfig=" + kubeConfigFile - res, err := e2e.RunCommand(cmd) - Expect(err).NotTo(HaveOccurred()) - Expect(res).Should(ContainSubstring("test-clusterip")) - Expect(res).Should(ContainSubstring("test-nodeport")) - }) - - }) - - Context("Cluster restores from snapshot", func() { - It("Restores the snapshot", func() { - //Stop k3s on all nodes - for _, nodeName := range serverNodeNames { - cmd := "systemctl stop k3s" - Expect(e2e.RunCmdOnNode(cmd, nodeName)).Error().NotTo(HaveOccurred()) - if nodeName != serverNodeNames[0] { - cmd = "k3s-killall.sh" - Expect(e2e.RunCmdOnNode(cmd, nodeName)).Error().NotTo(HaveOccurred()) - } - } - //Restores from snapshot on server-0 - cmd := "k3s server --cluster-init --cluster-reset --cluster-reset-restore-path=/var/lib/rancher/k3s/server/db/snapshots/" + snapshotname - res, err := e2e.RunCmdOnNode(cmd, serverNodeNames[0]) - Expect(err).NotTo(HaveOccurred()) - Expect(res).Should(ContainSubstring("Managed etcd cluster membership has been reset, restart without --cluster-reset flag now")) - - cmd = "systemctl start k3s" - Expect(e2e.RunCmdOnNode(cmd, serverNodeNames[0])).Error().NotTo(HaveOccurred()) - - }) - - It("Checks that other servers are not ready", func() { - fmt.Printf("\nFetching node status\n") - Eventually(func(g Gomega) { - nodes, err := e2e.ParseNodes(kubeConfigFile, false) - g.Expect(err).NotTo(HaveOccurred()) - for _, node := range nodes { - if strings.Contains(node.Name, serverNodeNames[0]) || strings.Contains(node.Name, "agent-") { - g.Expect(node.Status).Should(Equal("Ready")) - } else { - g.Expect(node.Status).Should(Equal("NotReady")) - } - } - }, "240s", "5s").Should(Succeed()) - _, _ = e2e.ParseNodes(kubeConfigFile, true) - }) - - It("Rejoins other servers to cluster", func() { - // We must remove the db directory on the other servers before restarting k3s - // otherwise the nodes may join the old cluster - for _, nodeName := range serverNodeNames[1:] { - cmd := "rm -rf /var/lib/rancher/k3s/server/db" - Expect(e2e.RunCmdOnNode(cmd, nodeName)).Error().NotTo(HaveOccurred()) - } - - for _, nodeName := range serverNodeNames[1:] { - cmd := "systemctl start k3s" - Expect(e2e.RunCmdOnNode(cmd, nodeName)).Error().NotTo(HaveOccurred()) - } - }) - - It("Checks that all nodes and pods are ready", func() { - //Verifies node is up and pods running - fmt.Printf("\nFetching node status\n") - Eventually(func(g Gomega) { - nodes, err := e2e.ParseNodes(kubeConfigFile, false) - g.Expect(err).NotTo(HaveOccurred()) - for _, node := range nodes { - g.Expect(node.Status).Should(Equal("Ready")) - } - }, "420s", "5s").Should(Succeed()) - _, _ = e2e.ParseNodes(kubeConfigFile, true) - - fmt.Printf("\nFetching Pods status\n") - Eventually(func(g Gomega) { - pods, err := e2e.ParsePods(kubeConfigFile, false) - g.Expect(err).NotTo(HaveOccurred()) - for _, pod := range pods { - if strings.Contains(pod.Name, "helm-install") { - g.Expect(pod.Status).Should(Equal("Completed"), pod.Name) - } else { - g.Expect(pod.Status).Should(Equal("Running"), pod.Name) - } - } - }, "620s", "5s").Should(Succeed()) - _, _ = e2e.ParsePods(kubeConfigFile, true) - }) - - It("Verifies that workload1 exists and workload2 does not", func() { - cmd := "kubectl get pods --kubeconfig=" + kubeConfigFile - res, err := e2e.RunCommand(cmd) - Expect(err).NotTo(HaveOccurred()) - Expect(res).Should(ContainSubstring("test-clusterip")) - Expect(res).ShouldNot(ContainSubstring("test-nodeport")) - }) - }) -}) - -var failed bool -var _ = AfterEach(func() { - failed = failed || CurrentSpecReport().Failed() -}) - -var _ = AfterSuite(func() { - if failed { - AddReportEntry("journald-logs", e2e.TailJournalLogs(1000, append(serverNodeNames, agentNodeNames...))) - } else { - Expect(e2e.GetCoverageReport(append(serverNodeNames, agentNodeNames...))).To(Succeed()) - } - if !failed || *ci { - Expect(e2e.DestroyCluster()).To(Succeed()) - Expect(os.Remove(kubeConfigFile)).To(Succeed()) - } -})