Skip to content

Commit

Permalink
PWX-30726: Handle review comments
Browse files Browse the repository at this point in the history
- Added integration tests for extender and health-monitor.
- Simulating a StorageDown state by using Portworx driver's Pool Maintenance operation.
  • Loading branch information
adityadani committed May 8, 2023
1 parent 7646e4c commit 2982478
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 11 deletions.
14 changes: 5 additions & 9 deletions pkg/extender/extender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,12 +365,6 @@ func pxCSIExtPodNoDriverTest(t *testing.T) {
nodes.Items = append(nodes.Items, *newNode("node1", "node1", "192.168.0.1", "rack1", "a", "us-east-1"))
nodes.Items = append(nodes.Items, *newNode("node2", "node2", "192.168.0.2", "rack1", "a", "us-east-1"))
nodes.Items = append(nodes.Items, *newNode("node3", "node3", "192.168.0.3", "rack1", "a", "us-east-1"))
nodes.Items = append(nodes.Items, *newNode("node4", "node4", "192.168.0.4", "rack1", "a", "us-east-1"))

if err := driver.UpdateNodeStatus(3, volume.NodeDegraded); err != nil {
t.Fatalf("Error setting node status to StorageDown: %v", err)
}

filterResponse, err := sendFilterRequest(pod, nodes)
if err != nil {
t.Fatalf("Error sending filter request: %v", err)
Expand Down Expand Up @@ -400,13 +394,13 @@ func pxCSIExtPodDriverTest(t *testing.T) {
nodes.Items = append(nodes.Items, *newNode("node5", "node5", "192.168.0.5", "rack1", "", ""))
nodes.Items = append(nodes.Items, *newNode("node6", "node6", "192.168.0.6", "rack1", "", ""))

if err := driver.CreateCluster(5, nodes); err != nil {
if err := driver.CreateCluster(6, nodes); err != nil {
t.Fatalf("Error creating cluster: %v", err)
}
pod := newPod("px-csi-ext-foo", nil)

if err := driver.UpdateNodeStatus(5, volume.NodeDegraded); err != nil {
t.Fatalf("Error setting node status to StorageDown: %v", err)
t.Fatalf("Error setting node status to Degraded: %v", err)
}

filterResponse, err := sendFilterRequest(pod, nodes)
Expand All @@ -415,6 +409,8 @@ func pxCSIExtPodDriverTest(t *testing.T) {
}
verifyFilterResponse(t, nodes, []int{0, 1, 2, 3, 4}, filterResponse)

// Remove the degraded node from the list
nodes.Items = nodes.Items[:5]
prioritizeResponse, err := sendPrioritizeRequest(pod, nodes)
if err != nil {
t.Fatalf("Error sending prioritize request: %v", err)
Expand Down Expand Up @@ -480,7 +476,7 @@ func pxCSIExtPodOfflinePxNodesTest(t *testing.T) {
pod := newPod("px-csi-ext-foo", nil)

if err := driver.UpdateNodeStatus(2, volume.NodeOffline); err != nil {
t.Fatalf("Error setting node status to StorageDown: %v", err)
t.Fatalf("Error setting node status to Offline: %v", err)
}

filterResponse, err := sendFilterRequest(pod, nodes)
Expand Down
2 changes: 1 addition & 1 deletion pkg/monitor/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ func (m *Monitor) cleanupDriverNodePods(node *volume.NodeInfo) {
if err != nil {
return false, nil
}
if n.Status != volume.NodeOnline {
if node.Status == volume.NodeOffline || node.Status == volume.NodeDegraded {
log.Infof("Volume driver on node %v (%v) is still offline (%v)", node.Hostname, node.StorageID, n.RawStatus)
return false, nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/monitor/monitor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ func testStorageDownNode(t *testing.T) {
require.NoError(t, err, "failed to create pod")

err = driver.UpdateNodeStatus(node2Index, volume.NodeStorageDown)
require.NoError(t, err, "Error setting node status to Degraded")
require.NoError(t, err, "Error setting node status to StorageDown")
defer func() {
err = driver.UpdateNodeStatus(node2Index, volume.NodeOnline)
require.NoError(t, err, "Error setting node status to Online")
Expand Down
22 changes: 22 additions & 0 deletions test/integration_test/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,28 @@ func addRunToMilestone(testrailID int, testResult *string) (int, error) {
return runID, nil
}

func getPodsForApp(ctx *scheduler.Context) ([]v1.Pod, error) {
var pods []v1.Pod

for _, specObj := range ctx.App.SpecList {
if obj, ok := specObj.(*appsapi.Deployment); ok {
depPods, err := apps.Instance().GetDeploymentPods(obj)
if err != nil {
return nil, err
}
pods = append(pods, depPods...)
} else if obj, ok := specObj.(*appsapi.StatefulSet); ok {
ssPods, err := apps.Instance().GetStatefulSetPods(obj)
if err != nil {
return nil, err
}
pods = append(pods, ssPods...)
}
}

return pods, nil
}

func TestMain(m *testing.M) {
flag.IntVar(&snapshotScaleCount,
"snapshot-scale-count",
Expand Down
57 changes: 57 additions & 0 deletions test/integration_test/extender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ func TestExtender(t *testing.T) {
t.Run("statefulsetTest", statefulsetTest)
t.Run("multiplePVCTest", multiplePVCTest)
t.Run("driverNodeErrorTest", driverNodeErrorTest)
t.Run("poolMaintenanceTest", poolMaintenanceTest)
t.Run("antihyperconvergenceTest", antihyperconvergenceTest)
t.Run("antihyperconvergenceTestPreferRemoteOnlyTest", antihyperconvergenceTestPreferRemoteOnlyTest)
t.Run("equalPodSpreadTest", equalPodSpreadTest)
Expand Down Expand Up @@ -215,6 +216,62 @@ func driverNodeErrorTest(t *testing.T) {
logrus.Infof("Test status at end of %s test: %s", t.Name(), testResult)
}

func poolMaintenanceTest(t *testing.T) {
var testrailID, testResult = 86080, testResultFail
runID := testrailSetupForTest(testrailID, &testResult)
defer updateTestRail(&testResult, testrailID, runID)

ctxs, err := schedulerDriver.Schedule(generateInstanceID(t, "pool-test"),
scheduler.ScheduleOptions{AppKeys: []string{"mysql-1-pvc"}})
require.NoError(t, err, "Error scheduling task")
require.Equal(t, 1, len(ctxs), "Only one task should have started")

err = schedulerDriver.WaitForRunning(ctxs[0], defaultWaitTimeout, defaultWaitInterval)
require.NoError(t, err, "Error waiting for pod to get to running state")

scheduledNodes, err := schedulerDriver.GetNodesForApp(ctxs[0])
require.NoError(t, err, "Error getting node for app")
require.Equal(t, 1, len(scheduledNodes), "App should be scheduled on one node")

volumeNames := getVolumeNames(t, ctxs[0])
require.Equal(t, 1, len(volumeNames), "Should have only one volume")

verifyScheduledNode(t, scheduledNodes[0], volumeNames)

err = volumeDriver.EnterPoolMaintenance(scheduledNodes[0])
require.NoError(t, err, "Error entering pool maintenance mode on scheduled node %+v", scheduledNodes[0])
poolMaintenanceNode := scheduledNodes[0]

// Wait for node to go into maintenance mode
time.Sleep(5 * time.Minute)

// Delete the pods so that they get rescheduled
pods, err := getPodsForApp(ctxs[0])
require.NoError(t, err, "Failed to get pods for app")
err = core.Instance().DeletePods(pods, false)
require.NoError(t, err, "Error deleting the pods")

err = schedulerDriver.WaitForRunning(ctxs[0], defaultWaitTimeout, defaultWaitInterval)
require.NoError(t, err, "Error waiting for pod to get to running state after deletion")

scheduledNodes, err = schedulerDriver.GetNodesForApp(ctxs[0])
require.NoError(t, err, "Error getting node for app")
require.Equal(t, 1, len(scheduledNodes), "App should be scheduled on one node")
require.NotEqual(t, poolMaintenanceNode.Name, scheduledNodes[0].Name, "Pod should not be scheduled on node in PoolMaintenance state")

err = volumeDriver.ExitPoolMaintenance(poolMaintenanceNode)
require.NoError(t, err, "Error exiting pool maintenance mode on node %+v", scheduledNodes[0])

err = volumeDriver.WaitDriverUpOnNode(poolMaintenanceNode, defaultWaitTimeout)
require.NoError(t, err, "Error waiting for Node to start %+v", scheduledNodes[0])

destroyAndWait(t, ctxs)

// If we are here then the test has passed
testResult = testResultPass
logrus.Infof("Test status at end of %s test: %s", t.Name(), testResult)
}

func pvcOwnershipTest(t *testing.T) {
var testrailID, testResult = 50781, testResultFail
runID := testrailSetupForTest(testrailID, &testResult)
Expand Down
48 changes: 48 additions & 0 deletions test/integration_test/health_monitor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ func TestHealthMonitor(t *testing.T) {

t.Run("stopDriverTest", stopDriverTest)
t.Run("stopKubeletTest", stopKubeletTest)
t.Run("poolMaintenanceHealthTest", poolMaintenanceHealthTest)
t.Run("healthCheckFixTest", healthCheckFixTest)
t.Run("stopDriverCsiPodFailoverTest", stopDriverCsiPodFailoverTest)

Expand Down Expand Up @@ -154,6 +155,53 @@ func stopKubeletTest(t *testing.T) {

}

func poolMaintenanceHealthTest(t *testing.T) {
var testrailID, testResult = 86081, testResultFail
runID := testrailSetupForTest(testrailID, &testResult)
defer updateTestRail(&testResult, testrailID, runID)

ctxs, err := schedulerDriver.Schedule(generateInstanceID(t, "pool-health"),
scheduler.ScheduleOptions{AppKeys: []string{"mysql-1-pvc"}})
require.NoError(t, err, "Error scheduling task")
require.Equal(t, 1, len(ctxs), "Only one task should have started")

err = schedulerDriver.WaitForRunning(ctxs[0], defaultWaitTimeout, defaultWaitInterval)
require.NoError(t, err, "Error waiting for pod to get to running state")

scheduledNodesPre, err := schedulerDriver.GetNodesForApp(ctxs[0])
require.NoError(t, err, "Error getting node for app")
require.Equal(t, 1, len(scheduledNodesPre), "App should be scheduled on one node")

err = volumeDriver.EnterPoolMaintenance(scheduledNodesPre[0])
require.NoError(t, err, "Error entering pool maintenance on scheduled Node %+v", scheduledNodesPre[0])
poolMaintenanceNode := scheduledNodesPre[0]

// node timeout bumped to 4 mins from stork 2.9.0
// ref: https://github.com/libopenstorage/stork/pull/1028
time.Sleep(5 * time.Minute)

// The pod should not be deleted from a node which is in pool maintenance state
err = schedulerDriver.WaitForRunning(ctxs[0], defaultWaitTimeout, defaultWaitInterval)
require.NoError(t, err, "Error waiting for pod to get to running state after deletion")

scheduledNodesPost, err := schedulerDriver.GetNodesForApp(ctxs[0])
require.NoError(t, err, "Error getting node for app")
require.Equal(t, 1, len(scheduledNodesPost), "App should be scheduled on one node")
require.Equal(t, poolMaintenanceNode.Name, scheduledNodesPost[0].Name, "Pod should not restarted on pool maintenance node")

err = volumeDriver.ExitPoolMaintenance(poolMaintenanceNode)
require.NoError(t, err, "Error exiting pool maintenance on Node %+v", poolMaintenanceNode)

err = volumeDriver.WaitDriverUpOnNode(poolMaintenanceNode, defaultWaitTimeout)
require.NoError(t, err, "Error waiting for Node to start %+v", poolMaintenanceNode)

destroyAndWait(t, ctxs)

// If we are here then the test has passed
testResult = testResultPass
logrus.Infof("Test status at end of %s test: %s", t.Name(), testResult)
}

func healthCheckFixTest(t *testing.T) {
var testrailID, testResult = 85900, testResultFail
runID := testrailSetupForTest(testrailID, &testResult)
Expand Down

0 comments on commit 2982478

Please sign in to comment.