Skip to content

Commit

Permalink
Remove duplicated test case for unsuspending PyTorchJobs
Browse files Browse the repository at this point in the history
Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
  • Loading branch information
tenzen-y committed Jul 17, 2023
1 parent ecd3eca commit 1ed3e8e
Showing 1 changed file with 25 additions and 68 deletions.
93 changes: 25 additions & 68 deletions pkg/controller.v1/pytorch/pytorchjob_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ var _ = Describe("PyTorchJob controller", func() {
Expect(cond.Status).To(Equal(corev1.ConditionTrue))
})

It("Shouldn't create resources if PyTorchJob is suspended; Should create resources once PyTorchJob is unsuspended", func() {
It("Shouldn't create resources if PyTorchJob is suspended", func() {
By("By creating a new PyTorchJob with suspend=true")
job.Spec.RunPolicy.Suspend = pointer.Bool(true)
job.Spec.PyTorchReplicaSpecs[kubeflowv1.PyTorchJobReplicaTypeWorker].Replicas = pointer.Int32(1)
Expand Down Expand Up @@ -247,68 +247,6 @@ var _ = Describe("PyTorchJob controller", func() {
Message: fmt.Sprintf("PyTorchJob %s is suspended.", name),
},
}, testutil.IgnoreJobConditionsTimes))

By("Unsuspending the PyTorchJob")
Eventually(func() error {
Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed())
created.Spec.RunPolicy.Suspend = pointer.Bool(false)
return testK8sClient.Update(ctx, created)
}, testutil.Timeout, testutil.Interval).Should(Succeed())
Eventually(func() *metav1.Time {
Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed())
return created.Status.StartTime
}, testutil.Timeout, testutil.Interval).ShouldNot(BeNil())

By("Check if the pods and services are created")
Eventually(func() error {
return testK8sClient.Get(ctx, masterKey, masterPod)
}, testutil.Timeout, testutil.Interval).Should(BeNil())
Eventually(func() error {
return testK8sClient.Get(ctx, worker0Key, workerPod)
}, testutil.Timeout, testutil.Interval).Should(BeNil())
Eventually(func() error {
return testK8sClient.Get(ctx, masterKey, masterSvc)
}, testutil.Timeout, testutil.Interval).Should(BeNil())
Eventually(func() error {
return testK8sClient.Get(ctx, worker0Key, workerSvc)
}, testutil.Timeout, testutil.Interval).Should(BeNil())

By("Updating Pod's condition with running")
Eventually(func() error {
Expect(testK8sClient.Get(ctx, masterKey, masterPod)).Should(Succeed())
masterPod.Status.Phase = corev1.PodRunning
return testK8sClient.Status().Update(ctx, masterPod)
}, testutil.Timeout, testutil.Interval).Should(Succeed())
Eventually(func() error {
Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed())
workerPod.Status.Phase = corev1.PodRunning
return testK8sClient.Status().Update(ctx, workerPod)
}, testutil.Timeout, testutil.Interval).Should(Succeed())

By("Checking the PyTorchJob has resumed conditions")
Eventually(func() []kubeflowv1.JobCondition {
Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed())
return created.Status.Conditions
}, testutil.Timeout, testutil.Interval).Should(BeComparableTo([]kubeflowv1.JobCondition{
{
Type: kubeflowv1.JobCreated,
Status: corev1.ConditionTrue,
Reason: commonutil.NewReason(kubeflowv1.PytorchJobKind, commonutil.JobCreatedReason),
Message: fmt.Sprintf("PyTorchJob %s is created.", name),
},
{
Type: kubeflowv1.JobSuspended,
Status: corev1.ConditionFalse,
Reason: commonutil.NewReason(kubeflowv1.PytorchJobKind, commonutil.JobResumedReason),
Message: fmt.Sprintf("PyTorchJob %s is resumed.", name),
},
{
Type: kubeflowv1.JobRunning,
Status: corev1.ConditionTrue,
Reason: commonutil.NewReason(kubeflowv1.PytorchJobKind, commonutil.JobRunningReason),
Message: fmt.Sprintf("PyTorchJob %s is running.", name),
},
}, testutil.IgnoreJobConditionsTimes))
})

It("Should delete resources after PyTorchJob is suspended; Should resume PyTorchJob after PyTorchJob is unsuspended", func() {
Expand Down Expand Up @@ -440,23 +378,42 @@ var _ = Describe("PyTorchJob controller", func() {
},
}, testutil.IgnoreJobConditionsTimes))

By("Updating the PytorchJob with suspend=false")
By("Unsuspending the PyTorchJob")
Eventually(func() error {
Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed())
created.Spec.RunPolicy.Suspend = pointer.Bool(false)
return testK8sClient.Update(ctx, created)
}, testutil.Timeout, testutil.Interval).Should(Succeed())
Eventually(func() *metav1.Time {
Expect(testK8sClient.Get(ctx, jobKey, created)).Should(Succeed())
return created.Status.StartTime
}, testutil.Timeout, testutil.Interval).ShouldNot(BeNil())

By("Updating the pod's phase with Running")
By("Check if the pods and services are created")
Eventually(func() error {
errMaster := testK8sClient.Get(ctx, masterKey, masterPod)
return errMaster
}, testutil.Timeout, testutil.Interval).Should(Succeed())
return testK8sClient.Get(ctx, masterKey, masterPod)
}, testutil.Timeout, testutil.Interval).Should(BeNil())
Eventually(func() error {
return testK8sClient.Get(ctx, worker0Key, workerPod)
}, testutil.Timeout, testutil.Interval).Should(BeNil())
Eventually(func() error {
return testK8sClient.Get(ctx, masterKey, masterSvc)
}, testutil.Timeout, testutil.Interval).Should(BeNil())
Eventually(func() error {
return testK8sClient.Get(ctx, worker0Key, workerSvc)
}, testutil.Timeout, testutil.Interval).Should(BeNil())

By("Updating Pod's condition with running")
Eventually(func() error {
Expect(testK8sClient.Get(ctx, masterKey, masterPod)).Should(Succeed())
masterPod.Status.Phase = corev1.PodRunning
return testK8sClient.Status().Update(ctx, masterPod)
}, testutil.Timeout, testutil.Interval).Should(Succeed())
Eventually(func() error {
Expect(testK8sClient.Get(ctx, worker0Key, workerPod)).Should(Succeed())
workerPod.Status.Phase = corev1.PodRunning
return testK8sClient.Status().Update(ctx, workerPod)
}, testutil.Timeout, testutil.Interval).Should(Succeed())

By("Checking if the PyTorchJob has resumed conditions")
Eventually(func() []kubeflowv1.JobCondition {
Expand Down

0 comments on commit 1ed3e8e

Please sign in to comment.