Skip to content

Commit

Permalink
Merge pull request #913 from alcorj-mizar/master
Browse files Browse the repository at this point in the history
e2e: additional cases to reclaim
  • Loading branch information
volcano-sh-bot committed Jul 8, 2020
2 parents 3d6384e + 6cf4d34 commit dea6b59
Show file tree
Hide file tree
Showing 2 changed files with 193 additions and 35 deletions.
214 changes: 179 additions & 35 deletions test/e2e/reclaim.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (

var _ = Describe("Reclaim E2E Test", func() {

CreateReclaimJob := func(ctx *testContext, req v1.ResourceList, name string, queue string, pri string) (*batchv1alpha1.Job, error) {
CreateReclaimJob := func(ctx *testContext, req v1.ResourceList, name string, queue string, pri string, nodeName string, waitTaskReady bool) (*batchv1alpha1.Job, error) {
job := &jobSpec{
tasks: []taskSpec{
{
Expand All @@ -41,8 +41,9 @@ var _ = Describe("Reclaim E2E Test", func() {
rep: 1,
},
},
name: name,
queue: queue,
name: name,
queue: queue,
nodeName: nodeName,
}
if pri != "" {
job.pri = pri
Expand All @@ -51,7 +52,9 @@ var _ = Describe("Reclaim E2E Test", func() {
if err != nil {
return nil, err
}
err = waitTasksReady(ctx, batchJob, 1)
if waitTaskReady {
err = waitTasksReady(ctx, batchJob, 1)
}
return batchJob, err
}

Expand All @@ -66,6 +69,8 @@ var _ = Describe("Reclaim E2E Test", func() {
return queue.Status.State == schedulingv1beta1.QueueStateOpen, nil
case "Pending":
return queue.Status.Pending == num, nil
case "Inqueue":
return queue.Status.Inqueue == num, nil
default:
return false, nil
}
Expand All @@ -86,10 +91,10 @@ var _ = Describe("Reclaim E2E Test", func() {

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "")
_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "")
_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming queue and job")
Expand All @@ -100,7 +105,7 @@ var _ = Describe("Reclaim E2E Test", func() {
err = WaitQueueStatus(ctx, "Open", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "")
_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")

By("Make sure all job running")
Expand Down Expand Up @@ -137,10 +142,10 @@ var _ = Describe("Reclaim E2E Test", func() {

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, j1, q1, "")
_, err := CreateReclaimJob(ctx, CPU1Mem1, j1, q1, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, j2, q2, "")
_, err = CreateReclaimJob(ctx, CPU1Mem1, j2, q2, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming queue and job")
Expand All @@ -151,7 +156,7 @@ var _ = Describe("Reclaim E2E Test", func() {
err = WaitQueueStatus(ctx, "Open", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")

_, err = CreateReclaimJob(ctx, CPU1Mem1, j3, q3, "")
_, err = CreateReclaimJob(ctx, CPU1Mem1, j3, q3, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")

// delete pod of job3 to make sure reclaim-j3 podgroup is pending
Expand Down Expand Up @@ -197,16 +202,16 @@ var _ = Describe("Reclaim E2E Test", func() {

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "")
_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "")
_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming job")
q3 := "reclaim-q3"

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "")
_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "", "", true)
Expect(err).Should(HaveOccurred(), "job3 create failed when queue3 is not created")

By("Make sure all job running")
Expand Down Expand Up @@ -236,10 +241,10 @@ var _ = Describe("Reclaim E2E Test", func() {

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "high-priority")
_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "high-priority", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "high-priority")
_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "high-priority", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming queue and job")
Expand All @@ -248,7 +253,7 @@ var _ = Describe("Reclaim E2E Test", func() {
err = WaitQueueStatus(ctx, "Open", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "low-priority")
_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "low-priority", "", true)
Expect(err).Should(HaveOccurred(), "job3 create failed when queue3 is not created")

By("Make sure all job running")
Expand Down Expand Up @@ -278,30 +283,18 @@ var _ = Describe("Reclaim E2E Test", func() {

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "")
_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "")
_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "")
_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")

By("Create job4 to testing overused cases.")
job := &jobSpec{
tasks: []taskSpec{
{
img: defaultNginxImage,
req: CPU1Mem1,
min: 1,
rep: 1,
},
},
name: "reclaim-j4",
queue: q3,
}

createJob(ctx, job)
_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j4", q3, "", "", false)
Expect(err).NotTo(HaveOccurred(), "Wait for job4 failed")

By("Make sure all job running")

Expand All @@ -318,6 +311,49 @@ var _ = Describe("Reclaim E2E Test", func() {
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue pending")
})

It("Reclaim Case 7: New queue with job created no reclaim when job not satisfied with predicates", func() {
q1 := defaultQueue
q2 := "reclaim-q2"
ctx := initTestContext(options{
queues: []string{q2},
nodesNumLimit: 3,
nodesResourceLimit: CPU1Mem1,
})

defer cleanupTestContext(ctx)

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming queue and job")
q3 := "reclaim-q3"
ctx.queues = append(ctx.queues, q3)
createQueues(ctx)

err = WaitQueueStatus(ctx, "Open", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "", "fake-node", false)
Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")

By("Make sure all job running")

err = WaitQueueStatus(ctx, "Running", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q2)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Pending", 1, q3)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue pending")

})

It("Reclaim Case 8: New queue with job created no reclaim when task resources less than reclaimable resource", func() {
q1 := defaultQueue
q2 := "reclaim-q2"
Expand All @@ -335,10 +371,10 @@ var _ = Describe("Reclaim E2E Test", func() {

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "")
_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "")
_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming queue and job")
Expand Down Expand Up @@ -375,6 +411,114 @@ var _ = Describe("Reclaim E2E Test", func() {
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
})

It("Reclaim Case 9: New queue with job created, all queues.spec.reclaimable is false, no reclaim", func() {
q1 := defaultQueue
q2 := "reclaim-q2"
ctx := initTestContext(options{
queues: []string{q2},
nodesNumLimit: 3,
nodesResourceLimit: CPU1Mem1,
})

defer cleanupTestContext(ctx)

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q2, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

By("Create new comming queue and job")
q3 := "reclaim-q3"
ctx.queues = append(ctx.queues, q3)
createQueues(ctx)

setQueueReclaimable(ctx, []string{q1, q2}, false)
defer setQueueReclaimable(ctx, []string{q1}, true)

err = WaitQueueStatus(ctx, "Open", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q3, "", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")

By("Make sure all job running")

err = WaitQueueStatus(ctx, "Running", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q2)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q3)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

})

// Reclaim rely on priority is a bug here.
It("Reclaim Case 10: Multi reclaimed queue", func() {
q1 := defaultQueue
q2 := "reclaim-q2"
q3 := "reclaim-q3"
q4 := "reclaim-q4"
ctx := initTestContext(options{
queues: []string{q2, q3, q4},
nodesNumLimit: 4,
nodesResourceLimit: CPU1Mem1,
priorityClasses: map[string]int32{
"low-priority": 10,
"high-priority": 10000,
},
})

defer cleanupTestContext(ctx)

By("Setup initial jobs")

_, err := CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j1", q1, "low-priority", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j2", q1, "low-priority", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j3", q2, "low-priority", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j4", q2, "low-priority", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job4 failed")

By("Create coming jobs")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j5", q3, "high-priority", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job4 failed")

_, err = CreateReclaimJob(ctx, CPU1Mem1, "reclaim-j6", q4, "high-priority", "", true)
Expect(err).NotTo(HaveOccurred(), "Wait for job4 failed")

By("Make sure all job running")

err = WaitQueueStatus(ctx, "Running", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Inqueue", 1, q1)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q2)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Inqueue", 1, q2)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q3)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

err = WaitQueueStatus(ctx, "Running", 1, q4)
Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")

})

It("Reclaim", func() {
q1, q2 := "reclaim-q1", "reclaim-q2"
ctx := initTestContext(options{
Expand Down
14 changes: 14 additions & 0 deletions test/e2e/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,16 @@ func cleanupTestContext(ctx *testContext) {
Expect(err).NotTo(HaveOccurred())
}

func setQueueReclaimable(cxt *testContext, queues []string, reclaimable bool) {
for _, q := range queues {
queue, err := cxt.vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred(), "Queue get failed.")
queue.Spec.Reclaimable = &reclaimable
_, err = cxt.vcclient.SchedulingV1beta1().Queues().Update(context.TODO(), queue, metav1.UpdateOptions{})
Expect(err).NotTo(HaveOccurred(), "Queue reclaimed failed.")
}
}

func createQueues(cxt *testContext) {
for _, q := range cxt.queues {

Expand Down Expand Up @@ -317,6 +327,7 @@ type jobSpec struct {
pri string
plugins map[string][]string
volumes []batchv1alpha1.VolumeSpec
nodeName string
// ttl seconds after job finished
ttl *int32
}
Expand Down Expand Up @@ -467,6 +478,9 @@ func createJobInner(ctx *testContext, jobSpec *jobSpec) (*batchv1alpha1.Job, err
},
},
}
if jobSpec.nodeName != "" {
ts.Template.Spec.NodeName = jobSpec.nodeName
}

if task.defaultGracefulPeriod != nil {
ts.Template.Spec.TerminationGracePeriodSeconds = task.defaultGracefulPeriod
Expand Down

0 comments on commit dea6b59

Please sign in to comment.