diff --git a/test/e2e/apps/broadcastjob.go b/test/e2e/apps/broadcastjob.go index 6627d4084c..b1485bbc11 100644 --- a/test/e2e/apps/broadcastjob.go +++ b/test/e2e/apps/broadcastjob.go @@ -20,20 +20,19 @@ import ( "context" "time" - "k8s.io/client-go/util/retry" - + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1" kruiseclientset "github.com/openkruise/kruise/pkg/client/clientset/versioned" "github.com/openkruise/kruise/pkg/util" "github.com/openkruise/kruise/test/e2e/framework" - - "github.com/onsi/ginkgo" - "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/rand" clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/util/retry" "k8s.io/utils/integer" ) @@ -156,4 +155,85 @@ var _ = SIGDescribe("BroadcastJob", func() { }, 60*time.Second, time.Second).Should(gomega.Equal(int32(len(nodes)))) }) }) + + framework.KruiseDescribe("BroadcastJob uncordon handling", func() { + framework.ConformanceIt("creates missing pod after node uncordon", func() { + // Create fake node + fakeNode, err := nodeTester.CreateFakeNode(randStr) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Cordon the fake node + ginkgo.By("Cordoning fake node " + fakeNode.Name) + _, err = c.CoreV1().Nodes().Patch(context.TODO(), fakeNode.Name, + types.StrategicMergePatchType, + []byte(`{"spec":{"unschedulable":true}}`), + metav1.PatchOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create BroadcastJob job-" + randStr + job := &appsv1alpha1.BroadcastJob{ + ObjectMeta: metav1.ObjectMeta{Namespace: ns, Name: "job-" + randStr}, + Spec: appsv1alpha1.BroadcastJobSpec{ + Template: v1.PodTemplateSpec{ + Spec: v1.PodSpec{ + Tolerations: []v1.Toleration{{Key: framework.E2eFakeKey, Operator: v1.TolerationOpEqual, Value: randStr, Effect: v1.TaintEffectNoSchedule}}, + Containers: []v1.Container{{ + Name: "box", + Image: BusyboxImage, + Command: []string{"/bin/sh", "-c", "sleep 30"}, + }}, + RestartPolicy: v1.RestartPolicyNever, + }, + }, + CompletionPolicy: appsv1alpha1.CompletionPolicy{Type: appsv1alpha1.Always}, + }, + } + + nodes, err := nodeTester.ListRealNodesWithFake(job.Spec.Template.Spec.Tolerations) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + totalNodes := int32(len(nodes)) + parallelism := intstr.FromInt(len(nodes)) + job.Spec.Parallelism = ¶llelism + + ginkgo.By("Creating BroadcastJob " + job.Name) + job, err = tester.CreateBroadcastJob(job) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Verify desired count equals total nodes - 1 (due to cordoned node)") + gomega.Eventually(func() int32 { + job, err = tester.GetBroadcastJob(job.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return job.Status.Desired + }, 30*time.Second, time.Second).Should(gomega.Equal(totalNodes - 1)) + + ginkgo.By("Verify active pods equals total nodes - 1 (as pod is not created on cordoned node)") + gomega.Eventually(func() int32 { + job, err = tester.GetBroadcastJob(job.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return job.Status.Active + }, 60*time.Second, 3*time.Second).Should(gomega.Equal(totalNodes - 1)) + + // Uncordon the fake node + ginkgo.By("Uncordoning fake node " + fakeNode.Name) + _, err = c.CoreV1().Nodes().Patch(context.TODO(), fakeNode.Name, + types.StrategicMergePatchType, + []byte(`{"spec":{"unschedulable":false}}`), + metav1.PatchOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Verify desired count becomes total nodes after uncordon") + gomega.Eventually(func() int32 { + job, err = tester.GetBroadcastJob(job.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return job.Status.Desired + }, 30*time.Second, time.Second).Should(gomega.Equal(totalNodes)) + + ginkgo.By("Verify active pods becomes total nodes after uncordon (missing pod now created)") + gomega.Eventually(func() int32 { + job, err = tester.GetBroadcastJob(job.Name) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return job.Status.Active + }, 60*time.Second, 3*time.Second).Should(gomega.Equal(totalNodes)) + }) + }) })