Skip to content

Commit cc4890b

Browse files
authored
Merge pull request kubernetes-sigs#608 from mbobrovskyi/feature/wait-for-the-webhook-service-to-be-listening-before-advertising-the-jobset-replica-as-ready
Wait for the webhook service to be listening before advertising the Jobset replica as ready.
2 parents 58d5da2 + 3c8290b commit cc4890b

File tree

2 files changed

+50
-23
lines changed

2 files changed

+50
-23
lines changed

main.go

+20-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ limitations under the License.
1717
package main
1818

1919
import (
20+
"errors"
2021
"flag"
22+
"net/http"
2123
"os"
2224

2325
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
@@ -136,7 +138,7 @@ func main() {
136138
// Controllers who register after manager starts will start directly.
137139
go setupControllers(mgr, certsReady)
138140

139-
setupHealthzAndReadyzCheck(mgr)
141+
setupHealthzAndReadyzCheck(mgr, certsReady)
140142

141143
setupLog.Info("starting manager")
142144
if err := mgr.Start(ctx); err != nil {
@@ -186,14 +188,29 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
186188
//+kubebuilder:scaffold:builder
187189
}
188190

189-
func setupHealthzAndReadyzCheck(mgr ctrl.Manager) {
191+
func setupHealthzAndReadyzCheck(mgr ctrl.Manager, certsReady <-chan struct{}) {
190192
defer setupLog.Info("both healthz and readyz check are finished and configured")
191193

192194
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
193195
setupLog.Error(err, "unable to set up health check")
194196
os.Exit(1)
195197
}
196-
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
198+
199+
// Wait for the webhook server to be listening before advertising the
200+
// Jobset deployment replica as ready. This allows users to wait with sending
201+
// the first requests, requiring webhooks, until the Jobset deployment is
202+
// available, so that the early requests are not rejected during the Jobset's
203+
// startup. We wrap the call to GetWebhookServer in a closure to delay calling
204+
// the function, otherwise a not fully-initialized webhook server (without
205+
// ready certs) fails the start of the manager.
206+
if err := mgr.AddReadyzCheck("readyz", func(req *http.Request) error {
207+
select {
208+
case <-certsReady:
209+
return mgr.GetWebhookServer().StartedChecker()(req)
210+
default:
211+
return errors.New("certificates are not ready")
212+
}
213+
}); err != nil {
197214
setupLog.Error(err, "unable to set up ready check")
198215
os.Exit(1)
199216
}

test/e2e/suite_test.go

+30-20
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,21 @@ package e2e
1515

1616
import (
1717
"context"
18+
"fmt"
1819
"testing"
1920
"time"
2021

22+
"github.com/google/go-cmp/cmp/cmpopts"
2123
"github.com/onsi/ginkgo/v2"
2224
"github.com/onsi/gomega"
25+
appsv1 "k8s.io/api/apps/v1"
26+
corev1 "k8s.io/api/core/v1"
2327
"k8s.io/apimachinery/pkg/types"
2428
"k8s.io/client-go/kubernetes/scheme"
2529
"sigs.k8s.io/controller-runtime/pkg/client"
2630
"sigs.k8s.io/controller-runtime/pkg/client/config"
2731

2832
jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2"
29-
testutils "sigs.k8s.io/jobset/pkg/util/testing"
3033
//+kubebuilder:scaffold:imports
3134
)
3235

@@ -59,27 +62,34 @@ var _ = ginkgo.BeforeSuite(func() {
5962
gomega.Expect(err).NotTo(gomega.HaveOccurred())
6063
gomega.Expect(k8sClient).NotTo(gomega.BeNil())
6164

62-
JobSetReadyForTesting(k8sClient)
65+
jobSetReadyForTesting(k8sClient)
6366
})
6467

65-
func JobSetReadyForTesting(client client.Client) {
68+
func jobSetReadyForTesting(k8sClient client.Client) {
6669
ginkgo.By("waiting for resources to be ready for testing")
67-
// To verify that webhooks are ready, let's create a simple jobset.
68-
js := testutils.MakeJobSet("js", "default").
69-
ReplicatedJob(testutils.MakeReplicatedJob("rjob").
70-
Job(testutils.MakeJobTemplate("job", "default").
71-
PodSpec(testutils.TestPodSpec).Obj()).
72-
Obj()).Obj()
73-
74-
// Once the creation succeeds, that means the webhooks are ready
75-
// and we can begin testing.
76-
gomega.Eventually(func() error {
77-
return client.Create(context.Background(), js)
70+
deploymentKey := types.NamespacedName{Namespace: "jobset-system", Name: "jobset-controller-manager"}
71+
deployment := &appsv1.Deployment{}
72+
pods := &corev1.PodList{}
73+
gomega.Eventually(func(g gomega.Gomega) error {
74+
// Get controller-manager deployment.
75+
g.Expect(k8sClient.Get(ctx, deploymentKey, deployment)).To(gomega.Succeed())
76+
// Get pods matches for controller-manager deployment.
77+
g.Expect(k8sClient.List(ctx, pods, client.InNamespace(deploymentKey.Namespace), client.MatchingLabels(deployment.Spec.Selector.MatchLabels))).To(gomega.Succeed())
78+
for _, pod := range pods.Items {
79+
for _, cs := range pod.Status.ContainerStatuses {
80+
// To make sure that we don't have restarts of controller-manager.
81+
// If we have that's mean that something went wrong, and there is
82+
// no needs to continue trying check availability.
83+
if cs.RestartCount > 0 {
84+
return gomega.StopTrying(fmt.Sprintf("%q in %q has restarted %d times", cs.Name, pod.Name, cs.RestartCount))
85+
}
86+
}
87+
}
88+
// To verify that webhooks are ready, checking is deployment have condition Available=True.
89+
g.Expect(deployment.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(
90+
appsv1.DeploymentCondition{Type: appsv1.DeploymentAvailable, Status: corev1.ConditionTrue},
91+
cmpopts.IgnoreFields(appsv1.DeploymentCondition{}, "Reason", "Message", "LastUpdateTime", "LastTransitionTime")),
92+
))
93+
return nil
7894
}, timeout, interval).Should(gomega.Succeed())
79-
80-
// Delete this jobset before beginning tests.
81-
gomega.Expect(client.Delete(ctx, js))
82-
gomega.Eventually(func() error {
83-
return client.Get(ctx, types.NamespacedName{Name: js.Name, Namespace: js.Namespace}, &jobset.JobSet{})
84-
}).ShouldNot(gomega.Succeed())
8595
}

0 commit comments

Comments
 (0)