Skip to content

Commit 1d7e20d

Browse files
authored
[TPU Webhook] Import headless service suffix from KubeRay (#1003)
Import headless service suffix from KubeRay Signed-off-by: Ryan O'Leary <[email protected]>
1 parent 7d72605 commit 1d7e20d

File tree

2 files changed

+45
-48
lines changed

2 files changed

+45
-48
lines changed

ray-on-gke/tpu/kuberay-tpu-webhook/main.go

+3-6
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import (
3333
"time"
3434

3535
ray "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
36-
utils "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils"
36+
"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils"
3737
admissionv1 "k8s.io/api/admission/v1"
3838
corev1 "k8s.io/api/core/v1"
3939
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -74,9 +74,6 @@ var (
7474
keyPath = "/etc/kuberay-tpu-webhook/tls/tls.key"
7575
tpuResourceName = corev1.ResourceName("google.com/tpu")
7676

77-
// headless svc will be of the form: {kuberay-cluster-name}-headless-worker-svc
78-
headlessServiceSuffix = "headless-worker-svc"
79-
8077
// Flag arguments.
8178
BindAddr string
8279
CACert string
@@ -247,7 +244,7 @@ func extractRayCluster(admissionReview *admissionv1.AdmissionReview) (*ray.RayCl
247244

248245
// generateHeadlessServiceName returns the expected TPU headless service name for a RayCluster
249246
func generateHeadlessServiceName(clusterName string) string {
250-
serviceName := fmt.Sprintf("%s-%s", clusterName, headlessServiceSuffix)
247+
serviceName := fmt.Sprintf("%s-%s", clusterName, utils.HeadlessServiceSuffix)
251248

252249
// Apply the same truncation as in the RayCluster controller when generating the headless service
253250
// name. This is to maintain the up-to 63 char compatibility guarantee for hostnames (RFC 1123).
@@ -262,7 +259,7 @@ func genDNSHostnames(numOfHosts int32, groupName string, clusterName string, nam
262259
}
263260
headlessServiceName := generateHeadlessServiceName(clusterName)
264261
hostNames := make([]string, numOfHosts)
265-
// Host names will be of the form {WORKER_GROUP_NAME}-{REPLICA_INDEX}-{HOST_INDEX}.{CLUSTER_NAME}-headless-worker-svc
262+
// Host names will be of the form {WORKER_GROUP_NAME}-{REPLICA_INDEX}-{HOST_INDEX}.{CLUSTER_NAME}-headless
266263
for j := 0; j < int(numOfHosts); j++ {
267264
hostNames[j] = fmt.Sprintf("%s-%d-%d.%s", groupName, replicaIndex, j, headlessServiceName)
268265
}

ray-on-gke/tpu/kuberay-tpu-webhook/webhook_main_test.go

+42-42
Original file line numberDiff line numberDiff line change
@@ -797,26 +797,26 @@ func Test_GenDNSHostnames(t *testing.T) {
797797
clusterName: "test-cluster",
798798
replicaIndex: 0,
799799
numOfHosts: int32(1),
800-
expectedHostnames: fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 0, "test-cluster", headlessServiceSuffix),
800+
expectedHostnames: fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 0, "test-cluster", utils.HeadlessServiceSuffix),
801801
},
802802
"genDNSHostnames with NumOfHosts > 1": {
803803
// multi-host worker group, should return a string list of DNS hostnames for the given replica
804804
clusterName: "test-cluster",
805805
replicaIndex: 1,
806806
numOfHosts: int32(4),
807-
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "test-cluster", headlessServiceSuffix),
808-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "test-cluster", headlessServiceSuffix),
809-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 2, "test-cluster", headlessServiceSuffix),
810-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 3, "test-cluster", headlessServiceSuffix),
807+
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "test-cluster", utils.HeadlessServiceSuffix),
808+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "test-cluster", utils.HeadlessServiceSuffix),
809+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 2, "test-cluster", utils.HeadlessServiceSuffix),
810+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 3, "test-cluster", utils.HeadlessServiceSuffix),
811811
}, ","),
812812
},
813813
"genDNSHostnames with long RayCluster name": {
814814
// Multi-host worker group in a RayCluster with a name that will be truncated
815815
clusterName: "long-raycluster-name-to-be-truncated",
816816
replicaIndex: 1,
817817
numOfHosts: int32(2),
818-
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "aycluster-name-to-be-truncated", headlessServiceSuffix),
819-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "aycluster-name-to-be-truncated", headlessServiceSuffix),
818+
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "aycluster-name-to-be-truncated", utils.HeadlessServiceSuffix),
819+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "aycluster-name-to-be-truncated", utils.HeadlessServiceSuffix),
820820
}, ","),
821821
},
822822
}
@@ -846,23 +846,23 @@ func Test_InjectHostnames(t *testing.T) {
846846
// This function is only called for multi-host TPU worker groups.
847847
clusterName: "test-cluster",
848848
groupName: "test-group-name",
849-
expectedSubdomain: fmt.Sprintf("%s-%s", "test-cluster", headlessServiceSuffix),
850-
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "test-cluster", headlessServiceSuffix),
851-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "test-cluster", headlessServiceSuffix),
852-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 2, "test-cluster", headlessServiceSuffix),
853-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 3, "test-cluster", headlessServiceSuffix),
849+
expectedSubdomain: fmt.Sprintf("%s-%s", "test-cluster", utils.HeadlessServiceSuffix),
850+
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "test-cluster", utils.HeadlessServiceSuffix),
851+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "test-cluster", utils.HeadlessServiceSuffix),
852+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 2, "test-cluster", utils.HeadlessServiceSuffix),
853+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 3, "test-cluster", utils.HeadlessServiceSuffix),
854854
}, ","),
855855
},
856856
"injectHostnames for multi-host worker group with truncated service name": {
857857
// Should create a patch to set the subdomain and TPU_WORKER_HOSTNAMES for all hosts, with the
858858
// correct subdomain truncated to match the created service name.
859859
clusterName: "extremely-long-test-raycluster-name",
860860
groupName: "test-group-name",
861-
expectedSubdomain: fmt.Sprintf("%s-%s", "mely-long-test-raycluster-name", headlessServiceSuffix),
862-
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "mely-long-test-raycluster-name", headlessServiceSuffix),
863-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "mely-long-test-raycluster-name", headlessServiceSuffix),
864-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 2, "mely-long-test-raycluster-name", headlessServiceSuffix),
865-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 3, "mely-long-test-raycluster-name", headlessServiceSuffix),
861+
expectedSubdomain: fmt.Sprintf("%s-%s", "mely-long-test-raycluster-name", utils.HeadlessServiceSuffix),
862+
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "mely-long-test-raycluster-name", utils.HeadlessServiceSuffix),
863+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "mely-long-test-raycluster-name", utils.HeadlessServiceSuffix),
864+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 2, "mely-long-test-raycluster-name", utils.HeadlessServiceSuffix),
865+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 3, "mely-long-test-raycluster-name", utils.HeadlessServiceSuffix),
866866
}, ","),
867867
},
868868
}
@@ -1154,7 +1154,7 @@ func Test_GetEnvironmentVariable(t *testing.T) {
11541154
}
11551155
workerHostnames := corev1.EnvVar{
11561156
Name: "TPU_WORKER_HOSTNAMES",
1157-
Value: fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 0, "test-cluster", headlessServiceSuffix),
1157+
Value: fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 0, "test-cluster", utils.HeadlessServiceSuffix),
11581158
}
11591159
podContainer.Env = []corev1.EnvVar{workerID, workerName, workerHostnames}
11601160

@@ -1179,7 +1179,7 @@ func Test_GetEnvironmentVariable(t *testing.T) {
11791179
// returns TPU_WORKER_HOSTNAMES env var value
11801180
variableName: "TPU_WORKER_HOSTNAMES",
11811181
container: podContainer,
1182-
expectedValue: fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 0, "test-cluster", headlessServiceSuffix),
1182+
expectedValue: fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 0, "test-cluster", utils.HeadlessServiceSuffix),
11831183
},
11841184
}
11851185

@@ -1369,10 +1369,10 @@ func Test_MutatePod(t *testing.T) {
13691369
expectedWorkerID: "0",
13701370
expectedReplicaID: 0,
13711371
expectedWorkerName: fmt.Sprintf("%s-%d", "test-group", 0),
1372-
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 0, "test-cluster", headlessServiceSuffix),
1373-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 1, "test-cluster", headlessServiceSuffix),
1374-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 2, "test-cluster", headlessServiceSuffix),
1375-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 3, "test-cluster", headlessServiceSuffix),
1372+
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 0, "test-cluster", utils.HeadlessServiceSuffix),
1373+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 1, "test-cluster", utils.HeadlessServiceSuffix),
1374+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 2, "test-cluster", utils.HeadlessServiceSuffix),
1375+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 3, "test-cluster", utils.HeadlessServiceSuffix),
13761376
}, ","),
13771377
expectedReplicaLabel: fmt.Sprintf("%s-%d", "test-group", 0),
13781378
},
@@ -1386,10 +1386,10 @@ func Test_MutatePod(t *testing.T) {
13861386
expectedWorkerID: "3",
13871387
expectedReplicaID: 0,
13881388
expectedWorkerName: fmt.Sprintf("%s-%d", "test-group", 0),
1389-
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 0, "test-cluster", headlessServiceSuffix),
1390-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 1, "test-cluster", headlessServiceSuffix),
1391-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 2, "test-cluster", headlessServiceSuffix),
1392-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 3, "test-cluster", headlessServiceSuffix),
1389+
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 0, "test-cluster", utils.HeadlessServiceSuffix),
1390+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 1, "test-cluster", utils.HeadlessServiceSuffix),
1391+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 2, "test-cluster", utils.HeadlessServiceSuffix),
1392+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 0, 3, "test-cluster", utils.HeadlessServiceSuffix),
13931393
}, ","),
13941394
expectedReplicaLabel: fmt.Sprintf("%s-%d", "test-group", 0),
13951395
},
@@ -1403,10 +1403,10 @@ func Test_MutatePod(t *testing.T) {
14031403
expectedWorkerID: "0",
14041404
expectedReplicaID: 1,
14051405
expectedWorkerName: fmt.Sprintf("%s-%d", "test-group", 1),
1406-
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "test-cluster", headlessServiceSuffix),
1407-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "test-cluster", headlessServiceSuffix),
1408-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 2, "test-cluster", headlessServiceSuffix),
1409-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 3, "test-cluster", headlessServiceSuffix),
1406+
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "test-cluster", utils.HeadlessServiceSuffix),
1407+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "test-cluster", utils.HeadlessServiceSuffix),
1408+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 2, "test-cluster", utils.HeadlessServiceSuffix),
1409+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 3, "test-cluster", utils.HeadlessServiceSuffix),
14101410
}, ","),
14111411
expectedReplicaLabel: fmt.Sprintf("%s-%d", "test-group", 1),
14121412
},
@@ -1420,10 +1420,10 @@ func Test_MutatePod(t *testing.T) {
14201420
expectedWorkerID: "1",
14211421
expectedReplicaID: 1,
14221422
expectedWorkerName: fmt.Sprintf("%s-%d", "test-group", 1),
1423-
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "test-cluster", headlessServiceSuffix),
1424-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "test-cluster", headlessServiceSuffix),
1425-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 2, "test-cluster", headlessServiceSuffix),
1426-
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 3, "test-cluster", headlessServiceSuffix),
1423+
expectedHostnames: strings.Join([]string{fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 0, "test-cluster", utils.HeadlessServiceSuffix),
1424+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 1, "test-cluster", utils.HeadlessServiceSuffix),
1425+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 2, "test-cluster", utils.HeadlessServiceSuffix),
1426+
fmt.Sprintf("%s-%d-%d.%s-%s", "test-group", 1, 3, "test-cluster", utils.HeadlessServiceSuffix),
14271427
}, ","),
14281428
expectedReplicaLabel: fmt.Sprintf("%s-%d", "test-group", 1),
14291429
},
@@ -1474,7 +1474,7 @@ func Test_MutatePod(t *testing.T) {
14741474
expectedHostnamesPatch := []interface{}([]interface{}{map[string]interface{}{"name": "TPU_WORKER_HOSTNAMES", "value": tc.expectedHostnames}})
14751475
assert.Equal(t, tc.expectedReplicaLabel, patches[0]["value"])
14761476
assert.Equal(t, fmt.Sprintf("%s-%s", tc.expectedReplicaLabel, tc.expectedWorkerID), patches[1]["value"])
1477-
assert.Equal(t, fmt.Sprintf("%s-%s", "test-cluster", headlessServiceSuffix), patches[3]["value"])
1477+
assert.Equal(t, fmt.Sprintf("%s-%s", "test-cluster", utils.HeadlessServiceSuffix), patches[3]["value"])
14781478
assert.Equal(t, expectedHostnamesPatch, patches[4]["value"])
14791479
assert.Equal(t, expectedIDPatch, patches[5]["value"])
14801480
assert.Equal(t, expectedNamePatch, patches[6]["value"])
@@ -1489,13 +1489,13 @@ func Test_GenerateHeadlessServiceName(t *testing.T) {
14891489
testRayClusterName string
14901490
expectedServiceName string
14911491
}{
1492-
"RayCluster name + headless-worker-svc is less than 50 chars, no truncation": {
1493-
testRayClusterName: "test-raycluster", // 15 chars
1494-
expectedServiceName: "test-raycluster-headless-worker-svc", // 35 chars
1492+
"RayCluster name + -{HEADLESS_SERVICE_SUFFIX} is less than 50 chars, no truncation": {
1493+
testRayClusterName: "test-raycluster", // 15 chars
1494+
expectedServiceName: utils.CheckName(fmt.Sprintf("%s-%s", "test-raycluster", utils.HeadlessServiceSuffix)),
14951495
},
1496-
"RayCluster name + headless-worker-svc is more than 50 chars, name is truncated": {
1497-
testRayClusterName: "extremely-long-test-raycluster-name", // 35 chars
1498-
expectedServiceName: "mely-long-test-raycluster-name-headless-worker-svc", // 50 chars
1496+
"RayCluster name + -{HEADLESS_SERVICE_SUFFIX} is more than 50 chars, name is truncated": {
1497+
testRayClusterName: "extremely-really-really-long-test-raycluster-name", // 49 chars
1498+
expectedServiceName: utils.CheckName(fmt.Sprintf("%s-%s", "extremely-really-really-long-test-raycluster-name", utils.HeadlessServiceSuffix)),
14991499
},
15001500
}
15011501

0 commit comments

Comments
 (0)