Skip to content

Commit 17dc9b7

Browse files
matthew29tangcopybara-github
authored andcommitted
feat: Verify client and cluster Ray versions match in create_ray_cluster
PiperOrigin-RevId: 591950445
1 parent 5f6ad8d commit 17dc9b7

File tree

1 file changed

+17
-7
lines changed

1 file changed

+17
-7
lines changed

google/cloud/aiplatform/preview/vertex_ray/cluster_init.py

+17-7
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,14 @@ def create_ray_cluster(
115115
"[Ray on Vertex]: No VPC network configured. It is required for client connection."
116116
)
117117

118+
local_ray_verion = _validation_utils.get_local_ray_version()
119+
if ray_version != local_ray_verion:
120+
logging.info(
121+
f"[Ray on Vertex]: Local runtime has Ray version {local_ray_verion}"
122+
+ f", but the requested cluster runtime has {ray_version}. Please "
123+
+ "ensure that the Ray versions match for client connectivity."
124+
)
125+
118126
if cluster_name is None:
119127
cluster_name = "ray-cluster-" + utils.timestamped_unique_name()
120128

@@ -351,10 +359,9 @@ def update_ray_cluster(
351359
)
352360
if additional_replica_count > 0 and i != j:
353361
raise ValueError(
354-
"[Ray on Vertex AI]: Worker_node_types have duplicate machine specs: ",
355-
worker_node_types[i],
356-
"and ",
357-
worker_node_types[j],
362+
"[Ray on Vertex AI]: Worker_node_types have duplicate "
363+
+ f"machine specs: {worker_node_types[i]} "
364+
+ f"and {worker_node_types[j]}"
358365
)
359366

360367
persistent_resource = _gapic_utils.get_persistent_resource(
@@ -371,10 +378,12 @@ def update_ray_cluster(
371378
# new worker_node_types and previous_worker_node_types should be the same length.
372379
if len(worker_node_types) != len(previous_worker_node_types):
373380
raise ValueError(
374-
f"[Ray on Vertex AI]: Desired number of worker_node_types ({len(worker_node_types)}) does not match the number of the existing worker_node_type({len(previous_worker_node_types)}).",
381+
"[Ray on Vertex AI]: Desired number of worker_node_types "
382+
+ f"({len(worker_node_types)}) does not match the number of the "
383+
+ f"existing worker_node_type({len(previous_worker_node_types)})."
375384
)
376385

377-
# Merge worker_node_type and head_node_type if the share
386+
# Merge worker_node_type and head_node_type if they share
378387
# the same machine spec.
379388
not_merged = 1
380389
for i in range(len(worker_node_types)):
@@ -399,7 +408,8 @@ def update_ray_cluster(
399408
# and head_node_type are merged due to the same machine specs.
400409
if worker_node_types[i].node_count == 0:
401410
raise ValueError(
402-
f"[Ray on Vertex AI]: Worker_node_type ({worker_node_types[i]}) must update to >= 1 nodes",
411+
"[Ray on Vertex AI]: Worker_node_type "
412+
+ f"({worker_node_types[i]}) must update to >= 1 nodes",
403413
)
404414

405415
request = persistent_resource_service.UpdatePersistentResourceRequest(

0 commit comments

Comments
 (0)