43
43
44
44
def create_ray_cluster (
45
45
head_node_type : Optional [resources .Resources ] = resources .Resources (),
46
- python_version : Optional [str ] = "3_10 " ,
47
- ray_version : Optional [str ] = "2_4 " ,
46
+ python_version : Optional [str ] = "3.10 " ,
47
+ ray_version : Optional [str ] = "2.9 " ,
48
48
network : Optional [str ] = None ,
49
49
cluster_name : Optional [str ] = None ,
50
50
worker_node_types : Optional [List [resources .Resources ]] = None ,
@@ -62,19 +62,22 @@ def create_ray_cluster(
62
62
node_count=1,
63
63
accelerator_type="NVIDIA_TESLA_K80",
64
64
accelerator_count=1,
65
+ custom_image="us-docker.pkg.dev/my-project/ray-cpu-image.2.9:latest", # Optional
65
66
)
66
67
67
68
worker_node_types = [Resources(
68
69
machine_type="n1-standard-8",
69
70
node_count=2,
70
71
accelerator_type="NVIDIA_TESLA_K80",
71
72
accelerator_count=1,
73
+ custom_image="us-docker.pkg.dev/my-project/ray-gpu-image.2.9:latest", # Optional
72
74
)]
73
75
74
76
cluster_resource_name = vertex_ray.create_ray_cluster(
75
77
head_node_type=head_node_type,
76
78
network="projects/my-project-number/global/networks/my-vpc-name",
77
79
worker_node_types=worker_node_types,
80
+ ray_version="2.9",
78
81
)
79
82
80
83
After a ray cluster is set up, you can call
@@ -100,7 +103,10 @@ def create_ray_cluster(
100
103
worker_node_types: The list of Resources of the worker nodes. The same
101
104
Resources object should not appear multiple times in the list.
102
105
custom_images: The NodeImages which specifies head node and worker nodes
103
- images. Allowlist only.
106
+ images. All the workers will share the same image. If each Resource
107
+ has a specific custom image, use `Resources.custom_image` for
108
+ head/worker_node_type(s). Note that configuring `Resources.custom_image`
109
+ will override `custom_images` here. Allowlist only.
104
110
labels:
105
111
The labels with user-defined metadata to organize Ray cluster.
106
112
@@ -121,14 +127,24 @@ def create_ray_cluster(
121
127
122
128
local_ray_verion = _validation_utils .get_local_ray_version ()
123
129
if ray_version != local_ray_verion :
124
- install_ray_version = "." .join (ray_version .split ("_" ))
125
- logging .info (
126
- f"[Ray on Vertex]: Local runtime has Ray version { local_ray_verion } "
127
- + f", but the requested cluster runtime has { ray_version } . Please "
128
- + "ensure that the Ray versions match for client connectivity. You may "
129
- + f'"pip install --user --force-reinstall ray[default]=={ install_ray_version } "'
130
- + " and restart runtime before cluster connection."
131
- )
130
+ if custom_images is None and head_node_type .custom_image is None :
131
+ install_ray_version = "2.9.3" if ray_version == "2.9" else "2.4.0"
132
+ logging .info (
133
+ "[Ray on Vertex]: Local runtime has Ray version %s"
134
+ ", but the requested cluster runtime has %s. Please "
135
+ "ensure that the Ray versions match for client connectivity. You may "
136
+ '"pip install --user --force-reinstall ray[default]==%s"'
137
+ " and restart runtime before cluster connection." ,
138
+ local_ray_verion ,
139
+ ray_version ,
140
+ install_ray_version ,
141
+ )
142
+ else :
143
+ logging .info (
144
+ "[Ray on Vertex]: Local runtime has Ray version %s."
145
+ "Please ensure that the Ray versions match for client connectivity." ,
146
+ local_ray_verion ,
147
+ )
132
148
133
149
if cluster_name is None :
134
150
cluster_name = "ray-cluster-" + utils .timestamped_unique_name ()
@@ -161,15 +177,18 @@ def create_ray_cluster(
161
177
resource_pool_0 .disk_spec .boot_disk_size_gb = head_node_type .boot_disk_size_gb
162
178
163
179
enable_cuda = True if head_node_type .accelerator_count > 0 else False
164
- image_uri = _validation_utils .get_image_uri (
165
- ray_version , python_version , enable_cuda
166
- )
167
- if custom_images is not None :
168
- if custom_images .head is None or custom_images .worker is None :
169
- raise ValueError (
170
- "[Ray on Vertex AI]: custom_images.head and custom_images.worker must be specified when custom_images is set."
171
- )
180
+ if head_node_type .custom_image is not None :
181
+ image_uri = head_node_type .custom_image
182
+ elif custom_images is None :
183
+ image_uri = _validation_utils .get_image_uri (
184
+ ray_version , python_version , enable_cuda
185
+ )
186
+ elif custom_images .head is not None and custom_images .worker is not None :
172
187
image_uri = custom_images .head
188
+ else :
189
+ raise ValueError (
190
+ "[Ray on Vertex AI]: custom_images.head and custom_images.worker must be specified when custom_images is set."
191
+ )
173
192
174
193
resource_pool_images [resource_pool_0 .id ] = image_uri
175
194
@@ -210,11 +229,16 @@ def create_ray_cluster(
210
229
)
211
230
worker_pools .append (resource_pool )
212
231
enable_cuda = True if worker_node_type .accelerator_count > 0 else False
213
- image_uri = _validation_utils .get_image_uri (
214
- ray_version , python_version , enable_cuda
215
- )
216
- if custom_images is not None :
232
+
233
+ if worker_node_type .custom_image is not None :
234
+ image_uri = worker_node_type .custom_image
235
+ elif custom_images is None :
236
+ image_uri = _validation_utils .get_image_uri (
237
+ ray_version , python_version , enable_cuda
238
+ )
239
+ else :
217
240
image_uri = custom_images .worker
241
+
218
242
resource_pool_images [resource_pool .id ] = image_uri
219
243
220
244
i += 1
@@ -395,8 +419,10 @@ def update_ray_cluster(
395
419
if len (worker_node_types ) != len (previous_worker_node_types ):
396
420
raise ValueError (
397
421
"[Ray on Vertex AI]: Desired number of worker_node_types "
398
- + f"({ len (worker_node_types )} ) does not match the number of the "
399
- + f"existing worker_node_type({ len (previous_worker_node_types )} )."
422
+ + "(%i) does not match the number of the "
423
+ + "existing worker_node_type(%i)." ,
424
+ len (worker_node_types ),
425
+ len (previous_worker_node_types ),
400
426
)
401
427
402
428
# Merge worker_node_type and head_node_type if they share
0 commit comments