Skip to content

Commit 2bfbcd7

Browse files
authored
Enable Ray Autoscaler for the Rag example application (#722)
* Enable Ray Autoscaler for the Rag example application * Update the ray application template
1 parent f2883eb commit 2bfbcd7

File tree

3 files changed

+10
-8
lines changed

3 files changed

+10
-8
lines changed

applications/rag/variables.tf

+1-1
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ variable "gpu_pools" {
398398
name = "gpu-pool-l4"
399399
machine_type = "g2-standard-24"
400400
autoscaling = true
401-
min_count = 1
401+
min_count = 0
402402
max_count = 3
403403
disk_size_gb = 200
404404
disk_type = "pd-balanced"

applications/ray/variables.tf

+1-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ variable "gpu_pools" {
172172
name = "gpu-pool-l4"
173173
machine_type = "g2-standard-24"
174174
autoscaling = true
175-
min_count = 1
175+
min_count = 0
176176
max_count = 3
177177
disk_size_gb = 100
178178
disk_type = "pd-balanced"

modules/kuberay-cluster/values.yaml

+8-6
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ head:
3636
# If enableInTreeAutoscaling is true, the autoscaler sidecar will be added to the Ray head pod.
3737
# Ray autoscaler integration is supported only for Ray versions >= 1.11.0
3838
# Ray autoscaler integration is Beta with KubeRay >= 0.3.0 and Ray >= 2.0.0.
39-
# enableInTreeAutoscaling: true
39+
enableInTreeAutoscaling: true
4040
# autoscalerOptions is an OPTIONAL field specifying configuration overrides for the Ray autoscaler.
4141
# The example configuration shown below below represents the DEFAULT values.
4242
# autoscalerOptions:
@@ -95,17 +95,17 @@ head:
9595
# Ray recommends at least 8G memory for production workloads.
9696
memory: "8G"
9797
# Sum of ephemeral storage requests must be max 10Gi on Autopilot default class.
98-
# This includes, ray-head, gcsfuse-sidecar, and fluent-bit.
99-
ephemeral-storage: 4Gi
98+
# This includes, ray-head, gcsfuse-sidecar, fluent-bit, and ray Autoscaler sidecar which requests 1Gi by default.
99+
ephemeral-storage: 3Gi
100100
requests:
101101
cpu: "4"
102102
memory: "8G"
103-
ephemeral-storage: 4Gi
103+
ephemeral-storage: 3Gi
104104
annotations:
105105
gke-gcsfuse/volumes: "true"
106106
gke-gcsfuse/cpu-limit: "1"
107107
gke-gcsfuse/memory-limit: 2Gi
108-
gke-gcsfuse/ephemeral-storage-limit: 4Gi
108+
gke-gcsfuse/ephemeral-storage-limit: 3Gi
109109
nodeSelector:
110110
iam.gke.io/gke-metadata-server-enabled: "true"
111111
tolerations: []
@@ -165,7 +165,9 @@ worker:
165165
# uncomment the line below
166166
# disabled: true
167167
groupName: workerGroup
168-
replicas: 1
168+
replicas: 0
169+
minReplicas: 0
170+
maxReplicas: 5
169171
type: worker
170172
labels:
171173
cloud.google.com/gke-ray-node-type: worker

0 commit comments

Comments
 (0)