Open
Description
Hello!
Is kuberay autoscaler compatible with RayService (in RayClusterConfig) or only in plain RayCluster resource?
i'have already configured kube cluster autoscaler (and is working with other pods), and i want to scale up gpu machine when a new job is launched, and scale down when is finished, i use JobSubmissionApi from a python client.
i've tried to launch a job with the submission api, but remain pending and autoscaler is not triggered.
my template is like this (i use helm with argocd); i need to create a standalone RayCluster instead of RayService?
apiVersion: ray.io/v1
kind: RayService
metadata:
name: ray-model-server
spec:
serveConfigV2: |
logging_config:
encoding: TEXT
log_level: DEBUG
logs_dir: null
enable_access_log: true
applications:
- name: model-server
route_prefix: /
import_path: model_server:app
deployments:
- name: model-server
num_replicas: {{ .Values.server_replica }}
max_ongoing_requests: {{ .Values.server_max_ongoing_requests }}
max_queued_requests: {{ .Values.server_max_queued_requests }}
ray_actor_options:
num_gpus: {{ .Values.server_gpus }}
num_cpus: {{ .Values.server_cpu }}
accelerator_type: {{ .Values.server_accelerator_type }}
rayClusterConfig:
enableInTreeAutoscaling: true
autoscalerOptions:
version: v2
upscalingMode: Default
idleTimeoutSeconds: 60
imagePullPolicy: IfNotPresent
# Optionally specify the Autoscaler container's securityContext.
securityContext: {}
envFrom: []
resources:
limits:
cpu: "500m"
memory: "512Mi"
requests:
cpu: "500m"
memory: "512Mi"
nvidia.com/gpu: 1
rayVersion: "{{ .Values.rayVersion }}"
# Ray head pod template.
headGroupSpec:
rayStartParams:
dashboard-host: "0.0.0.0"
metrics-export-port: "8080"
#pod template
template:
spec:
serviceAccountName: {{ .Values.serviceAccountName }}
nodeSelector:
eks.amazonaws.com/nodegroup: {{ .Values.head_nodegroup }}
containers:
- name: ray-head
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
resources:
limits:
cpu: {{ .Values.head_cpu }}
memory: {{ .Values.head_memory }}
requests:
cpu: {{ .Values.head_cpu }}
memory: {{ .Values.head_memory }}
ports:
- containerPort: 6379
name: gcs-server
protocol: TCP
- containerPort: 8265 # Ray dashboard
name: dashboard
protocol: TCP
- containerPort: 10001
name: client
protocol: TCP
- containerPort: 8000
name: serve
protocol: TCP
- containerPort: 8080
name: metrics
protocol: TCP
- containerPort: 44217
name: as-metrics # autoscaler
protocol: TCP
- containerPort: 44227
name: dash-metrics # dashboard
protocol: TCP
env:
- name: RAY_GRAFANA_IFRAME_HOST
value: {{ .Values.grafana_iframe_host }}
- name: RAY_GRAFANA_HOST
value: {{ .Values.grafana_host }}
- name: RAY_PROMETHEUS_HOST
value: {{ .Values.prometheus_host }}
- name: RAY_RUNTIME_ENV_HOOK
value: ray._private.runtime_env.uv_runtime_env_hook.hook
workerGroupSpecs:
- replicas: {{ .Values.initialWorkers }}
minReplicas: {{ .Values.minWorkers }}
maxReplicas: {{ .Values.maxWorkers }}
numOfHosts: 1
groupName: {{ .Values.nodegroup }}
rayStartParams: {}
#pod template
template:
spec:
serviceAccountName: {{ .Values.serviceAccountName }}
nodeSelector:
eks.amazonaws.com/nodegroup: {{ .Values.nodegroup }}
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
effect: "NoSchedule"
volumes:
- name: model-cache
persistentVolumeClaim:
claimName: ray-efs-pvc
containers:
- name: ray-worker
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
lifecycle:
preStop:
exec:
command: [ "/bin/sh", "-c", "ray stop" ]
resources:
limits:
cpu: {{ .Values.workerCpu }}
memory: {{ .Values.workerMemory }}
requests:
cpu: {{ .Values.workerCpu }}
memory: {{ .Values.workerMemory }}
nvidia.com/gpu: 1
volumeMounts:
env:
- name: ENVIRONMENT
value: {{ .Values.environment }}

Metadata
Metadata
Assignees
Labels
No labels