Skip to content

Commit 00461e5

Browse files
authored
Jetstream Maxtext Deployment Module: All scale rules now in a single HPA (GoogleCloudPlatform#730)
first commit
1 parent 4e11bb2 commit 00461e5

File tree

7 files changed

+30
-52
lines changed

7 files changed

+30
-52
lines changed

modules/jetstream-maxtext-deployment/main.tf

+8-30
Original file line numberDiff line numberDiff line change
@@ -73,41 +73,19 @@ module "prometheus_adapter" {
7373
}
7474

7575
resource "kubernetes_manifest" "prometheus_adapter_hpa_custom_metric" {
76-
for_each = {
77-
for index, rule in var.hpa_config.rules :
78-
index => {
79-
index = index
80-
target_query = rule.target_query
81-
average_value_target = rule.average_value_target
82-
}
83-
if var.maxengine_deployment_settings.custom_metrics_enabled && var.hpa_config.metrics_adapter == "prometheus-adapter"
84-
}
85-
76+
count = var.hpa_config.metrics_adapter == "prometheus-adapter" ? 1 : 0
8677
manifest = yamldecode(templatefile(local.prometheus_jetstream_hpa_template, {
87-
index = each.value.index
88-
hpa_type = try(each.value.target_query, "")
89-
hpa_averagevalue_target = try(each.value.average_value_target, 1)
90-
hpa_min_replicas = var.hpa_config.min_replicas
91-
hpa_max_replicas = var.hpa_config.max_replicas
78+
hpa_min_replicas = var.hpa_config.min_replicas
79+
hpa_max_replicas = var.hpa_config.max_replicas
80+
rules = var.hpa_config.rules
9281
}))
9382
}
9483

9584
resource "kubernetes_manifest" "cmsa_hpa_custom_metric" {
96-
for_each = {
97-
for index, rule in var.hpa_config.rules :
98-
index => {
99-
index = index
100-
target_query = rule.target_query
101-
average_value_target = rule.average_value_target
102-
}
103-
if var.maxengine_deployment_settings.custom_metrics_enabled && var.hpa_config.metrics_adapter == "custom-metrics-stackdriver-adapter"
104-
}
105-
85+
count = var.hpa_config.metrics_adapter == "custom-metrics-stackdriver-adapter" ? 1 : 0
10686
manifest = yamldecode(templatefile(local.cmsa_jetstream_hpa_template, {
107-
index = each.value.index
108-
hpa_type = try(each.value.target_query, "")
109-
hpa_averagevalue_target = try(each.value.average_value_target, 1)
110-
hpa_min_replicas = var.hpa_config.min_replicas
111-
hpa_max_replicas = var.hpa_config.max_replicas
87+
hpa_min_replicas = var.hpa_config.min_replicas
88+
hpa_max_replicas = var.hpa_config.max_replicas
89+
rules = var.hpa_config.rules
11290
}))
11391
}
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: autoscaling/v2
22
kind: HorizontalPodAutoscaler
33
metadata:
4-
name: jetstream-hpa-${index}
4+
name: jetstream-hpa
55
namespace: default
66
spec:
77
scaleTargetRef:
@@ -11,20 +11,22 @@ spec:
1111
minReplicas: ${hpa_min_replicas}
1212
maxReplicas: ${hpa_max_replicas}
1313
metrics:
14-
%{ if length(regexall("jetstream_.*", hpa_type)) > 0 }
14+
%{ for rule in rules }
15+
%{ if length(regexall("jetstream_.*", rule.target_query)) > 0 }
1516
- type: Pods
1617
pods:
1718
metric:
18-
name: prometheus.googleapis.com|${hpa_type}|gauge
19+
name: ${rule.target_query}
1920
target:
2021
type: AverageValue
21-
averageValue: ${hpa_averagevalue_target}
22+
averageValue: ${rule.average_value_target}
2223
%{ else }
2324
- type: External
2425
external:
2526
metric:
26-
name: kubernetes.io|node|accelerator|${hpa_type}
27+
name: kubernetes.io|node|accelerator|${rule.target_query}
2728
target:
2829
type: AverageValue
29-
averageValue: ${hpa_averagevalue_target}
30-
%{ endif }
30+
averageValue: ${rule.average_value_target}
31+
%{ endif }
32+
%{ endfor ~}
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: autoscaling/v2
22
kind: HorizontalPodAutoscaler
33
metadata:
4-
name: jetstream-hpa-${index}
4+
name: jetstream-hpa
55
namespace: default
66
spec:
77
scaleTargetRef:
@@ -11,20 +11,22 @@ spec:
1111
minReplicas: ${hpa_min_replicas}
1212
maxReplicas: ${hpa_max_replicas}
1313
metrics:
14-
%{ if length(regexall("jetstream_.*", hpa_type)) > 0 }
15-
- type: Pods
16-
pods:
14+
%{ for rule in rules }
15+
%{ if length(regexall("jetstream_.*", rule.target_query)) > 0 }
16+
- type: External
17+
external:
1718
metric:
18-
name: ${hpa_type}
19+
name: ${rule.target_query}
1920
target:
2021
type: AverageValue
21-
averageValue: ${hpa_averagevalue_target}
22+
averageValue: ${rule.average_value_target}
2223
%{ else }
2324
- type: External
2425
external:
2526
metric:
26-
name: ${hpa_type}
27+
name: ${rule.target_query}
2728
target:
2829
type: AverageValue
29-
averageValue: ${hpa_averagevalue_target}
30-
%{ endif }
30+
averageValue: ${rule.average_value_target}
31+
%{ endif }
32+
%{ endfor ~}

modules/jetstream-maxtext-deployment/variables.tf

+1-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@ variable "maxengine_deployment_settings" {
3131

3232
model_name = string // Name of your LLM (for example: "gemma-7b")
3333
parameters_path = string // Path to the paramters for your model
34-
metrics_port = optional(number) // Emit Jetstream metrics on this port of each contaienr
35-
custom_metrics_enabled = bool // Whether or not custom metrics are also emitted
34+
metrics_port = optional(number) // Emit Jetstream metrics on this port of each container
3635
metrics_scrape_interval = optional(number) // Interval for scraping metrics (default: 10s)
3736

3837
accelerator_selectors = object({

tutorials-and-examples/inference-servers/jetstream/maxtext/single-host-inference/README.md

-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ For deploying autoscaling components via terraform, a few more variables to be s
137137

138138
```
139139
maxengine_deployment_settings = {
140-
custom_metrics_enabled = true
141140
metrics_port = <same as above>
142141
metrics_scrape_interval
143142
}

tutorials-and-examples/inference-servers/jetstream/maxtext/single-host-inference/terraform/sample-terraform.tfvars

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ maxengine_deployment_settings = {
22
maxengine_server_image = "us-docker.pkg.dev/cloud-tpu-images/inference/maxengine-server:v0.2.2"
33
jetstream_http_server_image = "us-docker.pkg.dev/cloud-tpu-images/inference/jetstream-http:v0.2.2"
44

5-
custom_metrics_enabled = true
65
metrics_port = 9100
76
metrics_scrape_interval = 10
87
accelerator_selectors = {

tutorials-and-examples/inference-servers/jetstream/maxtext/single-host-inference/terraform/variables.tf

+1-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,7 @@ variable "maxengine_deployment_settings" {
5656

5757
model_name = string // Name of your LLM (for example: "gemma-7b")
5858
parameters_path = string // Path to the parameters for your model
59-
metrics_port = optional(number) // Emit Jetstream metrics on this port of each contaienr
60-
custom_metrics_enabled = bool // Whether or not custom metrics are also emitted
59+
metrics_port = optional(number) // Emit Jetstream metrics on this port of each container
6160
metrics_scrape_interval = optional(number) // Interval for scraping metrics (default: 10s)
6261

6362
accelerator_selectors = object({

0 commit comments

Comments
 (0)