Skip to content

Commit a8f3f73

Browse files
committed
Refactored workloads and Kubernetes manifest actions (#119)
1 parent da6d336 commit a8f3f73

File tree

100 files changed

+2615
-133
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+2615
-133
lines changed

.github/workflows/dictionary/terraform.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
abspath
22
cidrhost
3+
direxists
34
endfor
45
filemd
56
fileset

platforms/gke/base/_shared_config/cluster_variables.tf

+7-10
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,9 @@
1818
#
1919

2020
locals {
21-
cluster_credentials_command_private = "gcloud container clusters get-credentials ${local.cluster_name} --internal-ip --location ${var.cluster_region} --project ${var.cluster_project_id}"
22-
cluster_credentials_command_public = "gcloud container clusters get-credentials ${local.cluster_name} --location ${var.cluster_region} --project ${var.cluster_project_id}"
23-
cluster_credentials_command_gke = var.cluster_enable_private_endpoint ? local.cluster_credentials_command_private : local.cluster_credentials_command_public
24-
cluster_credentials_command_gkee = "gcloud container fleet memberships get-credentials ${local.cluster_name} --project ${var.cluster_project_id}"
25-
cluster_credentials_command = var.cluster_use_connect_gateway ? local.cluster_credentials_command_gkee : local.cluster_credentials_command_gke
21+
cluster_credentials_command_gke = "gcloud container clusters get-credentials ${local.cluster_name} --dns-endpoint --location ${var.cluster_region} --project ${var.cluster_project_id}"
22+
cluster_credentials_command_gkee = "gcloud container fleet memberships get-credentials ${local.cluster_name} --project ${var.cluster_project_id}"
23+
cluster_credentials_command = var.cluster_use_connect_gateway ? local.cluster_credentials_command_gkee : local.cluster_credentials_command_gke
2624

2725
cluster_name = local.unique_identifier_prefix
2826

@@ -31,9 +29,6 @@ locals {
3129
cluster_node_pool_service_account_id = var.cluster_node_pool_default_service_account_id != null ? var.cluster_node_pool_default_service_account_id : "vm-${local.cluster_name}"
3230
cluster_node_pool_service_account_project_id = var.cluster_node_pool_default_service_account_project_id != null ? var.cluster_node_pool_default_service_account_project_id : var.cluster_project_id
3331

34-
kubeconfig_directory = abspath("${path.module}/../kubeconfig")
35-
kubeconfig_file = abspath("${local.kubeconfig_directory}/${var.cluster_project_id}-${local.unique_identifier_prefix}")
36-
3732
# Minimal roles for nodepool SA https://cloud.google.com/kubernetes-engine/docs/how-to/hardening-your-cluster#use_least_privilege_sa
3833
cluster_sa_roles = [
3934
"roles/artifactregistry.reader",
@@ -44,6 +39,8 @@ locals {
4439
"roles/serviceusage.serviceUsageConsumer",
4540
"roles/stackdriver.resourceMetadata.writer",
4641
]
42+
43+
kubeconfig_file_name = "${var.cluster_project_id}-${local.cluster_name}"
4744
}
4845

4946
variable "cluster_binary_authorization_evaluation_mode" {
@@ -231,7 +228,7 @@ variable "cluster_system_node_pool_machine_type" {
231228
}
232229

233230
variable "cluster_use_connect_gateway" {
234-
default = true
235-
description = "Use Connect gateway to connect to the cluster, require GKE Enterprise. (https://cloud.google.com/kubernetes-engine/enterprise/multicluster-management/gateway)"
231+
default = false
232+
description = "Use Connect gateway to connect to the cluster, requires GKE Enterprise. (https://cloud.google.com/kubernetes-engine/enterprise/multicluster-management/gateway)"
236233
type = bool
237234
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/bin/bash
2+
#
3+
# Copyright 2024 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
set -o errexit
18+
set -o nounset
19+
set -o pipefail
20+
21+
SHARED_CONFIG_DIRECTORY=${1}
22+
SHARED_CONFIG_NAME=${2}
23+
24+
if [[ ${SHARED_CONFIG_DIRECTORY} != \.* ]]; then
25+
echo "The shared config directory path must be a relative path!"
26+
exit 1
27+
fi
28+
29+
if test ! -d "${SHARED_CONFIG_DIRECTORY}"; then
30+
echo "Shared config directory '${SHARED_CONFIG_DIRECTORY}' does not exist!"
31+
exit 2
32+
fi
33+
34+
if test ! -f "${SHARED_CONFIG_DIRECTORY}/${SHARED_CONFIG_NAME}_variables.tf"; then
35+
echo "Shared config '${SHARED_CONFIG_NAME}' does not exist in '${SHARED_CONFIG_DIRECTORY}'!"
36+
exit 3
37+
fi
38+
39+
ln -s ${SHARED_CONFIG_DIRECTORY}/${SHARED_CONFIG_NAME}_variables.tf _${SHARED_CONFIG_NAME}_variables.tf
40+
ln -s ${SHARED_CONFIG_DIRECTORY}/${SHARED_CONFIG_NAME}.auto.tfvars _${SHARED_CONFIG_NAME}.auto.tfvars
41+
42+
echo "Successfully linked shared config '${SHARED_CONFIG_NAME}' from '${SHARED_CONFIG_DIRECTORY}'."

platforms/gke/base/_shared_config/workloads_variables.tf

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
#
1818

1919
locals {
20-
manifests_directory = abspath("${path.module}/../manifests")
20+
manifests_directory_root = "${path.module}/../../../kubernetes/manifests"
2121
}
2222

2323
variable "kueue_version" {
24-
default = "0.10.0"
24+
default = "0.10.2"
2525
description = "Version of Kueue (https://kueue.sigs.k8s.io/) to install."
2626
type = string
2727
}

platforms/gke/base/core/container_cluster/container_cluster.tf

+1
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ resource "google_container_node_pool" "system" {
247247
node_config {
248248
# Variables
249249
labels = {
250+
"node-provisioning-model" : "on-demand"
250251
"resource-type" : "system"
251252
}
252253
machine_type = var.cluster_system_node_pool_machine_type

platforms/gke/base/core/container_node_pool/cpu/region/us-central1/container_node_pool_cpu_n4.tf

+2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ resource "google_container_node_pool" "cpu_n4s8" {
4848
node_config {
4949
# Variables
5050
labels = {
51+
"node-provisioning-model" : "on-demand"
5152
"resource-model" : "n4"
5253
"resource-type" : "cpu"
5354
}
@@ -118,6 +119,7 @@ resource "google_container_node_pool" "cpu_n4s8_spot" {
118119
node_config {
119120
# Variables
120121
labels = {
122+
"node-provisioning-model" : "spot"
121123
"resource-model" : "n4"
122124
"resource-type" : "cpu"
123125
}

platforms/gke/base/core/container_node_pool/cpu/region/us-east4/container_node_pool_cpu_n4.tf

+2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ resource "google_container_node_pool" "cpu_n4s8" {
4848
node_config {
4949
# Variables
5050
labels = {
51+
"node-provisioning-model" : "on-demand"
5152
"resource-model" : "n4"
5253
"resource-type" : "cpu"
5354
}
@@ -117,6 +118,7 @@ resource "google_container_node_pool" "cpu_n4s8_spot" {
117118
node_config {
118119
# Variables
119120
labels = {
121+
"node-provisioning-model" : "spot"
120122
"resource-model" : "n4"
121123
"resource-type" : "cpu"
122124
}

platforms/gke/base/core/container_node_pool/gpu/region/us-central1/container_node_pool_gpu_a100.tf

+5
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ resource "google_container_node_pool" "gpu_a100x2_a2h2" {
5656
node_config {
5757
# Variables
5858
labels = {
59+
"node-provisioning-model" : "on-demand"
5960
"resource-model" : "a100"
6061
"resource-type" : "gpu"
6162
"resource-variant" : "40GB"
@@ -143,6 +144,8 @@ resource "google_container_node_pool" "gpu_a100x2_a2h2_dws" {
143144
node_config {
144145
# Variables
145146
labels = {
147+
"node-provisioning-model" : "on-demand"
148+
"queued-provisioning" : "true"
146149
"resource-model" : "a100"
147150
"resource-type" : "gpu"
148151
"resource-variant" : "40GB"
@@ -234,6 +237,7 @@ resource "google_container_node_pool" "gpu_a100x2_a2h2_res" {
234237
node_config {
235238
# Variables
236239
labels = {
240+
"node-provisioning-model" : "reservation"
237241
"resource-model" : "a100"
238242
"resource-type" : "gpu"
239243
"resource-variant" : "40GB"
@@ -321,6 +325,7 @@ resource "google_container_node_pool" "gpu_a100x2_a2h2_spot" {
321325
node_config {
322326
# Variables
323327
labels = {
328+
"node-provisioning-model" : "spot"
324329
"resource-model" : "a100"
325330
"resource-type" : "gpu"
326331
"resource-variant" : "40GB"

platforms/gke/base/core/container_node_pool/gpu/region/us-central1/container_node_pool_gpu_h100.tf

+5
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ resource "google_container_node_pool" "gpu_h100x8_a3h8" {
5454
node_config {
5555
# Variables
5656
labels = {
57+
"node-provisioning-model" : "on-demand"
5758
"resource-model" : "h100"
5859
"resource-type" : "gpu"
5960
}
@@ -142,6 +143,8 @@ resource "google_container_node_pool" "gpu_h100x8_a3h8_dws" {
142143
node_config {
143144
# Variables
144145
labels = {
146+
"node-provisioning-model" : "on-demand"
147+
"queued-provisioning" : "true"
145148
"resource-model" : "h100"
146149
"resource-type" : "gpu"
147150
}
@@ -234,6 +237,7 @@ resource "google_container_node_pool" "gpu_h100x8_a3h8_res" {
234237
node_config {
235238
# Variables
236239
labels = {
240+
"node-provisioning-model" : "reservation"
237241
"resource-model" : "h100"
238242
"resource-type" : "gpu"
239243
}
@@ -322,6 +326,7 @@ resource "google_container_node_pool" "gpu_h100x8_a3h8_spot" {
322326
node_config {
323327
# Variables
324328
labels = {
329+
"node-provisioning-model" : "spot"
325330
"resource-model" : "h100"
326331
"resource-type" : "gpu"
327332
}

platforms/gke/base/core/container_node_pool/gpu/region/us-central1/container_node_pool_gpu_l4.tf

+5
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ resource "google_container_node_pool" "gpu_l4x2_g2s24" {
5454
node_config {
5555
# Variables
5656
labels = {
57+
"node-provisioning-model" : "on-demand"
5758
"resource-model" : "l4"
5859
"resource-type" : "gpu"
5960
}
@@ -138,6 +139,8 @@ resource "google_container_node_pool" "gpu_l4x2_g2s24_dws" {
138139
node_config {
139140
# Variables
140141
labels = {
142+
"node-provisioning-model" : "on-demand"
143+
"queued-provisioning" : "true"
141144
"resource-model" : "l4"
142145
"resource-type" : "gpu"
143146
}
@@ -226,6 +229,7 @@ resource "google_container_node_pool" "gpu_l4x2_g2s24_res" {
226229
node_config {
227230
# Variables
228231
labels = {
232+
"node-provisioning-model" : "reservation"
229233
"resource-model" : "l4"
230234
"resource-type" : "gpu"
231235
}
@@ -311,6 +315,7 @@ resource "google_container_node_pool" "gpu_l4x2_g2s24_spot" {
311315
node_config {
312316
# Variables
313317
labels = {
318+
"node-provisioning-model" : "spot"
314319
"resource-model" : "l4"
315320
"resource-type" : "gpu"
316321
}

platforms/gke/base/core/container_node_pool/gpu/region/us-east4/container_node_pool_gpu_a100.tf

+5
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ resource "google_container_node_pool" "gpu_a100x2_a2u2" {
5555
node_config {
5656
# Variables
5757
labels = {
58+
"node-provisioning-model" : "on-demand"
5859
"resource-model" : "a100"
5960
"resource-type" : "gpu"
6061
"resource-variant" : "80GB"
@@ -141,6 +142,8 @@ resource "google_container_node_pool" "gpu_a100x2_a2u2_dws" {
141142
node_config {
142143
# Variables
143144
labels = {
145+
"node-provisioning-model" : "on-demand"
146+
"queued-provisioning" : "true"
144147
"resource-model" : "a100"
145148
"resource-type" : "gpu"
146149
"resource-variant" : "80GB"
@@ -231,6 +234,7 @@ resource "google_container_node_pool" "gpu_a100x2_a2u2_res" {
231234
node_config {
232235
# Variables
233236
labels = {
237+
"node-provisioning-model" : "reservation"
234238
"resource-model" : "a100"
235239
"resource-type" : "gpu"
236240
"resource-variant" : "80GB"
@@ -317,6 +321,7 @@ resource "google_container_node_pool" "gpu_a100x2_a2u2_spot" {
317321
node_config {
318322
# Variables
319323
labels = {
324+
"node-provisioning-model" : "spot"
320325
"resource-model" : "a100"
321326
"resource-type" : "gpu"
322327
"resource-variant" : "80GB"

platforms/gke/base/core/container_node_pool/gpu/region/us-east4/container_node_pool_gpu_h100.tf

+5
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ resource "google_container_node_pool" "gpu_h100x8_a3h8" {
5656
node_config {
5757
# Variables
5858
labels = {
59+
"node-provisioning-model" : "on-demand"
5960
"resource-model" : "h100"
6061
"resource-type" : "gpu"
6162
}
@@ -146,6 +147,8 @@ resource "google_container_node_pool" "gpu_h100x8_a3h8_dws" {
146147
node_config {
147148
# Variables
148149
labels = {
150+
"node-provisioning-model" : "on-demand"
151+
"queued-provisioning" : "true"
149152
"resource-model" : "h100"
150153
"resource-type" : "gpu"
151154
}
@@ -240,6 +243,7 @@ resource "google_container_node_pool" "gpu_h100x8_a3h8_res" {
240243
node_config {
241244
# Variables
242245
labels = {
246+
"node-provisioning-model" : "reservation"
243247
"resource-model" : "h100"
244248
"resource-type" : "gpu"
245249
}
@@ -330,6 +334,7 @@ resource "google_container_node_pool" "gpu_h100x8_a3h8_spot" {
330334
node_config {
331335
# Variables
332336
labels = {
337+
"node-provisioning-model" : "spot"
333338
"resource-model" : "h100"
334339
"resource-type" : "gpu"
335340
}

platforms/gke/base/core/container_node_pool/gpu/region/us-east4/container_node_pool_gpu_l4.tf

+5
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ resource "google_container_node_pool" "gpu_l4x2_g2s24" {
5656
node_config {
5757
# Variables
5858
labels = {
59+
"node-provisioning-model" : "on-demand"
5960
"resource-model" : "l4"
6061
"resource-type" : "gpu"
6162
}
@@ -142,6 +143,8 @@ resource "google_container_node_pool" "gpu_l4x2_g2s24_dws" {
142143
node_config {
143144
# Variables
144145
labels = {
146+
"node-provisioning-model" : "on-demand"
147+
"queued-provisioning" : "true"
145148
"resource-model" : "l4"
146149
"resource-type" : "gpu"
147150
}
@@ -232,6 +235,7 @@ resource "google_container_node_pool" "gpu_l4x2_g2s24_res" {
232235
node_config {
233236
# Variables
234237
labels = {
238+
"node-provisioning-model" : "reservation"
235239
"resource-model" : "l4"
236240
"resource-type" : "gpu"
237241
}
@@ -318,6 +322,7 @@ resource "google_container_node_pool" "gpu_l4x2_g2s24_spot" {
318322
node_config {
319323
# Variables
320324
labels = {
325+
"node-provisioning-model" : "spot"
321326
"resource-model" : "l4"
322327
"resource-type" : "gpu"
323328
}

platforms/gke/base/core/deploy.sh

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ else
3535
"gke_enterprise/fleet_membership"
3636
# Disable gke_enterprise/servicemesh due to b/376312292
3737
# "gke_enterprise/servicemesh"
38+
"workloads/cluster_credentials"
3839
"workloads/kueue"
3940
)
4041
fi

platforms/gke/base/core/workloads/cluster_credentials/.terraform.lock.hcl

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../_shared_config/cluster.auto.tfvars
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../_shared_config/cluster_variables.tf
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../_shared_config/platform.auto.tfvars
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../_shared_config/platform_variables.tf

0 commit comments

Comments
 (0)