Skip to content

Commit 7634ed1

Browse files
authored
TPU Provisioner: Add confidential disk config (#1005)
1 parent 184a6d5 commit 7634ed1

File tree

4 files changed

+75
-25
lines changed

4 files changed

+75
-25
lines changed

tpu-provisioner/cmd/main.go

+18-11
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ func main() {
8585
GCPNodeSecureBoot bool `envconfig:"GCP_NODE_SECURE_BOOT" default:"true"`
8686
GCPNodeAdditionalNetworks string `envconfig:"GCP_NODE_ADDITIONAL_NETWORKS" default:""`
8787

88+
GCPNodeDiskType string `envconfig:"GCP_NODE_DISK_TYPE"`
89+
GCPNodeConfidentialStorage bool `envconfig:"GCP_NODE_CONFIDENTIAL_STORAGE"`
90+
GCPNodeBootDiskKMSKey string `envconfig:"GCP_NODE_BOOT_DISK_KMS_KEY"`
91+
8892
// GCPForceOnDemand forces the controller to create nodes on demand, even if
8993
// the Pod requests a reservation or spot.
9094
GCPForceOnDemand bool `envconfig:"GCP_FORCE_ON_DEMAND" default:"false"`
@@ -202,17 +206,20 @@ func main() {
202206
provider = &cloud.GKE{
203207
Service: containers,
204208
ClusterContext: cloud.GKEContext{
205-
ProjectID: cfg.GCPProjectID,
206-
ClusterLocation: cfg.GCPClusterLocation,
207-
Cluster: cfg.GCPCluster,
208-
NodeZone: cfg.GCPZone,
209-
NodeServiceAccount: cfg.GCPNodeServiceAccount,
210-
NodeAdditionalNetworks: cfg.GCPNodeAdditionalNetworks,
211-
NodeSecondaryDisk: cfg.GCPNodeSecondaryDisk,
212-
NodeTags: cfg.GCPNodeTags,
213-
PodToNodeLabels: cfg.GCPPodToNodeLabels,
214-
NodeSecureBoot: cfg.GCPNodeSecureBoot,
215-
ForceOnDemand: cfg.GCPForceOnDemand,
209+
ProjectID: cfg.GCPProjectID,
210+
ClusterLocation: cfg.GCPClusterLocation,
211+
Cluster: cfg.GCPCluster,
212+
NodeZone: cfg.GCPZone,
213+
NodeServiceAccount: cfg.GCPNodeServiceAccount,
214+
NodeAdditionalNetworks: cfg.GCPNodeAdditionalNetworks,
215+
NodeSecondaryDisk: cfg.GCPNodeSecondaryDisk,
216+
NodeTags: cfg.GCPNodeTags,
217+
NodeDiskType: cfg.GCPNodeDiskType,
218+
NodeConfidentialStorage: cfg.GCPNodeConfidentialStorage,
219+
NodeBootDiskKMSKey: cfg.GCPNodeBootDiskKMSKey,
220+
PodToNodeLabels: cfg.GCPPodToNodeLabels,
221+
NodeSecureBoot: cfg.GCPNodeSecureBoot,
222+
ForceOnDemand: cfg.GCPForceOnDemand,
216223
},
217224
Recorder: mgr.GetEventRecorderFor("tpu-provisioner"),
218225
}

tpu-provisioner/internal/cloud/gke.go

+14-6
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,11 @@ func (g *GKE) nodePoolForPod(name string, p *corev1.Pod) (*containerv1beta1.Node
405405
placementPolicy.Type = "COMPACT"
406406
}
407407

408+
var diskType string
409+
if g.ClusterContext.NodeDiskType != "" {
410+
diskType = g.ClusterContext.NodeDiskType
411+
}
412+
408413
return &containerv1beta1.NodePool{
409414
Name: name,
410415
Config: &containerv1beta1.NodeConfig{
@@ -416,12 +421,15 @@ func (g *GKE) nodePoolForPod(name string, p *corev1.Pod) (*containerv1beta1.Node
416421
Tags: g.ClusterContext.NodeTags,
417422
// NOTE: vendor/ was manually updated to include the field because
418423
// it was not currently available at the time of writing:
419-
SecondaryBootDisks: secondaryDisks,
420-
MachineType: machineType,
421-
ReservationAffinity: reservation,
422-
Labels: labels,
423-
Spot: spot,
424-
Taints: taints,
424+
SecondaryBootDisks: secondaryDisks,
425+
MachineType: machineType,
426+
ReservationAffinity: reservation,
427+
Labels: labels,
428+
Spot: spot,
429+
Taints: taints,
430+
BootDiskKmsKey: g.ClusterContext.NodeBootDiskKMSKey,
431+
DiskType: diskType,
432+
EnableConfidentialStorage: g.ClusterContext.NodeConfidentialStorage,
425433
},
426434
InitialNodeCount: int64(nodeCount),
427435
Locations: []string{g.ClusterContext.NodeZone},

tpu-provisioner/internal/cloud/gke_context.go

+11-8
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,17 @@ package cloud
33
import "fmt"
44

55
type GKEContext struct {
6-
ProjectID string
7-
ClusterLocation string
8-
Cluster string
9-
NodeZone string
10-
NodeServiceAccount string
11-
NodeAdditionalNetworks string
12-
NodeSecondaryDisk string
13-
NodeTags []string
6+
ProjectID string
7+
ClusterLocation string
8+
Cluster string
9+
NodeZone string
10+
NodeServiceAccount string
11+
NodeAdditionalNetworks string
12+
NodeSecondaryDisk string
13+
NodeTags []string
14+
NodeDiskType string
15+
NodeConfidentialStorage bool
16+
NodeBootDiskKMSKey string
1417
// PodToNodeLabels is a list of key=value pairs that will be copied from the Pod
1518
// to the Node.
1619
PodToNodeLabels []string

tpu-provisioner/internal/cloud/gke_test.go

+32
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,38 @@ func TestNodePoolForPod(t *testing.T) {
691691
},
692692
},
693693
},
694+
{
695+
desc: "confidential disk configured in cluster context",
696+
gkeContext: GKEContext{
697+
NodeConfidentialStorage: true,
698+
NodeDiskType: "hyperdisk-balanced",
699+
NodeBootDiskKMSKey: "my-kms-key",
700+
},
701+
want: &containerv1beta1.NodePool{
702+
Config: &container.NodeConfig{
703+
Labels: map[string]string{
704+
"google.com/nodepool-manager": "tpu-provisioner",
705+
"google.com/tpu-provisioner-jobset-name": "jobset-test",
706+
"google.com/tpu-provisioner-jobset-namespace": "default",
707+
"google.com/tpu-provisioner-parent-kind": "job",
708+
"google.com/tpu-provisioner-parent-name": "jobset-test-job-1-0",
709+
"google.com/tpu-provisioner-parent-namespace": "default",
710+
},
711+
MachineType: "ct5p-hightpu-4t",
712+
ShieldedInstanceConfig: &container.ShieldedInstanceConfig{EnableIntegrityMonitoring: true},
713+
EnableConfidentialStorage: true,
714+
BootDiskKmsKey: "my-kms-key",
715+
DiskType: "hyperdisk-balanced",
716+
},
717+
InitialNodeCount: 512,
718+
Locations: []string{""},
719+
Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false},
720+
MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15},
721+
Name: "test-pool",
722+
PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"},
723+
UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1},
724+
},
725+
},
694726
}
695727
for _, tc := range tests {
696728
t.Run(tc.desc, func(t *testing.T) {

0 commit comments

Comments
 (0)