|
1 |
| -// Copyright 2021 Google LLC |
| 1 | +// Copyright 2022 Google LLC |
2 | 2 | //
|
3 | 3 | // Licensed under the Apache License, Version 2.0 (the "License");
|
4 | 4 | // you may not use this file except in compliance with the License.
|
@@ -108,6 +108,179 @@ message RuntimeInfo {
|
108 | 108 | string diagnostic_output_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
|
109 | 109 | }
|
110 | 110 |
|
| 111 | +// The cluster's GKE config. |
| 112 | +message GkeClusterConfig { |
| 113 | + // Optional. A target GKE cluster to deploy to. It must be in the same project and |
| 114 | + // region as the Dataproc cluster (the GKE cluster can be zonal or regional). |
| 115 | + // Format: 'projects/{project}/locations/{location}/clusters/{cluster_id}' |
| 116 | + string gke_cluster_target = 2 [ |
| 117 | + (google.api.field_behavior) = OPTIONAL |
| 118 | + ]; |
| 119 | + |
| 120 | + // Optional. GKE NodePools where workloads will be scheduled. At least one node pool |
| 121 | + // must be assigned the 'default' role. Each role can be given to only a |
| 122 | + // single NodePoolTarget. All NodePools must have the same location settings. |
| 123 | + // If a nodePoolTarget is not specified, Dataproc constructs a default |
| 124 | + // nodePoolTarget. |
| 125 | + repeated GkeNodePoolTarget node_pool_target = 3 [(google.api.field_behavior) = OPTIONAL]; |
| 126 | +} |
| 127 | + |
| 128 | +// The configuration for running the Dataproc cluster on Kubernetes. |
| 129 | +message KubernetesClusterConfig { |
| 130 | + // Optional. A namespace within the Kubernetes cluster to deploy into. If this namespace |
| 131 | + // does not exist, it is created. If it exists, Dataproc |
| 132 | + // verifies that another Dataproc VirtualCluster is not installed |
| 133 | + // into it. If not specified, the name of the Dataproc Cluster is used. |
| 134 | + string kubernetes_namespace = 1 [(google.api.field_behavior) = OPTIONAL]; |
| 135 | + |
| 136 | + oneof config { |
| 137 | + // Required. The configuration for running the Dataproc cluster on GKE. |
| 138 | + GkeClusterConfig gke_cluster_config = 2 [(google.api.field_behavior) = REQUIRED]; |
| 139 | + } |
| 140 | + |
| 141 | + // Optional. The software configuration for this Dataproc cluster running on Kubernetes. |
| 142 | + KubernetesSoftwareConfig kubernetes_software_config = 3 [(google.api.field_behavior) = OPTIONAL]; |
| 143 | +} |
| 144 | + |
| 145 | +// The software configuration for this Dataproc cluster running on Kubernetes. |
| 146 | +message KubernetesSoftwareConfig { |
| 147 | + // The components that should be installed in this Dataproc cluster. The key |
| 148 | + // must be a string from the KubernetesComponent enumeration. The value is |
| 149 | + // the version of the software to be installed. |
| 150 | + // At least one entry must be specified. |
| 151 | + map<string, string> component_version = 1; |
| 152 | + |
| 153 | + // The properties to set on daemon config files. |
| 154 | + // |
| 155 | + // Property keys are specified in `prefix:property` format, for example |
| 156 | + // `spark:spark.kubernetes.container.image`. The following are supported |
| 157 | + // prefixes and their mappings: |
| 158 | + // |
| 159 | + // * spark: `spark-defaults.conf` |
| 160 | + // |
| 161 | + // For more information, see [Cluster |
| 162 | + // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties). |
| 163 | + map<string, string> properties = 2; |
| 164 | +} |
| 165 | + |
| 166 | +// GKE NodePools that Dataproc workloads run on. |
| 167 | +message GkeNodePoolTarget { |
| 168 | + // `Role` specifies whose tasks will run on the NodePool. The roles can be |
| 169 | + // specific to workloads. Exactly one GkeNodePoolTarget within the |
| 170 | + // VirtualCluster must have 'default' role, which is used to run all workloads |
| 171 | + // that are not associated with a NodePool. |
| 172 | + enum Role { |
| 173 | + // Role is unspecified. |
| 174 | + ROLE_UNSPECIFIED = 0; |
| 175 | + |
| 176 | + // Any roles that are not directly assigned to a NodePool run on the |
| 177 | + // `default` role's NodePool. |
| 178 | + DEFAULT = 1; |
| 179 | + |
| 180 | + // Run controllers and webhooks. |
| 181 | + CONTROLLER = 2; |
| 182 | + |
| 183 | + // Run spark driver. |
| 184 | + SPARK_DRIVER = 3; |
| 185 | + |
| 186 | + // Run spark executors. |
| 187 | + SPARK_EXECUTOR = 4; |
| 188 | + } |
| 189 | + |
| 190 | + // Required. The target GKE NodePool. |
| 191 | + // Format: |
| 192 | + // 'projects/{project}/locations/{location}/clusters/{cluster}/nodePools/{node_pool}' |
| 193 | + string node_pool = 1 [ |
| 194 | + (google.api.field_behavior) = REQUIRED |
| 195 | + ]; |
| 196 | + |
| 197 | + // Required. The types of role for a GKE NodePool |
| 198 | + repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED]; |
| 199 | + |
| 200 | + // Optional. The configuration for the GKE NodePool. |
| 201 | + // |
| 202 | + // If specified, Dataproc attempts to create a NodePool with the |
| 203 | + // specified shape. If one with the same name already exists, it is |
| 204 | + // verified against all specified fields. If a field differs, the |
| 205 | + // virtual cluster creation will fail. |
| 206 | + // |
| 207 | + // If omitted, any NodePool with the specified name is used. If a |
| 208 | + // NodePool with the specified name does not exist, Dataproc create a NodePool |
| 209 | + // with default values. |
| 210 | + GkeNodePoolConfig node_pool_config = 3 [(google.api.field_behavior) = OPTIONAL]; |
| 211 | +} |
| 212 | + |
| 213 | +// The configuration of a GKE NodePool used by a [Dataproc-on-GKE |
| 214 | +// cluster](https://cloud.google.com/dataproc/docs/concepts/jobs/dataproc-gke#create-a-dataproc-on-gke-cluster). |
| 215 | +message GkeNodePoolConfig { |
| 216 | + // Parameters that describe cluster nodes. |
| 217 | + message GkeNodeConfig { |
| 218 | + // Optional. The name of a Compute Engine [machine |
| 219 | + // type](https://cloud.google.com/compute/docs/machine-types). |
| 220 | + string machine_type = 1 [(google.api.field_behavior) = OPTIONAL]; |
| 221 | + |
| 222 | + // Optional. Whether the nodes are created as [preemptible VM |
| 223 | + // instances](https://cloud.google.com/compute/docs/instances/preemptible). |
| 224 | + bool preemptible = 10 [(google.api.field_behavior) = OPTIONAL]; |
| 225 | + |
| 226 | + // Optional. The number of local SSD disks to attach to the node, which is limited by |
| 227 | + // the maximum number of disks allowable per zone (see [Adding Local |
| 228 | + // SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)). |
| 229 | + int32 local_ssd_count = 7 [(google.api.field_behavior) = OPTIONAL]; |
| 230 | + |
| 231 | + // Optional. A list of [hardware |
| 232 | + // accelerators](https://cloud.google.com/compute/docs/gpus) to attach to |
| 233 | + // each node. |
| 234 | + repeated GkeNodePoolAcceleratorConfig accelerators = 11 [(google.api.field_behavior) = OPTIONAL]; |
| 235 | + |
| 236 | + // Optional. [Minimum CPU |
| 237 | + // platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform) |
| 238 | + // to be used by this instance. The instance may be scheduled on the |
| 239 | + // specified or a newer CPU platform. Specify the friendly names of CPU |
| 240 | + // platforms, such as "Intel Haswell"` or Intel Sandy Bridge". |
| 241 | + string min_cpu_platform = 13 [(google.api.field_behavior) = OPTIONAL]; |
| 242 | + } |
| 243 | + |
| 244 | + // A GkeNodeConfigAcceleratorConfig represents a Hardware Accelerator request |
| 245 | + // for a NodePool. |
| 246 | + message GkeNodePoolAcceleratorConfig { |
| 247 | + // The number of accelerator cards exposed to an instance. |
| 248 | + int64 accelerator_count = 1; |
| 249 | + |
| 250 | + // The accelerator type resource namename (see GPUs on Compute Engine). |
| 251 | + string accelerator_type = 2; |
| 252 | + } |
| 253 | + |
| 254 | + // GkeNodePoolAutoscaling contains information the cluster autoscaler needs to |
| 255 | + // adjust the size of the node pool to the current cluster usage. |
| 256 | + message GkeNodePoolAutoscalingConfig { |
| 257 | + // The minimum number of nodes in the NodePool. Must be >= 0 and <= |
| 258 | + // max_node_count. |
| 259 | + int32 min_node_count = 2; |
| 260 | + |
| 261 | + // The maximum number of nodes in the NodePool. Must be >= min_node_count. |
| 262 | + // **Note:** Quota must be sufficient to scale up the cluster. |
| 263 | + int32 max_node_count = 3; |
| 264 | + } |
| 265 | + |
| 266 | + // Optional. The node pool configuration. |
| 267 | + GkeNodeConfig config = 2 [(google.api.field_behavior) = OPTIONAL]; |
| 268 | + |
| 269 | + // Optional. The list of Compute Engine |
| 270 | + // [zones](https://cloud.google.com/compute/docs/zones#available) where |
| 271 | + // NodePool's nodes will be located. |
| 272 | + // |
| 273 | + // **Note:** Currently, only one zone may be specified. |
| 274 | + // |
| 275 | + // If a location is not specified during NodePool creation, Dataproc will |
| 276 | + // choose a location. |
| 277 | + repeated string locations = 13 [(google.api.field_behavior) = OPTIONAL]; |
| 278 | + |
| 279 | + // Optional. The autoscaler configuration for this NodePool. The autoscaler is enabled |
| 280 | + // only when a valid configuration is present. |
| 281 | + GkeNodePoolAutoscalingConfig autoscaling = 4 [(google.api.field_behavior) = OPTIONAL]; |
| 282 | +} |
| 283 | + |
111 | 284 | // Cluster components that can be activated.
|
112 | 285 | enum Component {
|
113 | 286 | // Unspecified component. Specifying this will cause Cluster creation to fail.
|
|
0 commit comments