Skip to content

Commit 0f765f0

Browse files
HeiglAnnaAnnaHeigljuliocc
authored andcommitted
Add cAdvisor Metrics to Autopilot/Standard GKE Cluster (GoogleCloudPlatform#2841)
* Add cAdvisor Metrics to Autopilot/Standard GKE Cluster * Fix tests --------- Co-authored-by: AnnaHeigl <[email protected]> Co-authored-by: Julio Castillo <[email protected]>
1 parent 776edf0 commit 0f765f0

File tree

10 files changed

+29
-16
lines changed

10 files changed

+29
-16
lines changed

blueprints/gke/autopilot/cluster.tf

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ module "cluster" {
4646
# enable_pod_metrics = true
4747
# enable_statefulset_metrics = true
4848
# enable_storage_metrics = true
49+
# enable_cadvisor_metrics = true
4950
# }
5051
# cluster_autoscaling = {
5152
# auto_provisioning_defaults = {

blueprints/gke/patterns/autopilot-cluster/cluster.tf

+1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ module "cluster" {
107107
enable_api_server_metrics = true
108108
enable_controller_manager_metrics = true
109109
enable_scheduler_metrics = true
110+
enable_cadvisor_metrics = true
110111
}
111112
logging_config = {
112113
enable_api_server_logs = true

modules/gke-cluster-autopilot/README.md

+8-7
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ module "cluster-1" {
146146
secondary_range_names = {} # use default names "pods" and "services"
147147
}
148148
monitoring_config = {
149+
enable_cadvisor_metrics = true
149150
enable_daemonset_metrics = true
150151
enable_deployment_metrics = true
151152
enable_hpa_metrics = true
@@ -205,9 +206,9 @@ module "cluster-1" {
205206
| name | description | type | required | default |
206207
|---|---|:---:|:---:|:---:|
207208
| [location](variables.tf#L143) | Autopilot clusters are always regional. | <code>string</code> || |
208-
| [name](variables.tf#L220) | Cluster name. | <code>string</code> || |
209-
| [project_id](variables.tf#L251) | Cluster project ID. | <code>string</code> || |
210-
| [vpc_config](variables.tf#L267) | VPC-level configuration. | <code title="object&#40;&#123;&#10; disable_default_snat &#61; optional&#40;bool&#41;&#10; network &#61; string&#10; subnetwork &#61; string&#10; secondary_range_blocks &#61; optional&#40;object&#40;&#123;&#10; pods &#61; string&#10; services &#61; string&#10; &#125;&#41;&#41;&#10; secondary_range_names &#61; optional&#40;object&#40;&#123;&#10; pods &#61; optional&#40;string&#41;&#10; services &#61; optional&#40;string&#41;&#10; &#125;&#41;&#41;&#10; additional_ranges &#61; optional&#40;list&#40;string&#41;&#41;&#10; stack_type &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> || |
209+
| [name](variables.tf#L222) | Cluster name. | <code>string</code> || |
210+
| [project_id](variables.tf#L253) | Cluster project ID. | <code>string</code> || |
211+
| [vpc_config](variables.tf#L269) | VPC-level configuration. | <code title="object&#40;&#123;&#10; disable_default_snat &#61; optional&#40;bool&#41;&#10; network &#61; string&#10; subnetwork &#61; string&#10; secondary_range_blocks &#61; optional&#40;object&#40;&#123;&#10; pods &#61; string&#10; services &#61; string&#10; &#125;&#41;&#41;&#10; secondary_range_names &#61; optional&#40;object&#40;&#123;&#10; pods &#61; optional&#40;string&#41;&#10; services &#61; optional&#40;string&#41;&#10; &#125;&#41;&#41;&#10; additional_ranges &#61; optional&#40;list&#40;string&#41;&#41;&#10; stack_type &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> || |
211212
| [access_config](variables.tf#L17) | Control plane endpoint and nodes access configurations. | <code title="object&#40;&#123;&#10; dns_access &#61; optional&#40;bool, true&#41;&#10; ip_access &#61; optional&#40;object&#40;&#123;&#10; authorized_ranges &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; disable_public_endpoint &#61; optional&#40;bool, true&#41;&#10; private_endpoint_config &#61; optional&#40;object&#40;&#123;&#10; endpoint_subnetwork &#61; optional&#40;string&#41;&#10; global_access &#61; optional&#40;bool, true&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10; private_nodes &#61; optional&#40;bool, true&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
212213
| [backup_configs](variables.tf#L42) | Configuration for Backup for GKE. | <code title="object&#40;&#123;&#10; enable_backup_agent &#61; optional&#40;bool, false&#41;&#10; backup_plans &#61; optional&#40;map&#40;object&#40;&#123;&#10; encryption_key &#61; optional&#40;string&#41;&#10; include_secrets &#61; optional&#40;bool, true&#41;&#10; include_volume_data &#61; optional&#40;bool, true&#41;&#10; labels &#61; optional&#40;map&#40;string&#41;&#41;&#10; namespaces &#61; optional&#40;list&#40;string&#41;&#41;&#10; region &#61; string&#10; schedule &#61; string&#10; retention_policy_days &#61; optional&#40;string&#41;&#10; retention_policy_lock &#61; optional&#40;bool, false&#41;&#10; retention_policy_delete_lock_days &#61; optional&#40;string&#41;&#10; &#125;&#41;&#41;, &#123;&#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
213214
| [deletion_protection](variables.tf#L63) | Whether or not to allow Terraform to destroy the cluster. Unless this field is set to false in Terraform state, a terraform destroy or terraform apply that would delete the cluster will fail. | <code>bool</code> | | <code>true</code> |
@@ -219,10 +220,10 @@ module "cluster-1" {
219220
| [logging_config](variables.tf#L148) | Logging configuration. | <code title="object&#40;&#123;&#10; enable_api_server_logs &#61; optional&#40;bool, false&#41;&#10; enable_scheduler_logs &#61; optional&#40;bool, false&#41;&#10; enable_controller_manager_logs &#61; optional&#40;bool, false&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
220221
| [maintenance_config](variables.tf#L159) | Maintenance window configuration. | <code title="object&#40;&#123;&#10; daily_window_start_time &#61; optional&#40;string&#41;&#10; recurring_window &#61; optional&#40;object&#40;&#123;&#10; start_time &#61; string&#10; end_time &#61; string&#10; recurrence &#61; string&#10; &#125;&#41;&#41;&#10; maintenance_exclusions &#61; optional&#40;list&#40;object&#40;&#123;&#10; name &#61; string&#10; start_time &#61; string&#10; end_time &#61; string&#10; scope &#61; optional&#40;string&#41;&#10; &#125;&#41;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code title="&#123;&#10; daily_window_start_time &#61; &#34;03:00&#34;&#10; recurring_window &#61; null&#10; maintenance_exclusion &#61; &#91;&#93;&#10;&#125;">&#123;&#8230;&#125;</code> |
221222
| [min_master_version](variables.tf#L182) | Minimum version of the master, defaults to the version of the most recent official release. | <code>string</code> | | <code>null</code> |
222-
| [monitoring_config](variables.tf#L188) | Monitoring configuration. System metrics collection cannot be disabled. Control plane metrics are optional. Kube state metrics are optional. Google Cloud Managed Service for Prometheus is enabled by default. | <code title="object&#40;&#123;&#10; enable_api_server_metrics &#61; optional&#40;bool, false&#41;&#10; enable_controller_manager_metrics &#61; optional&#40;bool, false&#41;&#10; enable_scheduler_metrics &#61; optional&#40;bool, false&#41;&#10; enable_daemonset_metrics &#61; optional&#40;bool, false&#41;&#10; enable_deployment_metrics &#61; optional&#40;bool, false&#41;&#10; enable_hpa_metrics &#61; optional&#40;bool, false&#41;&#10; enable_pod_metrics &#61; optional&#40;bool, false&#41;&#10; enable_statefulset_metrics &#61; optional&#40;bool, false&#41;&#10; enable_storage_metrics &#61; optional&#40;bool, false&#41;&#10; enable_managed_prometheus &#61; optional&#40;bool, true&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
223-
| [node_config](variables.tf#L225) | Configuration for nodes and nodepools. | <code title="object&#40;&#123;&#10; boot_disk_kms_key &#61; optional&#40;string&#41;&#10; service_account &#61; optional&#40;string&#41;&#10; tags &#61; optional&#40;list&#40;string&#41;&#41;&#10; workload_metadata_config_mode &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
224-
| [node_locations](variables.tf#L244) | Zones in which the cluster's nodes are located. | <code>list&#40;string&#41;</code> | | <code>&#91;&#93;</code> |
225-
| [release_channel](variables.tf#L256) | Release channel for GKE upgrades. Clusters created in the Autopilot mode must use a release channel. Choose between \"RAPID\", \"REGULAR\", and \"STABLE\". | <code>string</code> | | <code>&#34;REGULAR&#34;</code> |
223+
| [monitoring_config](variables.tf#L188) | Monitoring configuration. System metrics collection cannot be disabled. Control plane metrics are optional. Kube state metrics are optional. Google Cloud Managed Service for Prometheus is enabled by default. | <code title="object&#40;&#123;&#10; enable_api_server_metrics &#61; optional&#40;bool, false&#41;&#10; enable_controller_manager_metrics &#61; optional&#40;bool, false&#41;&#10; enable_scheduler_metrics &#61; optional&#40;bool, false&#41;&#10; enable_daemonset_metrics &#61; optional&#40;bool, false&#41;&#10; enable_deployment_metrics &#61; optional&#40;bool, false&#41;&#10; enable_hpa_metrics &#61; optional&#40;bool, false&#41;&#10; enable_pod_metrics &#61; optional&#40;bool, false&#41;&#10; enable_statefulset_metrics &#61; optional&#40;bool, false&#41;&#10; enable_storage_metrics &#61; optional&#40;bool, false&#41;&#10; enable_cadvisor_metrics &#61; optional&#40;bool, false&#41;&#10; enable_managed_prometheus &#61; optional&#40;bool, true&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
224+
| [node_config](variables.tf#L227) | Configuration for nodes and nodepools. | <code title="object&#40;&#123;&#10; boot_disk_kms_key &#61; optional&#40;string&#41;&#10; service_account &#61; optional&#40;string&#41;&#10; tags &#61; optional&#40;list&#40;string&#41;&#41;&#10; workload_metadata_config_mode &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
225+
| [node_locations](variables.tf#L246) | Zones in which the cluster's nodes are located. | <code>list&#40;string&#41;</code> | | <code>&#91;&#93;</code> |
226+
| [release_channel](variables.tf#L258) | Release channel for GKE upgrades. Clusters created in the Autopilot mode must use a release channel. Choose between \"RAPID\", \"REGULAR\", and \"STABLE\". | <code>string</code> | | <code>&#34;REGULAR&#34;</code> |
226227

227228
## Outputs
228229

modules/gke-cluster-autopilot/main.tf

+1
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ resource "google_container_cluster" "cluster" {
241241
var.monitoring_config.enable_pod_metrics ? "POD" : null,
242242
var.monitoring_config.enable_statefulset_metrics ? "STATEFULSET" : null,
243243
var.monitoring_config.enable_storage_metrics ? "STORAGE" : null,
244+
var.monitoring_config.enable_cadvisor_metrics ? "CADVISOR" : null,
244245
]))
245246
managed_prometheus {
246247
enabled = var.monitoring_config.enable_managed_prometheus

modules/gke-cluster-autopilot/variables.tf

+2
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ variable "monitoring_config" {
199199
enable_pod_metrics = optional(bool, false)
200200
enable_statefulset_metrics = optional(bool, false)
201201
enable_storage_metrics = optional(bool, false)
202+
enable_cadvisor_metrics = optional(bool, false)
202203
# Google Cloud Managed Service for Prometheus. Autopilot clusters version >= 1.25 must have this on.
203204
enable_managed_prometheus = optional(bool, true)
204205
})
@@ -212,6 +213,7 @@ variable "monitoring_config" {
212213
var.monitoring_config.enable_pod_metrics,
213214
var.monitoring_config.enable_statefulset_metrics,
214215
var.monitoring_config.enable_storage_metrics,
216+
var.monitoring_config.enable_cadvisor_metrics,
215217
]) ? var.monitoring_config.enable_managed_prometheus : true
216218
error_message = "Kube state metrics collection requires Google Cloud Managed Service for Prometheus to be enabled."
217219
}

0 commit comments

Comments
 (0)