Add TargetMemoryUtilization metric for AutoScaling (#1223)

kevinearls · web-flow · commit 61accfd0591c · 2022-12-01T10:48:42.000+01:00
* Add TargetMemoryUtilization metric for AutoScaling

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

* Add changes to v2beta2 as there is no way to un e2e tests just for one version

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

* See if we just have a race condition

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

* Reset kuttl timeout

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

* Add some debugging code to help analyze failures on github

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

* Try to appease the linter

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

* Restore autoscale tests

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

* Cleanup

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

* More cleanup

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

* Respond to comments

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

* Cleanup whitespace so linter will rerun

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

* Don't set TargetCPUUtilization to default if another metric is set

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;

Signed-off-by: Kevin Earls &lt;kearls@redhat.com&gt;
diff --git a/apis/v1alpha1/opentelemetrycollector_types.go b/apis/v1alpha1/opentelemetrycollector_types.go
@@ -277,6 +277,9 @@ type AutoscalerSpec struct {
 	// If average CPU exceeds this value, the HPA will scale up. Defaults to 90 percent.
 	// +optional
 	TargetCPUUtilization *int32 `json:"targetCPUUtilization,omitempty"`
+	// +optional
+	// TargetMemoryUtilization sets the target average memory utilization across all replicas
+	TargetMemoryUtilization *int32 `json:"targetMemoryUtilization,omitempty"`
 }
 
 func init() {
diff --git a/apis/v1alpha1/opentelemetrycollector_webhook.go b/apis/v1alpha1/opentelemetrycollector_webhook.go
@@ -67,13 +67,15 @@ func (r *OpenTelemetryCollector) Default() {
 		r.Spec.TargetAllocator.Replicas = &one
 	}
 
-	// Set default targetCPUUtilization for autoscaler
-	if r.Spec.MaxReplicas != nil && (r.Spec.Autoscaler == nil || r.Spec.Autoscaler.TargetCPUUtilization == nil) {
-		defaultCPUTarget := int32(90)
+	if r.Spec.MaxReplicas != nil {
 		if r.Spec.Autoscaler == nil {
 			r.Spec.Autoscaler = &AutoscalerSpec{}
 		}
-		r.Spec.Autoscaler.TargetCPUUtilization = &defaultCPUTarget
+
+		if r.Spec.Autoscaler.TargetMemoryUtilization == nil && r.Spec.Autoscaler.TargetCPUUtilization == nil {
+			defaultCPUTarget := int32(90)
+			r.Spec.Autoscaler.TargetCPUUtilization = &defaultCPUTarget
+		}
 	}
 }
 
@@ -176,7 +178,9 @@ func (r *OpenTelemetryCollector) validateCRDSpec() error {
 		if r.Spec.Autoscaler != nil && r.Spec.Autoscaler.TargetCPUUtilization != nil && (*r.Spec.Autoscaler.TargetCPUUtilization < int32(1) || *r.Spec.Autoscaler.TargetCPUUtilization > int32(99)) {
 			return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetCPUUtilization should be greater than 0 and less than 100")
 		}
-
+		if r.Spec.Autoscaler != nil && r.Spec.Autoscaler.TargetMemoryUtilization != nil && (*r.Spec.Autoscaler.TargetMemoryUtilization < int32(1) || *r.Spec.Autoscaler.TargetMemoryUtilization > int32(99)) {
+			return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetMemoryUtilization should be greater than 0 and less than 100")
+		}
 	}
 
 	if r.Spec.Ingress.Type == IngressTypeNginx && r.Spec.Mode == ModeSidecar {
diff --git a/apis/v1alpha1/zz_generated.deepcopy.go b/apis/v1alpha1/zz_generated.deepcopy.go
diff --git a/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml b/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml
@@ -1016,6 +1016,11 @@ spec:
                       the HPA will scale up. Defaults to 90 percent.
                     format: int32
                     type: integer
+                  targetMemoryUtilization:
+                    description: TargetMemoryUtilization sets the target average memory
+                      utilization across all replicas
+                    format: int32
+                    type: integer
                 type: object
               config:
                 description: Config is the raw JSON to be used as the collector's
diff --git a/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml b/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml
@@ -1014,6 +1014,11 @@ spec:
                       the HPA will scale up. Defaults to 90 percent.
                     format: int32
                     type: integer
+                  targetMemoryUtilization:
+                    description: TargetMemoryUtilization sets the target average memory
+                      utilization across all replicas
+                    format: int32
+                    type: integer
                 type: object
               config:
                 description: Config is the raw JSON to be used as the collector's
diff --git a/docs/api.md b/docs/api.md
@@ -3228,6 +3228,15 @@ Autoscaler specifies the pod autoscaling configuration to use for the OpenTeleme
             <i>Format</i>: int32<br/>
         </td>
         <td>false</td>
+      </tr><tr>
+        <td><b>targetMemoryUtilization</b></td>
+        <td>integer</td>
+        <td>
+          TargetMemoryUtilization sets the target average memory utilization across all replicas<br/>
+          <br/>
+            <i>Format</i>: int32<br/>
+        </td>
+        <td>false</td>
       </tr></tbody>
 </table>
 
diff --git a/pkg/collector/horizontalpodautoscaler.go b/pkg/collector/horizontalpodautoscaler.go
@@ -33,9 +33,7 @@ func HorizontalPodAutoscaler(cfg config.Config, logger logr.Logger, otelcol v1al
 
 	labels := Labels(otelcol, cfg.LabelsFilter())
 	labels["app.kubernetes.io/name"] = naming.Collector(otelcol)
-
 	annotations := Annotations(otelcol)
-
 	var result client.Object
 
 	objectMeta := metav1.ObjectMeta{
@@ -46,6 +44,22 @@ func HorizontalPodAutoscaler(cfg config.Config, logger logr.Logger, otelcol v1al
 	}
 
 	if autoscalingVersion == autodetect.AutoscalingVersionV2Beta2 {
+		metrics := []autoscalingv2beta2.MetricSpec{}
+
+		if otelcol.Spec.Autoscaler.TargetMemoryUtilization != nil {
+			utilizationTarget := autoscalingv2beta2.MetricSpec{
+				Type: autoscalingv2beta2.ResourceMetricSourceType,
+				Resource: &autoscalingv2beta2.ResourceMetricSource{
+					Name: corev1.ResourceMemory,
+					Target: autoscalingv2beta2.MetricTarget{
+						Type:               autoscalingv2beta2.UtilizationMetricType,
+						AverageUtilization: otelcol.Spec.Autoscaler.TargetMemoryUtilization,
+					},
+				},
+			}
+			metrics = append(metrics, utilizationTarget)
+		}
+
 		targetCPUUtilization := autoscalingv2beta2.MetricSpec{
 			Type: autoscalingv2beta2.ResourceMetricSourceType,
 			Resource: &autoscalingv2beta2.ResourceMetricSource{
@@ -56,7 +70,7 @@ func HorizontalPodAutoscaler(cfg config.Config, logger logr.Logger, otelcol v1al
 				},
 			},
 		}
-		metrics := []autoscalingv2beta2.MetricSpec{targetCPUUtilization}
+		metrics = append(metrics, targetCPUUtilization)
 
 		autoscaler := autoscalingv2beta2.HorizontalPodAutoscaler{
 			ObjectMeta: objectMeta,
@@ -79,17 +93,35 @@ func HorizontalPodAutoscaler(cfg config.Config, logger logr.Logger, otelcol v1al
 
 		result = &autoscaler
 	} else {
-		targetCPUUtilization := autoscalingv2.MetricSpec{
-			Type: autoscalingv2.ResourceMetricSourceType,
-			Resource: &autoscalingv2.ResourceMetricSource{
-				Name: corev1.ResourceCPU,
-				Target: autoscalingv2.MetricTarget{
-					Type:               autoscalingv2.UtilizationMetricType,
-					AverageUtilization: otelcol.Spec.Autoscaler.TargetCPUUtilization,
+		metrics := []autoscalingv2.MetricSpec{}
+
+		if otelcol.Spec.Autoscaler.TargetMemoryUtilization != nil {
+			utilizationTarget := autoscalingv2.MetricSpec{
+				Type: autoscalingv2.ResourceMetricSourceType,
+				Resource: &autoscalingv2.ResourceMetricSource{
+					Name: corev1.ResourceMemory,
+					Target: autoscalingv2.MetricTarget{
+						Type:               autoscalingv2.UtilizationMetricType,
+						AverageUtilization: otelcol.Spec.Autoscaler.TargetMemoryUtilization,
+					},
 				},
-			},
+			}
+			metrics = append(metrics, utilizationTarget)
+		}
+
+		if otelcol.Spec.Autoscaler.TargetCPUUtilization != nil {
+			targetCPUUtilization := autoscalingv2.MetricSpec{
+				Type: autoscalingv2.ResourceMetricSourceType,
+				Resource: &autoscalingv2.ResourceMetricSource{
+					Name: corev1.ResourceCPU,
+					Target: autoscalingv2.MetricTarget{
+						Type:               autoscalingv2.UtilizationMetricType,
+						AverageUtilization: otelcol.Spec.Autoscaler.TargetCPUUtilization,
+					},
+				},
+			}
+			metrics = append(metrics, targetCPUUtilization)
 		}
-		metrics := []autoscalingv2.MetricSpec{targetCPUUtilization}
 
 		autoscaler := autoscalingv2.HorizontalPodAutoscaler{
 			ObjectMeta: objectMeta,
@@ -104,7 +136,7 @@ func HorizontalPodAutoscaler(cfg config.Config, logger logr.Logger, otelcol v1al
 				Metrics:     metrics,
 			},
 		}
-		if otelcol.Spec.Autoscaler != nil && otelcol.Spec.Autoscaler.Behavior != nil {
+		if otelcol.Spec.Autoscaler.Behavior != nil {
 			autoscaler.Spec.Behavior = otelcol.Spec.Autoscaler.Behavior
 		}
 		result = &autoscaler
diff --git a/pkg/collector/horizontalpodautoscaler_test.go b/pkg/collector/horizontalpodautoscaler_test.go
@@ -41,7 +41,8 @@ func TestHPA(t *testing.T) {
 
 	var minReplicas int32 = 3
 	var maxReplicas int32 = 5
-	var cpuUtilization int32 = 90
+	var cpuUtilization int32 = 66
+	var memoryUtilization int32 = 77
 
 	otelcol := v1alpha1.OpenTelemetryCollector{
 		ObjectMeta: metav1.ObjectMeta{
@@ -51,7 +52,8 @@ func TestHPA(t *testing.T) {
 			Replicas:    &minReplicas,
 			MaxReplicas: &maxReplicas,
 			Autoscaler: &v1alpha1.AutoscalerSpec{
-				TargetCPUUtilization: &cpuUtilization,
+				TargetCPUUtilization:    &cpuUtilization,
+				TargetMemoryUtilization: &memoryUtilization,
 			},
 		},
 	}
@@ -76,9 +78,13 @@ func TestHPA(t *testing.T) {
 				assert.Equal(t, "my-instance-collector", hpa.Labels["app.kubernetes.io/name"])
 				assert.Equal(t, int32(3), *hpa.Spec.MinReplicas)
 				assert.Equal(t, int32(5), hpa.Spec.MaxReplicas)
-				assert.Equal(t, 1, len(hpa.Spec.Metrics))
-				assert.Equal(t, corev1.ResourceCPU, hpa.Spec.Metrics[0].Resource.Name)
-				assert.Equal(t, int32(90), *hpa.Spec.Metrics[0].Resource.Target.AverageUtilization)
+				for _, metric := range hpa.Spec.Metrics {
+					if metric.Resource.Name == corev1.ResourceCPU {
+						assert.Equal(t, cpuUtilization, *metric.Resource.Target.AverageUtilization)
+					} else if metric.Resource.Name == corev1.ResourceMemory {
+						assert.Equal(t, memoryUtilization, *metric.Resource.Target.AverageUtilization)
+					}
+				}
 			} else {
 				hpa := raw.(*autoscalingv2.HorizontalPodAutoscaler)
 
@@ -87,9 +93,15 @@ func TestHPA(t *testing.T) {
 				assert.Equal(t, "my-instance-collector", hpa.Labels["app.kubernetes.io/name"])
 				assert.Equal(t, int32(3), *hpa.Spec.MinReplicas)
 				assert.Equal(t, int32(5), hpa.Spec.MaxReplicas)
-				assert.Equal(t, 1, len(hpa.Spec.Metrics))
-				assert.Equal(t, corev1.ResourceCPU, hpa.Spec.Metrics[0].Resource.Name)
-				assert.Equal(t, int32(90), *hpa.Spec.Metrics[0].Resource.Target.AverageUtilization)
+				assert.Equal(t, 2, len(hpa.Spec.Metrics))
+
+				for _, metric := range hpa.Spec.Metrics {
+					if metric.Resource.Name == corev1.ResourceCPU {
+						assert.Equal(t, cpuUtilization, *metric.Resource.Target.AverageUtilization)
+					} else if metric.Resource.Name == corev1.ResourceMemory {
+						assert.Equal(t, memoryUtilization, *metric.Resource.Target.AverageUtilization)
+					}
+				}
 			}
 		})
 	}
diff --git a/pkg/collector/reconcile/horizontalpodautoscaler.go b/pkg/collector/reconcile/horizontalpodautoscaler.go
@@ -20,6 +20,7 @@ import (
 
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	autoscalingv2beta2 "k8s.io/api/autoscaling/v2beta2"
+	corev1 "k8s.io/api/core/v1"
 	k8serrors "k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/api/meta"
 	"k8s.io/apimachinery/pkg/types"
@@ -127,7 +128,15 @@ func setAutoscalerSpec(params Params, autoscalingVersion autodetect.AutoscalingV
 			} else {
 				updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.MinReplicas = &one
 			}
-			updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.Metrics[0].Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetCPUUtilization
+
+			// This will update memory and CPU usage for now, and can be used to update other metrics in the future
+			for _, metric := range updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.Metrics {
+				if metric.Resource.Name == corev1.ResourceCPU {
+					metric.Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetCPUUtilization
+				} else if metric.Resource.Name == corev1.ResourceMemory {
+					metric.Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetMemoryUtilization
+				}
+			}
 		}
 	}
 }
diff --git a/tests/e2e/autoscale/00-install.yaml b/tests/e2e/autoscale/00-install.yaml
@@ -1,3 +1,6 @@
+# This creates two different deployments. The first one will be used to see if we scale properly.  (Note that we are
+# only scaling up to 2 because of limitations of KUTTL). The second is to check the targetCPUUtilization option.
+#
 apiVersion: opentelemetry.io/v1alpha1
 kind: OpenTelemetryCollector
 metadata:
diff --git a/tests/e2e/autoscale/01-assert.yaml b/tests/e2e/autoscale/01-assert.yaml
@@ -1,3 +1,4 @@
+# Wait until tracegen has completed and the simplest deployment has scaled up to 2
 apiVersion: batch/v1
 kind: Job
 metadata:
diff --git a/tests/e2e/autoscale/02-assert.yaml b/tests/e2e/autoscale/02-assert.yaml
@@ -1,3 +1,4 @@
+# Wait for the collector to scale back down to 1
 apiVersion: opentelemetry.io/v1alpha1
 kind: OpenTelemetryCollector
 

Original file line number	Diff line number	Diff line change
`@@ -277,6 +277,9 @@ type AutoscalerSpec struct {`
`277`	`277`	`// If average CPU exceeds this value, the HPA will scale up. Defaults to 90 percent.`
`278`	`278`	`// +optional`
`279`	`279`	TargetCPUUtilization *int32 `json:"targetCPUUtilization,omitempty"`
	`280`	`+ // +optional`
	`281`	`+ // TargetMemoryUtilization sets the target average memory utilization across all replicas`
	`282`	+ TargetMemoryUtilization *int32 `json:"targetMemoryUtilization,omitempty"`
`280`	`283`	`}`
`281`	`284`
`282`	`285`	`func init() {`
Original file line number	Diff line number	Diff line change
`@@ -67,13 +67,15 @@ func (r *OpenTelemetryCollector) Default() {`
`67`	`67`	`r.Spec.TargetAllocator.Replicas = &one`
`68`	`68`	`}`
`69`	`69`
`70`		`- // Set default targetCPUUtilization for autoscaler`
`71`		`- if r.Spec.MaxReplicas != nil && (r.Spec.Autoscaler == nil \|\| r.Spec.Autoscaler.TargetCPUUtilization == nil) {`
`72`		`- defaultCPUTarget := int32(90)`
	`70`	`+ if r.Spec.MaxReplicas != nil {`
`73`	`71`	`if r.Spec.Autoscaler == nil {`
`74`	`72`	`r.Spec.Autoscaler = &AutoscalerSpec{}`
`75`	`73`	`}`
`76`		`- r.Spec.Autoscaler.TargetCPUUtilization = &defaultCPUTarget`
	`74`	`+`
	`75`	`+ if r.Spec.Autoscaler.TargetMemoryUtilization == nil && r.Spec.Autoscaler.TargetCPUUtilization == nil {`
	`76`	`+ defaultCPUTarget := int32(90)`
	`77`	`+ r.Spec.Autoscaler.TargetCPUUtilization = &defaultCPUTarget`
	`78`	`+ }`
`77`	`79`	`}`
`78`	`80`	`}`
`79`	`81`
`@@ -176,7 +178,9 @@ func (r *OpenTelemetryCollector) validateCRDSpec() error {`
`176`	`178`	`if r.Spec.Autoscaler != nil && r.Spec.Autoscaler.TargetCPUUtilization != nil && (r.Spec.Autoscaler.TargetCPUUtilization < int32(1) \|\| r.Spec.Autoscaler.TargetCPUUtilization > int32(99)) {`
`177`	`179`	`return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetCPUUtilization should be greater than 0 and less than 100")`
`178`	`180`	`}`
`179`		`-`
	`181`	`+ if r.Spec.Autoscaler != nil && r.Spec.Autoscaler.TargetMemoryUtilization != nil && (r.Spec.Autoscaler.TargetMemoryUtilization < int32(1) \|\| r.Spec.Autoscaler.TargetMemoryUtilization > int32(99)) {`
	`182`	`+ return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetMemoryUtilization should be greater than 0 and less than 100")`
	`183`	`+ }`
`180`	`184`	`}`
`181`	`185`
`182`	`186`	`if r.Spec.Ingress.Type == IngressTypeNginx && r.Spec.Mode == ModeSidecar {`
Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@ import (`
`20`	`20`
`21`	`21`	`autoscalingv2 "k8s.io/api/autoscaling/v2"`
`22`	`22`	`autoscalingv2beta2 "k8s.io/api/autoscaling/v2beta2"`
	`23`	`+ corev1 "k8s.io/api/core/v1"`
`23`	`24`	`k8serrors "k8s.io/apimachinery/pkg/api/errors"`
`24`	`25`	`"k8s.io/apimachinery/pkg/api/meta"`
`25`	`26`	`"k8s.io/apimachinery/pkg/types"`
`@@ -127,7 +128,15 @@ func setAutoscalerSpec(params Params, autoscalingVersion autodetect.AutoscalingV`
`127`	`128`	`} else {`
`128`	`129`	`updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.MinReplicas = &one`
`129`	`130`	`}`
`130`		`- updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.Metrics[0].Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetCPUUtilization`
	`131`	`+`
	`132`	`+ // This will update memory and CPU usage for now, and can be used to update other metrics in the future`
	`133`	`+ for _, metric := range updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.Metrics {`
	`134`	`+ if metric.Resource.Name == corev1.ResourceCPU {`
	`135`	`+ metric.Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetCPUUtilization`
	`136`	`+ } else if metric.Resource.Name == corev1.ResourceMemory {`
	`137`	`+ metric.Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetMemoryUtilization`
	`138`	`+ }`
	`139`	`+ }`
`131`	`140`	`}`
`132`	`141`	`}`
`133`	`142`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# Wait until tracegen has completed and the simplest deployment has scaled up to 2`
`1`	`2`	`apiVersion: batch/v1`
`2`	`3`	`kind: Job`
`3`	`4`	`metadata:`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# Wait for the collector to scale back down to 1`
`1`	`2`	`apiVersion: opentelemetry.io/v1alpha1`
`2`	`3`	`kind: OpenTelemetryCollector`
`3`	`4`