Skip to content

Commit 6aa3722

Browse files
Test Tempo instance metrics when multitenancy is enabled (#1230)
Assisted by: Cursor IDE using claude-4-sonnet Co-authored-by: Andreas Gerstmayr <[email protected]>
1 parent 97e4d03 commit 6aa3722

File tree

13 files changed

+216
-3
lines changed

13 files changed

+216
-3
lines changed

tests/e2e-openshift/monitoring-monolithic/check_metrics.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ TOKEN=$(oc create token e2e-test-metrics-reader -n $NAMESPACE)
77
THANOS_QUERIER_HOST=$(oc get route thanos-querier -n openshift-monitoring -o json | jq -r '.spec.host')
88

99
#Check TempoMonolithc metircs
10-
metrics="tempo_distributor_bytes_received_total tempo_distributor_spans_received_total tempo_ingester_bytes_received_total tempo_distributor_traces_per_batch_count tempo_build_info"
10+
metrics="tempo_query_frontend_queries_total tempo_distributor_bytes_received_total tempo_distributor_spans_received_total tempo_ingester_bytes_received_total tempo_distributor_traces_per_batch_count tempo_build_info"
1111

1212
for metric in $metrics; do
1313
query="$metric"

tests/e2e-openshift/monitoring/check_metrics.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ TOKEN=$(oc create token e2e-test-metrics-reader -n $NAMESPACE)
77
THANOS_QUERIER_HOST=$(oc get route thanos-querier -n openshift-monitoring -o json | jq -r '.spec.host')
88

99
#Check metrics used in the prometheus rules created for TempoStack. Refer issue https://issues.redhat.com/browse/TRACING-3399 for skipped metrics.
10-
metrics="tempo_request_duration_seconds_count tempo_request_duration_seconds_sum tempo_request_duration_seconds_bucket tempo_build_info tempo_ingester_bytes_received_total tempo_ingester_flush_failed_retries_total tempo_ingester_failed_flushes_total tempo_ring_members tempo_operator_tempostack_managed tempo_operator_tempostack_storage_backend tempo_operator_tempostack_multi_tenancy"
10+
metrics="tempo_query_frontend_queries_total tempo_request_duration_seconds_count tempo_request_duration_seconds_sum tempo_request_duration_seconds_bucket tempo_build_info tempo_ingester_bytes_received_total tempo_ingester_flush_failed_retries_total tempo_ingester_failed_flushes_total tempo_ring_members tempo_operator_tempostack_managed tempo_operator_tempostack_storage_backend tempo_operator_tempostack_multi_tenancy"
1111

1212
for metric in $metrics; do
1313
query="$metric"
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: prometheus-operator
5+
namespace: openshift-user-workload-monitoring
6+
(status.replicas == spec.replicas): true
7+
spec:
8+
(replicas >= `1`): true
9+
10+
---
11+
apiVersion: apps/v1
12+
kind: StatefulSet
13+
metadata:
14+
name: prometheus-user-workload
15+
namespace: openshift-user-workload-monitoring
16+
(status.replicas == spec.replicas): true
17+
spec:
18+
(replicas >= `1`): true
19+
20+
---
21+
apiVersion: apps/v1
22+
kind: StatefulSet
23+
metadata:
24+
name: thanos-ruler-user-workload
25+
namespace: openshift-user-workload-monitoring
26+
(status.replicas == spec.replicas): true
27+
spec:
28+
(replicas >= `1`): true
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# oc -n openshift-user-workload-monitoring get pod
2+
# https://docs.openshift.com/container-platform/4.13/monitoring/enabling-monitoring-for-user-defined-projects.html#accessing-metrics-from-outside-cluster_enabling-monitoring-for-user-defined-projects
3+
4+
apiVersion: v1
5+
kind: ConfigMap
6+
metadata:
7+
name: cluster-monitoring-config
8+
namespace: openshift-monitoring
9+
data:
10+
config.yaml: |
11+
enableUserWorkload: true
12+
alertmanagerMain:
13+
enableUserAlertmanagerConfig: true

tests/e2e-openshift/monolithic-multitenancy-openshift/01-assert.yaml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,40 @@ subjects:
9292
- kind: ServiceAccount
9393
name: tempo-mmo
9494
namespace: chainsaw-monolithic-multitenancy
95+
96+
---
97+
apiVersion: monitoring.coreos.com/v1
98+
kind: ServiceMonitor
99+
metadata:
100+
labels:
101+
app.kubernetes.io/component: gateway
102+
app.kubernetes.io/instance: mmo
103+
app.kubernetes.io/managed-by: tempo-operator
104+
app.kubernetes.io/name: tempo-monolithic
105+
name: tempo-mmo-gateway
106+
namespace: chainsaw-monolithic-multitenancy
107+
spec:
108+
endpoints:
109+
- path: /metrics
110+
port: internal
111+
relabelings:
112+
- action: replace
113+
sourceLabels:
114+
- __meta_kubernetes_service_label_app_kubernetes_io_instance
115+
targetLabel: cluster
116+
- action: replace
117+
separator: /
118+
sourceLabels:
119+
- __meta_kubernetes_namespace
120+
- __meta_kubernetes_service_label_app_kubernetes_io_component
121+
targetLabel: job
122+
scheme: http
123+
namespaceSelector:
124+
matchNames:
125+
- chainsaw-monolithic-multitenancy
126+
selector:
127+
matchLabels:
128+
app.kubernetes.io/component: gateway
129+
app.kubernetes.io/instance: mmo
130+
app.kubernetes.io/managed-by: tempo-operator
131+
app.kubernetes.io/name: tempo-monolithic

tests/e2e-openshift/monolithic-multitenancy-openshift/01-install-tempo.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ spec:
77
enabled: true
88
route:
99
enabled: true
10+
observability:
11+
metrics:
12+
prometheusRules:
13+
enabled: true
14+
serviceMonitors:
15+
enabled: true
1016
multitenancy:
1117
enabled: true
1218
mode: openshift

tests/e2e-openshift/monolithic-multitenancy-openshift/chainsaw-test.yaml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,19 @@
22
apiVersion: chainsaw.kyverno.io/v1alpha1
33
kind: Test
44
metadata:
5+
creationTimestamp: null
56
name: monolithic-multitenancy-openshift
67
spec:
7-
# this test must use a known namespace because of the CN field of the TLS certificate and the ClusterRoleBinding
8+
# Avoid running this test case in parallel to prevent the deletion of shared resources used by multiple tests, specifically in the context of OpenShift user workload monitoring.
9+
concurrent: false
810
namespace: chainsaw-monolithic-multitenancy
911
steps:
12+
- name: step-00-workload-monitoring
13+
try:
14+
- apply:
15+
file: 00-workload-monitoring.yaml
16+
- assert:
17+
file: 00-workload-monitoring-assert.yaml
1018
- name: step-01
1119
try:
1220
- apply:
@@ -41,3 +49,8 @@ spec:
4149
tail: 50
4250
- podLogs:
4351
selector: app.kubernetes.io/name=tempo-monolithic
52+
- name: step-05-check-metrics
53+
try:
54+
- script:
55+
timeout: 5m
56+
content: ./check_metrics.sh
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
3+
oc create serviceaccount e2e-test-metrics-reader -n $NAMESPACE
4+
oc adm policy add-cluster-role-to-user cluster-monitoring-view system:serviceaccount:$NAMESPACE:e2e-test-metrics-reader
5+
6+
TOKEN=$(oc create token e2e-test-metrics-reader -n $NAMESPACE)
7+
THANOS_QUERIER_HOST=$(oc get route thanos-querier -n openshift-monitoring -o json | jq -r '.spec.host')
8+
9+
#Check TempoMonolithc metircs
10+
# Tempo component metrics not exposed due to bug. https://issues.redhat.com/browse/TRACING-5472
11+
#metrics="tempo_query_frontend_queries_total tempo_distributor_bytes_received_total tempo_distributor_spans_received_total tempo_ingester_bytes_received_total tempo_distributor_traces_per_batch_count tempo_build_info"
12+
metrics="http_request_duration_seconds_bucket"
13+
14+
for metric in $metrics; do
15+
query="$metric"
16+
count=0
17+
18+
# Keep fetching and checking the metrics until metrics with value is present.
19+
while [[ $count -eq 0 ]]; do
20+
response=$(curl -k -H "Authorization: Bearer $TOKEN" -H "Content-type: application/json" "https://$THANOS_QUERIER_HOST/api/v1/query?query=$query")
21+
count=$(echo "$response" | jq -r '.data.result | length')
22+
23+
if [[ $count -eq 0 ]]; then
24+
echo "No metric '$metric' with value present. Retrying..."
25+
sleep 5 # Wait for 5 seconds before retrying
26+
else
27+
echo "Metric '$metric' with value is present."
28+
fi
29+
done
30+
done
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: prometheus-operator
5+
namespace: openshift-user-workload-monitoring
6+
(status.replicas == spec.replicas): true
7+
spec:
8+
(replicas >= `1`): true
9+
10+
---
11+
apiVersion: apps/v1
12+
kind: StatefulSet
13+
metadata:
14+
name: prometheus-user-workload
15+
namespace: openshift-user-workload-monitoring
16+
(status.replicas == spec.replicas): true
17+
spec:
18+
(replicas >= `1`): true
19+
20+
---
21+
apiVersion: apps/v1
22+
kind: StatefulSet
23+
metadata:
24+
name: thanos-ruler-user-workload
25+
namespace: openshift-user-workload-monitoring
26+
(status.replicas == spec.replicas): true
27+
spec:
28+
(replicas >= `1`): true
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# oc -n openshift-user-workload-monitoring get pod
2+
# https://docs.openshift.com/container-platform/4.13/monitoring/enabling-monitoring-for-user-defined-projects.html#accessing-metrics-from-outside-cluster_enabling-monitoring-for-user-defined-projects
3+
4+
apiVersion: v1
5+
kind: ConfigMap
6+
metadata:
7+
name: cluster-monitoring-config
8+
namespace: openshift-monitoring
9+
data:
10+
config.yaml: |
11+
enableUserWorkload: true
12+
alertmanagerMain:
13+
enableUserAlertmanagerConfig: true

0 commit comments

Comments
 (0)