File tree 1 file changed +53
-0
lines changed
1 file changed +53
-0
lines changed Original file line number Diff line number Diff line change
1
+ groups :
2
+ - name : jaeger_alerts
3
+ rules :
4
+ - alert : OtelHttpServerErrors
5
+ annotations :
6
+ message : |
7
+ {{ $labels.job }} {{ $labels.instance }} is experiencing {{ printf "%.2f" $value }}% HTTP errors.
8
+ expr : |
9
+ 100 * sum(rate(otelcol_http_server_duration_count{http_status_code=~"5.."}[1m])) by (instance, job) /
10
+ sum(rate(otelcol_http_server_duration_count[1m])) by (instance, job) > 1
11
+ for : 15m
12
+ labels :
13
+ severity : warning
14
+
15
+ - alert : OtelExporterQueueFull
16
+ annotations :
17
+ message : |
18
+ {{ $labels.job }} {{ $labels.instance }} exporter queue is at {{ printf "%.2f" $value }} items (over 80% capacity).
19
+ expr : |
20
+ 100 * otelcol_exporter_queue_size / otelcol_exporter_queue_capacity > 80
21
+ for : 15m
22
+ labels :
23
+ severity : warning
24
+
25
+ - alert : OtelHighMemoryUsage
26
+ annotations :
27
+ message : |
28
+ {{ $labels.job }} {{ $labels.instance }} memory usage is high at {{ humanize $value }} bytes.
29
+ expr : |
30
+ otelcol_process_memory_rss > 100000000
31
+ for : 15m
32
+ labels :
33
+ severity : warning
34
+
35
+ - alert : OtelHighCpuUsage
36
+ annotations :
37
+ message : |
38
+ {{ $labels.job }} {{ $labels.instance }} CPU usage is high ({{ printf "%.2f" $value }} seconds of CPU time in 5m).
39
+ expr : |
40
+ rate(otelcol_process_cpu_seconds[5m]) > 0.8
41
+ for : 15m
42
+ labels :
43
+ severity : warning
44
+
45
+ - alert : OtelProcessorBatchHighCardinality
46
+ annotations :
47
+ message : |
48
+ {{ $labels.job }} {{ $labels.instance }} has high metadata cardinality ({{ printf "%.0f" $value }} combinations).
49
+ expr : |
50
+ otelcol_processor_batch_metadata_cardinality > 1000
51
+ for : 15m
52
+ labels :
53
+ severity : warning
You can’t perform that action at this time.
0 commit comments