|
1 | 1 | {
|
2 |
| - groups+: [ |
3 |
| - { |
4 |
| - name: 'traefik', |
5 |
| - rules: [ |
6 |
| - // TraefikConfigReloadFailuresIncreasing |
7 |
| - { |
8 |
| - alert: 'TraefikConfigReloadFailuresIncreasing', |
9 |
| - expr: 'sum(rate(traefik_config_reloads_failure_total[5m])) > 0', |
10 |
| - 'for': '5m', |
11 |
| - labels: { |
12 |
| - severity: 'critical', |
| 2 | + prometheusAlerts+:: { |
| 3 | + groups+: [ |
| 4 | + { |
| 5 | + name: 'traefik', |
| 6 | + rules: [ |
| 7 | + // TraefikConfigReloadFailuresIncreasing |
| 8 | + { |
| 9 | + alert: 'TraefikConfigReloadFailuresIncreasing', |
| 10 | + expr: ||| |
| 11 | + sum by (%(sumByLabels)s) (rate(traefik_config_reloads_failure_total{%(timeSeriesLabels)s}[5m])) > 0 |
| 12 | + ||| % $._config, |
| 13 | + 'for': '5m', |
| 14 | + labels: { |
| 15 | + severity: 'critical', |
| 16 | + } + std.get($._config, 'alertLabels', {}), |
| 17 | + annotations: { |
| 18 | + description: 'Traefik is failing to reload its config', |
| 19 | + } + std.get($._config, 'alertAnnotations', {}), |
13 | 20 | },
|
14 |
| - annotations: { |
15 |
| - description: 'Traefik is failing to reload its config', |
| 21 | + // TraefikTLSCertificatesExpiring (critical) |
| 22 | + { |
| 23 | + alert: 'TraefikTLSCertificatesExpiring', |
| 24 | + expr: ||| |
| 25 | + max by (%(maxByLabels)s) ((last_over_time(traefik_tls_certs_not_after{%(timeSeriesLabels)s}[5m]) - time()) / 86400) < %(traefik_tls_expiry_days_critical)s |
| 26 | + ||| % $._config, |
| 27 | + 'for': '5m', |
| 28 | + labels: { |
| 29 | + severity: 'critical', |
| 30 | + } + std.get($._config, 'alertLabels', {}), |
| 31 | + annotations: { |
| 32 | + description: ||| |
| 33 | + The minimum number of days until a Traefik-served certificate expires is {{ printf "%%.0f" $value }} days on {{ $labels.sans }} which is below the critical threshold of %(traefik_tls_expiry_days_critical)s. |
| 34 | + ||| % $._config, |
| 35 | + } + std.get($._config, 'alertAnnotations', {}), |
16 | 36 | },
|
17 |
| - }, |
18 |
| - // TraefikTLSCertificatesExpiring |
19 |
| - { |
20 |
| - alert: 'TraefikTLSCertificatesExpiring', |
21 |
| - expr: 'max by (sans) ((last_over_time(traefik_tls_certs_not_after[5m]) - time()) / 86400) < 7', |
22 |
| - 'for': '5m', |
23 |
| - labels: { |
24 |
| - severity: 'critical', |
| 37 | + // TraefikTLSCertificatesExpiring (warning) |
| 38 | + { |
| 39 | + alert: 'TraefikTLSCertificatesExpiringSoon', |
| 40 | + expr: ||| |
| 41 | + max by (%(maxByLabels)s) ((last_over_time(traefik_tls_certs_not_after{%(timeSeriesLabels)s}[5m]) - time()) / 86400) < %(traefik_tls_expiry_days_warning)s |
| 42 | + ||| % $._config, |
| 43 | + 'for': '5m', |
| 44 | + labels: { |
| 45 | + severity: 'warning', |
| 46 | + } + std.get($._config, 'alertLabels', {}), |
| 47 | + annotations: { |
| 48 | + description: ||| |
| 49 | + The minimum number of days until a Traefik-served certificate expires is {{ printf "%%.0f" $value }} days on {{ $labels.sans }} which is below the warning threshold of %(traefik_tls_expiry_days_warning)s. |
| 50 | + ||| % $._config, |
| 51 | + } + std.get($._config, 'alertAnnotations', {}), |
25 | 52 | },
|
26 |
| - annotations: { |
27 |
| - description: 'Traefik is serving certificates that will expire soon', |
28 |
| - }, |
29 |
| - }, |
30 |
| - ], |
31 |
| - }, |
32 |
| - ], |
| 53 | + ], |
| 54 | + }, |
| 55 | + ], |
| 56 | + }, |
33 | 57 | }
|
0 commit comments