Skip to content

Commit 3192967

Browse files
authored
Extend alertmanager limits to cover all integrations. (#4163)
* Extend alertmanager limits to cover all integrations. Signed-off-by: Peter Štibraný <[email protected]> * Fix changelog entry. Signed-off-by: Peter Štibraný <[email protected]> * End sentence with period. Signed-off-by: Peter Štibraný <[email protected]> * Fix tests. Signed-off-by: Peter Štibraný <[email protected]> * Fix typo. Signed-off-by: Peter Štibraný <[email protected]> * Feedback review. Signed-off-by: Peter Štibraný <[email protected]> * Simplify rate limit config by using burst-size=rate limit. Signed-off-by: Peter Štibraný <[email protected]> * Fix flag names. Signed-off-by: Peter Štibraný <[email protected]>
1 parent de52eff commit 3192967

10 files changed

+421
-53
lines changed

CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
- `-alertmanager.receivers-firewall.block.cidr-networks` renamed to `-alertmanager.receivers-firewall-block-cidr-networks`
88
- `-alertmanager.receivers-firewall.block.private-addresses` renamed to `-alertmanager.receivers-firewall-block-private-addresses`
99
* [CHANGE] Change default value of `-server.grpc.keepalive.min-time-between-pings` to `10s` and `-server.grpc.keepalive.ping-without-stream-allowed` to `true`. #4168
10-
* [FEATURE] Alertmanager: Added rate-limits to email notifier. Rate limits can be configured using `-alertmanager.email-notification-rate-limit` and `-alertmanager.email-notification-burst-size`. These limits are applied on individual alertmanagers. Rate-limited email notifications are failed notifications. It is possible to monitor rate-limited notifications via new `cortex_alertmanager_notification_rate_limited_total` metric. #4135
10+
* [FEATURE] Alertmanager: Added rate-limits to notifiers. Rate limits used by all integrations can be configured using `-alertmanager.notification-rate-limit`, while per-integration rate limits can be specified via `-alertmanager.notification-rate-limit-per-integration` parameter. Both shared and per-integration limits can be overwritten using overrides mechanism. These limits are applied on individual (per-tenant) alertmanagers. Rate-limited notifications are failed notifications. It is possible to monitor rate-limited notifications via new `cortex_alertmanager_notification_rate_limited_total` metric. #4135 #4163
1111
* [ENHANCEMENT] Alertmanager: introduced new metrics to monitor operation when using `-alertmanager.sharding-enabled`: #4149
1212
* `cortex_alertmanager_state_fetch_replica_state_total`
1313
* `cortex_alertmanager_state_fetch_replica_state_failed_total`

docs/configuration/config-file-reference.md

+14-10
Original file line numberDiff line numberDiff line change
@@ -4107,16 +4107,20 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s
41074107
# CLI flag: -alertmanager.receivers-firewall-block-private-addresses
41084108
[alertmanager_receivers_firewall_block_private_addresses: <boolean> | default = false]
41094109
4110-
# Per-user rate limit for sending email notifications from Alertmanager in
4111-
# emails/sec. 0 = rate limit disabled. Negative value = no emails are allowed.
4112-
# CLI flag: -alertmanager.email-notification-rate-limit
4113-
[alertmanager_email_notification_rate_limit: <float> | default = 0]
4114-
4115-
# Per-user burst size for email notifications. If set to 0, no email
4116-
# notifications will be sent, unless rate-limit is disabled, in which case all
4117-
# email notifications are allowed.
4118-
# CLI flag: -alertmanager.email-notification-burst-size
4119-
[alertmanager_email_notification_burst_size: <int> | default = 1]
4110+
# Per-user rate limit for sending notifications from Alertmanager in
4111+
# notifications/sec. 0 = rate limit disabled. Negative value = no notifications
4112+
# are allowed.
4113+
# CLI flag: -alertmanager.notification-rate-limit
4114+
[alertmanager_notification_rate_limit: <float> | default = 0]
4115+
4116+
# Per-integration notification rate limits. Value is a map, where each key is
4117+
# integration name and value is a rate-limit (float). On command line, this map
4118+
# is given in JSON format. Rate limit has the same meaning as
4119+
# -alertmanager.notification-rate-limit, but only applies for specific
4120+
# integration. Allowed integration names: webhook, email, pagerduty, opsgenie,
4121+
# wechat, slack, victorops, pushover.
4122+
# CLI flag: -alertmanager.notification-rate-limit-per-integration
4123+
[alertmanager_notification_rate_limit_per_integration: <map of string to float64> | default = {}]
41204124
```
41214125

41224126
### `redis_config`

docs/configuration/v1-guarantees.md

+1
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,4 @@ Currently experimental features are:
7171
- `-ingester_stream_chunks_when_using_blocks` (boolean) field in runtime config file
7272
- Instance limits in ingester and distributor
7373
- Exemplar storage, currently in-memory only within the Ingester based on Prometheus exemplar storage (`-blocks-storage.tsdb.max-exemplars`)
74+
- Alertmanager: notification rate limits. (`-alertmanager.notification-rate-limit` and `-alertmanager.notification-rate-limit-per-integration`)

pkg/alertmanager/alertmanager.go

+10-7
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,11 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config, rawCfg s
346346
firewallDialer := util_net.NewFirewallDialer(newFirewallDialerConfigProvider(userID, am.cfg.Limits))
347347

348348
integrationsMap, err := buildIntegrationsMap(conf.Receivers, tmpl, firewallDialer, am.logger, func(integrationName string, notifier notify.Notifier) notify.Notifier {
349-
if integrationName == "email" && am.cfg.Limits != nil {
349+
if am.cfg.Limits != nil {
350350
rl := &tenantRateLimits{
351-
tenant: userID,
352-
limits: am.cfg.Limits,
351+
tenant: userID,
352+
limits: am.cfg.Limits,
353+
integration: integrationName,
353354
}
354355

355356
return newRateLimitedNotifier(notifier, rl, 10*time.Second, am.rateLimitedNotifications.WithLabelValues(integrationName))
@@ -507,6 +508,7 @@ func buildReceiverIntegrations(nc *config.Receiver, tmpl *template.Template, fir
507508
for i, c := range nc.PushoverConfigs {
508509
add("pushover", i, c, func(l log.Logger) (notify.Notifier, error) { return pushover.New(c, tmpl, l, httpOps...) })
509510
}
511+
// If we add support for more integrations, we need to add them to validation as well. See validation.allowedIntegrationNames field.
510512
if errs.Len() > 0 {
511513
return nil, &errs
512514
}
@@ -560,14 +562,15 @@ func (p firewallDialerConfigProvider) BlockPrivateAddresses() bool {
560562
}
561563

562564
type tenantRateLimits struct {
563-
tenant string
564-
limits Limits
565+
tenant string
566+
integration string
567+
limits Limits
565568
}
566569

567570
func (t *tenantRateLimits) RateLimit() rate.Limit {
568-
return t.limits.EmailNotificationRateLimit(t.tenant)
571+
return t.limits.NotificationRateLimit(t.tenant, t.integration)
569572
}
570573

571574
func (t *tenantRateLimits) Burst() int {
572-
return t.limits.EmailNotificationBurst(t.tenant)
575+
return t.limits.NotificationBurstSize(t.tenant, t.integration)
573576
}

pkg/alertmanager/multitenant.go

+7-6
Original file line numberDiff line numberDiff line change
@@ -235,16 +235,17 @@ type Limits interface {
235235
// in the Alertmanager receivers for the given user.
236236
AlertmanagerReceiversBlockPrivateAddresses(user string) bool
237237

238-
// EmailNotificationRateLimit returns limit used by rate-limiter. If set to 0, no emails are allowed.
239-
// rate.Inf = all emails are allowed.
238+
// NotificationRateLimit methods return limit used by rate-limiter for given integration.
239+
// If set to 0, no notifications are allowed.
240+
// rate.Inf = all notifications are allowed.
240241
//
241-
// Note that when negative or zero values specified by user are translated to rate.Limit by Overrides,
242+
// Note that when negative or zero values specified by user are translated to rate.Limit by Overrides,
242243
// and may have different meaning there.
243-
EmailNotificationRateLimit(tenant string) rate.Limit
244+
NotificationRateLimit(tenant string, integration string) rate.Limit
244245

245-
// EmailNotificationBurst returns burst-size for rate limiter. If 0, no notifications are allowed except
246+
// NotificationBurstSize returns burst-size for rate limiter for given integration type. If 0, no notifications are allowed except
246247
// when limit == rate.Inf.
247-
EmailNotificationBurst(tenant string) int
248+
NotificationBurstSize(tenant string, integration string) int
248249
}
249250

250251
// A MultitenantAlertmanager manages Alertmanager instances for multiple

pkg/alertmanager/multitenant_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -1952,10 +1952,10 @@ func (m mockAlertManagerLimits) AlertmanagerReceiversBlockPrivateAddresses(user
19521952
panic("implement me")
19531953
}
19541954

1955-
func (m mockAlertManagerLimits) EmailNotificationRateLimit(_ string) rate.Limit {
1955+
func (m mockAlertManagerLimits) NotificationRateLimit(_ string, integration string) rate.Limit {
19561956
return m.emailNotificationRateLimit
19571957
}
19581958

1959-
func (m mockAlertManagerLimits) EmailNotificationBurst(_ string) int {
1959+
func (m mockAlertManagerLimits) NotificationBurstSize(_ string, integration string) int {
19601960
return m.emailNotificationBurst
19611961
}

pkg/util/validation/limits.go

+57-12
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"flag"
77
"math"
8+
"strings"
89
"time"
910

1011
"github.com/prometheus/common/model"
@@ -103,8 +104,8 @@ type Limits struct {
103104
AlertmanagerReceiversBlockPrivateAddresses bool `yaml:"alertmanager_receivers_firewall_block_private_addresses" json:"alertmanager_receivers_firewall_block_private_addresses"`
104105

105106
// Alertmanager limits
106-
EmailNotificationRateLimit float64 `yaml:"alertmanager_email_notification_rate_limit" json:"alertmanager_email_notification_rate_limit"`
107-
EmailNotificationBurstSize int `yaml:"alertmanager_email_notification_burst_size" json:"alertmanager_email_notification_burst_size"`
107+
NotificationRateLimit float64 `yaml:"alertmanager_notification_rate_limit" json:"alertmanager_notification_rate_limit"`
108+
NotificationRateLimitPerIntegration NotificationRateLimitMap `yaml:"alertmanager_notification_rate_limit_per_integration" json:"alertmanager_notification_rate_limit_per_integration"`
108109
}
109110

110111
// RegisterFlags adds the flags required to config this to the given FlagSet
@@ -165,8 +166,13 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
165166
// Alertmanager.
166167
f.Var(&l.AlertmanagerReceiversBlockCIDRNetworks, "alertmanager.receivers-firewall-block-cidr-networks", "Comma-separated list of network CIDRs to block in Alertmanager receiver integrations.")
167168
f.BoolVar(&l.AlertmanagerReceiversBlockPrivateAddresses, "alertmanager.receivers-firewall-block-private-addresses", false, "True to block private and local addresses in Alertmanager receiver integrations. It blocks private addresses defined by RFC 1918 (IPv4 addresses) and RFC 4193 (IPv6 addresses), as well as loopback, local unicast and local multicast addresses.")
168-
f.Float64Var(&l.EmailNotificationRateLimit, "alertmanager.email-notification-rate-limit", 0, "Per-user rate limit for sending email notifications from Alertmanager in emails/sec. 0 = rate limit disabled. Negative value = no emails are allowed.")
169-
f.IntVar(&l.EmailNotificationBurstSize, "alertmanager.email-notification-burst-size", 1, "Per-user burst size for email notifications. If set to 0, no email notifications will be sent, unless rate-limit is disabled, in which case all email notifications are allowed.")
169+
170+
f.Float64Var(&l.NotificationRateLimit, "alertmanager.notification-rate-limit", 0, "Per-user rate limit for sending notifications from Alertmanager in notifications/sec. 0 = rate limit disabled. Negative value = no notifications are allowed.")
171+
172+
if l.NotificationRateLimitPerIntegration == nil {
173+
l.NotificationRateLimitPerIntegration = NotificationRateLimitMap{}
174+
}
175+
f.Var(&l.NotificationRateLimitPerIntegration, "alertmanager.notification-rate-limit-per-integration", "Per-integration notification rate limits. Value is a map, where each key is integration name and value is a rate-limit (float). On command line, this map is given in JSON format. Rate limit has the same meaning as -alertmanager.notification-rate-limit, but only applies for specific integration. Allowed integration names: "+strings.Join(allowedIntegrationNames, ", ")+".")
170176
}
171177

172178
// Validate the limits config and returns an error if the validation
@@ -190,6 +196,8 @@ func (l *Limits) UnmarshalYAML(unmarshal func(interface{}) error) error {
190196
// During startup we wont have a default value so we don't want to overwrite them
191197
if defaultLimits != nil {
192198
*l = *defaultLimits
199+
// Make copy of default limits. Otherwise unmarshalling would modify map in default limits.
200+
l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration)
193201
}
194202
type plain Limits
195203
return unmarshal((*plain)(l))
@@ -202,12 +210,21 @@ func (l *Limits) UnmarshalJSON(data []byte) error {
202210
// behind type indirection.
203211
if defaultLimits != nil {
204212
*l = *defaultLimits
213+
// Make copy of default limits. Otherwise unmarshalling would modify map in default limits.
214+
l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration)
205215
}
206216

207217
type plain Limits
208218
return json.Unmarshal(data, (*plain)(l))
209219
}
210220

221+
func (l *Limits) copyNotificationIntegrationLimits(defaults NotificationRateLimitMap) {
222+
l.NotificationRateLimitPerIntegration = make(map[string]float64, len(defaults))
223+
for k, v := range defaults {
224+
l.NotificationRateLimitPerIntegration[k] = v
225+
}
226+
}
227+
211228
// When we load YAML from disk, we want the various per-customer limits
212229
// to default to any values specified on the command line, not default
213230
// command line values. This global contains those values. I (Tom) cannot
@@ -508,24 +525,52 @@ func (o *Overrides) AlertmanagerReceiversBlockPrivateAddresses(user string) bool
508525
return o.getOverridesForUser(user).AlertmanagerReceiversBlockPrivateAddresses
509526
}
510527

511-
func (o *Overrides) EmailNotificationRateLimit(user string) rate.Limit {
512-
l := o.getOverridesForUser(user).EmailNotificationRateLimit
528+
// Notification limits are special. Limits are returned in following order:
529+
// 1. per-tenant limits for given integration
530+
// 2. default limits for given integration
531+
// 3. per-tenant limits
532+
// 4. default limits
533+
func (o *Overrides) getNotificationLimitForUser(user, integration string) float64 {
534+
u := o.getOverridesForUser(user)
535+
if n, ok := u.NotificationRateLimitPerIntegration[integration]; ok {
536+
return n
537+
}
538+
539+
return u.NotificationRateLimit
540+
}
541+
542+
func (o *Overrides) NotificationRateLimit(user string, integration string) rate.Limit {
543+
l := o.getNotificationLimitForUser(user, integration)
513544
if l == 0 || math.IsInf(l, 1) {
514545
return rate.Inf // No rate limit.
515546
}
516547

517548
if l < 0 {
518-
l = 0 // No emails will be sent.
549+
l = 0 // No notifications will be sent.
519550
}
520551
return rate.Limit(l)
521552
}
522553

523-
func (o *Overrides) EmailNotificationBurst(user string) int {
524-
b := o.getOverridesForUser(user).EmailNotificationBurstSize
525-
if b < 0 {
526-
b = 0
554+
const maxInt = int(^uint(0) >> 1)
555+
556+
func (o *Overrides) NotificationBurstSize(user string, integration string) int {
557+
// Burst size is computed from rate limit. Rate limit is already normalized to [0, +inf), where 0 means disabled.
558+
l := o.NotificationRateLimit(user, integration)
559+
if l == 0 {
560+
return 0
561+
}
562+
563+
// floats can be larger than max int. This also handles case where l == rate.Inf.
564+
if float64(l) >= float64(maxInt) {
565+
return maxInt
566+
}
567+
568+
// For values between (0, 1), allow single notification per second (every 1/limit seconds).
569+
if l < 1 {
570+
return 1
527571
}
528-
return b
572+
573+
return int(l)
529574
}
530575

531576
func (o *Overrides) getOverridesForUser(userID string) *Limits {

0 commit comments

Comments
 (0)