Skip to content

Commit f15714b

Browse files
committed
Simplify rate limit config by using burst-size=rate limit.
Signed-off-by: Peter Štibraný <[email protected]>
1 parent 2c6db68 commit f15714b

File tree

8 files changed

+115
-138
lines changed

8 files changed

+115
-138
lines changed

CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
- `-alertmanager.receivers-firewall.block.cidr-networks` renamed to `-alertmanager.receivers-firewall-block-cidr-networks`
88
- `-alertmanager.receivers-firewall.block.private-addresses` renamed to `-alertmanager.receivers-firewall-block-private-addresses`
99
* [CHANGE] Change default value of `-server.grpc.keepalive.min-time-between-pings` to `10s` and `-server.grpc.keepalive.ping-without-stream-allowed` to `true`. #4168
10-
* [FEATURE] Alertmanager: Added rate-limits to notifiers. Rate limits used by all integrations can be configured using `-alertmanager.notification-limits.rate-limit` and `-alertmanager.notification-limits.burst-size`, while per-integration limits can be specified via `-alertmanager.notification-limits.per-integration` parameter. Both shared and per-integration limits can be overwritten using overrides mechanism. These limits are applied on individual (per-tenant) alertmanagers. Rate-limited notifications are failed notifications. It is possible to monitor rate-limited notifications via new `cortex_alertmanager_notification_rate_limited_total` metric. #4135 #4163
10+
* [FEATURE] Alertmanager: Added rate-limits to notifiers. Rate limits used by all integrations can be configured using `-alertmanager.notification-rate-limit`, while per-integration rate limits can be specified via `-alertmanager.notification-rate-limit-per-integration` parameter. Both shared and per-integration limits can be overwritten using overrides mechanism. These limits are applied on individual (per-tenant) alertmanagers. Rate-limited notifications are failed notifications. It is possible to monitor rate-limited notifications via new `cortex_alertmanager_notification_rate_limited_total` metric. #4135 #4163
1111
* [ENHANCEMENT] Alertmanager: introduced new metrics to monitor operation when using `-alertmanager.sharding-enabled`: #4149
1212
* `cortex_alertmanager_state_fetch_replica_state_total`
1313
* `cortex_alertmanager_state_fetch_replica_state_failed_total`

docs/configuration/config-file-reference.md

+11-20
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ To specify which configuration file to load, pass the `-config.file` flag at the
2626
* `<prefix>`: a CLI flag prefix based on the context (look at the parent configuration block to see which CLI flags prefix should be used)
2727
* `<relabel_config>`: a [Prometheus relabeling configuration](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config).
2828
* `<time>`: a timestamp, with available formats: `2006-01-20` (midnight, local timezone), `2006-01-20T15:04` (local timezone), and RFC 3339 formats: `2006-01-20T15:04:05Z` (UTC) or `2006-01-20T15:04:05+07:00` (explicit timezone)
29-
* `<limits_map>`: Map with integration names as keys, mapped to object with two fields: `rate_limit` and `burst_size`. If either of them is specified, this per-integration limit will take precedence over shared notification limit, even if other field is not specified (in which case, it defaults to 0!).
3029

3130
### Use environment variables in the configuration
3231

@@ -4108,28 +4107,20 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s
41084107
# CLI flag: -alertmanager.receivers-firewall-block-private-addresses
41094108
[alertmanager_receivers_firewall_block_private_addresses: <boolean> | default = false]
41104109
4111-
alertmanager_notification_limits:
4112-
# Per-user rate limit for sending notifications from Alertmanager in
4113-
# notifications/sec. 0 = rate limit disabled. Negative value = no
4114-
# notifications are allowed.
4115-
# CLI flag: -alertmanager.notification-limits.rate-limit
4116-
[rate_limit: <float> | default = 0]
4110+
# Per-user rate limit for sending notifications from Alertmanager in
4111+
# notifications/sec. 0 = rate limit disabled. Negative value = no notifications
4112+
# are allowed.
4113+
# CLI flag: -alertmanager.notification-rate-limit
4114+
[alertmanager_notification_rate_limit: <float> | default = 0]
41174115
4118-
# Per-user burst size for notifications. If set to 0, no notifications will be
4119-
# sent, unless rate-limit is disabled, in which case all notifications are
4120-
# allowed.
4121-
# CLI flag: -alertmanager.notification-limits.burst-size
4122-
[burst_size: <int> | default = 1]
4123-
4124-
# Per-integration notification limits. Value is a map, where each key is
4125-
# integration name and value is an object with rate_limit and burst_size fields.
4126-
# On command line, this map is given in JSON format. Rate limit and burst size
4127-
# have the same meaning as -alertmanager.notification-limits.rate-limit and
4128-
# -alertmanager.notification-limits.burst-size, but only apply for specific
4116+
# Per-integration notification rate limits. Value is a map, where each key is
4117+
# integration name and value is a rate-limit (float). On command line, this map
4118+
# is given in JSON format. Rate limit has the same meaning as
4119+
# -alertmanager.notification-rate-limit, but only applies for specific
41294120
# integration. Allowed integration names: webhook, email, pagerduty, opsgenie,
41304121
# wechat, slack, victorops, pushover.
4131-
# CLI flag: -alertmanager.notification-limits.per-integration
4132-
[alertmanager_notification_limits_per_integration: <limits_map> | default = {}]
4122+
# CLI flag: -alertmanager.notification-rate-limit-per-integration
4123+
[alertmanager_notification_rate_limit_per_integration: <map of string to float64> | default = {}]
41334124
```
41344125

41354126
### `redis_config`

docs/configuration/config-file-reference.template

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ To specify which configuration file to load, pass the `-config.file` flag at the
2626
* `<prefix>`: a CLI flag prefix based on the context (look at the parent configuration block to see which CLI flags prefix should be used)
2727
* `<relabel_config>`: a [Prometheus relabeling configuration](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config).
2828
* `<time>`: a timestamp, with available formats: `2006-01-20` (midnight, local timezone), `2006-01-20T15:04` (local timezone), and RFC 3339 formats: `2006-01-20T15:04:05Z` (UTC) or `2006-01-20T15:04:05+07:00` (explicit timezone)
29-
* `<limits_map>`: Map with integration names as keys, mapped to object with two fields: `rate_limit` and `burst_size`. If either of them is specified, this per-integration limit will take precedence over shared notification limit, even if other field is not specified (in which case, it defaults to 0!).
3029

3130
### Use environment variables in the configuration
3231

pkg/util/validation/limits.go

+33-25
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,8 @@ type Limits struct {
104104
AlertmanagerReceiversBlockPrivateAddresses bool `yaml:"alertmanager_receivers_firewall_block_private_addresses" json:"alertmanager_receivers_firewall_block_private_addresses"`
105105

106106
// Alertmanager limits
107-
NotificationLimits NotificationLimits `yaml:"alertmanager_notification_limits" json:"alertmanager_notification_limits"`
108-
NotificationIntegrationLimits NotificationLimitsMap `yaml:"alertmanager_notification_limits_per_integration" json:"alertmanager_notification_limits_per_integration"`
109-
}
110-
111-
type NotificationLimits struct {
112-
RateLimit float64 `yaml:"rate_limit" json:"rate_limit"`
113-
BurstSize int `yaml:"burst_size" json:"burst_size"`
107+
NotificationRateLimit float64 `yaml:"alertmanager_notification_rate_limit" json:"alertmanager_notification_rate_limit"`
108+
NotificationRateLimitPerIntegration NotificationRateLimitMap `yaml:"alertmanager_notification_rate_limit_per_integration" json:"alertmanager_notification_rate_limit_per_integration"`
114109
}
115110

116111
// RegisterFlags adds the flags required to config this to the given FlagSet
@@ -172,13 +167,12 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
172167
f.Var(&l.AlertmanagerReceiversBlockCIDRNetworks, "alertmanager.receivers-firewall-block-cidr-networks", "Comma-separated list of network CIDRs to block in Alertmanager receiver integrations.")
173168
f.BoolVar(&l.AlertmanagerReceiversBlockPrivateAddresses, "alertmanager.receivers-firewall-block-private-addresses", false, "True to block private and local addresses in Alertmanager receiver integrations. It blocks private addresses defined by RFC 1918 (IPv4 addresses) and RFC 4193 (IPv6 addresses), as well as loopback, local unicast and local multicast addresses.")
174169

175-
f.Float64Var(&l.NotificationLimits.RateLimit, "alertmanager.notification-limits.rate-limit", 0, "Per-user rate limit for sending notifications from Alertmanager in notifications/sec. 0 = rate limit disabled. Negative value = no notifications are allowed.")
176-
f.IntVar(&l.NotificationLimits.BurstSize, "alertmanager.notification-limits.burst-size", 1, "Per-user burst size for notifications. If set to 0, no notifications will be sent, unless rate-limit is disabled, in which case all notifications are allowed.")
170+
f.Float64Var(&l.NotificationRateLimit, "alertmanager.notification-rate-limit", 0, "Per-user rate limit for sending notifications from Alertmanager in notifications/sec. 0 = rate limit disabled. Negative value = no notifications are allowed.")
177171

178-
if l.NotificationIntegrationLimits == nil {
179-
l.NotificationIntegrationLimits = NotificationLimitsMap{}
172+
if l.NotificationRateLimitPerIntegration == nil {
173+
l.NotificationRateLimitPerIntegration = NotificationRateLimitMap{}
180174
}
181-
f.Var(&l.NotificationIntegrationLimits, "alertmanager.notification-limits.per-integration", "Per-integration notification limits. Value is a map, where each key is integration name and value is an object with rate_limit and burst_size fields. On command line, this map is given in JSON format. Rate limit and burst size have the same meaning as -alertmanager.notification-limits.rate-limit and -alertmanager.notification-limits.burst-size, but only apply for specific integration. Allowed integration names: "+strings.Join(allowedIntegrationNames, ", ")+".")
175+
f.Var(&l.NotificationRateLimitPerIntegration, "alertmanager.notification-rate-limit-per-integration", "Per-integration notification rate limits. Value is a map, where each key is integration name and value is a rate-limit (float). On command line, this map is given in JSON format. Rate limit has the same meaning as -alertmanager.notification-rate-limit, but only applies for specific integration. Allowed integration names: "+strings.Join(allowedIntegrationNames, ", ")+".")
182176
}
183177

184178
// Validate the limits config and returns an error if the validation
@@ -203,7 +197,7 @@ func (l *Limits) UnmarshalYAML(unmarshal func(interface{}) error) error {
203197
if defaultLimits != nil {
204198
*l = *defaultLimits
205199
// Make copy of default limits. Otherwise unmarshalling would modify map in default limits.
206-
l.copyNotificationIntegrationLimits(defaultLimits.NotificationIntegrationLimits)
200+
l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration)
207201
}
208202
type plain Limits
209203
return unmarshal((*plain)(l))
@@ -217,17 +211,17 @@ func (l *Limits) UnmarshalJSON(data []byte) error {
217211
if defaultLimits != nil {
218212
*l = *defaultLimits
219213
// Make copy of default limits. Otherwise unmarshalling would modify map in default limits.
220-
l.copyNotificationIntegrationLimits(defaultLimits.NotificationIntegrationLimits)
214+
l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration)
221215
}
222216

223217
type plain Limits
224218
return json.Unmarshal(data, (*plain)(l))
225219
}
226220

227-
func (l *Limits) copyNotificationIntegrationLimits(defaults NotificationLimitsMap) {
228-
l.NotificationIntegrationLimits = make(map[string]NotificationLimits, len(defaults))
221+
func (l *Limits) copyNotificationIntegrationLimits(defaults NotificationRateLimitMap) {
222+
l.NotificationRateLimitPerIntegration = make(map[string]float64, len(defaults))
229223
for k, v := range defaults {
230-
l.NotificationIntegrationLimits[k] = v
224+
l.NotificationRateLimitPerIntegration[k] = v
231225
}
232226
}
233227

@@ -536,17 +530,17 @@ func (o *Overrides) AlertmanagerReceiversBlockPrivateAddresses(user string) bool
536530
// 2. default limits for given integration
537531
// 3. per-tenant limits
538532
// 4. default limits
539-
func (o *Overrides) getNotificationLimitForUser(user, integration string) NotificationLimits {
533+
func (o *Overrides) getNotificationLimitForUser(user, integration string) float64 {
540534
u := o.getOverridesForUser(user)
541-
if n, ok := u.NotificationIntegrationLimits[integration]; ok {
535+
if n, ok := u.NotificationRateLimitPerIntegration[integration]; ok {
542536
return n
543537
}
544538

545-
return u.NotificationLimits
539+
return u.NotificationRateLimit
546540
}
547541

548542
func (o *Overrides) NotificationRateLimit(user string, integration string) rate.Limit {
549-
l := o.getNotificationLimitForUser(user, integration).RateLimit
543+
l := o.getNotificationLimitForUser(user, integration)
550544
if l == 0 || math.IsInf(l, 1) {
551545
return rate.Inf // No rate limit.
552546
}
@@ -557,12 +551,26 @@ func (o *Overrides) NotificationRateLimit(user string, integration string) rate.
557551
return rate.Limit(l)
558552
}
559553

554+
const maxInt = int(^uint(0) >> 1)
555+
560556
func (o *Overrides) NotificationBurstSize(user string, integration string) int {
561-
b := o.getNotificationLimitForUser(user, integration).BurstSize
562-
if b < 0 {
563-
b = 0
557+
// Burst size is computed from rate limit. Rate limit is already normalized to [0, +inf), where 0 means disabled.
558+
l := o.NotificationRateLimit(user, integration)
559+
if l == 0 {
560+
return 0
564561
}
565-
return b
562+
563+
// floats can be larger than max int. This also handles case where l == rate.Inf.
564+
if float64(l) >= float64(maxInt) {
565+
return maxInt
566+
}
567+
568+
// For values between (0, 1), allow single notification per second (every 1/limit seconds).
569+
if l < 1 {
570+
return 1
571+
}
572+
573+
return int(l)
566574
}
567575

568576
func (o *Overrides) getOverridesForUser(userID string) *Limits {

pkg/util/validation/limits_test.go

+29-56
Original file line numberDiff line numberDiff line change
@@ -349,59 +349,47 @@ func TestAlertmanagerNotificationLimits(t *testing.T) {
349349
}{
350350
"no email specific limit": {
351351
inputYAML: `
352-
alertmanager_notification_limits:
353-
rate_limit: 100
354-
burst_size: 100
352+
alertmanager_notification_rate_limit: 100
355353
`,
356354
expectedRateLimit: 100,
357355
expectedBurstSize: 100,
358356
},
359357
"zero limit": {
360358
inputYAML: `
361-
alertmanager_notification_limits:
362-
rate_limit: 100
363-
burst_size: 100
364-
365-
alertmanager_notification_limits_per_integration:
366-
email:
367-
rate_limit: 0
368-
burst_size: 0
359+
alertmanager_notification_rate_limit: 100
360+
361+
alertmanager_notification_rate_limit_per_integration:
362+
email: 0
369363
`,
370364
expectedRateLimit: rate.Inf,
371-
expectedBurstSize: 0,
365+
expectedBurstSize: maxInt,
372366
},
373367

374368
"negative limit": {
375369
inputYAML: `
376-
alertmanager_notification_limits_per_integration:
377-
email:
378-
rate_limit: -10
379-
burst_size: 5
370+
alertmanager_notification_rate_limit_per_integration:
371+
email: -10
380372
`,
381373
expectedRateLimit: 0,
382-
expectedBurstSize: 5,
374+
expectedBurstSize: 0,
383375
},
384376

385377
"positive limit, negative burst": {
386378
inputYAML: `
387-
alertmanager_notification_limits_per_integration:
388-
email:
389-
rate_limit: 222
390-
burst_size: -1
379+
alertmanager_notification_rate_limit_per_integration:
380+
email: 222
391381
`,
392382
expectedRateLimit: 222,
393-
expectedBurstSize: 0,
383+
expectedBurstSize: 222,
394384
},
395385

396386
"infinte limit": {
397387
inputYAML: `
398-
alertmanager_notification_limits_per_integration:
399-
email:
400-
rate_limit: .inf
401-
burst_size: 50
388+
alertmanager_notification_rate_limit_per_integration:
389+
email: .inf
402390
`,
403391
expectedRateLimit: rate.Inf,
404-
expectedBurstSize: 50,
392+
expectedBurstSize: maxInt,
405393
},
406394
} {
407395
t.Run(name, func(t *testing.T) {
@@ -420,49 +408,35 @@ alertmanager_notification_limits_per_integration:
420408

421409
func TestAlertmanagerNotificationLimitsOverrides(t *testing.T) {
422410
baseYaml := `
423-
alertmanager_notification_limits:
424-
rate_limit: 5
425-
burst_size: 5
426-
427-
alertmanager_notification_limits_per_integration:
428-
email:
429-
rate_limit: 100
430-
burst_size: 100
411+
alertmanager_notification_rate_limit: 5
412+
413+
alertmanager_notification_rate_limit_per_integration:
414+
email: 100
431415
`
432416

433417
overrideGenericLimitsOnly := `
434418
testuser:
435-
alertmanager_notification_limits:
436-
rate_limit: 333
437-
burst_size: 333
419+
alertmanager_notification_rate_limit: 333
438420
`
439421

440422
overrideEmailLimits := `
441423
testuser:
442-
alertmanager_notification_limits_per_integration:
443-
email:
444-
rate_limit: 7777
445-
burst_size: 7777
424+
alertmanager_notification_rate_limit_per_integration:
425+
email: 7777
446426
`
447427

448428
overrideGenericLimitsAndEmailLimits := `
449429
testuser:
450-
alertmanager_notification_limits:
451-
rate_limit: 333
452-
burst_size: 333
430+
alertmanager_notification_rate_limit: 333
453431
454-
alertmanager_notification_limits_per_integration:
455-
email:
456-
rate_limit: 7777
457-
burst_size: 7777
432+
alertmanager_notification_rate_limit_per_integration:
433+
email: 7777
458434
`
459435

460436
differentUserOverride := `
461437
differentuser:
462438
alertmanager_notification_limits_per_integration:
463-
email:
464-
rate_limit: 500
465-
burst_size: 20
439+
email: 500
466440
`
467441

468442
for name, tc := range map[string]struct {
@@ -529,12 +503,11 @@ differentuser:
529503
testedIntegration: "email",
530504
overrides: `
531505
testuser:
532-
alertmanager_notification_limits_per_integration:
533-
email:
534-
rate_limit: 500
506+
alertmanager_notification_rate_limit_per_integration:
507+
email: 500
535508
`,
536509
expectedRateLimit: 500, // overridden
537-
expectedBurstSize: 0, // also overridden, but not visible
510+
expectedBurstSize: 500, // same as rate limit
538511
},
539512

540513
"different user override, pushover": {

pkg/util/validation/notifications_limit_flag.go

+12-12
Original file line numberDiff line numberDiff line change
@@ -13,32 +13,32 @@ var allowedIntegrationNames = []string{
1313
"webhook", "email", "pagerduty", "opsgenie", "wechat", "slack", "victorops", "pushover",
1414
}
1515

16-
type NotificationLimitsMap map[string]NotificationLimits
16+
type NotificationRateLimitMap map[string]float64
1717

1818
// String implements flag.Value
19-
func (m NotificationLimitsMap) String() string {
20-
out, err := json.Marshal(map[string]NotificationLimits(m))
19+
func (m NotificationRateLimitMap) String() string {
20+
out, err := json.Marshal(map[string]float64(m))
2121
if err != nil {
2222
return fmt.Sprintf("failed to marshal: %v", err)
2323
}
2424
return string(out)
2525
}
2626

2727
// Set implements flag.Value
28-
func (m NotificationLimitsMap) Set(s string) error {
29-
newMap := map[string]NotificationLimits{}
28+
func (m NotificationRateLimitMap) Set(s string) error {
29+
newMap := map[string]float64{}
3030
return m.updateMap(json.Unmarshal([]byte(s), &newMap), newMap)
3131
}
3232

3333
// UnmarshalYAML implements yaml.Unmarshaler.
34-
func (m NotificationLimitsMap) UnmarshalYAML(unmarshal func(interface{}) error) error {
35-
newMap := map[string]NotificationLimits{}
34+
func (m NotificationRateLimitMap) UnmarshalYAML(unmarshal func(interface{}) error) error {
35+
newMap := map[string]float64{}
3636
return m.updateMap(unmarshal(newMap), newMap)
3737
}
3838

39-
func (m NotificationLimitsMap) updateMap(err error, newMap map[string]NotificationLimits) error {
40-
if err != nil {
41-
return err
39+
func (m NotificationRateLimitMap) updateMap(unmarshalErr error, newMap map[string]float64) error {
40+
if unmarshalErr != nil {
41+
return unmarshalErr
4242
}
4343

4444
for k, v := range newMap {
@@ -51,6 +51,6 @@ func (m NotificationLimitsMap) updateMap(err error, newMap map[string]Notificati
5151
}
5252

5353
// MarshalYAML implements yaml.Marshaler.
54-
func (m NotificationLimitsMap) MarshalYAML() (interface{}, error) {
55-
return map[string]NotificationLimits(m), nil
54+
func (m NotificationRateLimitMap) MarshalYAML() (interface{}, error) {
55+
return map[string]float64(m), nil
5656
}

0 commit comments

Comments
 (0)