Skip to content

Commit f16afd8

Browse files
authored
Alertmanager template limits. (#4223)
* More alertmanager template limits. Signed-off-by: Peter Štibraný <[email protected]> * Added PR number. Signed-off-by: Peter Štibraný <[email protected]>
1 parent 68048b7 commit f16afd8

File tree

7 files changed

+128
-6
lines changed

7 files changed

+128
-6
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* [FEATURE] Querier: Added new `-querier.max-fetched-series-per-query` flag. When Cortex is running with blocks storage, the max series per query limit is enforced in the querier and applies to unique series received from ingesters and store-gateway (long-term storage). #4179
1111
* [FEATURE] Alertmanager: Added rate-limits to notifiers. Rate limits used by all integrations can be configured using `-alertmanager.notification-rate-limit`, while per-integration rate limits can be specified via `-alertmanager.notification-rate-limit-per-integration` parameter. Both shared and per-integration limits can be overwritten using overrides mechanism. These limits are applied on individual (per-tenant) alertmanagers. Rate-limited notifications are failed notifications. It is possible to monitor rate-limited notifications via new `cortex_alertmanager_notification_rate_limited_total` metric. #4135 #4163
1212
* [FEATURE] Alertmanager: Added `-alertmanager.max-config-size-bytes` limit to control size of configuration files that Cortex users can upload to Alertmanager via API. This limit is configurable per-tenant. #4201
13+
* [FEATURE] Alertmanager: Added `-alertmanager.max-templates-count` and `-alertmanager.max-template-size-bytes` options to control number and size of templates uploaded to Alertmanager via API. These limits are configurable per-tenant. #4223
1314
* [FEATURE] Added flag `-debug.block-profile-rate` to enable goroutine blocking events profiling. #4217
1415
* [ENHANCEMENT] Alertmanager: introduced new metrics to monitor operation when using `-alertmanager.sharding-enabled`: #4149
1516
* `cortex_alertmanager_state_fetch_replica_state_total`

docs/configuration/config-file-reference.md

+10
Original file line numberDiff line numberDiff line change
@@ -4143,6 +4143,16 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s
41434143
# Alertmanager API. 0 = no limit.
41444144
# CLI flag: -alertmanager.max-config-size-bytes
41454145
[alertmanager_max_config_size_bytes: <int> | default = 0]
4146+
4147+
# Maximum number of templates in tenant's Alertmanager configuration uploaded
4148+
# via Alertmanager API. 0 = no limit.
4149+
# CLI flag: -alertmanager.max-templates-count
4150+
[alertmanager_max_templates_count: <int> | default = 0]
4151+
4152+
# Maximum size of single template in tenant's Alertmanager configuration
4153+
# uploaded via Alertmanager API. 0 = no limit.
4154+
# CLI flag: -alertmanager.max-template-size-bytes
4155+
[alertmanager_max_template_size_bytes: <int> | default = 0]
41464156
```
41474157

41484158
### `redis_config`

pkg/alertmanager/api.go

+17-2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ const (
3434
errNoOrgID = "unable to determine the OrgID"
3535
errListAllUser = "unable to list the Alertmanager users"
3636
errConfigurationTooBig = "Alertmanager configuration is too big, limit: %d bytes"
37+
errTooManyTemplates = "too many templates in the configuration: %d (limit: %d)"
38+
errTemplateTooBig = "template %s is too big: %d bytes (limit: %d bytes)"
3739

3840
fetchConcurrency = 16
3941
)
@@ -133,7 +135,7 @@ func (am *MultitenantAlertmanager) SetUserConfig(w http.ResponseWriter, r *http.
133135
}
134136

135137
cfgDesc := alertspb.ToProto(cfg.AlertmanagerConfig, cfg.TemplateFiles, userID)
136-
if err := validateUserConfig(logger, cfgDesc); err != nil {
138+
if err := validateUserConfig(logger, cfgDesc, am.limits, userID); err != nil {
137139
level.Warn(logger).Log("msg", errValidatingConfig, "err", err.Error())
138140
http.Error(w, fmt.Sprintf("%s: %s", errValidatingConfig, err.Error()), http.StatusBadRequest)
139141
return
@@ -171,7 +173,7 @@ func (am *MultitenantAlertmanager) DeleteUserConfig(w http.ResponseWriter, r *ht
171173
}
172174

173175
// Partially copied from: https://github.com/prometheus/alertmanager/blob/8e861c646bf67599a1704fc843c6a94d519ce312/cli/check_config.go#L65-L96
174-
func validateUserConfig(logger log.Logger, cfg alertspb.AlertConfigDesc) error {
176+
func validateUserConfig(logger log.Logger, cfg alertspb.AlertConfigDesc, limits Limits, user string) error {
175177
// We don't have a valid use case for empty configurations. If a tenant does not have a
176178
// configuration set and issue a request to the Alertmanager, we'll a) upload an empty
177179
// config and b) immediately start an Alertmanager instance for them if a fallback
@@ -197,6 +199,19 @@ func validateUserConfig(logger log.Logger, cfg alertspb.AlertConfigDesc) error {
197199
}
198200
}
199201

202+
// Check template limits.
203+
if l := limits.AlertmanagerMaxTemplatesCount(user); l > 0 && len(cfg.Templates) > l {
204+
return fmt.Errorf(errTooManyTemplates, len(cfg.Templates), l)
205+
}
206+
207+
if maxSize := limits.AlertmanagerMaxTemplateSize(user); maxSize > 0 {
208+
for _, tmpl := range cfg.Templates {
209+
if size := len(tmpl.GetBody()); size > maxSize {
210+
return fmt.Errorf(errTemplateTooBig, tmpl.GetFilename(), size, maxSize)
211+
}
212+
}
213+
}
214+
200215
// Validate template files.
201216
for _, tmpl := range cfg.Templates {
202217
if err := validateTemplateFilename(tmpl.Filename); err != nil {

pkg/alertmanager/api_test.go

+71-3
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,11 @@ import (
3333

3434
func TestAMConfigValidationAPI(t *testing.T) {
3535
testCases := []struct {
36-
name string
37-
cfg string
38-
maxConfigSize int
36+
name string
37+
cfg string
38+
maxConfigSize int
39+
maxTemplates int
40+
maxTemplateSize int
3941

4042
response string
4143
err error
@@ -486,6 +488,70 @@ alertmanager_config: |
486488
maxConfigSize: 1000,
487489
err: nil,
488490
},
491+
{
492+
name: "templates limit reached",
493+
cfg: `
494+
alertmanager_config: |
495+
route:
496+
receiver: 'default-receiver'
497+
receivers:
498+
- name: default-receiver
499+
template_files:
500+
"t1.tmpl": "Some template"
501+
"t2.tmpl": "Some template"
502+
"t3.tmpl": "Some template"
503+
"t4.tmpl": "Some template"
504+
"t5.tmpl": "Some template"
505+
`,
506+
maxTemplates: 3,
507+
err: errors.Wrap(fmt.Errorf(errTooManyTemplates, 5, 3), "error validating Alertmanager config"),
508+
},
509+
{
510+
name: "templates limit not reached",
511+
cfg: `
512+
alertmanager_config: |
513+
route:
514+
receiver: 'default-receiver'
515+
receivers:
516+
- name: default-receiver
517+
template_files:
518+
"t1.tmpl": "Some template"
519+
"t2.tmpl": "Some template"
520+
"t3.tmpl": "Some template"
521+
"t4.tmpl": "Some template"
522+
"t5.tmpl": "Some template"
523+
`,
524+
maxTemplates: 10,
525+
err: nil,
526+
},
527+
{
528+
name: "template size limit reached",
529+
cfg: `
530+
alertmanager_config: |
531+
route:
532+
receiver: 'default-receiver'
533+
receivers:
534+
- name: default-receiver
535+
template_files:
536+
"t1.tmpl": "Very big template"
537+
`,
538+
maxTemplateSize: 5,
539+
err: errors.Wrap(fmt.Errorf(errTemplateTooBig, "t1.tmpl", 17, 5), "error validating Alertmanager config"),
540+
},
541+
{
542+
name: "template size limit ok",
543+
cfg: `
544+
alertmanager_config: |
545+
route:
546+
receiver: 'default-receiver'
547+
receivers:
548+
- name: default-receiver
549+
template_files:
550+
"t1.tmpl": "Very big template"
551+
`,
552+
maxTemplateSize: 20,
553+
err: nil,
554+
},
489555
}
490556

491557
limits := &mockAlertManagerLimits{}
@@ -497,6 +563,8 @@ alertmanager_config: |
497563
for _, tc := range testCases {
498564
t.Run(tc.name, func(t *testing.T) {
499565
limits.maxConfigSize = tc.maxConfigSize
566+
limits.maxTemplatesCount = tc.maxTemplates
567+
limits.maxSizeOfTemplate = tc.maxTemplateSize
500568

501569
req := httptest.NewRequest(http.MethodPost, "http://alertmanager/api/v1/alerts", bytes.NewReader([]byte(tc.cfg)))
502570
ctx := user.InjectOrgID(req.Context(), "testing")

pkg/alertmanager/multitenant.go

+6
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,12 @@ type Limits interface {
253253

254254
// AlertmanagerMaxConfigSize returns max size of configuration file that user is allowed to upload. If 0, there is no limit.
255255
AlertmanagerMaxConfigSize(tenant string) int
256+
257+
// AlertmanagerMaxTemplatesCount returns max number of templates that tenant can use in the configuration. 0 = no limit.
258+
AlertmanagerMaxTemplatesCount(tenant string) int
259+
260+
// AlertmanagerMaxTemplateSize returns max size of individual template. 0 = no limit.
261+
AlertmanagerMaxTemplateSize(tenant string) int
256262
}
257263

258264
// A MultitenantAlertmanager manages Alertmanager instances for multiple

pkg/alertmanager/multitenant_test.go

+10
Original file line numberDiff line numberDiff line change
@@ -2022,12 +2022,22 @@ type mockAlertManagerLimits struct {
20222022
emailNotificationRateLimit rate.Limit
20232023
emailNotificationBurst int
20242024
maxConfigSize int
2025+
maxTemplatesCount int
2026+
maxSizeOfTemplate int
20252027
}
20262028

20272029
func (m *mockAlertManagerLimits) AlertmanagerMaxConfigSize(tenant string) int {
20282030
return m.maxConfigSize
20292031
}
20302032

2033+
func (m *mockAlertManagerLimits) AlertmanagerMaxTemplatesCount(tenant string) int {
2034+
return m.maxTemplatesCount
2035+
}
2036+
2037+
func (m *mockAlertManagerLimits) AlertmanagerMaxTemplateSize(tenant string) int {
2038+
return m.maxSizeOfTemplate
2039+
}
2040+
20312041
func (m *mockAlertManagerLimits) AlertmanagerReceiversBlockCIDRNetworks(user string) []flagext.CIDR {
20322042
panic("implement me")
20332043
}

pkg/util/validation/limits.go

+13-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,9 @@ type Limits struct {
107107
NotificationRateLimit float64 `yaml:"alertmanager_notification_rate_limit" json:"alertmanager_notification_rate_limit"`
108108
NotificationRateLimitPerIntegration NotificationRateLimitMap `yaml:"alertmanager_notification_rate_limit_per_integration" json:"alertmanager_notification_rate_limit_per_integration"`
109109

110-
AlertmanagerMaxConfigSizeBytes int `yaml:"alertmanager_max_config_size_bytes" json:"alertmanager_max_config_size_bytes"`
110+
AlertmanagerMaxConfigSizeBytes int `yaml:"alertmanager_max_config_size_bytes" json:"alertmanager_max_config_size_bytes"`
111+
AlertmanagerMaxTemplatesCount int `yaml:"alertmanager_max_templates_count" json:"alertmanager_max_templates_count"`
112+
AlertmanagerMaxTemplateSizeBytes int `yaml:"alertmanager_max_template_size_bytes" json:"alertmanager_max_template_size_bytes"`
111113
}
112114

113115
// RegisterFlags adds the flags required to config this to the given FlagSet
@@ -177,6 +179,8 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
177179
}
178180
f.Var(&l.NotificationRateLimitPerIntegration, "alertmanager.notification-rate-limit-per-integration", "Per-integration notification rate limits. Value is a map, where each key is integration name and value is a rate-limit (float). On command line, this map is given in JSON format. Rate limit has the same meaning as -alertmanager.notification-rate-limit, but only applies for specific integration. Allowed integration names: "+strings.Join(allowedIntegrationNames, ", ")+".")
179181
f.IntVar(&l.AlertmanagerMaxConfigSizeBytes, "alertmanager.max-config-size-bytes", 0, "Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit.")
182+
f.IntVar(&l.AlertmanagerMaxTemplatesCount, "alertmanager.max-templates-count", 0, "Maximum number of templates in tenant's Alertmanager configuration uploaded via Alertmanager API. 0 = no limit.")
183+
f.IntVar(&l.AlertmanagerMaxTemplateSizeBytes, "alertmanager.max-template-size-bytes", 0, "Maximum size of single template in tenant's Alertmanager configuration uploaded via Alertmanager API. 0 = no limit.")
180184
}
181185

182186
// Validate the limits config and returns an error if the validation
@@ -587,6 +591,14 @@ func (o *Overrides) AlertmanagerMaxConfigSize(userID string) int {
587591
return o.getOverridesForUser(userID).AlertmanagerMaxConfigSizeBytes
588592
}
589593

594+
func (o *Overrides) AlertmanagerMaxTemplatesCount(userID string) int {
595+
return o.getOverridesForUser(userID).AlertmanagerMaxTemplatesCount
596+
}
597+
598+
func (o *Overrides) AlertmanagerMaxTemplateSize(userID string) int {
599+
return o.getOverridesForUser(userID).AlertmanagerMaxTemplateSizeBytes
600+
}
601+
590602
func (o *Overrides) getOverridesForUser(userID string) *Limits {
591603
if o.tenantLimits != nil {
592604
l := o.tenantLimits.ByUserID(userID)

0 commit comments

Comments
 (0)