Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added limit for size of configuration file that tenant can upload to Alertmanager. #4201

Merged
merged 5 commits into from
May 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* [CHANGE] Change default value of `-server.grpc.keepalive.min-time-between-pings` to `10s` and `-server.grpc.keepalive.ping-without-stream-allowed` to `true`. #4168
* [FEATURE] Querier: Added new `-querier.max-fetched-series-per-query` flag. When Cortex is running with blocks storage, the max series per query limit is enforced in the querier and applies to unique series received from ingesters and store-gateway (long-term storage). #4179
* [FEATURE] Alertmanager: Added rate-limits to notifiers. Rate limits used by all integrations can be configured using `-alertmanager.notification-rate-limit`, while per-integration rate limits can be specified via `-alertmanager.notification-rate-limit-per-integration` parameter. Both shared and per-integration limits can be overwritten using overrides mechanism. These limits are applied on individual (per-tenant) alertmanagers. Rate-limited notifications are failed notifications. It is possible to monitor rate-limited notifications via new `cortex_alertmanager_notification_rate_limited_total` metric. #4135 #4163
* [FEATURE] Alertmanager: Added `-alertmanager.max-config-size-bytes` limit to control size of configuration files that Cortex users can upload to Alertmanager via API. This limit is configurable per-tenant. #4201
* [FEATURE] Added flag `-debug.block-profile-rate` to enable goroutine blocking events profiling. #4217
* [ENHANCEMENT] Alertmanager: introduced new metrics to monitor operation when using `-alertmanager.sharding-enabled`: #4149
* `cortex_alertmanager_state_fetch_replica_state_total`
Expand Down
5 changes: 5 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -4128,6 +4128,11 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s
# wechat, slack, victorops, pushover.
# CLI flag: -alertmanager.notification-rate-limit-per-integration
[alertmanager_notification_rate_limit_per_integration: <map of string to float64> | default = {}]

# Maximum size of configuration file for Alertmanager that tenant can upload via
# Alertmanager API. 0 = no limit.
# CLI flag: -alertmanager.max-config-size-bytes
[alertmanager_max_config_size_bytes: <int> | default = 0]
```

### `redis_config`
Expand Down
21 changes: 20 additions & 1 deletion pkg/alertmanager/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package alertmanager
import (
"context"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
Expand Down Expand Up @@ -32,6 +33,7 @@ const (
errDeletingConfiguration = "unable to delete the Alertmanager config"
errNoOrgID = "unable to determine the OrgID"
errListAllUser = "unable to list the Alertmanager users"
errConfigurationTooBig = "Alertmanager configuration is too big, limit: %d bytes"

fetchConcurrency = 16
)
Expand Down Expand Up @@ -98,13 +100,30 @@ func (am *MultitenantAlertmanager) SetUserConfig(w http.ResponseWriter, r *http.
return
}

payload, err := ioutil.ReadAll(r.Body)
var input io.Reader
maxConfigSize := am.limits.AlertmanagerMaxConfigSize(userID)
if maxConfigSize > 0 {
// LimitReader will return EOF after reading specified number of bytes. To check if
// we have read too many bytes, allow one extra byte.
input = io.LimitReader(r.Body, int64(maxConfigSize)+1)
} else {
input = r.Body
}

payload, err := ioutil.ReadAll(input)
if err != nil {
level.Error(logger).Log("msg", errReadingConfiguration, "err", err.Error())
http.Error(w, fmt.Sprintf("%s: %s", errReadingConfiguration, err.Error()), http.StatusBadRequest)
return
}

if maxConfigSize > 0 && len(payload) > maxConfigSize {
msg := fmt.Sprintf(errConfigurationTooBig, maxConfigSize)
level.Warn(logger).Log("msg", msg)
http.Error(w, msg, http.StatusBadRequest)
return
}

cfg := &UserConfig{}
err = yaml.Unmarshal(payload, cfg)
if err != nil {
Expand Down
42 changes: 40 additions & 2 deletions pkg/alertmanager/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ import (

func TestAMConfigValidationAPI(t *testing.T) {
testCases := []struct {
name string
cfg string
name string
cfg string
maxConfigSize int

response string
err error
}{
Expand Down Expand Up @@ -452,14 +454,50 @@ template_files:
`,
err: fmt.Errorf(`error validating Alertmanager config: template: test.tmpl:1: function "invalid" not defined`),
},
{
name: "config too big",
cfg: `
alertmanager_config: |
route:
receiver: 'default-receiver'
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
group_by: [cluster, alertname]
receivers:
- name: default-receiver
`,
maxConfigSize: 10,
err: fmt.Errorf(errConfigurationTooBig, 10),
},
{
name: "config size OK",
cfg: `
alertmanager_config: |
route:
receiver: 'default-receiver'
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
group_by: [cluster, alertname]
receivers:
- name: default-receiver
`,
maxConfigSize: 1000,
err: nil,
},
}

limits := &mockAlertManagerLimits{}
am := &MultitenantAlertmanager{
store: prepareInMemoryAlertStore(),
logger: util_log.Logger,
limits: limits,
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
limits.maxConfigSize = tc.maxConfigSize

req := httptest.NewRequest(http.MethodPost, "http://alertmanager/api/v1/alerts", bytes.NewReader([]byte(tc.cfg)))
ctx := user.InjectOrgID(req.Context(), "testing")
w := httptest.NewRecorder()
Expand Down
3 changes: 3 additions & 0 deletions pkg/alertmanager/multitenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,9 @@ type Limits interface {
// NotificationBurstSize returns burst-size for rate limiter for given integration type. If 0, no notifications are allowed except
// when limit == rate.Inf.
NotificationBurstSize(tenant string, integration string) int

// AlertmanagerMaxConfigSize returns max size of configuration file that user is allowed to upload. If 0, there is no limit.
AlertmanagerMaxConfigSize(tenant string) int
}

// A MultitenantAlertmanager manages Alertmanager instances for multiple
Expand Down
15 changes: 10 additions & 5 deletions pkg/alertmanager/multitenant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1867,7 +1867,7 @@ receivers:

reg := prometheus.NewPedanticRegistry()
cfg := mockAlertmanagerConfig(t)
am, err := createMultitenantAlertmanager(cfg, nil, nil, store, nil, limits, log.NewNopLogger(), reg)
am, err := createMultitenantAlertmanager(cfg, nil, nil, store, nil, &limits, log.NewNopLogger(), reg)
require.NoError(t, err)

err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
Expand Down Expand Up @@ -1942,20 +1942,25 @@ func (f *passthroughAlertmanagerClientPool) GetClientFor(addr string) (Client, e
type mockAlertManagerLimits struct {
emailNotificationRateLimit rate.Limit
emailNotificationBurst int
maxConfigSize int
}

func (m mockAlertManagerLimits) AlertmanagerReceiversBlockCIDRNetworks(user string) []flagext.CIDR {
func (m *mockAlertManagerLimits) AlertmanagerMaxConfigSize(tenant string) int {
return m.maxConfigSize
}

func (m *mockAlertManagerLimits) AlertmanagerReceiversBlockCIDRNetworks(user string) []flagext.CIDR {
panic("implement me")
}

func (m mockAlertManagerLimits) AlertmanagerReceiversBlockPrivateAddresses(user string) bool {
func (m *mockAlertManagerLimits) AlertmanagerReceiversBlockPrivateAddresses(user string) bool {
panic("implement me")
}

func (m mockAlertManagerLimits) NotificationRateLimit(_ string, integration string) rate.Limit {
func (m *mockAlertManagerLimits) NotificationRateLimit(_ string, integration string) rate.Limit {
return m.emailNotificationRateLimit
}

func (m mockAlertManagerLimits) NotificationBurstSize(_ string, integration string) int {
func (m *mockAlertManagerLimits) NotificationBurstSize(_ string, integration string) int {
return m.emailNotificationBurst
}
8 changes: 7 additions & 1 deletion pkg/util/validation/limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,10 @@ type Limits struct {
AlertmanagerReceiversBlockCIDRNetworks flagext.CIDRSliceCSV `yaml:"alertmanager_receivers_firewall_block_cidr_networks" json:"alertmanager_receivers_firewall_block_cidr_networks"`
AlertmanagerReceiversBlockPrivateAddresses bool `yaml:"alertmanager_receivers_firewall_block_private_addresses" json:"alertmanager_receivers_firewall_block_private_addresses"`

// Alertmanager limits
NotificationRateLimit float64 `yaml:"alertmanager_notification_rate_limit" json:"alertmanager_notification_rate_limit"`
NotificationRateLimitPerIntegration NotificationRateLimitMap `yaml:"alertmanager_notification_rate_limit_per_integration" json:"alertmanager_notification_rate_limit_per_integration"`

AlertmanagerMaxConfigSizeBytes int `yaml:"alertmanager_max_config_size_bytes" json:"alertmanager_max_config_size_bytes"`
}

// RegisterFlags adds the flags required to config this to the given FlagSet
Expand Down Expand Up @@ -175,6 +176,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
l.NotificationRateLimitPerIntegration = NotificationRateLimitMap{}
}
f.Var(&l.NotificationRateLimitPerIntegration, "alertmanager.notification-rate-limit-per-integration", "Per-integration notification rate limits. Value is a map, where each key is integration name and value is a rate-limit (float). On command line, this map is given in JSON format. Rate limit has the same meaning as -alertmanager.notification-rate-limit, but only applies for specific integration. Allowed integration names: "+strings.Join(allowedIntegrationNames, ", ")+".")
f.IntVar(&l.AlertmanagerMaxConfigSizeBytes, "alertmanager.max-config-size-bytes", 0, "Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit.")
}

// Validate the limits config and returns an error if the validation
Expand Down Expand Up @@ -581,6 +583,10 @@ func (o *Overrides) NotificationBurstSize(user string, integration string) int {
return int(l)
}

func (o *Overrides) AlertmanagerMaxConfigSize(userID string) int {
return o.getOverridesForUser(userID).AlertmanagerMaxConfigSizeBytes
}

func (o *Overrides) getOverridesForUser(userID string) *Limits {
if o.tenantLimits != nil {
l := o.tenantLimits.ByUserID(userID)
Expand Down