Skip to content

Fix -ruler.alertmanager-url flag #2989

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -1011,12 +1011,12 @@ storage:
# CLI flag: -ruler.rule-path
[rule_path: <string> | default = "/rules"]

# Space-separated list of URL(s) of the Alertmanager(s) to send notifications
# Comma-separated list of URL(s) of the Alertmanager(s) to send notifications
# to. Each Alertmanager URL is treated as a separate group in the configuration.
# Multiple Alertmanagers in HA per group can be supported by using DNS
# resolution via -ruler.alertmanager-discovery.
# CLI flag: -ruler.alertmanager-url
[alertmanager_url: <list of string> | default = ]
[alertmanager_url: <string> | default = ""]

# Use DNS SRV records to discover Alertmanager hosts.
# CLI flag: -ruler.alertmanager-discovery
Expand Down Expand Up @@ -1152,7 +1152,7 @@ The `alertmanager_config` configures the Cortex alertmanager.

# Initial peers (may be repeated).
# CLI flag: -cluster.peer
[peers: <list of string> | default = ]
[peers: <list of string> | default = []]

# Time to wait between peers to send notifications.
# CLI flag: -cluster.peer-timeout
Expand Down Expand Up @@ -1888,7 +1888,7 @@ cassandra:
# If set, when authenticating with cassandra a custom authenticator will be
# expected during the handshake. This flag can be set multiple times.
# CLI flag: -cassandra.custom-authenticator
[custom_authenticators: <list of string> | default = ]
[custom_authenticators: <list of string> | default = []]

# Timeout when connecting to cassandra.
# CLI flag: -cassandra.timeout
Expand Down Expand Up @@ -2514,7 +2514,7 @@ The `memberlist_config` configures the Gossip memberlist.
# https://cortexmetrics.io/docs/configuration/arguments/#dns-service-discovery
# for more details).
# CLI flag: -memberlist.join
[join_members: <list of string> | default = ]
[join_members: <list of string> | default = []]

# Min backoff duration to join other cluster members.
# CLI flag: -memberlist.min-join-backoff
Expand Down Expand Up @@ -2552,7 +2552,7 @@ The `memberlist_config` configures the Gossip memberlist.
# IP address to listen on for gossip messages. Multiple addresses may be
# specified. Defaults to 0.0.0.0
# CLI flag: -memberlist.bind-addr
[bind_addr: <list of string> | default = ]
[bind_addr: <list of string> | default = []]

# Port to listen on for gossip messages.
# CLI flag: -memberlist.bind-port
Expand Down Expand Up @@ -2602,7 +2602,7 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s
# ingestion within the distributor and can be repeated in order to drop multiple
# labels.
# CLI flag: -distributor.drop-label
[drop_labels: <list of string> | default = ]
[drop_labels: <list of string> | default = []]

# Maximum length accepted for label names
# CLI flag: -validation.max-length-label-name
Expand Down
89 changes: 74 additions & 15 deletions integration/api_ruler_test.go → integration/ruler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package main

import (
"path/filepath"
"strings"
"testing"

"github.com/prometheus/prometheus/pkg/labels"
Expand All @@ -20,22 +21,9 @@ func TestRulerAPI(t *testing.T) {
var (
namespaceOne = "test_/encoded_+namespace/?"
namespaceTwo = "test_/encoded_+namespace/?/two"

recordNode = yaml.Node{}
exprNode = yaml.Node{}
)
recordNode.SetString("test_rule")
exprNode.SetString("up")
ruleGroup := rulefmt.RuleGroup{
Name: "test_encoded_+\"+group_name/?",
Interval: 100,
Rules: []rulefmt.RuleNode{
{
Record: recordNode,
Expr: exprNode,
},
},
}
ruleGroup := createTestRuleGroup(t)

s, err := e2e.NewScenario(networkName)
require.NoError(t, err)
defer s.Close()
Expand Down Expand Up @@ -144,3 +132,74 @@ func TestRulerAPISingleBinary(t *testing.T) {
require.NoError(t, cortex.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"prometheus_engine_queries"}, e2e.WithLabelMatchers(
labels.MustNewMatcher(labels.MatchEqual, "engine", "ruler"))))
}

func TestRulerAlertmanager(t *testing.T) {
var namespaceOne = "test_/encoded_+namespace/?"
ruleGroup := createTestRuleGroup(t)

s, err := e2e.NewScenario(networkName)
require.NoError(t, err)
defer s.Close()

// Start dependencies.
dynamo := e2edb.NewDynamoDB()
minio := e2edb.NewMinio(9000, RulerConfigs["-ruler.storage.s3.buckets"])
require.NoError(t, s.StartAndWaitReady(minio, dynamo))

// Have at least one alertmanager configuration.
require.NoError(t, writeFileToSharedDir(s, "alertmanager_configs/user-1.yaml", []byte(cortexAlertmanagerUserConfigYaml)))

// Start Alertmanagers.
am1 := e2ecortex.NewAlertmanager("alertmanager1", mergeFlags(AlertmanagerFlags, AlertmanagerLocalFlags), "")
require.NoError(t, s.StartAndWaitReady(am1))
am2 := e2ecortex.NewAlertmanager("alertmanager2", mergeFlags(AlertmanagerFlags, AlertmanagerLocalFlags), "")
require.NoError(t, s.StartAndWaitReady(am2))

am1URL := "http://" + am1.HTTPEndpoint()
am2URL := "http://" + am2.HTTPEndpoint()

// Connect the ruler to Alertmanagers
configOverrides := map[string]string{
"-ruler.alertmanager-url": strings.Join([]string{am1URL, am2URL}, ","),
}

// Start Ruler.
require.NoError(t, writeFileToSharedDir(s, cortexSchemaConfigFile, []byte(cortexSchemaConfigYaml)))
ruler := e2ecortex.NewRuler("ruler", mergeFlags(ChunksStorageFlags, RulerConfigs, configOverrides), "")
require.NoError(t, s.StartAndWaitReady(ruler))

// Create a client with the ruler address configured
c, err := e2ecortex.NewClient("", "", "", ruler.HTTPEndpoint(), "user-1")
require.NoError(t, err)

// Set the rule group into the ruler
require.NoError(t, c.SetRuleGroup(ruleGroup, namespaceOne))

// Wait until the user manager is created
require.NoError(t, ruler.WaitSumMetrics(e2e.Equals(1), "cortex_ruler_managers_total"))

// Wait until we've discovered the alertmanagers.
require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.Equals(2), []string{"cortex_prometheus_notifications_alertmanagers_discovered"}, e2e.WaitMissingMetrics))
}

func createTestRuleGroup(t *testing.T) rulefmt.RuleGroup {
t.Helper()

var (
recordNode = yaml.Node{}
exprNode = yaml.Node{}
)

recordNode.SetString("test_rule")
exprNode.SetString("up")
return rulefmt.RuleGroup{
Name: "test_encoded_+\"+group_name/?",
Interval: 100,
Rules: []rulefmt.RuleNode{
{
Record: recordNode,
Expr: exprNode,
},
},
}
}
2 changes: 1 addition & 1 deletion pkg/compactor/compactor_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {

// Instance flags
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
f.Var((*flagext.Strings)(&cfg.InstanceInterfaceNames), "compactor.ring.instance-interface", "Name of network interface to read address from.")
f.Var((*flagext.StringSlice)(&cfg.InstanceInterfaceNames), "compactor.ring.instance-interface", "Name of network interface to read address from.")
f.StringVar(&cfg.InstanceAddr, "compactor.ring.instance-addr", "", "IP address to advertise in the ring.")
f.IntVar(&cfg.InstancePort, "compactor.ring.instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).")
f.StringVar(&cfg.InstanceID, "compactor.ring.instance-id", hostname, "Instance ID to register in the ring.")
Expand Down
2 changes: 1 addition & 1 deletion pkg/distributor/distributor_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {

// Instance flags
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
f.Var((*flagext.Strings)(&cfg.InstanceInterfaceNames), "distributor.ring.instance-interface", "Name of network interface to read address from.")
f.Var((*flagext.StringSlice)(&cfg.InstanceInterfaceNames), "distributor.ring.instance-interface", "Name of network interface to read address from.")
f.StringVar(&cfg.InstanceAddr, "distributor.ring.instance-addr", "", "IP address to advertise in the ring.")
f.IntVar(&cfg.InstancePort, "distributor.ring.instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).")
f.StringVar(&cfg.InstanceID, "distributor.ring.instance-id", hostname, "Instance ID to register in the ring.")
Expand Down
2 changes: 1 addition & 1 deletion pkg/ring/kv/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type Client struct {
// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet.
func (cfg *Config) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) {
cfg.Endpoints = []string{}
f.Var((*flagext.Strings)(&cfg.Endpoints), prefix+"etcd.endpoints", "The etcd endpoints to connect to.")
f.Var((*flagext.StringSlice)(&cfg.Endpoints), prefix+"etcd.endpoints", "The etcd endpoints to connect to.")
f.DurationVar(&cfg.DialTimeout, prefix+"etcd.dial-timeout", 10*time.Second, "The dial timeout for the etcd connection.")
f.IntVar(&cfg.MaxRetries, prefix+"etcd.max-retries", 10, "The maximum number of retries to do for failed ops.")
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/ring/lifecycler.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func (cfg *LifecyclerConfig) RegisterFlagsWithPrefix(prefix string, f *flag.Flag
}

cfg.InfNames = []string{"eth0", "en0"}
f.Var((*flagext.Strings)(&cfg.InfNames), prefix+"lifecycler.interface", "Name of network interface to read address from.")
f.Var((*flagext.StringSlice)(&cfg.InfNames), prefix+"lifecycler.interface", "Name of network interface to read address from.")
f.StringVar(&cfg.Addr, prefix+"lifecycler.addr", "", "IP address to advertise in consul.")
f.IntVar(&cfg.Port, prefix+"lifecycler.port", 0, "port to advertise in consul (defaults to server.grpc-listen-port).")
f.StringVar(&cfg.ID, prefix+"lifecycler.ID", hostname, "ID to register into consul.")
Expand Down
6 changes: 4 additions & 2 deletions pkg/ruler/notifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"net/url"
"regexp"
"strings"
"sync"

gklog "github.com/go-kit/kit/log"
Expand Down Expand Up @@ -75,10 +76,11 @@ func (rn *rulerNotifier) stop() {
// Builds a Prometheus config.Config from a ruler.Config with just the required
// options to configure notifications to Alertmanager.
func buildNotifierConfig(rulerConfig *Config) (*config.Config, error) {
validURLs := make([]*url.URL, 0, len(rulerConfig.AlertmanagerURL))
amURLs := strings.Split(rulerConfig.AlertmanagerURL, ",")
validURLs := make([]*url.URL, 0, len(amURLs))

srvDNSregexp := regexp.MustCompile(`^_.+._.+`)
for _, h := range rulerConfig.AlertmanagerURL {
for _, h := range amURLs {
url, err := url.Parse(h)
if err != nil {
return nil, err
Expand Down
20 changes: 6 additions & 14 deletions pkg/ruler/notifier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func TestBuildNotifierConfig(t *testing.T) {
{
name: "with a single URL and no service discovery",
cfg: &Config{
AlertmanagerURL: []string{"http://alertmanager.default.svc.cluster.local/alertmanager"},
AlertmanagerURL: "http://alertmanager.default.svc.cluster.local/alertmanager",
},
ncfg: &config.Config{
AlertingConfig: config.AlertingConfig{
Expand All @@ -49,7 +49,7 @@ func TestBuildNotifierConfig(t *testing.T) {
{
name: "with a single URL and service discovery",
cfg: &Config{
AlertmanagerURL: []string{"http://_http._tcp.alertmanager.default.svc.cluster.local/alertmanager"},
AlertmanagerURL: "http://_http._tcp.alertmanager.default.svc.cluster.local/alertmanager",
AlertmanagerDiscovery: true,
AlertmanagerRefreshInterval: time.Duration(60),
},
Expand All @@ -74,18 +74,15 @@ func TestBuildNotifierConfig(t *testing.T) {
{
name: "with service discovery and an invalid URL",
cfg: &Config{
AlertmanagerURL: []string{"http://_http.default.svc.cluster.local/alertmanager"},
AlertmanagerURL: "http://_http.default.svc.cluster.local/alertmanager",
AlertmanagerDiscovery: true,
},
err: fmt.Errorf("when alertmanager-discovery is on, host name must be of the form _portname._tcp.service.fqdn (is \"alertmanager.default.svc.cluster.local\")"),
},
{
name: "with multiple URLs and no service discovery",
cfg: &Config{
AlertmanagerURL: []string{
"http://alertmanager-0.default.svc.cluster.local/alertmanager",
"http://alertmanager-1.default.svc.cluster.local/alertmanager",
},
AlertmanagerURL: "http://alertmanager-0.default.svc.cluster.local/alertmanager,http://alertmanager-1.default.svc.cluster.local/alertmanager",
},
ncfg: &config.Config{
AlertingConfig: config.AlertingConfig{
Expand Down Expand Up @@ -113,10 +110,7 @@ func TestBuildNotifierConfig(t *testing.T) {
{
name: "with multiple URLs and service discovery",
cfg: &Config{
AlertmanagerURL: []string{
"http://_http._tcp.alertmanager-0.default.svc.cluster.local/alertmanager",
"http://_http._tcp.alertmanager-1.default.svc.cluster.local/alertmanager",
},
AlertmanagerURL: "http://_http._tcp.alertmanager-0.default.svc.cluster.local/alertmanager,http://_http._tcp.alertmanager-1.default.svc.cluster.local/alertmanager",
AlertmanagerDiscovery: true,
AlertmanagerRefreshInterval: time.Duration(60),
},
Expand Down Expand Up @@ -152,9 +146,7 @@ func TestBuildNotifierConfig(t *testing.T) {
{
name: "with Basic Authentication",
cfg: &Config{
AlertmanagerURL: []string{
"http://marco:[email protected]/alertmanager",
},
AlertmanagerURL: "http://marco:[email protected]/alertmanager",
},
ncfg: &config.Config{
AlertingConfig: config.AlertingConfig{
Expand Down
4 changes: 2 additions & 2 deletions pkg/ruler/ruler.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ type Config struct {
RulePath string `yaml:"rule_path"`

// URL of the Alertmanager to send notifications to.
AlertmanagerURL flagext.StringSlice `yaml:"alertmanager_url"`
AlertmanagerURL string `yaml:"alertmanager_url"`
// Whether to use DNS SRV records to discover Alertmanager.
AlertmanagerDiscovery bool `yaml:"enable_alertmanager_discovery"`
// How long to wait between refreshing the list of Alertmanager based on DNS service discovery.
Expand Down Expand Up @@ -132,7 +132,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
f.DurationVar(&cfg.EvaluationDelay, "ruler.evaluation-delay-duration", 0, "Duration to delay the evaluation of rules to ensure they underlying metrics have been pushed to cortex.")
f.DurationVar(&cfg.PollInterval, "ruler.poll-interval", 1*time.Minute, "How frequently to poll for rule changes")

f.Var(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "Space-separated list of URL(s) of the Alertmanager(s) to send notifications to. Each Alertmanager URL is treated as a separate group in the configuration. Multiple Alertmanagers in HA per group can be supported by using DNS resolution via -ruler.alertmanager-discovery.")
f.StringVar(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "", "Comma-separated list of URL(s) of the Alertmanager(s) to send notifications to. Each Alertmanager URL is treated as a separate group in the configuration. Multiple Alertmanagers in HA per group can be supported by using DNS resolution via -ruler.alertmanager-discovery.")
f.BoolVar(&cfg.AlertmanagerDiscovery, "ruler.alertmanager-discovery", false, "Use DNS SRV records to discover Alertmanager hosts.")
f.DurationVar(&cfg.AlertmanagerRefreshInterval, "ruler.alertmanager-refresh-interval", 1*time.Minute, "How long to wait between refreshing DNS resolutions of Alertmanager hosts.")
f.BoolVar(&cfg.AlertmanangerEnableV2API, "ruler.alertmanager-use-v2", false, "If enabled requests to Alertmanager will utilize the V2 API.")
Expand Down
2 changes: 1 addition & 1 deletion pkg/ruler/ruler_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {

// Instance flags
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
f.Var((*flagext.Strings)(&cfg.InstanceInterfaceNames), "ruler.ring.instance-interface", "Name of network interface to read address from.")
f.Var((*flagext.StringSlice)(&cfg.InstanceInterfaceNames), "ruler.ring.instance-interface", "Name of network interface to read address from.")
f.StringVar(&cfg.InstanceAddr, "ruler.ring.instance-addr", "", "IP address to advertise in the ring.")
f.IntVar(&cfg.InstancePort, "ruler.ring.instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).")
f.StringVar(&cfg.InstanceID, "ruler.ring.instance-id", hostname, "Instance ID to register in the ring.")
Expand Down
3 changes: 1 addition & 2 deletions pkg/ruler/ruler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,7 @@ func TestNotifierSendsUserIDHeader(t *testing.T) {
cfg, cleanup := defaultRulerConfig(newMockRuleStore(nil))
defer cleanup()

err := cfg.AlertmanagerURL.Set(ts.URL)
require.NoError(t, err)
cfg.AlertmanagerURL = ts.URL
cfg.AlertmanagerDiscovery = false

r, rcleanup := newTestRuler(t, cfg)
Expand Down
2 changes: 1 addition & 1 deletion pkg/storegateway/gateway_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {

// Instance flags
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
f.Var((*flagext.Strings)(&cfg.InstanceInterfaceNames), "experimental.store-gateway.sharding-ring.instance-interface", "Name of network interface to read address from.")
f.Var((*flagext.StringSlice)(&cfg.InstanceInterfaceNames), "experimental.store-gateway.sharding-ring.instance-interface", "Name of network interface to read address from.")
f.StringVar(&cfg.InstanceAddr, "experimental.store-gateway.sharding-ring.instance-addr", "", "IP address to advertise in the ring.")
f.IntVar(&cfg.InstancePort, "experimental.store-gateway.sharding-ring.instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).")
f.StringVar(&cfg.InstanceID, "experimental.store-gateway.sharding-ring.instance-id", hostname, "Instance ID to register in the ring.")
Expand Down
19 changes: 0 additions & 19 deletions pkg/util/flagext/strings.go

This file was deleted.

6 changes: 2 additions & 4 deletions pkg/util/flagext/stringslice.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
package flagext

import (
"strings"
)
import "fmt"

// StringSlice is a slice of strings that implements flag.Value
type StringSlice []string

// String implements flag.Value
func (v StringSlice) String() string {
return strings.Join(v, " ")
return fmt.Sprintf("%s", []string(v))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The unification of the flag extensions is basically that I kept the name StringSlice but "borrowed" the String casting function.

I like this because for empty values it renders [] instead of "" which seems to be more accurate.

}

// Set implements flag.Value
Expand Down