Skip to content

Commit f8b08a3

Browse files
authored
Expose default configuration values for memberlist. (#4276)
* Expose default configuration values for memberlist. Set the defaults for various memberlist configuration values based on the "Default LAN" configuration. The only result of this change is that the defaults are now visible and are in the documentation. This also means that if the default values change, then the changes are visible in the documentation, where as before they would have gone unnoticed. To prevent this being a breaking change, the existing behaviour is retained, in case anyone is explicitly setting the values to zero and expecting the default to be used. Signed-off-by: Steve Simpson <[email protected]> * Remove use of zero value as default value indicator. Signed-off-by: Steve Simpson <[email protected]> * Review comments. Signed-off-by: Steve Simpson <[email protected]> * Review comments. Signed-off-by: Steve Simpson <[email protected]>
1 parent 95fedaa commit f8b08a3

File tree

6 files changed

+122
-119
lines changed

6 files changed

+122
-119
lines changed

CHANGELOG.md

+8
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@
1919
* [CHANGE] Change default value of `-server.grpc.keepalive.min-time-between-pings` from `5m` to `10s` and `-server.grpc.keepalive.ping-without-stream-allowed` to `true`. #4168
2020
* [CHANGE] Ingester: Change default value of `-ingester.active-series-metrics-enabled` to `true`. This incurs a small increase in memory usage, between 1.2% and 1.6% as measured on ingesters with 1.3M active series. #4257
2121
* [CHANGE] Dependency: update go-redis from v8.2.3 to v8.9.0. #4236
22+
* [CHANGE] Memberlist: Expose default configuration values to the command line options. Note that setting these explicitly to zero will no longer cause the default to be used. If the default is desired, then do set the option. The following are affected: #4276
23+
- `-memberlist.stream-timeout`
24+
- `-memberlist.retransmit-factor`
25+
- `-memberlist.pull-push-interval`
26+
- `-memberlist.gossip-interval`
27+
- `-memberlist.gossip-nodes`
28+
- `-memberlist.gossip-to-dead-nodes-time`
29+
- `-memberlist.dead-node-reclaim-time`
2230
* [FEATURE] Querier: Added new `-querier.max-fetched-series-per-query` flag. When Cortex is running with blocks storage, the max series per query limit is enforced in the querier and applies to unique series received from ingesters and store-gateway (long-term storage). #4179
2331
* [FEATURE] Querier/Ruler: Added new `-querier.max-fetched-chunk-bytes-per-query` flag. When Cortex is running with blocks storage, the max chunk bytes limit is enforced in the querier and ruler and limits the size of all aggregated chunks returned from ingesters and storage as bytes for a query. #4216
2432
* [FEATURE] Alertmanager: support negative matchers, time-based muting - [upstream release notes](https://github.com/prometheus/alertmanager/releases/tag/v0.22.0). #4237

docs/configuration/config-file-reference.md

+12-13
Original file line numberDiff line numberDiff line change
@@ -3761,33 +3761,32 @@ The `memberlist_config` configures the Gossip memberlist.
37613761
[randomize_node_name: <boolean> | default = true]
37623762
37633763
# The timeout for establishing a connection with a remote node, and for
3764-
# read/write operations. Uses memberlist LAN defaults if 0.
3764+
# read/write operations.
37653765
# CLI flag: -memberlist.stream-timeout
3766-
[stream_timeout: <duration> | default = 0s]
3766+
[stream_timeout: <duration> | default = 10s]
37673767
37683768
# Multiplication factor used when sending out messages (factor * log(N+1)).
37693769
# CLI flag: -memberlist.retransmit-factor
3770-
[retransmit_factor: <int> | default = 0]
3770+
[retransmit_factor: <int> | default = 4]
37713771
3772-
# How often to use pull/push sync. Uses memberlist LAN defaults if 0.
3772+
# How often to use pull/push sync.
37733773
# CLI flag: -memberlist.pullpush-interval
3774-
[pull_push_interval: <duration> | default = 0s]
3774+
[pull_push_interval: <duration> | default = 30s]
37753775
3776-
# How often to gossip. Uses memberlist LAN defaults if 0.
3776+
# How often to gossip.
37773777
# CLI flag: -memberlist.gossip-interval
3778-
[gossip_interval: <duration> | default = 0s]
3778+
[gossip_interval: <duration> | default = 200ms]
37793779
3780-
# How many nodes to gossip to. Uses memberlist LAN defaults if 0.
3780+
# How many nodes to gossip to.
37813781
# CLI flag: -memberlist.gossip-nodes
3782-
[gossip_nodes: <int> | default = 0]
3782+
[gossip_nodes: <int> | default = 3]
37833783
37843784
# How long to keep gossiping to dead nodes, to give them chance to refute their
3785-
# death. Uses memberlist LAN defaults if 0.
3785+
# death.
37863786
# CLI flag: -memberlist.gossip-to-dead-nodes-time
3787-
[gossip_to_dead_nodes_time: <duration> | default = 0s]
3787+
[gossip_to_dead_nodes_time: <duration> | default = 30s]
37883788
3789-
# How soon can dead node's name be reclaimed with new address. Defaults to 0,
3790-
# which is disabled.
3789+
# How soon can dead node's name be reclaimed with new address. 0 to disable.
37913790
# CLI flag: -memberlist.dead-node-reclaim-time
37923791
[dead_node_reclaim_time: <duration> | default = 0s]
37933792

pkg/cortex/cortex.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) {
170170
c.Alertmanager.RegisterFlags(f)
171171
c.AlertmanagerStorage.RegisterFlags(f)
172172
c.RuntimeConfig.RegisterFlags(f)
173-
c.MemberlistKV.RegisterFlags(f, "")
173+
c.MemberlistKV.RegisterFlags(f)
174174
c.QueryScheduler.RegisterFlags(f)
175175

176176
// These don't seem to have a home.

pkg/ring/kv/memberlist/kv_init_service_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import (
77

88
"github.com/hashicorp/memberlist"
99
"github.com/stretchr/testify/require"
10+
11+
"github.com/cortexproject/cortex/pkg/util/flagext"
1012
)
1113

1214
func TestPage(t *testing.T) {
@@ -53,6 +55,7 @@ func TestPage(t *testing.T) {
5355

5456
func TestStop(t *testing.T) {
5557
var cfg KVConfig
58+
flagext.DefaultValues(&cfg)
5659
kvinit := NewKVInitService(&cfg, nil)
5760
require.NoError(t, kvinit.stopping(nil))
5861
}

pkg/ring/kv/memberlist/memberlist_client.go

+27-30
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,14 @@ type KVConfig struct {
165165
}
166166

167167
// RegisterFlags registers flags.
168-
func (cfg *KVConfig) RegisterFlags(f *flag.FlagSet, prefix string) {
168+
func (cfg *KVConfig) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) {
169+
mlDefaults := defaultMemberlistConfig()
170+
169171
// "Defaults to hostname" -- memberlist sets it to hostname by default.
170172
f.StringVar(&cfg.NodeName, prefix+"memberlist.nodename", "", "Name of the node in memberlist cluster. Defaults to hostname.") // memberlist.DefaultLANConfig will put hostname here.
171173
f.BoolVar(&cfg.RandomizeNodeName, prefix+"memberlist.randomize-node-name", true, "Add random suffix to the node name.")
172-
f.DurationVar(&cfg.StreamTimeout, prefix+"memberlist.stream-timeout", 0, "The timeout for establishing a connection with a remote node, and for read/write operations. Uses memberlist LAN defaults if 0.")
173-
f.IntVar(&cfg.RetransmitMult, prefix+"memberlist.retransmit-factor", 0, "Multiplication factor used when sending out messages (factor * log(N+1)).")
174+
f.DurationVar(&cfg.StreamTimeout, prefix+"memberlist.stream-timeout", mlDefaults.TCPTimeout, "The timeout for establishing a connection with a remote node, and for read/write operations.")
175+
f.IntVar(&cfg.RetransmitMult, prefix+"memberlist.retransmit-factor", mlDefaults.RetransmitMult, "Multiplication factor used when sending out messages (factor * log(N+1)).")
174176
f.Var(&cfg.JoinMembers, prefix+"memberlist.join", "Other cluster members to join. Can be specified multiple times. It can be an IP, hostname or an entry specified in the DNS Service Discovery format (see https://cortexmetrics.io/docs/configuration/arguments/#dns-service-discovery for more details).")
175177
f.DurationVar(&cfg.MinJoinBackoff, prefix+"memberlist.min-join-backoff", 1*time.Second, "Min backoff duration to join other cluster members.")
176178
f.DurationVar(&cfg.MaxJoinBackoff, prefix+"memberlist.max-join-backoff", 1*time.Minute, "Max backoff duration to join other cluster members.")
@@ -179,16 +181,20 @@ func (cfg *KVConfig) RegisterFlags(f *flag.FlagSet, prefix string) {
179181
f.DurationVar(&cfg.RejoinInterval, prefix+"memberlist.rejoin-interval", 0, "If not 0, how often to rejoin the cluster. Occasional rejoin can help to fix the cluster split issue, and is harmless otherwise. For example when using only few components as a seed nodes (via -memberlist.join), then it's recommended to use rejoin. If -memberlist.join points to dynamic service that resolves to all gossiping nodes (eg. Kubernetes headless service), then rejoin is not needed.")
180182
f.DurationVar(&cfg.LeftIngestersTimeout, prefix+"memberlist.left-ingesters-timeout", 5*time.Minute, "How long to keep LEFT ingesters in the ring.")
181183
f.DurationVar(&cfg.LeaveTimeout, prefix+"memberlist.leave-timeout", 5*time.Second, "Timeout for leaving memberlist cluster.")
182-
f.DurationVar(&cfg.GossipInterval, prefix+"memberlist.gossip-interval", 0, "How often to gossip. Uses memberlist LAN defaults if 0.")
183-
f.IntVar(&cfg.GossipNodes, prefix+"memberlist.gossip-nodes", 0, "How many nodes to gossip to. Uses memberlist LAN defaults if 0.")
184-
f.DurationVar(&cfg.PushPullInterval, prefix+"memberlist.pullpush-interval", 0, "How often to use pull/push sync. Uses memberlist LAN defaults if 0.")
185-
f.DurationVar(&cfg.GossipToTheDeadTime, prefix+"memberlist.gossip-to-dead-nodes-time", 0, "How long to keep gossiping to dead nodes, to give them chance to refute their death. Uses memberlist LAN defaults if 0.")
186-
f.DurationVar(&cfg.DeadNodeReclaimTime, prefix+"memberlist.dead-node-reclaim-time", 0, "How soon can dead node's name be reclaimed with new address. Defaults to 0, which is disabled.")
184+
f.DurationVar(&cfg.GossipInterval, prefix+"memberlist.gossip-interval", mlDefaults.GossipInterval, "How often to gossip.")
185+
f.IntVar(&cfg.GossipNodes, prefix+"memberlist.gossip-nodes", mlDefaults.GossipNodes, "How many nodes to gossip to.")
186+
f.DurationVar(&cfg.PushPullInterval, prefix+"memberlist.pullpush-interval", mlDefaults.PushPullInterval, "How often to use pull/push sync.")
187+
f.DurationVar(&cfg.GossipToTheDeadTime, prefix+"memberlist.gossip-to-dead-nodes-time", mlDefaults.GossipToTheDeadTime, "How long to keep gossiping to dead nodes, to give them chance to refute their death.")
188+
f.DurationVar(&cfg.DeadNodeReclaimTime, prefix+"memberlist.dead-node-reclaim-time", mlDefaults.DeadNodeReclaimTime, "How soon can dead node's name be reclaimed with new address. 0 to disable.")
187189
f.IntVar(&cfg.MessageHistoryBufferBytes, prefix+"memberlist.message-history-buffer-bytes", 0, "How much space to use for keeping received and sent messages in memory for troubleshooting (two buffers). 0 to disable.")
188190

189191
cfg.TCPTransport.RegisterFlags(f, prefix)
190192
}
191193

194+
func (cfg *KVConfig) RegisterFlags(f *flag.FlagSet) {
195+
cfg.RegisterFlagsWithPrefix(f, "")
196+
}
197+
192198
func generateRandomSuffix() string {
193199
suffix := make([]byte, 4)
194200
_, err := rand.Read(suffix)
@@ -345,36 +351,27 @@ func NewKV(cfg KVConfig, logger log.Logger) *KV {
345351
return mlkv
346352
}
347353

354+
func defaultMemberlistConfig() *memberlist.Config {
355+
return memberlist.DefaultLANConfig()
356+
}
357+
348358
func (m *KV) buildMemberlistConfig() (*memberlist.Config, error) {
349359
tr, err := NewTCPTransport(m.cfg.TCPTransport, m.logger)
350360
if err != nil {
351361
return nil, fmt.Errorf("failed to create transport: %v", err)
352362
}
353363

354-
mlCfg := memberlist.DefaultLANConfig()
364+
mlCfg := defaultMemberlistConfig()
355365
mlCfg.Delegate = m
356366

357-
if m.cfg.StreamTimeout != 0 {
358-
mlCfg.TCPTimeout = m.cfg.StreamTimeout
359-
}
360-
if m.cfg.RetransmitMult != 0 {
361-
mlCfg.RetransmitMult = m.cfg.RetransmitMult
362-
}
363-
if m.cfg.PushPullInterval != 0 {
364-
mlCfg.PushPullInterval = m.cfg.PushPullInterval
365-
}
366-
if m.cfg.GossipInterval != 0 {
367-
mlCfg.GossipInterval = m.cfg.GossipInterval
368-
}
369-
if m.cfg.GossipNodes != 0 {
370-
mlCfg.GossipNodes = m.cfg.GossipNodes
371-
}
372-
if m.cfg.GossipToTheDeadTime > 0 {
373-
mlCfg.GossipToTheDeadTime = m.cfg.GossipToTheDeadTime
374-
}
375-
if m.cfg.DeadNodeReclaimTime > 0 {
376-
mlCfg.DeadNodeReclaimTime = m.cfg.DeadNodeReclaimTime
377-
}
367+
mlCfg.TCPTimeout = m.cfg.StreamTimeout
368+
mlCfg.RetransmitMult = m.cfg.RetransmitMult
369+
mlCfg.PushPullInterval = m.cfg.PushPullInterval
370+
mlCfg.GossipInterval = m.cfg.GossipInterval
371+
mlCfg.GossipNodes = m.cfg.GossipNodes
372+
mlCfg.GossipToTheDeadTime = m.cfg.GossipToTheDeadTime
373+
mlCfg.DeadNodeReclaimTime = m.cfg.DeadNodeReclaimTime
374+
378375
if m.cfg.NodeName != "" {
379376
mlCfg.Name = m.cfg.NodeName
380377
}

0 commit comments

Comments
 (0)