Skip to content

Commit 40b623b

Browse files
authored
Expose configuration of memberlist packet compression. (#4346)
* Expose configuration of memberlist packet compression. Allows manually specifying whether memberlist should compress packets via a new configuration flag: `-memberlist.enable-compression`. This typically has little benefit for Cortex, as the ring state messages are already compressed with Snappy, the second layer of compression does not achieve any additional saving. It's not clear cut whether there might still be some benefit for internal memberlist messages; this needs to be evaluated in a environment of some reasonable scale. Signed-off-by: Steve Simpson <[email protected]> * Review comments. Signed-off-by: Steve Simpson <[email protected]> * Review comments. Signed-off-by: Steve Simpson <[email protected]>
1 parent 6b8bd5a commit 40b623b

File tree

4 files changed

+27
-11
lines changed

4 files changed

+27
-11
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* `-compactor.ring.heartbeat-period`
2323
* `-store-gateway.sharding-ring.heartbeat-period`
2424
* [ENHANCEMENT] Memberlist: optimized receive path for processing ring state updates, to help reduce CPU utilization in large clusters. #4345
25+
* [ENHANCEMENT] Memberlist: expose configuration of memberlist packet compression via `-memberlist.compression=enabled`. #4346
2526
* [BUGFIX] HA Tracker: when cleaning up obsolete elected replicas from KV store, tracker didn't update number of cluster per user correctly. #4336
2627

2728
## 1.10.0-rc.0 / 2021-06-28

docs/configuration/config-file-reference.md

+5
Original file line numberDiff line numberDiff line change
@@ -3796,6 +3796,11 @@ The `memberlist_config` configures the Gossip memberlist.
37963796
# CLI flag: -memberlist.dead-node-reclaim-time
37973797
[dead_node_reclaim_time: <duration> | default = 0s]
37983798
3799+
# Enable message compression. This can be used to reduce bandwidth usage at the
3800+
# cost of slightly more CPU utilization.
3801+
# CLI flag: -memberlist.compression-enabled
3802+
[compression_enabled: <boolean> | default = true]
3803+
37993804
# Other cluster members to join. Can be specified multiple times. It can be an
38003805
# IP, hostname or an entry specified in the DNS Service Discovery format (see
38013806
# https://cortexmetrics.io/docs/configuration/arguments/#dns-service-discovery

integration/integration_memberlist_single_binary_test.go

+18-11
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,21 @@ import (
2323

2424
func TestSingleBinaryWithMemberlist(t *testing.T) {
2525
t.Run("default", func(t *testing.T) {
26-
testSingleBinaryEnv(t, false)
26+
testSingleBinaryEnv(t, false, nil)
2727
})
2828

2929
t.Run("tls", func(t *testing.T) {
30-
testSingleBinaryEnv(t, true)
30+
testSingleBinaryEnv(t, true, nil)
31+
})
32+
33+
t.Run("compression-disabled", func(t *testing.T) {
34+
testSingleBinaryEnv(t, false, map[string]string{
35+
"-memberlist.compression-enabled": "false",
36+
})
3137
})
3238
}
3339

34-
func testSingleBinaryEnv(t *testing.T, tlsEnabled bool) {
40+
func testSingleBinaryEnv(t *testing.T, tlsEnabled bool, flags map[string]string) {
3541
s, err := e2e.NewScenario(networkName)
3642
require.NoError(t, err)
3743
defer s.Close()
@@ -65,13 +71,13 @@ func testSingleBinaryEnv(t *testing.T, tlsEnabled bool) {
6571
filepath.Join(s.SharedDir(), clientKeyFile),
6672
))
6773

68-
cortex1 = newSingleBinary("cortex-1", memberlistDNS, "")
69-
cortex2 = newSingleBinary("cortex-2", memberlistDNS, networkName+"-cortex-1:8000")
70-
cortex3 = newSingleBinary("cortex-3", memberlistDNS, networkName+"-cortex-1:8000")
74+
cortex1 = newSingleBinary("cortex-1", memberlistDNS, "", flags)
75+
cortex2 = newSingleBinary("cortex-2", memberlistDNS, networkName+"-cortex-1:8000", flags)
76+
cortex3 = newSingleBinary("cortex-3", memberlistDNS, networkName+"-cortex-1:8000", flags)
7177
} else {
72-
cortex1 = newSingleBinary("cortex-1", "", "")
73-
cortex2 = newSingleBinary("cortex-2", "", networkName+"-cortex-1:8000")
74-
cortex3 = newSingleBinary("cortex-3", "", networkName+"-cortex-1:8000")
78+
cortex1 = newSingleBinary("cortex-1", "", "", flags)
79+
cortex2 = newSingleBinary("cortex-2", "", networkName+"-cortex-1:8000", flags)
80+
cortex3 = newSingleBinary("cortex-3", "", networkName+"-cortex-1:8000", flags)
7581
}
7682

7783
// start cortex-1 first, as cortex-2 and cortex-3 both connect to cortex-1
@@ -109,7 +115,7 @@ func testSingleBinaryEnv(t *testing.T, tlsEnabled bool) {
109115
require.NoError(t, s.Stop(cortex3))
110116
}
111117

112-
func newSingleBinary(name string, servername string, join string) *e2ecortex.CortexService {
118+
func newSingleBinary(name string, servername string, join string, testFlags map[string]string) *e2ecortex.CortexService {
113119
flags := map[string]string{
114120
"-ingester.final-sleep": "0s",
115121
"-ingester.join-after": "0s", // join quickly
@@ -132,6 +138,7 @@ func newSingleBinary(name string, servername string, join string) *e2ecortex.Cor
132138
mergeFlags(
133139
ChunksStorageFlags(),
134140
flags,
141+
testFlags,
135142
getTLSFlagsWithPrefix("memberlist", servername, servername == ""),
136143
),
137144
"",
@@ -170,7 +177,7 @@ func TestSingleBinaryWithMemberlistScaling(t *testing.T) {
170177
if i > 0 {
171178
join = e2e.NetworkContainerHostPort(networkName, "cortex-1", 8000)
172179
}
173-
c := newSingleBinary(name, "", join)
180+
c := newSingleBinary(name, "", join, nil)
174181
require.NoError(t, s.StartAndWaitReady(c))
175182
instances = append(instances, c)
176183
}

pkg/ring/kv/memberlist/memberlist_client.go

+3
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ type KVConfig struct {
136136
GossipNodes int `yaml:"gossip_nodes"`
137137
GossipToTheDeadTime time.Duration `yaml:"gossip_to_dead_nodes_time"`
138138
DeadNodeReclaimTime time.Duration `yaml:"dead_node_reclaim_time"`
139+
EnableCompression bool `yaml:"compression_enabled"`
139140

140141
// List of members to join
141142
JoinMembers flagext.StringSlice `yaml:"join_members"`
@@ -187,6 +188,7 @@ func (cfg *KVConfig) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) {
187188
f.DurationVar(&cfg.GossipToTheDeadTime, prefix+"memberlist.gossip-to-dead-nodes-time", mlDefaults.GossipToTheDeadTime, "How long to keep gossiping to dead nodes, to give them chance to refute their death.")
188189
f.DurationVar(&cfg.DeadNodeReclaimTime, prefix+"memberlist.dead-node-reclaim-time", mlDefaults.DeadNodeReclaimTime, "How soon can dead node's name be reclaimed with new address. 0 to disable.")
189190
f.IntVar(&cfg.MessageHistoryBufferBytes, prefix+"memberlist.message-history-buffer-bytes", 0, "How much space to use for keeping received and sent messages in memory for troubleshooting (two buffers). 0 to disable.")
191+
f.BoolVar(&cfg.EnableCompression, prefix+"memberlist.compression-enabled", mlDefaults.EnableCompression, "Enable message compression. This can be used to reduce bandwidth usage at the cost of slightly more CPU utilization.")
190192

191193
cfg.TCPTransport.RegisterFlags(f, prefix)
192194
}
@@ -380,6 +382,7 @@ func (m *KV) buildMemberlistConfig() (*memberlist.Config, error) {
380382
mlCfg.GossipNodes = m.cfg.GossipNodes
381383
mlCfg.GossipToTheDeadTime = m.cfg.GossipToTheDeadTime
382384
mlCfg.DeadNodeReclaimTime = m.cfg.DeadNodeReclaimTime
385+
mlCfg.EnableCompression = m.cfg.EnableCompression
383386

384387
if m.cfg.NodeName != "" {
385388
mlCfg.Name = m.cfg.NodeName

0 commit comments

Comments
 (0)