Skip to content

Commit b38abf6

Browse files
pierresouchaymkeeler
authored andcommitted
Avoid to taking into account wrong versions of protocols in Vsn. (#178)
* Avoid to take into account wrong versions of protocols in Vsn. On Consul, sometimes, nodes do send a pMin = pMan = 0 in Vsn This causes a corruption of the acceptable versions of protocol and thus requiring version = [0, 1]. After this corruption occurs, all new nodes cannot join anymore, it then force the restart of all Consul servers to resume normal operations. While not fixing the root cause, this patch discards alive nodes claiming version 0,0,0 and will avoid this breakage. See hashicorp/consul#3217 * Always set the Vsn when creating state, so race condition cannot happen * Do not move m.encodeBroadcastNotify(a.Node, aliveMsg, a, notify) since not needed * Test the bare minimum for size of Vsn Co-Authored-By: pierresouchay <[email protected]> * Fixed test TestMemberList_ProbeNode_Awareness_OldProtocol * Avoid to crash when len(Vsn) is incorrect and ignore the message when there is an Alive delegate
1 parent a9da52f commit b38abf6

File tree

4 files changed

+156
-77
lines changed

4 files changed

+156
-77
lines changed

memberlist.go

+11-10
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,15 @@ type Memberlist struct {
7272
logger *log.Logger
7373
}
7474

75+
// BuildVsnArray creates the array of Vsn
76+
func (conf *Config) BuildVsnArray() []uint8 {
77+
return []uint8{
78+
ProtocolVersionMin, ProtocolVersionMax, conf.ProtocolVersion,
79+
conf.DelegateProtocolMin, conf.DelegateProtocolMax,
80+
conf.DelegateProtocolVersion,
81+
}
82+
}
83+
7584
// newMemberlist creates the network listeners.
7685
// Does not schedule execution of background maintenance.
7786
func newMemberlist(conf *Config) (*Memberlist, error) {
@@ -402,11 +411,7 @@ func (m *Memberlist) setAlive() error {
402411
Addr: addr,
403412
Port: uint16(port),
404413
Meta: meta,
405-
Vsn: []uint8{
406-
ProtocolVersionMin, ProtocolVersionMax, m.config.ProtocolVersion,
407-
m.config.DelegateProtocolMin, m.config.DelegateProtocolMax,
408-
m.config.DelegateProtocolVersion,
409-
},
414+
Vsn: m.config.BuildVsnArray(),
410415
}
411416
m.aliveNode(&a, nil, true)
412417
return nil
@@ -447,11 +452,7 @@ func (m *Memberlist) UpdateNode(timeout time.Duration) error {
447452
Addr: state.Addr,
448453
Port: state.Port,
449454
Meta: meta,
450-
Vsn: []uint8{
451-
ProtocolVersionMin, ProtocolVersionMax, m.config.ProtocolVersion,
452-
m.config.DelegateProtocolMin, m.config.DelegateProtocolMax,
453-
m.config.DelegateProtocolVersion,
454-
},
455+
Vsn: m.config.BuildVsnArray(),
455456
}
456457
notifyCh := make(chan struct{})
457458
m.aliveNode(&a, notifyCh, true)

net_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,10 @@ func TestSendMsg_Piggyback(t *testing.T) {
569569
Node: "rand",
570570
Addr: []byte{127, 0, 0, 255},
571571
Meta: nil,
572+
Vsn: []uint8{
573+
ProtocolVersionMin, ProtocolVersionMax, ProtocolVersionMin,
574+
1, 1, 1,
575+
},
572576
}
573577
m.encodeAndBroadcast("rand", aliveMsg, &a)
574578

state.go

+24-2
Original file line numberDiff line numberDiff line change
@@ -850,11 +850,26 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
850850
return
851851
}
852852

853+
if len(a.Vsn) >= 3 {
854+
pMin := a.Vsn[0]
855+
pMax := a.Vsn[1]
856+
pCur := a.Vsn[2]
857+
if pMin == 0 || pMax == 0 || pMin > pMax {
858+
m.logger.Printf("[WARN] memberlist: Ignoring an alive message for '%s' (%v:%d) because protocol version(s) are wrong: %d <= %d <= %d should be >0", a.Node, net.IP(a.Addr), a.Port, pMin, pCur, pMax)
859+
return
860+
}
861+
}
862+
853863
// Invoke the Alive delegate if any. This can be used to filter out
854864
// alive messages based on custom logic. For example, using a cluster name.
855865
// Using a merge delegate is not enough, as it is possible for passive
856866
// cluster merging to still occur.
857867
if m.config.Alive != nil {
868+
if len(a.Vsn) < 6 {
869+
m.logger.Printf("[WARN] memberlist: ignoring alive message for '%s' (%v:%d) because Vsn is not present",
870+
a.Node, net.IP(a.Addr), a.Port)
871+
return
872+
}
858873
node := &Node{
859874
Name: a.Node,
860875
Addr: a.Addr,
@@ -886,6 +901,14 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
886901
},
887902
State: stateDead,
888903
}
904+
if len(a.Vsn) > 5 {
905+
state.PMin = a.Vsn[0]
906+
state.PMax = a.Vsn[1]
907+
state.PCur = a.Vsn[2]
908+
state.DMin = a.Vsn[3]
909+
state.DMax = a.Vsn[4]
910+
state.DCur = a.Vsn[5]
911+
}
889912

890913
// Add to map
891914
m.nodeMap[a.Node] = state
@@ -965,9 +988,8 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
965988
bytes.Equal(a.Vsn, versions) {
966989
return
967990
}
968-
969991
m.refute(state, a.Incarnation)
970-
m.logger.Printf("[WARN] memberlist: Refuting an alive message")
992+
m.logger.Printf("[WARN] memberlist: Refuting an alive message for '%s' (%v:%d) meta:(%v VS %v), vsn:(%v VS %v)", a.Node, net.IP(a.Addr), a.Port, a.Meta, state.Meta, a.Vsn, versions)
971993
} else {
972994
m.encodeBroadcastNotify(a.Node, aliveMsg, a, notify)
973995

0 commit comments

Comments
 (0)