@@ -22,7 +22,8 @@ type Delegate interface {
22
22
NotifyHealth (OperatorHealthReply )
23
23
PromoteNonVoters (* Config , OperatorHealthReply ) ([]raft.Server , error )
24
24
Raft () * raft.Raft
25
- Serf () * serf.Serf
25
+ SerfLAN () * serf.Serf
26
+ SerfWAN () * serf.Serf
26
27
}
27
28
28
29
// Autopilot is a mechanism for automatically managing the Raft
@@ -182,7 +183,7 @@ func (a *Autopilot) pruneDeadServers() error {
182
183
183
184
// Failed servers are known to Serf and marked failed, and stale servers
184
185
// are known to Raft but not Serf.
185
- var failed []string
186
+ var failed []serf. Member
186
187
staleRaftServers := make (map [string ]raft.Server )
187
188
raftNode := a .delegate .Raft ()
188
189
future := raftNode .GetConfiguration ()
@@ -194,8 +195,8 @@ func (a *Autopilot) pruneDeadServers() error {
194
195
for _ , server := range raftConfig .Servers {
195
196
staleRaftServers [string (server .Address )] = server
196
197
}
197
-
198
- serfLAN := a .delegate .Serf ()
198
+ serfWAN := a . delegate . SerfWAN ()
199
+ serfLAN := a .delegate .SerfLAN ()
199
200
for _ , member := range serfLAN .Members () {
200
201
server , err := a .delegate .IsServer (member )
201
202
if err != nil {
@@ -214,8 +215,12 @@ func (a *Autopilot) pruneDeadServers() error {
214
215
if found && s .Suffrage == raft .Nonvoter {
215
216
a .logger .Printf ("[INFO] autopilot: Attempting removal of failed server node %q" , member .Name )
216
217
go serfLAN .RemoveFailedNode (member .Name )
218
+ if serfWAN != nil {
219
+ go serfWAN .RemoveFailedNode (member .Name )
220
+ }
217
221
} else {
218
- failed = append (failed , member .Name )
222
+ failed = append (failed , member )
223
+
219
224
}
220
225
}
221
226
}
@@ -231,8 +236,12 @@ func (a *Autopilot) pruneDeadServers() error {
231
236
peers := NumPeers (raftConfig )
232
237
if removalCount < peers / 2 {
233
238
for _ , node := range failed {
234
- a .logger .Printf ("[INFO] autopilot: Attempting removal of failed server node %q" , node )
235
- go serfLAN .RemoveFailedNode (node )
239
+ a .logger .Printf ("[INFO] autopilot: Attempting removal of failed server node %q" , node .Name )
240
+ go serfLAN .RemoveFailedNode (node .Name )
241
+ if serfWAN != nil {
242
+ go serfWAN .RemoveFailedNode (fmt .Sprintf ("%s.%s" , node .Name , node .Tags ["dc" ]))
243
+ }
244
+
236
245
}
237
246
238
247
minRaftProtocol , err := a .MinRaftProtocol ()
@@ -260,7 +269,7 @@ func (a *Autopilot) pruneDeadServers() error {
260
269
261
270
// MinRaftProtocol returns the lowest supported Raft protocol among alive servers
262
271
func (a * Autopilot ) MinRaftProtocol () (int , error ) {
263
- return minRaftProtocol (a .delegate .Serf ().Members (), a .delegate .IsServer )
272
+ return minRaftProtocol (a .delegate .SerfLAN ().Members (), a .delegate .IsServer )
264
273
}
265
274
266
275
func minRaftProtocol (members []serf.Member , serverFunc func (serf.Member ) (* ServerInfo , error )) (int , error ) {
@@ -369,7 +378,7 @@ func (a *Autopilot) updateClusterHealth() error {
369
378
// Get the the serf members which are Consul servers
370
379
var serverMembers []serf.Member
371
380
serverMap := make (map [string ]* ServerInfo )
372
- for _ , member := range a .delegate .Serf ().Members () {
381
+ for _ , member := range a .delegate .SerfLAN ().Members () {
373
382
if member .Status == serf .StatusLeft {
374
383
continue
375
384
}
0 commit comments