@@ -913,22 +913,29 @@ func TestNRGNoResetOnAppendEntryResponse(t *testing.T) {
913
913
follower := rg .nonLeader ().node ().(* raft )
914
914
lsm := rg .leader ().(* stateAdder )
915
915
916
+ // Ensure all servers can get quorum on an initial set.
917
+ c .waitOnAllCurrent ()
918
+ lsm .proposeDelta (5 )
919
+ rg .waitOnTotal (t , 5 )
920
+
916
921
// Subscribe for append entries that aren't heartbeats and respond to
917
922
// each of them as though it's a non-success and with a higher term.
918
923
// The higher term in this case is what would cause the leader previously
919
924
// to reset the entire log which it shouldn't do.
925
+ var once bool
920
926
_ , err := nc .Subscribe (fmt .Sprintf (raftAppendSubj , "TEST" ), func (msg * nats.Msg ) {
921
- if ae , err := follower .decodeAppendEntry (msg .Data , nil , msg .Reply ); err == nil && len (ae .entries ) > 0 {
927
+ if ae , err := follower .decodeAppendEntry (msg .Data , nil , msg .Reply ); err == nil && len (ae .entries ) > 0 && ! once {
922
928
ar := newAppendEntryResponse (ae .term + 1 , ae .commit , follower .id , false )
923
929
require_NoError (t , msg .Respond (ar .encode (nil )))
930
+ once = true
924
931
}
925
932
})
926
933
require_NoError (t , err )
927
934
928
935
// Generate an append entry that the subscriber above can respond to.
929
936
c .waitOnAllCurrent ()
930
937
lsm .proposeDelta (5 )
931
- rg .waitOnTotal (t , 5 )
938
+ rg .waitOnTotal (t , 10 )
932
939
933
940
// The was-leader should now have stepped down, make sure that it
934
941
// didn't blow away its log in the process.
@@ -2363,6 +2370,105 @@ func TestNRGSignalLeadChangeFalseIfCampaignImmediately(t *testing.T) {
2363
2370
}
2364
2371
}
2365
2372
2373
+ func TestNRGClearWALOnStepDownWhenZeroCommits (t * testing.T ) {
2374
+ n , cleanup := initSingleMemRaftNode (t )
2375
+ defer cleanup ()
2376
+
2377
+ // Create a sample entry, the content doesn't matter, just that it's stored.
2378
+ esm := encodeStreamMsgAllowCompress ("foo" , "_INBOX.foo" , nil , nil , 0 , 0 , true )
2379
+ entries := []* Entry {newEntry (EntryNormal , esm )}
2380
+
2381
+ // Switch this node to leader, and send one entry. It will not get quorum.
2382
+ n .term ++
2383
+ n .switchToLeader ()
2384
+ require_Equal (t , n .term , 1 )
2385
+ require_Equal (t , n .pindex , 0 )
2386
+ n .sendAppendEntry (entries )
2387
+ require_Equal (t , n .pindex , 1 )
2388
+ require_Equal (t , n .pterm , 1 )
2389
+
2390
+ // Shouldn't have any commits up to this point.
2391
+ require_Equal (t , n .commit , 0 )
2392
+
2393
+ // If we now step down, we clear our WAL, we had no commits.
2394
+ n .stepdown (noLeader )
2395
+ require_Equal (t , n .commit , 0 )
2396
+ require_Equal (t , n .pindex , 0 )
2397
+ require_Equal (t , n .pterm , 0 )
2398
+ }
2399
+
2400
+ func TestNRGTruncateDownToPreviousTermOnStepDown (t * testing.T ) {
2401
+ tests := []struct {
2402
+ title string
2403
+ commit uint64
2404
+ }{
2405
+ // The current leader's term is fully removed. None of them had quorum.
2406
+ {title : "no-commits" , commit : 0 },
2407
+ {title : "half-previous-term" , commit : 1 },
2408
+ {title : "full-previous-term" , commit : 2 },
2409
+ // At least one entry in the leader's term had quorum, need to preserve all of them.
2410
+ {title : "half-leader-term" , commit : 3 },
2411
+ }
2412
+ for _ , test := range tests {
2413
+ t .Run (test .title , func (t * testing.T ) {
2414
+ n , cleanup := initSingleMemRaftNode (t )
2415
+ defer cleanup ()
2416
+
2417
+ // Create a sample entry, the content doesn't matter, just that it's stored.
2418
+ esm := encodeStreamMsgAllowCompress ("foo" , "_INBOX.foo" , nil , nil , 0 , 0 , true )
2419
+ entries := []* Entry {newEntry (EntryNormal , esm )}
2420
+
2421
+ // Switch this node to leader, and send two entries. The first will get quorum, the second will not.
2422
+ n .term ++
2423
+ n .switchToLeader ()
2424
+ require_Equal (t , n .term , 1 )
2425
+ require_Equal (t , n .pindex , 0 )
2426
+ n .sendAppendEntry (entries )
2427
+ n .sendAppendEntry (entries )
2428
+ require_Equal (t , n .pindex , 2 )
2429
+ require_Equal (t , n .pterm , 1 )
2430
+
2431
+ // Shouldn't have any commits up to this point.
2432
+ require_Equal (t , n .commit , 0 )
2433
+
2434
+ // We get quorum on the first entry.
2435
+ for i := uint64 (1 ); i <= test .commit && i <= 2 ; i ++ {
2436
+ require_NoError (t , n .applyCommit (i ))
2437
+ require_Equal (t , n .commit , i )
2438
+ }
2439
+
2440
+ // Act like the next entry is sent under a new term.
2441
+ // That will verify we can revert back to the pterm at time of last commit.
2442
+ n .term ++
2443
+ n .switchToLeader ()
2444
+ n .sendAppendEntry (entries )
2445
+ n .sendAppendEntry (entries )
2446
+ require_Equal (t , n .pindex , 4 )
2447
+ require_Equal (t , n .pterm , 2 )
2448
+
2449
+ if test .commit == 3 {
2450
+ require_NoError (t , n .applyCommit (3 ))
2451
+ require_Equal (t , n .commit , 3 )
2452
+ }
2453
+
2454
+ // If we now step down, we should remove all entries from the leader's term if none had quorum.
2455
+ // This is required, otherwise we could try to become leader again and trick a follower into
2456
+ // voting for us based on a pterm without quorum. That could result in overwriting an entry
2457
+ // at a pindex where a previous leader knew we had quorum on, but the follower didn't.
2458
+ n .stepdown (noLeader )
2459
+
2460
+ require_Equal (t , n .commit , test .commit )
2461
+ if test .commit < 3 {
2462
+ require_Equal (t , n .pindex , 2 )
2463
+ require_Equal (t , n .pterm , 1 )
2464
+ } else {
2465
+ require_Equal (t , n .pindex , 4 )
2466
+ require_Equal (t , n .pterm , 2 )
2467
+ }
2468
+ })
2469
+ }
2470
+ }
2471
+
2366
2472
// This is a RaftChainOfBlocks test where a block is proposed and then we wait for all replicas to apply it before
2367
2473
// proposing the next one.
2368
2474
// The test may fail if:
0 commit comments