Skip to content

Commit 71f7988

Browse files
committed
eth/downloader: create repro testcase for beacon header loss
1 parent 686f743 commit 71f7988

File tree

2 files changed

+120
-39
lines changed

2 files changed

+120
-39
lines changed

eth/downloader/skeleton.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,7 @@ func (s *skeleton) initSync(head *types.Header) {
520520
}
521521
break
522522
}
523-
// If the last subchain can be extended, we're lucky. Otherwise create
523+
// If the last subchain can be extended, we're lucky. Otherwise, create
524524
// a new subchain sync task.
525525
var extended bool
526526
if n := len(s.progress.Subchains); n > 0 {

eth/downloader/skeleton_test.go

Lines changed: 119 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import (
3737
type hookedBackfiller struct {
3838
// suspendHook is an optional hook to be called when the filler is requested
3939
// to be suspended.
40-
suspendHook func()
40+
suspendHook func() *types.Header
4141

4242
// resumeHook is an optional hook to be called when the filler is requested
4343
// to be resumed.
@@ -56,7 +56,7 @@ func newHookedBackfiller() backfiller {
5656
// on initial startup.
5757
func (hf *hookedBackfiller) suspend() *types.Header {
5858
if hf.suspendHook != nil {
59-
hf.suspendHook()
59+
return hf.suspendHook()
6060
}
6161
return nil // we don't really care about header cleanups for now
6262
}
@@ -525,9 +525,21 @@ func TestSkeletonSyncRetrievals(t *testing.T) {
525525
Number: big.NewInt(int64(i)),
526526
})
527527
}
528+
// Some tests require a forking side chain to trigger cornercases.
529+
var sidechain []*types.Header
530+
for i := 0; i < len(chain)/2; i++ { // Fork at block #5000
531+
sidechain = append(sidechain, chain[i])
532+
}
533+
for i := len(chain) / 2; i < len(chain); i++ {
534+
sidechain = append(sidechain, &types.Header{
535+
ParentHash: sidechain[i-1].Hash(),
536+
Number: big.NewInt(int64(i)),
537+
Extra: []byte("B"), // force a different hash
538+
})
539+
}
528540
tests := []struct {
529-
headers []*types.Header // Database content (beside the genesis)
530-
oldstate []*subchain // Old sync state with various interrupted subchains
541+
fill bool // Whether to run a real backfiller in this test case
542+
unpredictable bool // Whether to ignore drops/serves due to uncertain packet assignments
531543

532544
head *types.Header // New head header to announce to reorg to
533545
peers []*skeletonTestPeer // Initial peer set to start the sync with
@@ -760,11 +772,41 @@ func TestSkeletonSyncRetrievals(t *testing.T) {
760772
endstate: []*subchain{{Head: 2*requestHeaders + 2, Tail: 1}},
761773
endserve: 4 * requestHeaders,
762774
},
775+
// This test reproduces a bug caught by (@rjl493456442) where a skeleton
776+
// header goes missing, causing the sync to get stuck and/or panic.
777+
//
778+
// The setup requires a previously successfully synced chain up to a block
779+
// height N. That results is a single skeleton header (block N) and a single
780+
// subchain (head N, Tail N) being stored on disk.
781+
//
782+
// The following step requires a new sync cycle to a new side chain of a
783+
// height higher than N, and an ancestor lower than N (e.g. N-2, N+2).
784+
// In this scenario, when processing a batch of headers, a link point of
785+
// N-2 will be found, meaning that N-1 and N have been overwritten.
786+
//
787+
// The link event triggers an early exit, noticing that the previous sub-
788+
// chain is a leftover and deletes it (with it's skeleton header N). But
789+
// since skeleton header N has been overwritten to the new side chain, we
790+
// end up losing it and creating a gap.
791+
{
792+
fill: true,
793+
unpredictable: true, // We have good and bad peer too, bad may be dropped, test too short for certainty
794+
795+
head: chain[len(chain)/2+1], // Sync up until the sidechain common ancestor + 2
796+
peers: []*skeletonTestPeer{newSkeletonTestPeer("test-peer-oldchain", chain)},
797+
midstate: []*subchain{{Head: uint64(len(chain)/2 + 1), Tail: 1}},
798+
799+
newHead: sidechain[len(sidechain)/2+3], // Sync up until the sidechain common ancestor + 4
800+
newPeer: newSkeletonTestPeer("test-peer-newchain", sidechain),
801+
endstate: []*subchain{{Head: uint64(len(sidechain)/2 + 3), Tail: uint64(len(chain) / 2)}},
802+
},
763803
}
764804
for i, tt := range tests {
765805
// Create a fresh database and initialize it with the starting state
766806
db := rawdb.NewMemoryDatabase()
767-
rawdb.WriteHeader(db, chain[0])
807+
808+
rawdb.WriteBlock(db, types.NewBlockWithHeader(chain[0]))
809+
rawdb.WriteReceipts(db, chain[0].Hash(), chain[0].Number.Uint64(), types.Receipts{})
768810

769811
// Create a peer set to feed headers through
770812
peerset := newPeerSet()
@@ -780,8 +822,43 @@ func TestSkeletonSyncRetrievals(t *testing.T) {
780822
peerset.Unregister(peer)
781823
dropped[peer]++
782824
}
825+
// Create a backfiller if we need to run more advanced tests
826+
filler := newHookedBackfiller()
827+
if tt.fill {
828+
var filled *types.Header
829+
830+
filler = &hookedBackfiller{
831+
resumeHook: func() {
832+
var progress skeletonProgress
833+
json.Unmarshal(rawdb.ReadSkeletonSyncStatus(db), &progress)
834+
835+
for progress.Subchains[0].Tail < progress.Subchains[0].Head {
836+
header := rawdb.ReadSkeletonHeader(db, progress.Subchains[0].Tail)
837+
838+
rawdb.WriteBlock(db, types.NewBlockWithHeader(header))
839+
rawdb.WriteReceipts(db, header.Hash(), header.Number.Uint64(), types.Receipts{})
840+
841+
rawdb.DeleteSkeletonHeader(db, header.Number.Uint64())
842+
843+
progress.Subchains[0].Tail++
844+
progress.Subchains[0].Next = header.Hash()
845+
}
846+
filled = rawdb.ReadSkeletonHeader(db, progress.Subchains[0].Tail)
847+
848+
rawdb.WriteBlock(db, types.NewBlockWithHeader(filled))
849+
rawdb.WriteReceipts(db, filled.Hash(), filled.Number.Uint64(), types.Receipts{})
850+
},
851+
852+
suspendHook: func() *types.Header {
853+
prev := filled
854+
filled = nil
855+
856+
return prev
857+
},
858+
}
859+
}
783860
// Create a skeleton sync and run a cycle
784-
skeleton := newSkeleton(db, peerset, drop, newHookedBackfiller())
861+
skeleton := newSkeleton(db, peerset, drop, filler)
785862
skeleton.Sync(tt.head, true)
786863

787864
var progress skeletonProgress
@@ -815,19 +892,21 @@ func TestSkeletonSyncRetrievals(t *testing.T) {
815892
t.Error(err)
816893
continue
817894
}
818-
var served uint64
819-
for _, peer := range tt.peers {
820-
served += atomic.LoadUint64(&peer.served)
821-
}
822-
if served != tt.midserve {
823-
t.Errorf("test %d, mid state: served headers mismatch: have %d, want %d", i, served, tt.midserve)
824-
}
825-
var drops uint64
826-
for _, peer := range tt.peers {
827-
drops += atomic.LoadUint64(&peer.dropped)
828-
}
829-
if drops != tt.middrop {
830-
t.Errorf("test %d, mid state: dropped peers mismatch: have %d, want %d", i, drops, tt.middrop)
895+
if !tt.unpredictable {
896+
var served uint64
897+
for _, peer := range tt.peers {
898+
served += atomic.LoadUint64(&peer.served)
899+
}
900+
if served != tt.midserve {
901+
t.Errorf("test %d, mid state: served headers mismatch: have %d, want %d", i, served, tt.midserve)
902+
}
903+
var drops uint64
904+
for _, peer := range tt.peers {
905+
drops += atomic.LoadUint64(&peer.dropped)
906+
}
907+
if drops != tt.middrop {
908+
t.Errorf("test %d, mid state: dropped peers mismatch: have %d, want %d", i, drops, tt.middrop)
909+
}
831910
}
832911
// Apply the post-init events if there's any
833912
if tt.newHead != nil {
@@ -868,25 +947,27 @@ func TestSkeletonSyncRetrievals(t *testing.T) {
868947
continue
869948
}
870949
// Check that the peers served no more headers than we actually needed
871-
served = 0
872-
for _, peer := range tt.peers {
873-
served += atomic.LoadUint64(&peer.served)
874-
}
875-
if tt.newPeer != nil {
876-
served += atomic.LoadUint64(&tt.newPeer.served)
877-
}
878-
if served != tt.endserve {
879-
t.Errorf("test %d, end state: served headers mismatch: have %d, want %d", i, served, tt.endserve)
880-
}
881-
drops = 0
882-
for _, peer := range tt.peers {
883-
drops += atomic.LoadUint64(&peer.dropped)
884-
}
885-
if tt.newPeer != nil {
886-
drops += atomic.LoadUint64(&tt.newPeer.dropped)
887-
}
888-
if drops != tt.middrop {
889-
t.Errorf("test %d, end state: dropped peers mismatch: have %d, want %d", i, drops, tt.middrop)
950+
if !tt.unpredictable {
951+
served := uint64(0)
952+
for _, peer := range tt.peers {
953+
served += atomic.LoadUint64(&peer.served)
954+
}
955+
if tt.newPeer != nil {
956+
served += atomic.LoadUint64(&tt.newPeer.served)
957+
}
958+
if served != tt.endserve {
959+
t.Errorf("test %d, end state: served headers mismatch: have %d, want %d", i, served, tt.endserve)
960+
}
961+
drops := uint64(0)
962+
for _, peer := range tt.peers {
963+
drops += atomic.LoadUint64(&peer.dropped)
964+
}
965+
if tt.newPeer != nil {
966+
drops += atomic.LoadUint64(&tt.newPeer.dropped)
967+
}
968+
if drops != tt.enddrop {
969+
t.Errorf("test %d, end state: dropped peers mismatch: have %d, want %d", i, drops, tt.middrop)
970+
}
890971
}
891972
// Clean up any leftover skeleton sync resources
892973
skeleton.Terminate()

0 commit comments

Comments
 (0)