Skip to content

Commit c9757f7

Browse files
committed
[GH-659] Basic protocol version change handling.
1 parent 51119c3 commit c9757f7

File tree

11 files changed

+422
-190
lines changed

11 files changed

+422
-190
lines changed

nil/internal/collate/bootstrap.go

+1-6
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,7 @@ func fetchSnapshot(
119119
return fetchShardSnap(ctx, nm, peerId, db, logger)
120120
}
121121

122-
func fetchGenesisBlockHash(ctx context.Context, nm *network.Manager, peerAddr network.AddrInfo) (common.Hash, error) {
123-
peerId, err := nm.Connect(ctx, peerAddr)
124-
if err != nil {
125-
return common.EmptyHash, fmt.Errorf("failed to connect to %s: %w", peerAddr, err)
126-
}
127-
122+
func fetchGenesisBlockHash(ctx context.Context, nm *network.Manager, peerId network.PeerID) (common.Hash, error) {
128123
resp, err := nm.SendRequestAndGetResponse(ctx, peerId, topicVersion, nil)
129124
if err != nil {
130125
return common.EmptyHash, fmt.Errorf("failed to fetch genesis block hash: %w", err)

nil/internal/collate/syncer.go

+69-18
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,15 @@ import (
2222
"google.golang.org/protobuf/proto"
2323
)
2424

25+
type ProtocolVersionMismatchError struct {
26+
LocalVersion string
27+
RemoteVersion string
28+
}
29+
30+
func (e *ProtocolVersionMismatchError) Error() string {
31+
return fmt.Sprintf("protocol version mismatch; local: %s, remote: %s", e.LocalVersion, e.RemoteVersion)
32+
}
33+
2534
type SyncerConfig struct {
2635
execution.BlockGeneratorParams
2736

@@ -75,34 +84,67 @@ func (s *Syncer) shardIsEmpty(ctx context.Context) (bool, error) {
7584
return block == nil, nil
7685
}
7786

78-
func (s *Syncer) WaitComplete() {
79-
s.waitForSync.Wait()
87+
func (s *Syncer) WaitComplete(ctx context.Context) error {
88+
c := make(chan struct{}, 1)
89+
go func() {
90+
defer close(c)
91+
s.waitForSync.Wait()
92+
}()
93+
select {
94+
case <-ctx.Done():
95+
return ctx.Err()
96+
case <-c:
97+
return nil
98+
}
8099
}
81100

82-
func (s *Syncer) readLocalVersion(ctx context.Context) (common.Hash, error) {
101+
func (s *Syncer) getLocalVersion(ctx context.Context) (*NodeVersion, error) {
102+
protocolVersion := s.networkManager.ProtocolVersion()
103+
83104
rotx, err := s.db.CreateRoTx(ctx)
84105
if err != nil {
85-
return common.EmptyHash, err
106+
return nil, err
86107
}
87108
defer rotx.Rollback()
88109

89110
res, err := db.ReadBlockHashByNumber(rotx, types.MainShardId, 0)
90-
if errors.Is(err, db.ErrKeyNotFound) {
91-
return common.EmptyHash, nil
111+
if err != nil {
112+
if errors.Is(err, db.ErrKeyNotFound) {
113+
return &NodeVersion{protocolVersion, common.EmptyHash}, nil
114+
} else {
115+
return nil, err
116+
}
92117
}
93-
return res, err
118+
return &NodeVersion{protocolVersion, res}, err
94119
}
95120

96-
func (s *Syncer) fetchRemoteVersion(ctx context.Context) (common.Hash, error) {
121+
type NodeVersion struct {
122+
ProtocolVersion string
123+
GenesisBlockHash common.Hash
124+
}
125+
126+
func (s *Syncer) fetchRemoteVersion(ctx context.Context) (NodeVersion, error) {
97127
var err error
98128
for _, peer := range s.config.BootstrapPeers {
129+
var peerId network.PeerID
130+
peerId, err = s.networkManager.Connect(ctx, peer)
131+
if err != nil {
132+
continue
133+
}
134+
135+
var protocolVersion string
136+
protocolVersion, err = s.networkManager.GetPeerProtocolVersion(peerId)
137+
if err != nil {
138+
continue
139+
}
140+
99141
var res common.Hash
100-
res, err = fetchGenesisBlockHash(ctx, s.networkManager, peer)
142+
res, err = fetchGenesisBlockHash(ctx, s.networkManager, peerId)
101143
if err == nil {
102-
return res, nil
144+
return NodeVersion{protocolVersion, res}, nil
103145
}
104146
}
105-
return common.EmptyHash, fmt.Errorf("failed to fetch version from all peers; last error: %w", err)
147+
return NodeVersion{}, fmt.Errorf("failed to fetch version from all peers; last error: %w", err)
106148
}
107149

108150
func (s *Syncer) fetchSnapshot(ctx context.Context) error {
@@ -126,14 +168,10 @@ func (s *Syncer) Init(ctx context.Context, allowDbDrop bool) error {
126168
return nil
127169
}
128170

129-
version, err := s.readLocalVersion(ctx)
171+
version, err := s.getLocalVersion(ctx)
130172
if err != nil {
131173
return err
132174
}
133-
if version.Empty() {
134-
s.logger.Info().Msg("Local version is empty. Fetching snapshot...")
135-
return s.fetchSnapshot(ctx)
136-
}
137175

138176
remoteVersion, err := s.fetchRemoteVersion(ctx)
139177
if err != nil {
@@ -142,7 +180,20 @@ func (s *Syncer) Init(ctx context.Context, allowDbDrop bool) error {
142180
"Failed to fetch remote version. For now we assume that local version %s is up to date", version)
143181
return nil
144182
}
145-
if version == remoteVersion {
183+
184+
if version.ProtocolVersion != remoteVersion.ProtocolVersion {
185+
return &ProtocolVersionMismatchError{
186+
version.ProtocolVersion,
187+
remoteVersion.ProtocolVersion,
188+
}
189+
}
190+
191+
if version.GenesisBlockHash.Empty() {
192+
s.logger.Info().Msg("Local version is empty. Fetching snapshot...")
193+
return s.fetchSnapshot(ctx)
194+
}
195+
196+
if version.GenesisBlockHash == remoteVersion.GenesisBlockHash {
146197
s.logger.Info().Msgf("Local version %s is up to date. Finished initialization", version)
147198
return nil
148199
}
@@ -338,7 +389,7 @@ func (s *Syncer) saveBlock(ctx context.Context, block *types.BlockWithExtractedD
338389
return nil
339390
}
340391

341-
func (s *Syncer) GenerateZerostate(ctx context.Context) error {
392+
func (s *Syncer) GenerateZerostateIfShardIsEmpty(ctx context.Context) error {
342393
ctx, cancel := context.WithTimeout(ctx, s.config.Timeout)
343394
defer cancel()
344395

nil/internal/network/config.go

+18-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ import (
1010
type PeerID = peer.ID
1111

1212
type Config struct {
13-
PrivateKey PrivateKey `yaml:"-"`
13+
PrivateKey PrivateKey `yaml:"-"`
14+
ProtocolVersion string `yaml:"-"`
1415

1516
KeysPath string `yaml:"keysPath,omitempty"`
1617

@@ -32,6 +33,8 @@ type Config struct {
3233
Reachability network.Reachability `yaml:"-"`
3334
}
3435

36+
type Option func(cfg *Config) error
37+
3538
func NewDefaultConfig() *Config {
3639
return &Config{
3740
KeysPath: "network-keys.yaml",
@@ -44,3 +47,17 @@ func NewDefaultConfig() *Config {
4447
func (c *Config) Enabled() bool {
4548
return c.TcpPort != 0 || c.QuicPort != 0
4649
}
50+
51+
// Apply applies the given options to the config, returning the first error
52+
// encountered (if any).
53+
func (c *Config) Apply(opts ...Option) error {
54+
for _, opt := range opts {
55+
if opt == nil {
56+
continue
57+
}
58+
if err := opt(c); err != nil {
59+
return err
60+
}
61+
}
62+
return nil
63+
}

nil/internal/network/host.go

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ func getCommonOptions(ctx context.Context, conf *Config) ([]libp2p.Option, loggi
5353
}
5454

5555
return []libp2p.Option{
56+
libp2p.ProtocolVersion(conf.ProtocolVersion),
5657
libp2p.Security(noise.ID, noise.New),
5758
libp2p.ConnectionManager(cm),
5859
libp2p.Identity(conf.PrivateKey),

nil/internal/network/manager.go

+28-9
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package network
22

33
import (
44
"context"
5+
"fmt"
56
"slices"
67
"strings"
78

@@ -16,8 +17,9 @@ import (
1617
)
1718

1819
type Manager struct {
19-
ctx context.Context
20-
prefix string
20+
ctx context.Context
21+
prefix string
22+
protocolVersion string
2123

2224
host Host
2325
pubSub *PubSub
@@ -73,13 +75,14 @@ func newManagerFromHost(
7375
}
7476

7577
return &Manager{
76-
ctx: ctx,
77-
prefix: conf.Prefix,
78-
host: h,
79-
pubSub: ps,
80-
dht: dht,
81-
meter: telemetry.NewMeter("github.com/NilFoundation/nil/nil/internal/network"),
82-
logger: logger,
78+
ctx: ctx,
79+
prefix: conf.Prefix,
80+
protocolVersion: conf.ProtocolVersion,
81+
host: h,
82+
pubSub: ps,
83+
dht: dht,
84+
meter: telemetry.NewMeter("github.com/NilFoundation/nil/nil/internal/network"),
85+
logger: logger,
8386
}, nil
8487
}
8588

@@ -123,6 +126,22 @@ func (m *Manager) PubSub() *PubSub {
123126
return m.pubSub
124127
}
125128

129+
func (m *Manager) ProtocolVersion() string {
130+
return m.protocolVersion
131+
}
132+
133+
func (m *Manager) GetPeerProtocolVersion(peer peer.ID) (string, error) {
134+
pv, err := m.host.Peerstore().Get(peer, "ProtocolVersion")
135+
if err != nil {
136+
return "", err
137+
}
138+
versionString, ok := pv.(string)
139+
if !ok {
140+
return "", fmt.Errorf("failed to convert protocol version to string for peer %s", peer)
141+
}
142+
return versionString, nil
143+
}
144+
126145
func (m *Manager) AllKnownPeers() []peer.ID {
127146
return slices.DeleteFunc(m.host.Peerstore().PeersWithAddrs(), func(i PeerID) bool {
128147
return m.host.ID() == i

nil/services/nilservice/service.go

+14-3
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ func initSyncers(ctx context.Context, syncers []*collate.Syncer, allowDbDrop boo
240240
return err
241241
}
242242
for _, syncer := range syncers {
243-
if err := syncer.GenerateZerostate(ctx); err != nil {
243+
if err := syncer.GenerateZerostateIfShardIsEmpty(ctx); err != nil {
244244
return err
245245
}
246246
}
@@ -322,7 +322,9 @@ func createSyncers(
322322
}))
323323
res.funcs = append(res.funcs, concurrent.WithSource(func(ctx context.Context) error {
324324
for _, syncer := range res.syncers {
325-
syncer.WaitComplete()
325+
if err := syncer.WaitComplete(ctx); err != nil {
326+
return err
327+
}
326328
}
327329
return res.syncers[0].SetHandlers(ctx)
328330
}))
@@ -339,7 +341,16 @@ type Node struct {
339341

340342
func (i *Node) Run() error {
341343
if err := concurrent.Run(i.ctx, i.funcs...); err != nil {
342-
i.logger.Error().Err(err).Msg("App encountered an error and will be terminated.")
344+
var executionErr *concurrent.ExecutionError
345+
var protocolVersionMismatchErr *collate.ProtocolVersionMismatchError
346+
if errors.As(err, &executionErr) && errors.As(executionErr.Err, &protocolVersionMismatchErr) {
347+
i.logger.Error().
348+
Str("localVersion", protocolVersionMismatchErr.LocalVersion).
349+
Str("remoteVersion", protocolVersionMismatchErr.RemoteVersion).
350+
Msg("Protocol version mismatch. Probably nild executable is outdated.")
351+
} else {
352+
i.logger.Error().Err(err).Msg("App encountered an error and will be terminated.")
353+
}
343354
return err
344355
}
345356
i.logger.Info().Msg("App is terminated.")

0 commit comments

Comments
 (0)