Skip to content

Commit dc1f8e6

Browse files
committed
[GH-659] Basic protocol version change handling.
1 parent 350416f commit dc1f8e6

File tree

17 files changed

+447
-215
lines changed

17 files changed

+447
-215
lines changed

nil/internal/collate/bootstrap.go

+1-6
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,7 @@ func fetchSnapshot(
119119
return fetchShardSnap(ctx, nm, peerId, db, logger)
120120
}
121121

122-
func fetchGenesisBlockHash(ctx context.Context, nm *network.Manager, peerAddr network.AddrInfo) (common.Hash, error) {
123-
peerId, err := nm.Connect(ctx, peerAddr)
124-
if err != nil {
125-
return common.EmptyHash, fmt.Errorf("failed to connect to %s: %w", peerAddr, err)
126-
}
127-
122+
func fetchGenesisBlockHash(ctx context.Context, nm *network.Manager, peerId network.PeerID) (common.Hash, error) {
128123
resp, err := nm.SendRequestAndGetResponse(ctx, peerId, topicVersion, nil)
129124
if err != nil {
130125
return common.EmptyHash, fmt.Errorf("failed to fetch genesis block hash: %w", err)

nil/internal/collate/syncer.go

+57-16
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,15 @@ import (
2222
"google.golang.org/protobuf/proto"
2323
)
2424

25+
type ProtocolVersionMismatchError struct {
26+
LocalVersion string
27+
RemoteVersion string
28+
}
29+
30+
func (e *ProtocolVersionMismatchError) Error() string {
31+
return fmt.Sprintf("protocol version mismatch; local: %s, remote: %s", e.LocalVersion, e.RemoteVersion)
32+
}
33+
2534
type SyncerConfig struct {
2635
execution.BlockGeneratorParams
2736

@@ -79,30 +88,53 @@ func (s *Syncer) WaitComplete() {
7988
s.waitForSync.Wait()
8089
}
8190

82-
func (s *Syncer) readLocalVersion(ctx context.Context) (common.Hash, error) {
91+
func (s *Syncer) getLocalVersion(ctx context.Context) (*NodeVersion, error) {
92+
protocolVersion := s.networkManager.ProtocolVersion()
93+
8394
rotx, err := s.db.CreateRoTx(ctx)
8495
if err != nil {
85-
return common.EmptyHash, err
96+
return nil, err
8697
}
8798
defer rotx.Rollback()
8899

89100
res, err := db.ReadBlockHashByNumber(rotx, types.MainShardId, 0)
90-
if errors.Is(err, db.ErrKeyNotFound) {
91-
return common.EmptyHash, nil
101+
if err != nil {
102+
if errors.Is(err, db.ErrKeyNotFound) {
103+
return &NodeVersion{protocolVersion, common.EmptyHash}, nil
104+
} else {
105+
return nil, err
106+
}
92107
}
93-
return res, err
108+
return &NodeVersion{protocolVersion, res}, err
109+
}
110+
111+
type NodeVersion struct {
112+
ProtocolVersion string
113+
GenesisBlockHash common.Hash
94114
}
95115

96-
func (s *Syncer) fetchRemoteVersion(ctx context.Context) (common.Hash, error) {
116+
func (s *Syncer) fetchRemoteVersion(ctx context.Context) (NodeVersion, error) {
97117
var err error
98118
for _, peer := range s.config.BootstrapPeers {
119+
var peerId network.PeerID
120+
peerId, err = s.networkManager.Connect(ctx, peer)
121+
if err != nil {
122+
continue
123+
}
124+
125+
var protocolVersion string
126+
protocolVersion, err = s.networkManager.GetPeerProtocolVersion(peerId)
127+
if err != nil {
128+
continue
129+
}
130+
99131
var res common.Hash
100-
res, err = fetchGenesisBlockHash(ctx, s.networkManager, peer)
132+
res, err = fetchGenesisBlockHash(ctx, s.networkManager, peerId)
101133
if err == nil {
102-
return res, nil
134+
return NodeVersion{protocolVersion, res}, nil
103135
}
104136
}
105-
return common.EmptyHash, fmt.Errorf("failed to fetch version from all peers; last error: %w", err)
137+
return NodeVersion{}, fmt.Errorf("failed to fetch version from all peers; last error: %w", err)
106138
}
107139

108140
func (s *Syncer) fetchSnapshot(ctx context.Context) error {
@@ -126,14 +158,10 @@ func (s *Syncer) Init(ctx context.Context, allowDbDrop bool) error {
126158
return nil
127159
}
128160

129-
version, err := s.readLocalVersion(ctx)
161+
version, err := s.getLocalVersion(ctx)
130162
if err != nil {
131163
return err
132164
}
133-
if version.Empty() {
134-
s.logger.Info().Msg("Local version is empty. Fetching snapshot...")
135-
return s.fetchSnapshot(ctx)
136-
}
137165

138166
remoteVersion, err := s.fetchRemoteVersion(ctx)
139167
if err != nil {
@@ -142,7 +170,20 @@ func (s *Syncer) Init(ctx context.Context, allowDbDrop bool) error {
142170
"Failed to fetch remote version. For now we assume that local version %s is up to date", version)
143171
return nil
144172
}
145-
if version == remoteVersion {
173+
174+
if version.ProtocolVersion != remoteVersion.ProtocolVersion {
175+
return &ProtocolVersionMismatchError{
176+
version.ProtocolVersion,
177+
remoteVersion.ProtocolVersion,
178+
}
179+
}
180+
181+
if version.GenesisBlockHash.Empty() {
182+
s.logger.Info().Msg("Local version is empty. Fetching snapshot...")
183+
return s.fetchSnapshot(ctx)
184+
}
185+
186+
if version.GenesisBlockHash == remoteVersion.GenesisBlockHash {
146187
s.logger.Info().Msgf("Local version %s is up to date. Finished initialization", version)
147188
return nil
148189
}
@@ -338,7 +379,7 @@ func (s *Syncer) saveBlock(ctx context.Context, block *types.BlockWithExtractedD
338379
return nil
339380
}
340381

341-
func (s *Syncer) GenerateZerostate(ctx context.Context) error {
382+
func (s *Syncer) GenerateZerostateIfShardIsEmpty(ctx context.Context) error {
342383
ctx, cancel := context.WithTimeout(ctx, s.config.Timeout)
343384
defer cancel()
344385

nil/internal/network/config.go

+18-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ import (
1010
type PeerID = peer.ID
1111

1212
type Config struct {
13-
PrivateKey PrivateKey `yaml:"-"`
13+
PrivateKey PrivateKey `yaml:"-"`
14+
ProtocolVersion string `yaml:"-"`
1415

1516
KeysPath string `yaml:"keysPath,omitempty"`
1617

@@ -32,6 +33,8 @@ type Config struct {
3233
Reachability network.Reachability `yaml:"-"`
3334
}
3435

36+
type Option func(cfg *Config) error
37+
3538
func NewDefaultConfig() *Config {
3639
return &Config{
3740
KeysPath: "network-keys.yaml",
@@ -44,3 +47,17 @@ func NewDefaultConfig() *Config {
4447
func (c *Config) Enabled() bool {
4548
return c.TcpPort != 0 || c.QuicPort != 0
4649
}
50+
51+
// Apply applies the given options to the config, returning the first error
52+
// encountered (if any).
53+
func (c *Config) Apply(opts ...Option) error {
54+
for _, opt := range opts {
55+
if opt == nil {
56+
continue
57+
}
58+
if err := opt(c); err != nil {
59+
return err
60+
}
61+
}
62+
return nil
63+
}

nil/internal/network/host.go

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ func getCommonOptions(ctx context.Context, conf *Config) ([]libp2p.Option, loggi
5353
}
5454

5555
return []libp2p.Option{
56+
libp2p.ProtocolVersion(conf.ProtocolVersion),
5657
libp2p.Security(noise.ID, noise.New),
5758
libp2p.ConnectionManager(cm),
5859
libp2p.Identity(conf.PrivateKey),

nil/internal/network/manager.go

+28-9
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package network
22

33
import (
44
"context"
5+
"fmt"
56
"slices"
67
"strings"
78

@@ -16,8 +17,9 @@ import (
1617
)
1718

1819
type Manager struct {
19-
ctx context.Context
20-
prefix string
20+
ctx context.Context
21+
prefix string
22+
protocolVersion string
2123

2224
host Host
2325
pubSub *PubSub
@@ -73,13 +75,14 @@ func newManagerFromHost(
7375
}
7476

7577
return &Manager{
76-
ctx: ctx,
77-
prefix: conf.Prefix,
78-
host: h,
79-
pubSub: ps,
80-
dht: dht,
81-
meter: telemetry.NewMeter("github.com/NilFoundation/nil/nil/internal/network"),
82-
logger: logger,
78+
ctx: ctx,
79+
prefix: conf.Prefix,
80+
protocolVersion: conf.ProtocolVersion,
81+
host: h,
82+
pubSub: ps,
83+
dht: dht,
84+
meter: telemetry.NewMeter("github.com/NilFoundation/nil/nil/internal/network"),
85+
logger: logger,
8386
}, nil
8487
}
8588

@@ -123,6 +126,22 @@ func (m *Manager) PubSub() *PubSub {
123126
return m.pubSub
124127
}
125128

129+
func (m *Manager) ProtocolVersion() string {
130+
return m.protocolVersion
131+
}
132+
133+
func (m *Manager) GetPeerProtocolVersion(peer peer.ID) (string, error) {
134+
pv, err := m.host.Peerstore().Get(peer, "ProtocolVersion")
135+
if err != nil {
136+
return "", err
137+
}
138+
versionString, ok := pv.(string)
139+
if !ok {
140+
return "", fmt.Errorf("failed to convert protocol version to string for peer %s", peer)
141+
}
142+
return versionString, nil
143+
}
144+
126145
func (m *Manager) AllKnownPeers() []peer.ID {
127146
return slices.DeleteFunc(m.host.Peerstore().PeersWithAddrs(), func(i PeerID) bool {
128147
return m.host.ID() == i

nil/services/nilservice/service.go

+11-2
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ func initSyncers(ctx context.Context, syncers []*collate.Syncer, allowDbDrop boo
232232
return err
233233
}
234234
for _, syncer := range syncers {
235-
if err := syncer.GenerateZerostate(ctx); err != nil {
235+
if err := syncer.GenerateZerostateIfShardIsEmpty(ctx); err != nil {
236236
return err
237237
}
238238
}
@@ -331,7 +331,16 @@ type Node struct {
331331

332332
func (i *Node) Run() error {
333333
if err := concurrent.Run(i.ctx, i.funcs...); err != nil {
334-
i.logger.Error().Err(err).Msg("App encountered an error and will be terminated.")
334+
var executionErr *concurrent.ExecutionError
335+
var protocolVersionMismatchErr *collate.ProtocolVersionMismatchError
336+
if errors.As(err, &executionErr) && errors.As(executionErr.Err, &protocolVersionMismatchErr) {
337+
i.logger.Error().
338+
Str("localVersion", protocolVersionMismatchErr.LocalVersion).
339+
Str("remoteVersion", protocolVersionMismatchErr.RemoteVersion).
340+
Msg("Protocol version mismatch. Probably nild executable is outdated.")
341+
} else {
342+
i.logger.Error().Err(err).Msg("App encountered an error and will be terminated.")
343+
}
335344
return err
336345
}
337346
i.logger.Info().Msg("App is terminated.")

0 commit comments

Comments
 (0)