Skip to content

Commit a9765be

Browse files
author
Sergey Shilov
committed
INDY-1112: change primeries election procedure for backup instances.
Now primaries for backup instances are choosen in round-robin manner always starting from primary. If the next node is a primary for some instance then this node is skipped. So the first non-primary node is choosen as primary for current instance. Such approach allows to avoid election of instances of the same node as a primeries for different instances. The election procedure of the primary for master instance is not changed. Signed-off-by: Sergey Shilov <[email protected]>
1 parent 9fbb725 commit a9765be

File tree

3 files changed

+83
-9
lines changed

3 files changed

+83
-9
lines changed

plenum/server/node.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2354,15 +2354,42 @@ def lost_master_primary(self):
23542354
self._schedule_view_change()
23552355

23562356
def select_primaries(self, nodeReg: Dict[str, HA]=None):
2357+
primaries = set()
2358+
primary_rank = None
2359+
'''
2360+
Build a set of names of primaries, it is needed to avoid
2361+
duplicates of primary nodes for different replicas.
2362+
'''
2363+
for instance_id, replica in enumerate(self.replicas):
2364+
if replica.primaryName is not None:
2365+
name = replica.primaryName.split(":", 1)[0]
2366+
primaries.add(name)
2367+
'''
2368+
Remember the rank of primary of master instance, it is needed
2369+
for calculation of primaries for backup instances.
2370+
'''
2371+
if instance_id == 0:
2372+
primary_rank = self.poolManager.get_rank_by_name(
2373+
replica.primaryName.split(":", 1)[0], nodeReg)
2374+
23572375
for instance_id, replica in enumerate(self.replicas):
23582376
if replica.primaryName is not None:
23592377
logger.debug('{} already has a primary'.format(replica))
23602378
continue
2361-
new_primary_name = self.elector.next_primary_replica_name(
2362-
instance_id, nodeReg=nodeReg)
2379+
if instance_id == 0:
2380+
new_primary_name, new_primary_instance_name =\
2381+
self.elector.next_primary_replica_name_for_master(nodeReg=nodeReg)
2382+
primary_rank = self.poolManager.get_rank_by_name(
2383+
new_primary_name, nodeReg)
2384+
else:
2385+
assert primary_rank != None
2386+
new_primary_name, new_primary_instance_name =\
2387+
self.elector.next_primary_replica_name_for_backup(
2388+
instance_id, primary_rank, primaries, nodeReg=nodeReg)
2389+
primaries.add(new_primary_name)
23632390
logger.display("{}{} selected primary {} for instance {} (view {})"
23642391
.format(PRIMARY_SELECTION_PREFIX, replica,
2365-
new_primary_name, instance_id, self.viewNo),
2392+
new_primary_instance_name, instance_id, self.viewNo),
23662393
extra={"cli": "ANNOUNCE",
23672394
"tags": ["node-election"]})
23682395
if instance_id == 0:
@@ -2372,7 +2399,7 @@ def select_primaries(self, nodeReg: Dict[str, HA]=None):
23722399
# participating.
23732400
self.start_participating()
23742401

2375-
replica.primaryChanged(new_primary_name)
2402+
replica.primaryChanged(new_primary_instance_name)
23762403
self.primary_selected(instance_id)
23772404

23782405
logger.display("{}{} declares view change {} as completed for "
@@ -2383,7 +2410,7 @@ def select_primaries(self, nodeReg: Dict[str, HA]=None):
23832410
replica,
23842411
self.viewNo,
23852412
instance_id,
2386-
new_primary_name,
2413+
new_primary_instance_name,
23872414
self.ledger_summary),
23882415
extra={"cli": "ANNOUNCE",
23892416
"tags": ["node-election"]})

plenum/server/pool_manager.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,11 @@ def get_rank_of(self, node_id, nodeReg=None) -> Optional[int]:
411411
return None
412412
return self._get_rank(node_id, self.node_ids_ordered_by_rank(nodeReg))
413413

414+
def get_rank_by_name(self, name, nodeReg=None) -> Optional[int]:
415+
for nym, nm in self._ordered_node_ids.items():
416+
if name == nm:
417+
return self.get_rank_of(nym, nodeReg)
418+
414419
def get_name_by_rank(self, rank, nodeReg=None) -> Optional[str]:
415420
try:
416421
nym = self.node_ids_ordered_by_rank(nodeReg)[rank]

plenum/server/primary_selector.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,56 @@ def next_primary_node_name(self, instance_id, nodeReg=None):
5454

5555
return name
5656

57-
def next_primary_replica_name(self, instance_id, nodeReg=None):
57+
def next_primary_node_name_for_master(self, nodeReg=None):
58+
if nodeReg is None:
59+
nodeReg = self.node.nodeReg
60+
rank = self._get_primary_id(self.viewNo, 0, len(nodeReg))
61+
name = self.node.get_name_by_rank(rank, nodeReg=nodeReg)
62+
63+
logger.trace("{} selected {} as next primary node for master instance, "
64+
"viewNo {} with rank {}, nodeReg {}".format(
65+
self, name, self.viewNo, rank, nodeReg))
66+
assert name, "{} failed to get next primary node name for master instance".format(self)
67+
68+
return name
69+
70+
def next_primary_node_name_for_backup(self, instance_id, nodeReg=None):
71+
if nodeReg is None:
72+
nodeReg = self.node.nodeReg
73+
rank = self._get_primary_id(self.viewNo, instance_id, len(nodeReg))
74+
name = self.node.get_name_by_rank(rank, nodeReg=nodeReg)
75+
76+
logger.trace("{} selected {} as next primary node for instId {}, "
77+
"viewNo {} with rank {}, nodeReg {}".format(
78+
self, name, instance_id, self.viewNo, rank, nodeReg))
79+
assert name, "{} failed to get next primary node name".format(self)
80+
81+
return name
82+
83+
def next_primary_replica_name_for_master(self, nodeReg=None):
5884
"""
5985
Returns name of the next node which is supposed to be a new Primary
6086
in round-robin fashion
6187
"""
62-
return Replica.generateName(
63-
nodeName=self.next_primary_node_name(instance_id, nodeReg=nodeReg),
64-
instId=instance_id)
88+
name = self.next_primary_node_name_for_master(nodeReg)
89+
return name, Replica.generateName(nodeName=name, instId=0)
90+
91+
def next_primary_replica_name_for_backup(self, instance_id, master_primary_rank,
92+
primaries, nodeReg=None):
93+
"""
94+
Returns name of the next node which is supposed to be a new Primary
95+
for backup instance in round-robin fashion starting from primary of
96+
master instance.
97+
"""
98+
if nodeReg is None:
99+
nodeReg = self.node.nodeReg
100+
total_nodes = len(nodeReg)
101+
rank = (master_primary_rank + 1) % total_nodes
102+
name = self.node.get_name_by_rank(rank, nodeReg=nodeReg)
103+
while name in primaries:
104+
rank = (rank + 1) % total_nodes
105+
name = self.node.get_name_by_rank(rank, nodeReg=nodeReg)
106+
return name, Replica.generateName(nodeName=name, instId=instance_id)
65107

66108
# overridden method of PrimaryDecider
67109
def start_election_for_instance(self, instance_id):

0 commit comments

Comments
 (0)