@@ -113,6 +113,7 @@ public class ServiceUnitStateChannelImpl implements ServiceUnitStateChannel {
113
113
private static final long MAX_OWNED_BUNDLE_COUNT_DELAY_TIME_IN_MILLIS = 10 * 60 * 1000 ;
114
114
private static final long MAX_BROKER_HEALTH_CHECK_RETRY = 3 ;
115
115
private static final long MAX_BROKER_HEALTH_CHECK_DELAY_IN_MILLIS = 1000 ;
116
+
116
117
private final PulsarService pulsar ;
117
118
private final ServiceConfiguration config ;
118
119
private final Schema <ServiceUnitStateData > schema ;
@@ -1379,58 +1380,53 @@ private void scheduleCleanup(String broker, long delayInSecs) {
1379
1380
}
1380
1381
1381
1382
1382
- private void overrideOwnership (String serviceUnit , ServiceUnitStateData orphanData , String inactiveBroker ,
1383
- boolean gracefully ) {
1383
+ private CompletableFuture <Void > overrideOwnership (String serviceUnit ,
1384
+ ServiceUnitStateData orphanData ,
1385
+ String inactiveBroker ,
1386
+ boolean gracefully ) {
1384
1387
1385
1388
final var version = getNextVersionId (orphanData );
1386
- try {
1387
- selectBroker (serviceUnit , inactiveBroker )
1388
- .thenApply (selectedOpt ->
1389
- selectedOpt .map (selectedBroker -> {
1390
- if (orphanData .state () == Splitting ) {
1391
- // if Splitting, set orphan.dstBroker() as dst to indicate where it was from.
1392
- // (The src broker runs handleSplitEvent.)
1393
- return new ServiceUnitStateData (Splitting , orphanData .dstBroker (), selectedBroker ,
1394
- Map .copyOf (orphanData .splitServiceUnitToDestBroker ()), true , version );
1395
- } else if (orphanData .state () == Owned ) {
1396
- // if Owned, set orphan.dstBroker() as source to clean it up in case it is still
1397
- // alive.
1398
- var sourceBroker = selectedBroker .equals (orphanData .dstBroker ()) ? null :
1399
- orphanData .dstBroker ();
1400
- // if gracefully, try to release ownership first
1401
- var overrideState = gracefully && sourceBroker != null ? Releasing : Owned ;
1402
- return new ServiceUnitStateData (
1403
- overrideState ,
1404
- selectedBroker ,
1405
- sourceBroker ,
1406
- true , version );
1407
- } else {
1408
- // if Assigning or Releasing, set orphan.sourceBroker() as source
1409
- // to clean it up in case it is still alive.
1410
- return new ServiceUnitStateData (Owned , selectedBroker ,
1411
- selectedBroker .equals (orphanData .sourceBroker ()) ? null :
1412
- orphanData .sourceBroker (),
1413
- true , version );
1414
- }
1415
- // If no broker is selected(available), free the ownership.
1416
- // If the previous owner is still active, it will close the bundle(topic) ownership.
1417
- }).orElseGet (() -> new ServiceUnitStateData (Free , null ,
1418
- orphanData .state () == Owned ? orphanData .dstBroker () : orphanData .sourceBroker (),
1419
- true ,
1420
- version )))
1421
- .thenCompose (override -> {
1422
- log .info (
1423
- "Overriding inactiveBroker:{}, ownership serviceUnit:{} from orphanData:{} to "
1424
- + "overrideData:{}" ,
1425
- inactiveBroker , serviceUnit , orphanData , override );
1426
- return publishOverrideEventAsync (serviceUnit , override );
1427
- }).get (config .getMetadataStoreOperationTimeoutSeconds (), SECONDS );
1428
- } catch (Throwable e ) {
1429
- log .error (
1430
- "Failed to override inactiveBroker:{} ownership serviceUnit:{} orphanData:{}. "
1431
- + "totalCleanupErrorCnt:{}" ,
1432
- inactiveBroker , serviceUnit , orphanData , totalCleanupErrorCnt .incrementAndGet (), e );
1433
- }
1389
+ return selectBroker (serviceUnit , inactiveBroker )
1390
+ .thenApply (selectedOpt ->
1391
+ selectedOpt .map (selectedBroker -> {
1392
+ if (orphanData .state () == Splitting ) {
1393
+ // if Splitting, set orphan.dstBroker() as dst to indicate where it was from.
1394
+ // (The src broker runs handleSplitEvent.)
1395
+ return new ServiceUnitStateData (Splitting , orphanData .dstBroker (), selectedBroker ,
1396
+ Map .copyOf (orphanData .splitServiceUnitToDestBroker ()), true , version );
1397
+ } else if (orphanData .state () == Owned ) {
1398
+ // if Owned, set orphan.dstBroker() as source to clean it up in case it is still
1399
+ // alive.
1400
+ var sourceBroker = selectedBroker .equals (orphanData .dstBroker ()) ? null :
1401
+ orphanData .dstBroker ();
1402
+ // if gracefully, try to release ownership first
1403
+ var overrideState = gracefully && sourceBroker != null ? Releasing : Owned ;
1404
+ return new ServiceUnitStateData (
1405
+ overrideState ,
1406
+ selectedBroker ,
1407
+ sourceBroker ,
1408
+ true , version );
1409
+ } else {
1410
+ // if Assigning or Releasing, set orphan.sourceBroker() as source
1411
+ // to clean it up in case it is still alive.
1412
+ return new ServiceUnitStateData (Owned , selectedBroker ,
1413
+ selectedBroker .equals (orphanData .sourceBroker ()) ? null :
1414
+ orphanData .sourceBroker (),
1415
+ true , version );
1416
+ }
1417
+ // If no broker is selected(available), free the ownership.
1418
+ // If the previous owner is still active, it will close the bundle(topic) ownership.
1419
+ }).orElseGet (() -> new ServiceUnitStateData (Free , null ,
1420
+ orphanData .state () == Owned ? orphanData .dstBroker () : orphanData .sourceBroker (),
1421
+ true ,
1422
+ version )))
1423
+ .thenCompose (override -> {
1424
+ log .info (
1425
+ "Overriding inactiveBroker:{}, ownership serviceUnit:{} from orphanData:{} to "
1426
+ + "overrideData:{}" ,
1427
+ inactiveBroker , serviceUnit , orphanData , override );
1428
+ return publishOverrideEventAsync (serviceUnit , override );
1429
+ });
1434
1430
}
1435
1431
1436
1432
private void waitForCleanups (String broker , boolean excludeSystemTopics , int maxWaitTimeInMillis ) {
@@ -1505,7 +1501,21 @@ private void doHealthCheckBrokerAsyncWithRetries(String brokerId, int retry, Com
1505
1501
}
1506
1502
}
1507
1503
1508
- private synchronized void doCleanup (String broker , boolean gracefully ) {
1504
+ private void tryWaitForOverrides (List <CompletableFuture <Void >> overrideFutures , boolean force ) {
1505
+ if (overrideFutures .size () >= config .getLoadBalancerServiceUnitStateMaxConcurrentOverrides () || force ) {
1506
+ try {
1507
+ FutureUtil .waitForAll (overrideFutures )
1508
+ .get (config .getMetadataStoreOperationTimeoutSeconds (), SECONDS );
1509
+ } catch (Throwable e ) {
1510
+ log .error ("Failed to override ownership: totalCleanupErrorCnt:{}" ,
1511
+ totalCleanupErrorCnt .incrementAndGet (), e );
1512
+ } finally {
1513
+ overrideFutures .clear ();
1514
+ }
1515
+ }
1516
+ }
1517
+
1518
+ private void doCleanup (String broker , boolean gracefully ) {
1509
1519
try {
1510
1520
if (getChannelOwnerAsync ().get (MAX_CHANNEL_OWNER_ELECTION_WAITING_TIME_IN_SECS , TimeUnit .SECONDS )
1511
1521
.isEmpty ()) {
@@ -1549,8 +1559,11 @@ private synchronized void doCleanup(String broker, boolean gracefully) {
1549
1559
} catch (Exception e ) {
1550
1560
log .error ("Failed to flush" , e );
1551
1561
}
1562
+ List <CompletableFuture <Void >> overrideFutures = new ArrayList <>();
1552
1563
Map <String , ServiceUnitStateData > orphanSystemServiceUnits = new HashMap <>();
1553
- for (var etr : tableview .entrySet ()) {
1564
+ var iter = tableview .entrySet ().iterator ();
1565
+ while (iter .hasNext ()) {
1566
+ var etr = iter .next ();
1554
1567
var stateData = etr .getValue ();
1555
1568
var serviceUnit = etr .getKey ();
1556
1569
var state = state (stateData );
@@ -1559,7 +1572,8 @@ private synchronized void doCleanup(String broker, boolean gracefully) {
1559
1572
if (serviceUnit .startsWith (SYSTEM_NAMESPACE .toString ())) {
1560
1573
orphanSystemServiceUnits .put (serviceUnit , stateData );
1561
1574
} else {
1562
- overrideOwnership (serviceUnit , stateData , broker , gracefully );
1575
+ overrideFutures .add (overrideOwnership (serviceUnit , stateData , broker , gracefully ));
1576
+ tryWaitForOverrides (overrideFutures , !iter .hasNext ());
1563
1577
}
1564
1578
orphanServiceUnitCleanupCnt ++;
1565
1579
}
@@ -1584,9 +1598,14 @@ private synchronized void doCleanup(String broker, boolean gracefully) {
1584
1598
}
1585
1599
1586
1600
// clean system bundles in the end
1587
- for (var orphanSystemServiceUnit : orphanSystemServiceUnits .entrySet ()) {
1601
+ var orphanSystemServiceUnitIter = orphanSystemServiceUnits .entrySet ().iterator ();
1602
+ while (orphanSystemServiceUnitIter .hasNext ()) {
1603
+ var orphanSystemServiceUnit = iter .next ();
1588
1604
log .info ("Overriding orphan system service unit:{}" , orphanSystemServiceUnit .getKey ());
1589
- overrideOwnership (orphanSystemServiceUnit .getKey (), orphanSystemServiceUnit .getValue (), broker , gracefully );
1605
+ overrideFutures .add (
1606
+ overrideOwnership (orphanSystemServiceUnit .getKey (), orphanSystemServiceUnit .getValue (), broker ,
1607
+ gracefully ));
1608
+ tryWaitForOverrides (overrideFutures , !orphanSystemServiceUnitIter .hasNext ());
1590
1609
}
1591
1610
1592
1611
try {
@@ -1710,10 +1729,14 @@ protected void monitorOwnerships(List<String> brokers) {
1710
1729
// timedOutInFlightStateServiceUnits are the in-flight ones although their src and dst brokers are known to
1711
1730
// be active.
1712
1731
if (!timedOutInFlightStateServiceUnits .isEmpty ()) {
1713
- for (var etr : timedOutInFlightStateServiceUnits .entrySet ()) {
1732
+ List <CompletableFuture <Void >> overrideFutures = new ArrayList <>();
1733
+ var iter = timedOutInFlightStateServiceUnits .entrySet ().iterator ();
1734
+ while (iter .hasNext ()) {
1735
+ var etr = iter .next ();
1714
1736
var orphanServiceUnit = etr .getKey ();
1715
1737
var orphanData = etr .getValue ();
1716
- overrideOwnership (orphanServiceUnit , orphanData , null , false );
1738
+ overrideFutures .add (overrideOwnership (orphanServiceUnit , orphanData , null , false ));
1739
+ tryWaitForOverrides (overrideFutures , !iter .hasNext ());
1717
1740
orphanServiceUnitCleanupCnt ++;
1718
1741
}
1719
1742
}
0 commit comments