@@ -83,6 +83,7 @@ public enum TabletStatus {
83
83
COLOCATE_MISMATCH , // replicas do not all locate in right colocate backends set.
84
84
COLOCATE_REDUNDANT , // replicas match the colocate backends set, but redundant.
85
85
NEED_FURTHER_REPAIR , // one of replicas need a definite repair.
86
+ DISK_MIGRATION , // The disk where the replica is located is decommissioned.
86
87
}
87
88
88
89
// Most read only accesses to replicas should acquire db lock, to prevent
@@ -586,7 +587,8 @@ private Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriorityU
586
587
587
588
int alive = 0 ;
588
589
int aliveAndVersionComplete = 0 ;
589
- int stable = 0 ;
590
+ int backendStable = 0 ;
591
+ int diskStable = 0 ;
590
592
591
593
Replica needFurtherRepairReplica = null ;
592
594
Set <String > hosts = Sets .newHashSet ();
@@ -617,7 +619,13 @@ private Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriorityU
617
619
// this replica is alive, version complete, but backend is not available
618
620
continue ;
619
621
}
620
- stable ++;
622
+ backendStable ++;
623
+
624
+ if (backend .isDiskDecommissioned (replica .getPathHash ())) {
625
+ // disk in decommission state
626
+ continue ;
627
+ }
628
+ diskStable ++;
621
629
}
622
630
623
631
// 1. alive replicas are not enough
@@ -628,7 +636,7 @@ private Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriorityU
628
636
if (needRecoverWithEmptyTablet (systemInfoService )) {
629
637
LOG .info ("need to forcefully recover with empty tablet for {}, replica info:{}" ,
630
638
id , getReplicaInfos ());
631
- return createRedundantSchedCtx (TabletStatus .FORCE_REDUNDANT , TabletSchedCtx . Priority .VERY_HIGH ,
639
+ return createRedundantSchedCtx (TabletStatus .FORCE_REDUNDANT , Priority .VERY_HIGH ,
632
640
needFurtherRepairReplica );
633
641
}
634
642
@@ -644,7 +652,7 @@ private Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriorityU
644
652
// at least one backend for new replica.
645
653
// 4. replicationNum > 1: if replication num is set to 1, do not delete any replica, for safety reason
646
654
// For example: 3 replica, 3 be, one set bad, we need to forcefully delete one first
647
- return createRedundantSchedCtx (TabletStatus .FORCE_REDUNDANT , TabletSchedCtx . Priority .VERY_HIGH ,
655
+ return createRedundantSchedCtx (TabletStatus .FORCE_REDUNDANT , Priority .VERY_HIGH ,
648
656
needFurtherRepairReplica );
649
657
} else {
650
658
List <Long > availableBEs = systemInfoService .getAvailableBackendIds ();
@@ -654,26 +662,26 @@ private Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriorityU
654
662
// of load task won't be blocked either.
655
663
if (availableBEs .size () > alive ) {
656
664
if (alive < (replicationNum / 2 ) + 1 ) {
657
- return Pair .create (TabletStatus .REPLICA_MISSING , TabletSchedCtx . Priority .HIGH );
665
+ return Pair .create (TabletStatus .REPLICA_MISSING , Priority .HIGH );
658
666
} else if (alive < replicationNum ) {
659
- return Pair .create (TabletStatus .REPLICA_MISSING , TabletSchedCtx . Priority .NORMAL );
667
+ return Pair .create (TabletStatus .REPLICA_MISSING , Priority .NORMAL );
660
668
}
661
669
}
662
670
}
663
671
664
672
// 2. version complete replicas are not enough
665
673
if (aliveAndVersionComplete < (replicationNum / 2 ) + 1 ) {
666
- return Pair .create (TabletStatus .VERSION_INCOMPLETE , TabletSchedCtx . Priority .HIGH );
674
+ return Pair .create (TabletStatus .VERSION_INCOMPLETE , Priority .HIGH );
667
675
} else if (aliveAndVersionComplete < replicationNum ) {
668
- return Pair .create (TabletStatus .VERSION_INCOMPLETE , TabletSchedCtx . Priority .NORMAL );
676
+ return Pair .create (TabletStatus .VERSION_INCOMPLETE , Priority .NORMAL );
669
677
} else if (aliveAndVersionComplete > replicationNum ) {
670
678
// we set REDUNDANT as VERY_HIGH, because delete redundant replicas can free the space quickly.
671
- return createRedundantSchedCtx (TabletStatus .REDUNDANT , TabletSchedCtx . Priority .VERY_HIGH ,
679
+ return createRedundantSchedCtx (TabletStatus .REDUNDANT , Priority .VERY_HIGH ,
672
680
needFurtherRepairReplica );
673
681
}
674
682
675
683
// 3. replica is under relocating
676
- if (stable < replicationNum ) {
684
+ if (backendStable < replicationNum ) {
677
685
List <Long > replicaBeIds = replicas .stream ()
678
686
.map (Replica ::getBackendId ).collect (Collectors .toList ());
679
687
List <Long > availableBeIds = aliveBeIdsInCluster .stream ()
@@ -683,25 +691,30 @@ private Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriorityU
683
691
&& availableBeIds .size () >= replicationNum
684
692
&& replicationNum > 1 ) { // Doesn't have any BE that can be chosen to create a new replica
685
693
return createRedundantSchedCtx (TabletStatus .FORCE_REDUNDANT ,
686
- stable < (replicationNum / 2 ) + 1 ? TabletSchedCtx . Priority .NORMAL :
687
- TabletSchedCtx . Priority .LOW , needFurtherRepairReplica );
694
+ backendStable < (replicationNum / 2 ) + 1 ? Priority .NORMAL :
695
+ Priority .LOW , needFurtherRepairReplica );
688
696
}
689
- if (stable < (replicationNum / 2 ) + 1 ) {
690
- return Pair .create (TabletStatus .REPLICA_RELOCATING , TabletSchedCtx . Priority .NORMAL );
697
+ if (backendStable < (replicationNum / 2 ) + 1 ) {
698
+ return Pair .create (TabletStatus .REPLICA_RELOCATING , Priority .NORMAL );
691
699
} else {
692
700
return Pair .create (TabletStatus .REPLICA_RELOCATING , Priority .LOW );
693
701
}
694
702
}
695
703
696
- // 4. replica redundant
704
+ // 4. disk decommission
705
+ if (diskStable < replicationNum ) {
706
+ return Pair .create (TabletStatus .DISK_MIGRATION , Priority .NORMAL );
707
+ }
708
+
709
+ // 5. replica redundant
697
710
if (replicas .size () > replicationNum ) {
698
711
// we set REDUNDANT as VERY_HIGH, because delete redundant replicas can free the space quickly.
699
- return createRedundantSchedCtx (TabletStatus .REDUNDANT , TabletSchedCtx . Priority .VERY_HIGH ,
712
+ return createRedundantSchedCtx (TabletStatus .REDUNDANT , Priority .VERY_HIGH ,
700
713
needFurtherRepairReplica );
701
714
}
702
715
703
- // 5 . healthy
704
- return Pair .create (TabletStatus .HEALTHY , TabletSchedCtx . Priority .NORMAL );
716
+ // 6 . healthy
717
+ return Pair .create (TabletStatus .HEALTHY , Priority .NORMAL );
705
718
}
706
719
707
720
public TabletStatus getColocateHealthStatus (long visibleVersion ,
@@ -752,6 +765,7 @@ && containsAnyHighPrioBackend(replicaBackendIds, Config.tablet_sched_colocate_ba
752
765
}
753
766
}
754
767
768
+ int diskStableCnt = 0 ;
755
769
// 2. check version completeness
756
770
for (Replica replica : replicas ) {
757
771
// do not check the replica that is not in the colocate backend set,
@@ -771,9 +785,19 @@ && containsAnyHighPrioBackend(replicaBackendIds, Config.tablet_sched_colocate_ba
771
785
// this replica is alive but version incomplete
772
786
return TabletStatus .VERSION_INCOMPLETE ;
773
787
}
788
+
789
+ Backend backend = GlobalStateMgr .getCurrentSystemInfo ().getBackend (replica .getBackendId ());
790
+ if (backend != null && !backend .isDiskDecommissioned (replica .getPathHash ())) {
791
+ diskStableCnt ++;
792
+ }
793
+ }
794
+
795
+ // 3. check disk decommission
796
+ if (diskStableCnt < replicationNum ) {
797
+ return TabletStatus .DISK_MIGRATION ;
774
798
}
775
799
776
- // 3 . check redundant
800
+ // 4 . check redundant
777
801
if (replicas .size () > replicationNum ) {
778
802
return TabletStatus .COLOCATE_REDUNDANT ;
779
803
}
0 commit comments