Skip to content

Commit 543ce5d

Browse files
committed
feat: cleanup orphaned instances while evicting node
Signed-off-by: Raphanus Lo <[email protected]>
1 parent b90da50 commit 543ce5d

File tree

1 file changed

+38
-11
lines changed

1 file changed

+38
-11
lines changed

controller/node_controller.go

+38-11
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ func (nc *NodeController) syncNode(key string) (err error) {
388388

389389
kubeNode, err := nc.ds.GetKubernetesNodeRO(name)
390390
if err != nil {
391-
if apierrors.IsNotFound(err) {
391+
if datastore.ErrorIsNotFound(err) {
392392
// Directly record condition and return. The Kubernetes node controller should delete this Longhorn node
393393
// very soon. If we continue to reconcile with a nil pointer (e.g. on a node that is being removed), we are
394394
// guaranteed to run into an exception later on anyways.
@@ -555,7 +555,7 @@ func (nc *NodeController) enqueueReplica(obj interface{}) {
555555

556556
node, err := nc.ds.GetNodeRO(replica.Spec.NodeID)
557557
if err != nil {
558-
if !apierrors.IsNotFound(err) {
558+
if !datastore.ErrorIsNotFound(err) {
559559
utilruntime.HandleError(fmt.Errorf("failed to get node %v for replica %v: %v ",
560560
replica.Spec.NodeID, replica.Name, err))
561561
}
@@ -638,7 +638,7 @@ func (nc *NodeController) enqueueKubernetesNode(obj interface{}) {
638638

639639
nodeRO, err := nc.ds.GetNodeRO(kubernetesNode.Name)
640640
if err != nil {
641-
if !apierrors.IsNotFound(err) {
641+
if !datastore.ErrorIsNotFound(err) {
642642
utilruntime.HandleError(fmt.Errorf("failed to get longhorn node %v: %v ", kubernetesNode.Name, err))
643643
}
644644
return
@@ -1165,7 +1165,7 @@ func (nc *NodeController) cleanUpBackingImagesInDisks(node *longhorn.Node) error
11651165
for _, bi := range backingImages {
11661166
log := getLoggerForBackingImage(nc.logger, bi).WithField("node", node.Name)
11671167
bids, err := nc.ds.GetBackingImageDataSource(bi.Name)
1168-
if err != nil && !apierrors.IsNotFound(err) {
1168+
if err != nil && !datastore.ErrorIsNotFound(err) {
11691169
log.WithError(err).Warn("Failed to get the backing image data source when cleaning up the images in disks")
11701170
continue
11711171
}
@@ -1305,14 +1305,18 @@ func (nc *NodeController) syncOrphans(node *longhorn.Node, collectedDataInfo map
13051305
newOrphanedReplicaDataStores, missingOrphanedReplicaDataStores :=
13061306
nc.getNewAndMissingOrphanedReplicaDataStores(diskName, diskInfo.DiskUUID, diskInfo.Path, diskInfo.OrphanedReplicaDataStores)
13071307

1308-
if err := nc.createOrphans(node, diskName, diskInfo, newOrphanedReplicaDataStores); err != nil {
1308+
if err := nc.createOrphansForReplicaDataStore(node, diskName, diskInfo, newOrphanedReplicaDataStores); err != nil {
13091309
return errors.Wrapf(err, "failed to create orphans for disk %v", diskName)
13101310
}
1311-
if err := nc.deleteOrphans(node, diskName, diskInfo, missingOrphanedReplicaDataStores); err != nil {
1311+
if err := nc.deleteOrphansForReplicaDataStore(node, diskName, diskInfo, missingOrphanedReplicaDataStores); err != nil {
13121312
return errors.Wrapf(err, "failed to delete orphans for disk %v", diskName)
13131313
}
13141314
}
13151315

1316+
if node.Spec.EvictionRequested {
1317+
return nc.deleteOrphansForEngineAndReplicaInstances(node)
1318+
}
1319+
13161320
return nil
13171321
}
13181322

@@ -1356,7 +1360,30 @@ func (nc *NodeController) getNewAndMissingOrphanedReplicaDataStores(diskName, di
13561360
return newOrphanedReplicaDataStores, missingOrphanedReplicaDataStores
13571361
}
13581362

1359-
func (nc *NodeController) deleteOrphans(node *longhorn.Node, diskName string, diskInfo *monitor.CollectedDiskInfo, missingOrphanedReplicaDataStores map[string]string) error {
1363+
func (nc *NodeController) deleteOrphansForEngineAndReplicaInstances(node *longhorn.Node) error {
1364+
nc.logger.Infof("Deleting orphans on evicted node %v", node.Name)
1365+
1366+
orphans, err := nc.ds.ListOrphansByNodeRO(node.Name)
1367+
if err != nil {
1368+
return errors.Wrapf(err, "failed to list orphans to evict node %v", node.Name)
1369+
}
1370+
1371+
multiError := util.NewMultiError()
1372+
for _, orphan := range orphans {
1373+
switch orphan.Spec.Type {
1374+
case longhorn.OrphanTypeEngineInstance, longhorn.OrphanTypeReplicaInstance:
1375+
if err := nc.ds.DeleteOrphan(orphan.Name); err != nil && !datastore.ErrorIsNotFound(err) {
1376+
multiError.Append(util.NewMultiError(fmt.Sprintf("%v: %v", orphan.Name, err)))
1377+
}
1378+
}
1379+
}
1380+
if len(multiError) > 0 {
1381+
return fmt.Errorf("node controller failed to delete instance orphans: %v", multiError.Join())
1382+
}
1383+
return nil
1384+
}
1385+
1386+
func (nc *NodeController) deleteOrphansForReplicaDataStore(node *longhorn.Node, diskName string, diskInfo *monitor.CollectedDiskInfo, missingOrphanedReplicaDataStores map[string]string) error {
13601387
autoDeletionResourceTypes, err := nc.ds.GetSettingOrphanResourceAutoDeletion()
13611388
if err != nil {
13621389
return errors.Wrapf(err, "failed to get %v setting", types.SettingNameOrphanResourceAutoDeletion)
@@ -1365,7 +1392,7 @@ func (nc *NodeController) deleteOrphans(node *longhorn.Node, diskName string, di
13651392

13661393
for dataStore := range missingOrphanedReplicaDataStores {
13671394
orphanName := types.GetOrphanChecksumNameForOrphanedDataStore(node.Name, diskName, diskInfo.Path, diskInfo.DiskUUID, dataStore)
1368-
if err := nc.ds.DeleteOrphan(orphanName); err != nil && !apierrors.IsNotFound(err) {
1395+
if err := nc.ds.DeleteOrphan(orphanName); err != nil && !datastore.ErrorIsNotFound(err) {
13691396
return errors.Wrapf(err, "failed to delete orphan %v", orphanName)
13701397
}
13711398
}
@@ -1386,15 +1413,15 @@ func (nc *NodeController) deleteOrphans(node *longhorn.Node, diskName string, di
13861413
}
13871414

13881415
if autoDeletionEnabled || dataCleanableCondition.Status == longhorn.ConditionStatusFalse {
1389-
if err := nc.ds.DeleteOrphan(orphan.Name); err != nil && !apierrors.IsNotFound(err) {
1416+
if err := nc.ds.DeleteOrphan(orphan.Name); err != nil && !datastore.ErrorIsNotFound(err) {
13901417
return errors.Wrapf(err, "failed to delete orphan %v", orphan.Name)
13911418
}
13921419
}
13931420
}
13941421
return nil
13951422
}
13961423

1397-
func (nc *NodeController) createOrphans(node *longhorn.Node, diskName string, diskInfo *monitor.CollectedDiskInfo, newOrphanedReplicaDataStores map[string]string) error {
1424+
func (nc *NodeController) createOrphansForReplicaDataStore(node *longhorn.Node, diskName string, diskInfo *monitor.CollectedDiskInfo, newOrphanedReplicaDataStores map[string]string) error {
13981425
for dataStore := range newOrphanedReplicaDataStores {
13991426
if err := nc.createOrphan(node, diskName, dataStore, diskInfo); err != nil && !apierrors.IsAlreadyExists(err) {
14001427
return errors.Wrapf(err, "failed to create orphan for orphaned replica data store %v in disk %v on node %v",
@@ -1408,7 +1435,7 @@ func (nc *NodeController) createOrphan(node *longhorn.Node, diskName, replicaDat
14081435
name := types.GetOrphanChecksumNameForOrphanedDataStore(node.Name, diskName, diskInfo.Path, diskInfo.DiskUUID, replicaDataStore)
14091436

14101437
_, err := nc.ds.GetOrphanRO(name)
1411-
if err == nil || (err != nil && !apierrors.IsNotFound(err)) {
1438+
if err == nil || (err != nil && !datastore.ErrorIsNotFound(err)) {
14121439
return err
14131440
}
14141441

0 commit comments

Comments
 (0)