Skip to content

Commit bb7f39a

Browse files
committed
feat: cleanup orphaned instances while evicting node
Signed-off-by: Raphanus Lo <[email protected]>
1 parent b90da50 commit bb7f39a

File tree

2 files changed

+43
-6
lines changed

2 files changed

+43
-6
lines changed

controller/instance_manager_controller.go

+12-1
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,17 @@ func (imc *InstanceManagerController) deleteInstanceManagerPDB(im *longhorn.Inst
10041004
}
10051005

10061006
func (imc *InstanceManagerController) syncOrphans(im *longhorn.InstanceManager) error {
1007+
var isNodeDeletedOrEvictionRequested bool
1008+
node, err := imc.ds.GetNode(im.Spec.NodeID)
1009+
switch {
1010+
case datastore.ErrorIsNotFound(err):
1011+
isNodeDeletedOrEvictionRequested = true
1012+
case err != nil:
1013+
return errors.Wrapf(err, "failed to check node eviction status on %v", im.Spec.NodeID)
1014+
default:
1015+
isNodeDeletedOrEvictionRequested = node.Spec.EvictionRequested
1016+
}
1017+
10071018
// Instances are live inside the instance manager pod.
10081019
// Remove the instance orphan CRs when the instance manager pod is deleted or not running.
10091020
var isInstanceManagerPodDeletedOrNotRunning bool
@@ -1025,7 +1036,7 @@ func (imc *InstanceManagerController) syncOrphans(im *longhorn.InstanceManager)
10251036
// Remove the corresponding orphan CRs, and they will be resync after the owner instance manager gets back.
10261037
instanceManagerOwnershipChanged := im.Spec.NodeID != imc.controllerID
10271038

1028-
isInstanceManagerTerminating := isInstanceManagerPodDeletedOrNotRunning || instanceManagerOwnershipChanged
1039+
isInstanceManagerTerminating := isNodeDeletedOrEvictionRequested || isInstanceManagerPodDeletedOrNotRunning || instanceManagerOwnershipChanged
10291040
return imc.deleteOrphans(im, isInstanceManagerTerminating)
10301041
}
10311042

controller/node_controller.go

+31-5
Original file line numberDiff line numberDiff line change
@@ -1305,15 +1305,15 @@ func (nc *NodeController) syncOrphans(node *longhorn.Node, collectedDataInfo map
13051305
newOrphanedReplicaDataStores, missingOrphanedReplicaDataStores :=
13061306
nc.getNewAndMissingOrphanedReplicaDataStores(diskName, diskInfo.DiskUUID, diskInfo.Path, diskInfo.OrphanedReplicaDataStores)
13071307

1308-
if err := nc.createOrphans(node, diskName, diskInfo, newOrphanedReplicaDataStores); err != nil {
1308+
if err := nc.createDataOrphans(node, diskName, diskInfo, newOrphanedReplicaDataStores); err != nil {
13091309
return errors.Wrapf(err, "failed to create orphans for disk %v", diskName)
13101310
}
1311-
if err := nc.deleteOrphans(node, diskName, diskInfo, missingOrphanedReplicaDataStores); err != nil {
1311+
if err := nc.deleteDataOrphans(node, diskName, diskInfo, missingOrphanedReplicaDataStores); err != nil {
13121312
return errors.Wrapf(err, "failed to delete orphans for disk %v", diskName)
13131313
}
13141314
}
13151315

1316-
return nil
1316+
return nc.deleteInstanceOrphans(node)
13171317
}
13181318

13191319
func (nc *NodeController) getNewAndMissingOrphanedReplicaDataStores(diskName, diskUUID, diskPath string, replicaDataStores map[string]string) (map[string]string, map[string]string) {
@@ -1356,7 +1356,33 @@ func (nc *NodeController) getNewAndMissingOrphanedReplicaDataStores(diskName, di
13561356
return newOrphanedReplicaDataStores, missingOrphanedReplicaDataStores
13571357
}
13581358

1359-
func (nc *NodeController) deleteOrphans(node *longhorn.Node, diskName string, diskInfo *monitor.CollectedDiskInfo, missingOrphanedReplicaDataStores map[string]string) error {
1359+
func (nc *NodeController) deleteInstanceOrphans(node *longhorn.Node) error {
1360+
if !node.Spec.EvictionRequested {
1361+
return nil
1362+
}
1363+
nc.logger.Debugf("Deleting orphaned instances to evict node %v", node.Name)
1364+
1365+
orphans, err := nc.ds.ListOrphansByNodeRO(node.Name)
1366+
if err != nil {
1367+
return errors.Wrapf(err, "failed to list orphans to evict node %v", node.Name)
1368+
}
1369+
1370+
multiError := util.NewMultiError()
1371+
for _, orphan := range orphans {
1372+
switch orphan.Spec.Type {
1373+
case longhorn.OrphanTypeEngineInstance, longhorn.OrphanTypeReplicaInstance:
1374+
if err := nc.ds.DeleteOrphan(orphan.Name); err != nil {
1375+
multiError.Append(util.NewMultiError(fmt.Sprintf("%v: %v", orphan.Name, err)))
1376+
}
1377+
}
1378+
}
1379+
if len(multiError) > 0 {
1380+
return fmt.Errorf("node controller failed to delete instance orphan CR: %v", multiError.Join())
1381+
}
1382+
return nil
1383+
}
1384+
1385+
func (nc *NodeController) deleteDataOrphans(node *longhorn.Node, diskName string, diskInfo *monitor.CollectedDiskInfo, missingOrphanedReplicaDataStores map[string]string) error {
13601386
autoDeletionResourceTypes, err := nc.ds.GetSettingOrphanResourceAutoDeletion()
13611387
if err != nil {
13621388
return errors.Wrapf(err, "failed to get %v setting", types.SettingNameOrphanResourceAutoDeletion)
@@ -1394,7 +1420,7 @@ func (nc *NodeController) deleteOrphans(node *longhorn.Node, diskName string, di
13941420
return nil
13951421
}
13961422

1397-
func (nc *NodeController) createOrphans(node *longhorn.Node, diskName string, diskInfo *monitor.CollectedDiskInfo, newOrphanedReplicaDataStores map[string]string) error {
1423+
func (nc *NodeController) createDataOrphans(node *longhorn.Node, diskName string, diskInfo *monitor.CollectedDiskInfo, newOrphanedReplicaDataStores map[string]string) error {
13981424
for dataStore := range newOrphanedReplicaDataStores {
13991425
if err := nc.createOrphan(node, diskName, dataStore, diskInfo); err != nil && !apierrors.IsAlreadyExists(err) {
14001426
return errors.Wrapf(err, "failed to create orphan for orphaned replica data store %v in disk %v on node %v",

0 commit comments

Comments
 (0)