Skip to content

Commit b5a1b75

Browse files
committed
fix(orphan): finalize orphan instance CR when instance manager unavailable
Signed-off-by: Raphanus Lo <[email protected]>
1 parent 0088687 commit b5a1b75

File tree

1 file changed

+22
-11
lines changed

1 file changed

+22
-11
lines changed

controller/orphan_controller.go

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ func (oc *OrphanController) cleanupOrphanedResource(orphan *longhorn.Orphan) (is
315315
longhorn.OrphanConditionTypeError, longhorn.ConditionStatusTrue, "", err.Error())
316316
}()
317317

318-
// Make sure if the orphan nodeID and controller ID are same.
318+
// Make sure if the orphan nodeID and controller ID are the same.
319319
// If NO, just delete the orphan resource object and don't touch the data.
320320
if orphan.Spec.NodeID != oc.controllerID {
321321
log.WithFields(logrus.Fields{
@@ -370,7 +370,7 @@ func (oc *OrphanController) cleanupOrphanedEngineInstance(orphan *longhorn.Orpha
370370
} else {
371371
spec = &engineCR.Spec.InstanceSpec
372372
}
373-
return oc.cleanupOrphanedInstance(orphan, instance, imName, longhorn.InstanceManagerTypeEngine, spec)
373+
return oc.cleanupOrphanedInstance(orphan, instance, imName, longhorn.InstanceManagerTypeEngine, spec), nil
374374
}
375375

376376
func (oc *OrphanController) cleanupOrphanedReplicaInstance(orphan *longhorn.Orphan) (isCleanupComplete bool, err error) {
@@ -394,7 +394,7 @@ func (oc *OrphanController) cleanupOrphanedReplicaInstance(orphan *longhorn.Orph
394394
} else {
395395
spec = &replicaCR.Spec.InstanceSpec
396396
}
397-
return oc.cleanupOrphanedInstance(orphan, instance, imName, longhorn.InstanceManagerTypeReplica, spec)
397+
return oc.cleanupOrphanedInstance(orphan, instance, imName, longhorn.InstanceManagerTypeReplica, spec), nil
398398
}
399399

400400
func (oc *OrphanController) extractOrphanedInstanceInfo(orphan *longhorn.Orphan) (name, instanceManager string, err error) {
@@ -418,18 +418,21 @@ func (oc *OrphanController) extractOrphanedInstanceInfo(orphan *longhorn.Orphan)
418418
return name, instanceManager, nil
419419
}
420420

421-
func (oc *OrphanController) cleanupOrphanedInstance(orphan *longhorn.Orphan, instance, imName string, imType longhorn.InstanceManagerType, instanceCRSpec *longhorn.InstanceSpec) (isCleanupComplete bool, err error) {
421+
func (oc *OrphanController) cleanupOrphanedInstance(orphan *longhorn.Orphan, instance, imName string, imType longhorn.InstanceManagerType, instanceCRSpec *longhorn.InstanceSpec) bool {
422422
if instanceCRSpec != nil && instanceCRSpec.NodeID == orphan.Spec.NodeID {
423423
oc.logger.Infof("Orphan instance %v is scheduled back to current node %v. Skip cleaning up the instance resource and finalize the orphan CR.", instance, orphan.Spec.NodeID)
424-
return true, nil
424+
return true
425425
}
426426

427+
// If the instance manager client is unavailable or failed to delete the instance, continue finalizing the orphan.
428+
// Later if the orphaned instance is still reachable, the orphan will be recreated.
427429
imc, err := oc.getRunningInstanceManagerClientForOrphan(orphan, imName)
428430
if err != nil {
429-
return false, errors.Wrapf(err, "failed to get running instance manager client for orphan %v", orphan.Name)
431+
oc.logger.WithError(err).Warnf("Failed to delete orphan instance %v due to instance manager client initialization failure. Continue to finalize orphan %v", instance, orphan.Name)
432+
return false
430433
} else if imc == nil {
431434
oc.logger.WithField("orphanInstanceNode", orphan.Spec.NodeID).Warnf("No running instance manager for deleting orphan instance %v", orphan.Name)
432-
return true, nil
435+
return true
433436
}
434437
defer func() {
435438
if closeErr := imc.Close(); closeErr != nil {
@@ -438,13 +441,21 @@ func (oc *OrphanController) cleanupOrphanedInstance(orphan *longhorn.Orphan, ins
438441
}()
439442

440443
if err := oc.deleteInstance(imc, instance, imType, orphan.Spec.DataEngine); err != nil {
441-
return false, err
444+
oc.logger.WithError(err).Warnf("Failed to delete orphan instance %v due to instance manager client error. Continue to finalize orphan %v", instance, orphan.Name)
445+
return true
442446
}
443447
isCleanupComplete, cleanupErr := oc.confirmOrphanInstanceCleanup(imc, instance, imType, orphan.Spec.DataEngine)
444-
if cleanupErr == nil && !isCleanupComplete {
445-
oc.logger.Infof("Orphan instance %v cleanup in progress", instance)
448+
switch {
449+
case cleanupErr != nil:
450+
oc.logger.WithError(err).Warnf("Failed to confirm deletion of orphan instance %v due to instance manager client error. Continue to finalize orphan %v", instance, orphan.Name)
451+
return true
452+
case !isCleanupComplete:
453+
oc.logger.Infof("Orphan instance %v cleanup in progress, waiting for instance state update", instance)
454+
return false
455+
default:
456+
oc.logger.Infof("Orphan instance %v cleanup complete", instance)
457+
return true
446458
}
447-
return isCleanupComplete, cleanupErr
448459
}
449460

450461
func (oc *OrphanController) getRunningInstanceManagerClientForOrphan(orphan *longhorn.Orphan, imName string) (*engineapi.InstanceManagerClient, error) {

0 commit comments

Comments
 (0)