Skip to content

Commit 885c51c

Browse files
sriram-30sajmera-pensando
authored andcommitted
CR change during Upgrade when non-upgrade related CR changes
1 parent 8af89e3 commit 885c51c

File tree

2 files changed

+28
-21
lines changed

2 files changed

+28
-21
lines changed

internal/controllers/mock_upgrademgr.go

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/controllers/upgrademgr.go

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,13 @@ func (n *upgradeMgr) HandleUpgrade(ctx context.Context, deviceConfig *amdv1alpha
131131
// trigger reboot only for nodes which are in UpgradeStarted but haven't rebooted yet
132132
if nodeObj.Status.NodeInfo.BootID == moduleStatus.BootId {
133133
log.FromContext(ctx).Info(fmt.Sprintf("Node: %v: Reboot is required for driver upgrade, triggering node reboot", nodeName))
134-
n.helper.handleNodeReboot(ctx, nodeObj, deviceConfig)
134+
n.helper.handleNodeReboot(ctx, nodeObj, *deviceConfig)
135135
// for nodes which are in UpgradeStarted but already rebooted. Schedule the reboot pod deletion
136136
} else {
137137
currentBootID := nodeObj.Status.NodeInfo.BootID
138138
n.helper.setBootID(nodeObj.Name, currentBootID)
139139
log.FromContext(ctx).Info(fmt.Sprintf("Node: %v: Node already rebooted, scheduling reboot pod deletion", nodeName))
140-
go n.helper.deleteRebootPod(ctx, nodeName, deviceConfig, false, deviceConfig.Generation)
140+
go n.helper.deleteRebootPod(ctx, nodeName, *deviceConfig, false)
141141
}
142142
}
143143
} else {
@@ -155,7 +155,7 @@ func (n *upgradeMgr) HandleUpgrade(ctx context.Context, deviceConfig *amdv1alpha
155155
n.helper.setNodeStatus(ctx, nodeName, amdv1alpha1.UpgradeStateInProgress)
156156
} else {
157157
n.helper.setNodeStatus(ctx, nodeName, moduleStatus.Status)
158-
go n.helper.deleteRebootPod(ctx, nodeName, deviceConfig, false, deviceConfig.Generation)
158+
go n.helper.deleteRebootPod(ctx, nodeName, *deviceConfig, false)
159159
}
160160
} else {
161161
n.helper.setNodeStatus(ctx, nodeName, moduleStatus.Status)
@@ -276,7 +276,7 @@ func (n *upgradeMgr) HandleDelete(ctx context.Context, deviceConfig *amdv1alpha1
276276
if err := n.helper.cordonOrUncordonNode(ctx, deviceConfig, &nodeList.Items[i], false); err != nil {
277277
log.FromContext(ctx).Error(err, fmt.Sprintf("Taint Removal failed for %v during deviceconfig delete:%v", &nodeList.Items[i].Name, err))
278278
}
279-
n.helper.deleteRebootPod(ctx, nodeList.Items[i].Name, deviceConfig, true, deviceConfig.Generation)
279+
n.helper.deleteRebootPod(ctx, nodeList.Items[i].Name, *deviceConfig, true)
280280
}
281281
n.helper.clearNodeStatus()
282282
return
@@ -322,8 +322,8 @@ type upgradeMgrHelperAPI interface {
322322
getPodsToDrainOrDelete(ctx context.Context, deviceConfig *amdv1alpha1.DeviceConfig, node *v1.Node) (newPods []v1.Pod, err error)
323323
deleteOrDrainPods(ctx context.Context, deviceConfig *amdv1alpha1.DeviceConfig, node *v1.Node) error
324324
updateModuleVersionOnNode(ctx context.Context, deviceConfig *amdv1alpha1.DeviceConfig, node *v1.Node) error
325-
handleNodeReboot(ctx context.Context, node *v1.Node, dc *amdv1alpha1.DeviceConfig)
326-
deleteRebootPod(ctx context.Context, nodeName string, dc *amdv1alpha1.DeviceConfig, force bool, genId int64)
325+
handleNodeReboot(ctx context.Context, node *v1.Node, dc amdv1alpha1.DeviceConfig)
326+
deleteRebootPod(ctx context.Context, nodeName string, dc amdv1alpha1.DeviceConfig, force bool)
327327
getRebootPod(nodeName string, dc *amdv1alpha1.DeviceConfig) *v1.Pod
328328

329329
// getters and setters
@@ -817,7 +817,7 @@ func (h *upgradeMgrHelper) handleNodeUpgrade(ctx context.Context, deviceConfig a
817817

818818
// Reboot the node if required
819819
if deviceConfig.Spec.Driver.UpgradePolicy.RebootRequired != nil && *deviceConfig.Spec.Driver.UpgradePolicy.RebootRequired {
820-
h.handleNodeReboot(ctx, &node, &deviceConfig)
820+
h.handleNodeReboot(ctx, &node, deviceConfig)
821821
} else {
822822
// Update expected module version on the node
823823
if err := h.updateModuleVersionOnNode(ctx, &deviceConfig, &node); err != nil {
@@ -956,9 +956,9 @@ func (h *upgradeMgrHelper) updateModuleVersionOnNode(ctx context.Context, device
956956
return nil
957957
}
958958

959-
func (h *upgradeMgrHelper) handleNodeReboot(ctx context.Context, node *v1.Node, dc *amdv1alpha1.DeviceConfig) {
959+
func (h *upgradeMgrHelper) handleNodeReboot(ctx context.Context, node *v1.Node, dc amdv1alpha1.DeviceConfig) {
960960
logger := log.FromContext(ctx)
961-
rebootPod := h.getRebootPod(node.Name, dc)
961+
rebootPod := h.getRebootPod(node.Name, &dc)
962962
// Delete the existing pod if present
963963
pod := &v1.Pod{}
964964
if err := h.client.Get(ctx, types.NamespacedName{Namespace: dc.Namespace, Name: rebootPod.Name}, pod); err == nil {
@@ -970,7 +970,7 @@ func (h *upgradeMgrHelper) handleNodeReboot(ctx context.Context, node *v1.Node,
970970
}
971971

972972
// Update expected module version on the node
973-
if err := h.updateModuleVersionOnNode(ctx, dc, node); err != nil {
973+
if err := h.updateModuleVersionOnNode(ctx, &dc, node); err != nil {
974974
logger.Error(err, fmt.Sprintf("Node: %v State: %v UpgradeFailed with Error: %v", node.Name, h.getNodeStatus(node.Name), err))
975975
// Mark the state as failed
976976
h.setNodeStatus(ctx, node.Name, amdv1alpha1.UpgradeStateFailed)
@@ -1037,15 +1037,22 @@ func (h *upgradeMgrHelper) handleNodeReboot(ctx context.Context, node *v1.Node,
10371037
// Wait for the rebootPod to get spawned
10381038
waitForRebootPod()
10391039

1040-
h.setNodeStatus(ctx, node.Name, amdv1alpha1.UpgradeStateRebootInProgress)
1041-
h.deleteRebootPod(ctx, node.Name, dc, false, dc.Generation)
1040+
fetchedDeviceConfig := &amdv1alpha1.DeviceConfig{}
1041+
if err := h.client.Get(ctx, types.NamespacedName{Namespace: dc.Namespace, Name: dc.Name}, fetchedDeviceConfig); err != nil {
1042+
logger.Error(err, "Failed to fetch DeviceConfig from API server")
1043+
return
1044+
}
1045+
if fetchedDeviceConfig.Spec.Driver.Version == dc.Spec.Driver.Version {
1046+
h.setNodeStatus(ctx, node.Name, amdv1alpha1.UpgradeStateRebootInProgress)
1047+
}
1048+
h.deleteRebootPod(ctx, node.Name, dc, false)
10421049

10431050
}
10441051

1045-
func (h *upgradeMgrHelper) deleteRebootPod(ctx context.Context, nodeName string, dc *amdv1alpha1.DeviceConfig, force bool, genId int64) {
1052+
func (h *upgradeMgrHelper) deleteRebootPod(ctx context.Context, nodeName string, dc amdv1alpha1.DeviceConfig, force bool) {
10461053

10471054
logger := log.FromContext(ctx)
1048-
rebootPod := h.getRebootPod(nodeName, dc)
1055+
rebootPod := h.getRebootPod(nodeName, &dc)
10491056
fetchedDeviceConfig := &amdv1alpha1.DeviceConfig{}
10501057
pod := &v1.Pod{}
10511058
if err := h.client.Get(ctx, types.NamespacedName{Namespace: dc.Namespace, Name: rebootPod.Name}, pod); err != nil {
@@ -1077,7 +1084,7 @@ func (h *upgradeMgrHelper) deleteRebootPod(ctx context.Context, nodeName string,
10771084
if err := h.client.Delete(ctx, rebootPod); err != nil {
10781085
logger.Error(err, fmt.Sprintf("Node: %v State: %v RebootPod Delete failed with Error: %v", nodeName, h.getNodeStatus(nodeName), err))
10791086
}
1080-
if fetchedDeviceConfig.Generation == genId {
1087+
if fetchedDeviceConfig.Spec.Driver.Version == dc.Spec.Driver.Version {
10811088
logger.Info("Setting to In-Progress after deleting reboot pod")
10821089
h.setNodeStatus(ctx, nodeName, amdv1alpha1.UpgradeStateInProgress)
10831090
}
@@ -1100,7 +1107,7 @@ func (h *upgradeMgrHelper) deleteRebootPod(ctx context.Context, nodeName string,
11001107
logger.Error(err, "Failed to fetch DeviceConfig from API server")
11011108
return
11021109
}
1103-
if fetchedDeviceConfig.Generation == genId {
1110+
if fetchedDeviceConfig.Spec.Driver.Version == dc.Spec.Driver.Version {
11041111
logger.Info("Setting to In-Progress after deleting reboot pod eventually")
11051112
h.setNodeStatus(ctx, nodeName, amdv1alpha1.UpgradeStateInProgress)
11061113
}

0 commit comments

Comments
 (0)