Skip to content

Commit 3753d2c

Browse files
isubasingheJoibel
authored andcommitted
fix: manual retries exit handler cleanup. Fixes #14180 (#14181)
(cherry picked from commit 7ecb17f) Signed-off-by: isubasinghe <[email protected]> Signed-off-by: Alan Clucas <[email protected]>
1 parent 3993b12 commit 3753d2c

File tree

2 files changed

+268
-3
lines changed

2 files changed

+268
-3
lines changed

workflow/util/util.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,14 +1317,14 @@ func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSucce
13171317
}
13181318
if n.Name == onExitNodeName {
13191319
queue := list.New()
1320-
queue.PushBack(n)
1320+
queue.PushBack(&n)
13211321
for {
13221322
currNode := queue.Front()
13231323
if currNode == nil {
13241324
break
13251325
}
1326-
curr := currNode.Value.(wfv1.NodeStatus)
1327-
deletedPods, podsToDelete = deletePodNodeDuringRetryWorkflow(wf, curr, deletedPods, podsToDelete)
1326+
curr := currNode.Value.(*wfv1.NodeStatus)
1327+
deletedPods, podsToDelete = deletePodNodeDuringRetryWorkflow(wf, *curr, deletedPods, podsToDelete)
13281328
for i := range curr.Children {
13291329
child, err := wf.Status.Nodes.Get(curr.Children[i])
13301330
if err != nil {

workflow/util/util_test.go

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3911,3 +3911,268 @@ func TestNestedDAG(t *testing.T) {
39113911
}
39123912

39133913
}
3914+
3915+
const onExitPanic = `apiVersion: argoproj.io/v1alpha1
3916+
kind: Workflow
3917+
metadata:
3918+
annotations:
3919+
workflows.argoproj.io/pod-name-format: v2
3920+
creationTimestamp: "2025-02-11T05:25:47Z"
3921+
generateName: exit-handlers-
3922+
generation: 21
3923+
labels:
3924+
default-label: thisLabelIsFromWorkflowDefaults
3925+
workflows.argoproj.io/completed: "true"
3926+
workflows.argoproj.io/phase: Failed
3927+
name: exit-handlers-n7s4n
3928+
namespace: argo
3929+
resourceVersion: "2255"
3930+
uid: 7b2f1451-9a9a-4f66-a0d9-0364f814d948
3931+
spec:
3932+
activeDeadlineSeconds: 300
3933+
arguments: {}
3934+
entrypoint: intentional-fail
3935+
onExit: exit-handler
3936+
podSpecPatch: |
3937+
terminationGracePeriodSeconds: 3
3938+
templates:
3939+
- container:
3940+
args:
3941+
- echo intentional failure; exit 1
3942+
command:
3943+
- sh
3944+
- -c
3945+
image: alpine:latest
3946+
name: ""
3947+
resources: {}
3948+
inputs: {}
3949+
metadata: {}
3950+
name: intentional-fail
3951+
outputs: {}
3952+
- inputs: {}
3953+
metadata: {}
3954+
name: exit-handler
3955+
outputs: {}
3956+
steps:
3957+
- - arguments: {}
3958+
name: notify
3959+
template: send-email
3960+
- arguments: {}
3961+
name: celebrate
3962+
template: celebrate
3963+
when: '{{workflow.status}} == Succeeded'
3964+
- arguments: {}
3965+
name: cry
3966+
template: cry
3967+
when: '{{workflow.status}} != Succeeded'
3968+
- container:
3969+
args:
3970+
- 'echo send e-mail: {{workflow.name}} {{workflow.status}} {{workflow.duration}}.
3971+
Failed steps {{workflow.failures}}'
3972+
command:
3973+
- sh
3974+
- -c
3975+
image: alpine:latest
3976+
name: ""
3977+
resources: {}
3978+
inputs: {}
3979+
metadata: {}
3980+
name: send-email
3981+
outputs: {}
3982+
- container:
3983+
args:
3984+
- echo hooray!
3985+
command:
3986+
- sh
3987+
- -c
3988+
image: alpine:latest
3989+
name: ""
3990+
resources: {}
3991+
inputs: {}
3992+
metadata: {}
3993+
name: celebrate
3994+
outputs: {}
3995+
- container:
3996+
args:
3997+
- echo boohoo!
3998+
command:
3999+
- sh
4000+
- -c
4001+
image: alpine:latest
4002+
name: ""
4003+
resources: {}
4004+
inputs: {}
4005+
metadata: {}
4006+
name: cry
4007+
outputs: {}
4008+
workflowMetadata:
4009+
labels:
4010+
default-label: thisLabelIsFromWorkflowDefaults
4011+
status:
4012+
artifactGCStatus:
4013+
notSpecified: true
4014+
artifactRepositoryRef:
4015+
artifactRepository:
4016+
archiveLogs: true
4017+
s3:
4018+
accessKeySecret:
4019+
key: accesskey
4020+
name: my-minio-cred
4021+
bucket: my-bucket
4022+
endpoint: minio:9000
4023+
insecure: true
4024+
secretKeySecret:
4025+
key: secretkey
4026+
name: my-minio-cred
4027+
configMap: artifact-repositories
4028+
key: default-v1
4029+
namespace: argo
4030+
conditions:
4031+
- status: "False"
4032+
type: PodRunning
4033+
- status: "True"
4034+
type: Completed
4035+
finishedAt: "2025-02-11T05:31:30Z"
4036+
message: 'main: Error (exit code 1)'
4037+
nodes:
4038+
exit-handlers-n7s4n:
4039+
displayName: exit-handlers-n7s4n
4040+
finishedAt: "2025-02-11T05:31:18Z"
4041+
hostNodeName: k3d-k3s-default-server-0
4042+
id: exit-handlers-n7s4n
4043+
message: 'main: Error (exit code 1)'
4044+
name: exit-handlers-n7s4n
4045+
outputs:
4046+
artifacts:
4047+
- name: main-logs
4048+
s3:
4049+
key: exit-handlers-n7s4n/exit-handlers-n7s4n/main.log
4050+
exitCode: "1"
4051+
phase: Failed
4052+
progress: 0/1
4053+
resourcesDuration:
4054+
cpu: 0
4055+
memory: 4
4056+
startedAt: "2025-02-11T05:31:12Z"
4057+
templateName: intentional-fail
4058+
templateScope: local/exit-handlers-n7s4n
4059+
type: Pod
4060+
exit-handlers-n7s4n-134905866:
4061+
boundaryID: exit-handlers-n7s4n-1410405845
4062+
displayName: celebrate
4063+
finishedAt: "2025-02-11T05:31:21Z"
4064+
id: exit-handlers-n7s4n-134905866
4065+
message: when 'Failed == Succeeded' evaluated false
4066+
name: exit-handlers-n7s4n.onExit[0].celebrate
4067+
nodeFlag: {}
4068+
phase: Skipped
4069+
startedAt: "2025-02-11T05:31:21Z"
4070+
templateName: celebrate
4071+
templateScope: local/exit-handlers-n7s4n
4072+
type: Skipped
4073+
exit-handlers-n7s4n-975057257:
4074+
boundaryID: exit-handlers-n7s4n-1410405845
4075+
children:
4076+
- exit-handlers-n7s4n-3201878844
4077+
- exit-handlers-n7s4n-134905866
4078+
- exit-handlers-n7s4n-2699669595
4079+
displayName: '[0]'
4080+
finishedAt: "2025-02-11T05:31:30Z"
4081+
id: exit-handlers-n7s4n-975057257
4082+
name: exit-handlers-n7s4n.onExit[0]
4083+
nodeFlag: {}
4084+
phase: Succeeded
4085+
progress: 2/2
4086+
resourcesDuration:
4087+
cpu: 0
4088+
memory: 6
4089+
startedAt: "2025-02-11T05:31:21Z"
4090+
templateScope: local/exit-handlers-n7s4n
4091+
type: StepGroup
4092+
exit-handlers-n7s4n-1410405845:
4093+
children:
4094+
- exit-handlers-n7s4n-975057257
4095+
displayName: exit-handlers-n7s4n.onExit
4096+
finishedAt: "2025-02-11T05:31:30Z"
4097+
id: exit-handlers-n7s4n-1410405845
4098+
name: exit-handlers-n7s4n.onExit
4099+
nodeFlag:
4100+
hooked: true
4101+
outboundNodes:
4102+
- exit-handlers-n7s4n-3201878844
4103+
- exit-handlers-n7s4n-134905866
4104+
- exit-handlers-n7s4n-2699669595
4105+
phase: Succeeded
4106+
progress: 2/2
4107+
resourcesDuration:
4108+
cpu: 0
4109+
memory: 6
4110+
startedAt: "2025-02-11T05:31:21Z"
4111+
templateName: exit-handler
4112+
templateScope: local/exit-handlers-n7s4n
4113+
type: Steps
4114+
exit-handlers-n7s4n-2699669595:
4115+
boundaryID: exit-handlers-n7s4n-1410405845
4116+
displayName: cry
4117+
finishedAt: "2025-02-11T05:31:27Z"
4118+
hostNodeName: k3d-k3s-default-server-0
4119+
id: exit-handlers-n7s4n-2699669595
4120+
name: exit-handlers-n7s4n.onExit[0].cry
4121+
outputs:
4122+
artifacts:
4123+
- name: main-logs
4124+
s3:
4125+
key: exit-handlers-n7s4n/exit-handlers-n7s4n-cry-2699669595/main.log
4126+
exitCode: "0"
4127+
phase: Succeeded
4128+
progress: 1/1
4129+
resourcesDuration:
4130+
cpu: 0
4131+
memory: 3
4132+
startedAt: "2025-02-11T05:31:21Z"
4133+
templateName: cry
4134+
templateScope: local/exit-handlers-n7s4n
4135+
type: Pod
4136+
exit-handlers-n7s4n-3201878844:
4137+
boundaryID: exit-handlers-n7s4n-1410405845
4138+
displayName: notify
4139+
finishedAt: "2025-02-11T05:31:27Z"
4140+
hostNodeName: k3d-k3s-default-server-0
4141+
id: exit-handlers-n7s4n-3201878844
4142+
name: exit-handlers-n7s4n.onExit[0].notify
4143+
outputs:
4144+
artifacts:
4145+
- name: main-logs
4146+
s3:
4147+
key: exit-handlers-n7s4n/exit-handlers-n7s4n-send-email-3201878844/main.log
4148+
exitCode: "0"
4149+
phase: Succeeded
4150+
progress: 1/1
4151+
resourcesDuration:
4152+
cpu: 0
4153+
memory: 3
4154+
startedAt: "2025-02-11T05:31:21Z"
4155+
templateName: send-email
4156+
templateScope: local/exit-handlers-n7s4n
4157+
type: Pod
4158+
phase: Failed
4159+
progress: 2/3
4160+
resourcesDuration:
4161+
cpu: 0
4162+
memory: 10
4163+
startedAt: "2025-02-11T05:31:12Z"
4164+
taskResultsCompletionStatus:
4165+
exit-handlers-n7s4n: true
4166+
exit-handlers-n7s4n-2699669595: true
4167+
exit-handlers-n7s4n-3201878844: true
4168+
`
4169+
4170+
func TestRegressions(t *testing.T) {
4171+
t.Run("exit handler", func(t *testing.T) {
4172+
wf := wfv1.MustUnmarshalWorkflow(onExitPanic)
4173+
newWf, _, err := FormulateRetryWorkflow(context.Background(), wf, true, "id=exit-handlers-n7s4n-975057257", []string{})
4174+
require.NoError(t, err)
4175+
// we can't really handle exit handlers granually yet
4176+
assert.Empty(t, newWf.Status.Nodes)
4177+
})
4178+
}

0 commit comments

Comments
 (0)