@@ -31,6 +31,12 @@ const (
31
31
SaveEndResultRetryCount = 100
32
32
SaveEndResultRetryBaseDelay = 500 * time .Millisecond
33
33
34
+ GetExecutionRetryCount = 200
35
+ GetExecutionRetryDelay = 500 * time .Millisecond
36
+
37
+ MonitorRetryCount = 10
38
+ MonitorRetryDelay = 500 * time .Millisecond
39
+
34
40
inlinedGlobalTemplateName = "<inline-global-template>"
35
41
)
36
42
@@ -227,11 +233,18 @@ func (r *runner) Monitor(ctx context.Context, organizationId string, environment
227
233
return nil
228
234
}
229
235
230
- // Load the execution TODO: retry?
231
- execution , err := r .client .GetExecution (ctx , environmentId , id )
236
+ // Load the execution
237
+ var execution * testkube.TestWorkflowExecution
238
+ err := retry (GetExecutionRetryCount , GetExecutionRetryDelay , func () (err error ) {
239
+ execution , err = r .client .GetExecution (ctx , environmentId , id )
240
+ if err != nil {
241
+ log .DefaultLogger .Warnw ("failed to get execution for monitoring, retrying..." , "id" , id , "error" , err )
242
+ }
243
+ return err
244
+ })
232
245
if err != nil {
233
246
r .watching .Delete (id )
234
- log .DefaultLogger .Errorw ("failed to get execution" , "id" , id , "error" , err )
247
+ log .DefaultLogger .Errorw ("failed to get execution for monitoring " , "id" , id , "error" , err )
235
248
return err
236
249
}
237
250
@@ -278,15 +291,16 @@ func (r *runner) execute(request executionworkertypes.ExecuteRequest) (*executio
278
291
}
279
292
res , err := r .worker .Execute (context .Background (), request )
280
293
if err == nil {
281
- // TODO: consider retry?
282
294
go func () {
283
- for i := 0 ; i < 3 ; i ++ {
295
+ err := retry ( MonitorRetryCount , MonitorRetryDelay , func () error {
284
296
err := r .Monitor (context .Background (), request .Execution .OrganizationId , request .Execution .EnvironmentId , request .Execution .Id )
285
- if err = = nil {
286
- return
297
+ if err ! = nil {
298
+ log . DefaultLogger . Warnw ( "failed to monitor execution, retrying..." , "id" , request . Execution . Id , "error" , err )
287
299
}
300
+ return err
301
+ })
302
+ if err != nil {
288
303
log .DefaultLogger .Errorw ("failed to monitor execution" , "id" , request .Execution .Id , "error" , err )
289
- time .Sleep (1 * time .Second )
290
304
}
291
305
}()
292
306
}
0 commit comments