@@ -17,6 +17,8 @@ package engine
17
17
import (
18
18
"context"
19
19
"fmt"
20
+ "github.com/aws/amazon-ecs-agent/agent/logger"
21
+ "github.com/aws/amazon-ecs-agent/agent/logger/field"
20
22
"os"
21
23
"path/filepath"
22
24
"regexp"
@@ -102,11 +104,11 @@ const (
102
104
103
105
defaultMonitorExecAgentsInterval = 15 * time .Minute
104
106
105
- stopContainerBackoffMin = time .Second
106
- stopContainerBackoffMax = time .Second * 5
107
+ defaultStopContainerBackoffMin = time .Second
108
+ defaultStopContainerBackoffMax = time .Second * 5
107
109
stopContainerBackoffJitter = 0.2
108
110
stopContainerBackoffMultiplier = 1.3
109
- stopContainerMaxRetryCount = 3
111
+ stopContainerMaxRetryCount = 5
110
112
)
111
113
112
114
var newExponentialBackoff = retry .NewExponentialBackoff
@@ -176,6 +178,8 @@ type DockerTaskEngine struct {
176
178
monitorExecAgentsTicker * time.Ticker
177
179
execCmdMgr execcmd.Manager
178
180
monitorExecAgentsInterval time.Duration
181
+ stopContainerBackoffMin time.Duration
182
+ stopContainerBackoffMax time.Duration
179
183
}
180
184
181
185
// NewDockerTaskEngine returns a created, but uninitialized, DockerTaskEngine.
@@ -214,6 +218,8 @@ func NewDockerTaskEngine(cfg *config.Config,
214
218
handleDelay : time .Sleep ,
215
219
execCmdMgr : execCmdMgr ,
216
220
monitorExecAgentsInterval : defaultMonitorExecAgentsInterval ,
221
+ stopContainerBackoffMin : defaultStopContainerBackoffMin ,
222
+ stopContainerBackoffMax : defaultStopContainerBackoffMax ,
217
223
}
218
224
219
225
dockerTaskEngine .initializeContainerStatusToTransitionFunction ()
@@ -1552,19 +1558,28 @@ func (engine *DockerTaskEngine) stopContainer(task *apitask.Task, container *api
1552
1558
// for more information, see: https://github.com/moby/moby/issues/41587
1553
1559
func (engine * DockerTaskEngine ) stopDockerContainer (dockerID , containerName string , apiTimeoutStopContainer time.Duration ) dockerapi.DockerContainerMetadata {
1554
1560
var md dockerapi.DockerContainerMetadata
1555
- backoff := newExponentialBackoff (stopContainerBackoffMin , stopContainerBackoffMax , stopContainerBackoffJitter , stopContainerBackoffMultiplier )
1561
+ backoff := newExponentialBackoff (engine . stopContainerBackoffMin , engine . stopContainerBackoffMax , stopContainerBackoffJitter , stopContainerBackoffMultiplier )
1556
1562
for i := 0 ; i < stopContainerMaxRetryCount ; i ++ {
1557
1563
md = engine .client .StopContainer (engine .ctx , dockerID , apiTimeoutStopContainer )
1558
- if md .Error == nil || md . Error . ErrorName () != dockerapi . DockerTimeoutErrorName {
1564
+ if md .Error == nil {
1559
1565
return md
1560
1566
}
1567
+ cannotStopContainerError , ok := md .Error .(cannotStopContainerError )
1568
+ if ok && ! cannotStopContainerError .IsRetriableError () {
1569
+ return md
1570
+ }
1571
+
1561
1572
if i < stopContainerMaxRetryCount - 1 {
1562
- time .Sleep (backoff .Duration ())
1573
+ retryIn := backoff .Duration ()
1574
+ logger .Warn (fmt .Sprintf ("Error stopping container, retrying in %v" , retryIn ), logger.Fields {
1575
+ field .Container : containerName ,
1576
+ field .RuntimeID : dockerID ,
1577
+ field .Error : md .Error ,
1578
+ "attempt" : i + 1 ,
1579
+ })
1580
+ time .Sleep (retryIn )
1563
1581
}
1564
1582
}
1565
- if md .Error != nil && md .Error .ErrorName () == dockerapi .DockerTimeoutErrorName {
1566
- seelog .Warnf ("Unable to stop container (%s) after %d attempts that timed out; giving up and marking container as stopped anyways" , containerName , stopContainerMaxRetryCount )
1567
- }
1568
1583
return md
1569
1584
}
1570
1585
0 commit comments