@@ -51,6 +51,7 @@ const (
51
51
// credentials from acs, after the timeout it will check the credentials manager
52
52
// and start processing the task or start another round of waiting
53
53
waitForPullCredentialsTimeout = 1 * time .Minute
54
+ systemPingTimeout = 5 * time .Second
54
55
defaultTaskSteadyStatePollInterval = 5 * time .Minute
55
56
defaultTaskSteadyStatePollIntervalJitter = 30 * time .Second
56
57
transitionPollTime = 5 * time .Second
@@ -132,6 +133,7 @@ type managedTask struct {
132
133
cfg * config.Config
133
134
credentialsManager credentials.Manager
134
135
cniClient ecscni.CNIClient
136
+ dockerClient dockerapi.DockerClient
135
137
taskStopWG * utilsync.SequentialWaitGroup
136
138
137
139
acsMessages chan acsTransition
@@ -180,6 +182,7 @@ func (engine *DockerTaskEngine) newManagedTask(task *apitask.Task) *managedTask
180
182
containerChangeEventStream : engine .containerChangeEventStream ,
181
183
credentialsManager : engine .credentialsManager ,
182
184
cniClient : engine .cniClient ,
185
+ dockerClient : engine .client ,
183
186
taskStopWG : engine .taskStopGroup ,
184
187
steadyStatePollInterval : engine .taskSteadyStatePollInterval ,
185
188
steadyStatePollIntervalJitter : engine .taskSteadyStatePollIntervalJitter ,
@@ -930,16 +933,16 @@ func (mtask *managedTask) handleEventError(containerChange dockerContainerChange
930
933
func (mtask * managedTask ) handleContainerStoppedTransitionError (event dockerapi.DockerContainerChangeEvent ,
931
934
container * apicontainer.Container ,
932
935
currentKnownStatus apicontainerstatus.ContainerStatus ) bool {
933
- // If docker returned a transient error while trying to stop a container,
934
- // reset the known status to the current status and return
935
- cannotStopContainerError , ok := event . Error .( cannotStopContainerError )
936
- if ok && cannotStopContainerError . IsRetriableError () {
937
- logger . Info ( "Error stopping the container; ignoring state change" , logger. Fields {
938
- field .TaskARN : mtask . Arn ,
939
- field .Container : container .Name ,
940
- field . RuntimeID : container . GetRuntimeID (),
941
- "ErrorName" : event .Error .ErrorName (),
942
- field . Error : cannotStopContainerError .Error () ,
936
+
937
+ pr := mtask . dockerClient . SystemPing ( mtask . ctx , systemPingTimeout )
938
+ if pr . Error != nil {
939
+ logger . Info ( "Error stopping the container, but docker seems to be unresponsive; ignoring state change" , logger. Fields {
940
+ field . TaskARN : mtask . Arn ,
941
+ field .Container : container . Name ,
942
+ field .RuntimeID : container .GetRuntimeID () ,
943
+ "ErrorName" : event . Error . ErrorName (),
944
+ field . Error : event .Error .Error (),
945
+ "SystemPingError" : pr .Error ,
943
946
})
944
947
container .SetKnownStatus (currentKnownStatus )
945
948
return false
0 commit comments