@@ -43,7 +43,7 @@ function error()
43
43
function debug()
44
44
{
45
45
if [[ x" ${VERBOSE} " == x" yes" ]]; then
46
- echo ` date` $@
46
+ echo $( date) $@
47
47
fi
48
48
logger " $@ "
49
49
}
@@ -116,10 +116,10 @@ function clear_warm_boot()
116
116
{
117
117
common_clear
118
118
119
- result=` timeout 10s config warm_restart disable; if [[ $? == 124 ]]; then echo timeout; else echo " code ($? )" ; fi` || /bin/true
119
+ result=$( timeout 10s config warm_restart disable; res= $? ; if [[ $res == 124 ]]; then echo timeout; else echo " code ($res )" ; fi) || /bin/true
120
120
debug " Cancel warm-reboot: ${result} "
121
121
122
- TIMESTAMP=` date +%Y%m%d-%H%M%S`
122
+ TIMESTAMP=$( date +%Y%m%d-%H%M%S)
123
123
if [[ -f ${WARM_DIR} /${REDIS_FILE} ]]; then
124
124
mv -f ${WARM_DIR} /${REDIS_FILE} ${WARM_DIR} /${REDIS_FILE} .${TIMESTAMP} || /bin/true
125
125
fi
@@ -143,7 +143,7 @@ function initialize_pre_shutdown()
143
143
{
144
144
debug " Initialize pre-shutdown ..."
145
145
TABLE=" WARM_RESTART_TABLE|warm-shutdown"
146
- RESTORE_COUNT=` /usr/bin/redis-cli -n 6 hget " ${TABLE} " restore_count`
146
+ RESTORE_COUNT=$( /usr/bin/redis-cli -n 6 hget " ${TABLE} " restore_count)
147
147
if [[ -z " $RESTORE_COUNT " ]]; then
148
148
/usr/bin/redis-cli -n 6 hset " ${TABLE} " " restore_count" " 0" > /dev/null
149
149
fi
@@ -153,9 +153,10 @@ function initialize_pre_shutdown()
153
153
function request_pre_shutdown()
154
154
{
155
155
debug " Requesting pre-shutdown ..."
156
- /usr/bin/docker exec -i syncd /usr/bin/syncd_request_shutdown --pre & > /dev/null || {
156
+ STATE=$( timeout 5s docker exec syncd /usr/bin/syncd_request_shutdown --pre & > /dev/null; if [[ $? == 124 ]]; then echo " timed out" ; fi)
157
+ if [[ x" ${STATE} " == x" timed out" ]]; then
157
158
error " Failed to request pre-shutdown"
158
- }
159
+ fi
159
160
}
160
161
161
162
function recover_issu_bank_file_instruction()
@@ -201,33 +202,31 @@ function wait_for_pre_shutdown_complete_or_fail()
201
202
STATE=" requesting"
202
203
declare -i waitcount
203
204
declare -i retrycount
204
- waitcount=0
205
205
retrycount=0
206
+ start_time=$SECONDS
207
+ elapsed_time=$(( $SECONDS - $start_time ))
206
208
# Wait up to 60 seconds for pre-shutdown to complete
207
- while [[ ${waitcount } -lt 600 ]]; do
209
+ while [[ ${elapsed_time } -lt 60 ]]; do
208
210
# timeout doesn't work with -i option of "docker exec". Therefore we have
209
211
# to invoke docker exec directly below.
210
- STATE=` timeout 5s docker exec database redis-cli -n 6 hget " ${TABLE} " state; if [[ $? == 124 ]]; then echo " timed out" ; fi`
211
-
212
+ STATE=$( timeout 5s docker exec database redis-cli -n 6 hget " ${TABLE} " state; if [[ $? == 124 ]]; then echo " timed out" ; fi)
212
213
if [[ x" ${STATE} " == x" timed out" ]]; then
213
- waitcount+=50
214
214
retrycount+=1
215
- debug " Timed out getting pre-shutdown state ( ${waitcount} ) retry count ${retrycount} ..."
215
+ debug " Timed out getting pre-shutdown state, retry count ${retrycount} ..."
216
216
if [[ retrycount -gt 2 ]]; then
217
217
break
218
218
fi
219
219
elif [[ x" ${STATE} " != x" requesting" ]]; then
220
220
break
221
221
else
222
222
sleep 0.1
223
- waitcount+=1
224
223
fi
224
+ elapsed_time=$(( $SECONDS - $start_time ))
225
225
done
226
-
227
226
if [[ x" ${STATE} " != x" pre-shutdown-succeeded" ]]; then
228
- debug " Syncd pre-shutdown failed: ${STATE} ..."
227
+ debug " Syncd pre-shutdown failed, state : ${STATE} ..."
229
228
else
230
- debug " Pre-shutdown succeeded ..."
229
+ debug " Pre-shutdown succeeded, state: ${STATE} ..."
231
230
fi
232
231
}
233
232
@@ -248,7 +247,10 @@ function backup_database()
248
247
" 0 > /dev/null
249
248
redis-cli save > /dev/null
250
249
docker cp database:/var/lib/redis/$REDIS_FILE $WARM_DIR
251
- docker exec -i database rm /var/lib/redis/$REDIS_FILE
250
+ STATE=$( timeout 5s docker exec database rm /var/lib/redis/$REDIS_FILE ; if [[ $? == 124 ]]; then echo " timed out" ; fi)
251
+ if [[ x" ${STATE} " == x" timed out" ]]; then
252
+ error " Timedout during attempting to remove redis dump file from database container"
253
+ fi
252
254
}
253
255
254
256
function setup_control_plane_assistant()
@@ -289,10 +291,23 @@ function setup_reboot_variables()
289
291
INITRD=$( echo $KERNEL_IMAGE | sed ' s/vmlinuz/initrd.img/g' )
290
292
}
291
293
294
+ function check_docker_exec()
295
+ {
296
+ containers=" radv bgp lldp swss database teamd syncd"
297
+ for container in $containers ; do
298
+ STATE=$( timeout 1s docker exec $container echo " success" ; if [[ $? == 124 ]]; then echo " timed out" ; fi)
299
+ if [[ x" ${STATE} " == x" timed out" ]]; then
300
+ error " Docker exec on $container timedout"
301
+ exit " ${EXIT_FAILURE} "
302
+ fi
303
+ done
304
+ }
305
+
292
306
function reboot_pre_check()
293
307
{
308
+ check_docker_exec
294
309
# Make sure that the file system is normal: read-write able
295
- filename=" /host/test-` date +%Y%m%d-%H%M%S` "
310
+ filename=" /host/test-$( date +%Y%m%d-%H%M%S) "
296
311
if [[ ! -f ${filename} ]]; then
297
312
touch ${filename}
298
313
fi
@@ -456,10 +471,21 @@ if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; t
456
471
fi
457
472
fi
458
473
459
- # We are fully committed to reboot from this point on becasue critical
474
+ # We are fully committed to reboot from this point on because critical
460
475
# service will go down and we cannot recover from it.
461
476
set +e
462
477
478
+ # disable trap-handlers which were set before
479
+ trap ' ' EXIT HUP INT QUIT TERM KILL ABRT ALRM
480
+
481
+ # "systemctl stop <service>" is expected to prevent service/containers from
482
+ # restarting automatically. However, in some rare cases, systemctl stop doesn't
483
+ # work as expected, and services can still auto-restart after RestartSec timer expires
484
+ # Therefore, as a preventive measure, explicitly disable service auto-restart in the shutdown path.
485
+ debug " Disabling auto-restart for services ..."
486
+ grep -l " Restart=always" /usr/lib/systemd/system/* .service | xargs sed -i -e " s/\<Restart=always\>/Restart=no/"
487
+ systemctl daemon-reload
488
+
463
489
# Kill nat docker after saving the conntrack table
464
490
debug " Stopping nat ..."
465
491
/usr/bin/dump_nat_entries.py
@@ -544,8 +570,12 @@ if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; t
544
570
# Send USR1 signal to all teamd instances to stop them
545
571
# It will prepare teamd for warm-reboot
546
572
# Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port
547
- docker exec -i teamd pkill -USR1 teamd > /dev/null || [ $? == 1 ]
548
- debug " Stopped teamd ..."
573
+ STATE=$( timeout 5s docker exec teamd pkill -USR1 teamd; if [[ $? == 124 ]]; then echo " timed out" ; fi)
574
+ if [[ x" ${STATE} " == x" timed out" ]]; then
575
+ error " Timedout while attempting to stop teamd instances"
576
+ else
577
+ debug " Stopped teamd ..."
578
+ fi
549
579
fi
550
580
551
581
debug " Stopping syncd ..."
@@ -573,7 +603,7 @@ systemctl stop docker.service || debug "Ignore stopping docker service error $?"
573
603
# Stop kernel modules for Nephos platform
574
604
if [[ " $sonic_asic_type " = ' nephos' ]];
575
605
then
576
- systemctl stop nps-modules-` uname -r` .service || debug " Ignore stopping nps service error $? "
606
+ systemctl stop nps-modules-$( uname -r) .service || debug " Ignore stopping nps service error $? "
577
607
fi
578
608
579
609
if [[ " $REBOOT_TYPE " = " fast-reboot" ]]; then
@@ -617,6 +647,10 @@ if [ -x ${DEVPATH}/${PLATFORM}/${SSD_FW_UPDATE} ]; then
617
647
${DEVPATH} /${PLATFORM} /${SSD_FW_UPDATE} ${REBOOT_TYPE}
618
648
fi
619
649
650
+ # Restore the restart configuration for systemctl services
651
+ debug " Reset service auto restart ..."
652
+ grep -l " Restart=no" /usr/lib/systemd/system/* .service | xargs sed -i -e " s/\<Restart=no\>/Restart=always/"
653
+
620
654
# Reboot: explicity call Linux native reboot under sbin
621
655
debug " Rebooting with ${REBOOT_METHOD} to ${NEXT_SONIC_IMAGE} ..."
622
656
exec ${REBOOT_METHOD}
0 commit comments