@@ -11,13 +11,24 @@ VERBOSE=no
11
11
FORCE=no
12
12
REBOOT_METHOD=" /sbin/reboot"
13
13
14
+ EXIT_SUCCESS=0
15
+ EXIT_FAILURE=1
16
+ EXIT_NOT_SUPPORTED=2
17
+ EXIT_ORCHAGENT_SHUTDOWN=10
18
+ EXIT_SYNCD_SHUTDOWN=11
19
+
14
20
# Check root privileges
15
21
if [[ " $EUID " -ne 0 ]]
16
22
then
17
23
echo " This command must be run as root" >&2
18
- exit 1
24
+ exit " ${EXIT_FAILURE} "
19
25
fi
20
26
27
+ function error()
28
+ {
29
+ echo $@ >&2
30
+ }
31
+
21
32
function debug()
22
33
{
23
34
if [[ x" ${VERBOSE} " == x" yes" ]]; then
@@ -36,7 +47,7 @@ function showHelpAndExit()
36
47
echo " -k : reboot with /sbin/kexec -e"
37
48
echo " -x : execute script with -x flag"
38
49
39
- exit 0
50
+ exit " ${EXIT_SUCCESS} "
40
51
}
41
52
42
53
function parseOptions()
@@ -83,34 +94,24 @@ function clear_warm_boot()
83
94
fi
84
95
}
85
96
86
- function cleanup_except_table()
87
- {
88
- local REDIS_DB_NUMBER=" $1 "
89
- local TABLE_PREFIX=" $2 "
90
- redis-cli -n " ${REDIS_DB_NUMBER} " eval "
91
- for _, k in ipairs(redis.call('keys', '*')) do
92
- if not string.match(k, '${TABLE_PREFIX} ') then
93
- redis.call('del', k)
94
- end
95
- end
96
- " 0
97
- }
98
-
99
97
function initialize_pre_shutdown()
100
98
{
101
99
debug " Initialize pre-shutdown ..."
102
100
TABLE=" WARM_RESTART_TABLE|warm-shutdown"
103
101
RESTORE_COUNT=` /usr/bin/redis-cli -n 6 hget " ${TABLE} " restore_count`
104
102
if [[ -z " $RESTORE_COUNT " ]]; then
105
- /usr/bin/redis-cli -n 6 hset " ${TABLE} " restore_count 0
103
+ /usr/bin/redis-cli -n 6 hset " ${TABLE} " " restore_count" " 0 " > /dev/null
106
104
fi
107
- /usr/bin/redis-cli -n 6 hset " ${TABLE} " state requesting
105
+ /usr/bin/redis-cli -n 6 hset " ${TABLE} " " state" " requesting" > /dev/null
108
106
}
109
107
110
108
function request_pre_shutdown()
111
109
{
112
110
debug " Requesting pre-shutdown ..."
113
- /usr/bin/docker exec -i syncd /usr/bin/syncd_request_shutdown --pre
111
+ /usr/bin/docker exec -i syncd /usr/bin/syncd_request_shutdown --pre & > /dev/null || {
112
+ error " Failed to request pre-shutdown"
113
+ exit " ${EXIT_SYNCD_SHUTDOWN} "
114
+ }
114
115
}
115
116
116
117
function wait_for_pre_shutdown_complete_or_fail()
@@ -145,12 +146,12 @@ function wait_for_pre_shutdown_complete_or_fail()
145
146
146
147
if [[ x" ${STATE} " != x" pre-shutdown-succeeded" ]]; then
147
148
debug " Syncd pre-shutdown failed: ${STATE} ..."
148
- exit 10
149
+ exit " ${EXIT_SYNCD_SHUTDOWN} "
149
150
fi
150
151
debug " Pre-shutdown succeeded ..."
151
152
}
152
153
153
- function backup_datebase ()
154
+ function backup_database ()
154
155
{
155
156
debug " Backing up database ..."
156
157
# Dump redis content to a file 'dump.rdb' in warmboot directory
@@ -162,8 +163,8 @@ function backup_datebase()
162
163
redis.call('del', k)
163
164
end
164
165
end
165
- " 0
166
- redis-cli save
166
+ " 0 > /dev/null
167
+ redis-cli save > /dev/null
167
168
docker cp database:/var/lib/redis/$REDIS_FILE $WARM_DIR
168
169
docker exec -i database rm /var/lib/redis/$REDIS_FILE
169
170
}
@@ -181,27 +182,17 @@ case "$REBOOT_TYPE" in
181
182
REBOOT_TYPE=" fastfast-reboot"
182
183
BOOT_TYPE_ARG=" fastfast"
183
184
# source mlnx-ffb.sh file with
184
- # functions to check ISSU upgrade/do ISSU start
185
+ # functions to check ISSU upgrade possibility
185
186
source mlnx-ffb.sh
186
-
187
- trap clear_warm_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM
188
-
189
- # Set warm reboot flag for some components.
190
- # In fastfast boot flow, only APPL layer dockers
191
- # are enabled to perform warm restart
192
- config warm_restart disable system
193
- config warm_restart disable swss
194
- config warm_restart enable bgp
195
- config warm_restart enable teamd
196
187
else
197
188
BOOT_TYPE_ARG=" warm"
198
- trap clear_warm_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM
199
- config warm_restart enable system
200
189
fi
190
+ trap clear_warm_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM
191
+ config warm_restart enable system
201
192
;;
202
193
* )
203
- echo " Not supported reboot type: $REBOOT_TYPE " >&2
204
- exit 1
194
+ error " Not supported reboot type: $REBOOT_TYPE "
195
+ exit " ${EXIT_NOT_SUPPORTED} "
205
196
;;
206
197
esac
207
198
@@ -222,75 +213,63 @@ elif grep -q onie_platform= /host/machine.conf; then
222
213
KERNEL_IMAGE=" /host$( echo $KERNEL_OPTIONS | cut -d ' ' -f 2) "
223
214
BOOT_OPTIONS=" $( echo $KERNEL_OPTIONS | sed -e ' s/\s*linux\s*/BOOT_IMAGE=/' ) SONIC_BOOT_TYPE=${BOOT_TYPE_ARG} "
224
215
else
225
- echo " Unknown bootloader. ${REBOOT_TYPE} is not supported."
226
- exit 1
216
+ error " Unknown bootloader. ${REBOOT_TYPE} is not supported."
217
+ exit " ${EXIT_NOT_SUPPORTED} "
227
218
fi
228
219
INITRD=$( echo $KERNEL_IMAGE | sed ' s/vmlinuz/initrd.img/g' )
229
220
230
221
# Install new FW for mellanox platforms before control plane goes down
231
222
# So on boot switch will not spend time to upgrade FW increasing the CP downtime
232
223
if [[ " $sonic_asic_type " == " mellanox" ]]; then
224
+ MLNX_EXIT_SUCCESS=0
225
+ MLNX_EXIT_FW_ERROR=100
226
+ MLNX_EXIT_FFB_FAILURE=101
233
227
234
- if [[ " $REBOOT_TYPE " = " fastfast-reboot" ]]; then
235
- check_issu_enabled || {
236
- echo " Warm reboot is not supported by this HWSKU"
237
- exit 1
238
- }
228
+ MLNX_FW_UPGRADE_SCRIPT=" /usr/bin/mlnx-fw-upgrade.sh"
239
229
240
- check_sdk_upgrade || {
241
- echo " Warm reboot is not supported"
242
- exit 1
230
+
231
+ if [[ " $REBOOT_TYPE " = " fastfast-reboot" ]]; then
232
+ check_ffb || {
233
+ error " Warm reboot is not supported"
234
+ exit " ${MLNX_EXIT_FFB_FAILURE} "
243
235
}
244
236
fi
245
237
246
- echo " Prepare MLNX ASIC to ${REBOOT_TYPE} : install new FW if required"
247
-
248
- MLNX_EXIT_SUCCESS=" 0"
249
- MLNX_EXIT_ERROR=" 1"
250
-
251
- MLNX_FW_UPGRADE_SCRIPT=" /usr/bin/mlnx-fw-upgrade.sh"
238
+ debug " Prepare MLNX ASIC to ${REBOOT_TYPE} : install new FW if required"
252
239
253
240
${MLNX_FW_UPGRADE_SCRIPT} --upgrade
254
241
MLNX_EXIT_CODE=" $? "
255
242
if [[ " ${MLNX_EXIT_CODE} " != " ${MLNX_EXIT_SUCCESS} " ]]; then
256
- echo " Failed to burn MLNX FW: errno=${MLNX_EXIT_CODE} "
257
- exit " ${MLNX_EXIT_ERROR} "
258
- fi
259
-
260
- if [[ " $REBOOT_TYPE " = " fastfast-reboot" ]]; then
261
- issu_start || {
262
- echo " ISSU start failed"
263
- echo " Cold reboot may be requiered to recover"
264
- exit 1
265
- }
243
+ error " Failed to burn MLNX FW: errno=${MLNX_EXIT_CODE} "
244
+ exit " ${MLNX_EXIT_FW_ERROR} "
266
245
fi
267
246
fi
268
247
269
248
# Load kernel into the memory
270
249
/sbin/kexec -l " $KERNEL_IMAGE " --initrd=" $INITRD " --append=" $BOOT_OPTIONS "
271
250
272
- if [[ " $REBOOT_TYPE " = " fast-reboot" || " $REBOOT_TYPE " = " fastfast-reboot " ]]; then
251
+ if [[ " $REBOOT_TYPE " = " fast-reboot" ]]; then
273
252
# Dump the ARP and FDB tables to files also as default routes for both IPv4 and IPv6
274
253
# into /host/fast-reboot
275
254
mkdir -p /host/fast-reboot
276
255
/usr/bin/fast-reboot-dump.py -t /host/fast-reboot
277
256
fi
278
257
279
- if [[ " $REBOOT_TYPE " = " warm-reboot" ]]; then
258
+ if [[ " $REBOOT_TYPE " = " warm-reboot" || " $REBOOT_TYPE " = " fastfast-reboot " ]]; then
280
259
# Freeze orchagent for warm restart
281
260
# Try freeze 5 times, it is possible that the orchagent is in transient state and no opportunity to be freezed
282
261
# Note: assume that 1 second is enough for orchagent to process the request and respone freeze or not
283
262
debug " Pausing orchagent ..."
284
263
for i in ` seq 4 -1 0` ; do
285
- docker exec -i swss /usr/bin/orchagent_restart_check -w 1000 && break
286
- echo " RESTARTCHECK failed $i " >&2
264
+ docker exec -i swss /usr/bin/orchagent_restart_check -w 1000 > /dev/null && break
265
+ error " RESTARTCHECK failed $i "
287
266
if [[ " $i " = " 0" ]]; then
288
- echo " RESTARTCHECK failed finally" >&2
267
+ error " RESTARTCHECK failed finally"
289
268
if [[ x" ${FORCE} " == x" yes" ]]; then
290
269
debug " Ignoring orchagent pausing failure ..."
291
270
break ;
292
271
fi
293
- exit 10
272
+ exit " ${EXIT_ORCHAGENT_SHUTDOWN} "
294
273
fi
295
274
sleep 1
296
275
done
@@ -313,38 +292,26 @@ if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then
313
292
fi
314
293
315
294
# Kill swss dockers
316
- docker kill swss
317
-
318
-
319
- # Warm reboot: dump state to host disk
320
- if [[ " $REBOOT_TYPE " = " fastfast-reboot" ]]; then
321
- mkdir -p $WARM_DIR
322
-
323
- # Dump route table form APPL DB.
324
- # This route table will be used by fpmsyncd
325
- # reconcialtion logic
326
- cleanup_except_table 0 ' ROUTE_TABLE'
327
- cleanup_except_table 4 ' WARM_RESTART_TABLE'
328
- cleanup_except_table 6 ' WARM_RESTART_TABLE'
329
-
330
- redis-cli -n 1 FLUSHDB
331
- redis-cli -n 2 FLUSHDB
332
- redis-cli -n 5 FLUSHDB
333
-
334
- redis-cli save
335
- docker cp database:/var/lib/redis/$REDIS_FILE $WARM_DIR
336
- docker exec -i database rm /var/lib/redis/$REDIS_FILE
337
- fi
295
+ docker kill swss > /dev/null
338
296
339
297
# Pre-shutdown syncd
340
- if [[ " $REBOOT_TYPE " = " warm-reboot" ]]; then
298
+ if [[ " $REBOOT_TYPE " = " warm-reboot" || " $REBOOT_TYPE " = " fastfast-reboot " ]]; then
341
299
initialize_pre_shutdown
342
300
343
301
request_pre_shutdown
344
302
345
303
wait_for_pre_shutdown_complete_or_fail
346
304
347
- backup_datebase
305
+ # Warm reboot: dump state to host disk
306
+ if [[ " $REBOOT_TYPE " = " fastfast-reboot" ]]; then
307
+ redis-cli -n 1 FLUSHDB > /dev/null
308
+ redis-cli -n 2 FLUSHDB > /dev/null
309
+ redis-cli -n 5 FLUSHDB > /dev/null
310
+ fi
311
+
312
+ # TODO: backup_database preserves FDB_TABLE
313
+ # need to cleanup as well for fastfast boot case
314
+ backup_database
348
315
fi
349
316
350
317
# Stop teamd gracefully
@@ -353,18 +320,12 @@ if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; t
353
320
# Send USR1 signal to all teamd instances to stop them
354
321
# It will prepare teamd for warm-reboot
355
322
# Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port
356
- docker exec -i teamd pkill -USR1 teamd > /dev/null
323
+ docker exec -i teamd pkill -USR1 teamd || [ $? == 1 ] > /dev/null
357
324
debug " Stopped teamd ..."
358
325
fi
359
326
360
327
debug " Stopping syncd ..."
361
- # syncd service stop is capable of handling both warm/fast/cold shutdown
362
- if [[ " $sonic_asic_type " = " mellanox" ]]; then
363
- docker kill syncd
364
- else
365
- # syncd service stop is capable of handling both warm/fast/cold shutdown
366
- systemctl stop syncd
367
- fi
328
+ systemctl stop syncd
368
329
debug " Stopped syncd ..."
369
330
370
331
# Kill other containers to make the reboot faster
@@ -403,5 +364,5 @@ debug "Rebooting with ${REBOOT_METHOD} to ${NEXT_SONIC_IMAGE} ..."
403
364
exec ${REBOOT_METHOD}
404
365
405
366
# Should never reach here
406
- echo " ${REBOOT_TYPE} failed!" >&2
407
- exit 1
367
+ error " ${REBOOT_TYPE} failed!"
368
+ exit " ${EXIT_FAILURE} "
0 commit comments