Skip to content

Commit 7099fff

Browse files
authored
[fastboot] fastboot enhancement: Use warm-boot infrastructure for fast-boot (sonic-net#2286)
This PR should be merged together with the sonic-sairedis PR (sonic-net/sonic-sairedis#1100) and sonic-buildimage PR (sonic-net#11594). This is done to improve fast-reboot flow by: Using warm-reboot infrastructure. Clear all routes except of default routes for faster reconciliation time.
1 parent 09026ed commit 7099fff

File tree

1 file changed

+39
-55
lines changed

1 file changed

+39
-55
lines changed

scripts/fast-reboot

+39-55
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ EXIT_FILE_SYSTEM_FULL=3
4141
EXIT_NEXT_IMAGE_NOT_EXISTS=4
4242
EXIT_ORCHAGENT_SHUTDOWN=10
4343
EXIT_SYNCD_SHUTDOWN=11
44-
EXIT_FAST_REBOOT_DUMP_FAILURE=12
45-
EXIT_FILTER_FDB_ENTRIES_FAILURE=13
4644
EXIT_COUNTERPOLL_DELAY_FAILURE=14
4745
EXIT_DB_INTEGRITY_FAILURE=15
4846
EXIT_NO_CONTROL_PLANE_ASSISTANT=20
@@ -130,41 +128,36 @@ function parseOptions()
130128
done
131129
}
132130

133-
function common_clear()
131+
function clear_boot()
134132
{
133+
# common_clear
135134
debug "${REBOOT_TYPE} failure ($?) cleanup ..."
136135

137136
/sbin/kexec -u || /bin/true
138137

139138
teardown_control_plane_assistant
140-
}
141-
142-
function clear_fast_boot()
143-
{
144-
common_clear
145-
146-
sonic-db-cli STATE_DB DEL "FAST_REBOOT|system" &>/dev/null || /bin/true
147-
}
148-
149-
function clear_warm_boot()
150-
{
151-
common_clear
152139

140+
#clear_warm_boot
153141
result=$(timeout 10s config warm_restart disable; res=$?; if [[ $res == 124 ]]; then echo timeout; else echo "code ($res)"; fi) || /bin/true
154142
debug "Cancel warm-reboot: ${result}"
155143
156144
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
157145
if [[ -f ${WARM_DIR}/${REDIS_FILE} ]]; then
158146
mv -f ${WARM_DIR}/${REDIS_FILE} ${WARM_DIR}/${REDIS_FILE}.${TIMESTAMP} || /bin/true
159147
fi
148+
149+
#clear_fast_boot
150+
if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then
151+
sonic-db-cli STATE_DB DEL "FAST_REBOOT|system" &>/dev/null || /bin/true
152+
fi
160153
}
161154
162155
function init_warm_reboot_states()
163156
{
164157
# If the current running instance was booted up with warm reboot. Then
165158
# the current DB contents will likely mark warm reboot is done.
166159
# Clear these states so that the next boot up image won't get confused.
167-
if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
160+
if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" || "$REBOOT_TYPE" = "fast-reboot" ]]; then
168161
sonic-db-cli STATE_DB eval "
169162
for _, key in ipairs(redis.call('keys', 'WARM_RESTART_TABLE|*')) do
170163
redis.call('hdel', key, 'state')
@@ -271,7 +264,8 @@ function backup_database()
271264
and not string.match(k, 'FG_ROUTE_TABLE|') \
272265
and not string.match(k, 'WARM_RESTART_ENABLE_TABLE|') \
273266
and not string.match(k, 'VXLAN_TUNNEL_TABLE|') \
274-
and not string.match(k, 'BUFFER_MAX_PARAM_TABLE|') then
267+
and not string.match(k, 'BUFFER_MAX_PARAM_TABLE|') \
268+
and not string.match(k, 'FAST_REBOOT|') then
275269
redis.call('del', k)
276270
end
277271
end
@@ -381,7 +375,7 @@ function check_docker_exec()
381375
382376
function check_db_integrity()
383377
{
384-
if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
378+
if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" || "$REBOOT_TYPE" = "fast-reboot" ]]; then
385379
CHECK_DB_INTEGRITY=0
386380
/usr/local/bin/check_db_integrity.py || CHECK_DB_INTEGRITY=$?
387381
if [[ CHECK_DB_INTEGRITY -ne 0 ]]; then
@@ -464,7 +458,6 @@ function unload_kernel()
464458
function save_counters_folder() {
465459
if [[ "$REBOOT_TYPE" = "warm-reboot" ]]; then
466460
debug "Saving counters folder before warmboot..."
467-
468461
counters_folder="/host/counters"
469462
counters_cache="/tmp/cache"
470463
if [[ ! -d $counters_folder ]]; then
@@ -536,9 +529,11 @@ sonic_asic_type=$(sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type)
536529
BOOT_TYPE_ARG="cold"
537530
case "$REBOOT_TYPE" in
538531
"fast-reboot")
532+
check_warm_restart_in_progress
539533
BOOT_TYPE_ARG=$REBOOT_TYPE
540-
trap clear_fast_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM
534+
trap clear_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM
541535
sonic-db-cli STATE_DB SET "FAST_REBOOT|system" "1" "EX" "180" &>/dev/null
536+
config warm_restart enable system
542537
;;
543538
"warm-reboot")
544539
check_warm_restart_in_progress
@@ -551,7 +546,7 @@ case "$REBOOT_TYPE" in
551546
else
552547
BOOT_TYPE_ARG="warm"
553548
fi
554-
trap clear_warm_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM
549+
trap clear_boot EXIT HUP INT QUIT TERM KILL ABRT ALRM
555550
config warm_restart enable system
556551
;;
557552
*)
@@ -609,34 +604,11 @@ else
609604
load_kernel
610605
fi
611606
612-
if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then
613-
# Dump the ARP and FDB tables to files also as default routes for both IPv4 and IPv6
614-
# into /host/fast-reboot
615-
DUMP_DIR=/host/fast-reboot
616-
mkdir -p $DUMP_DIR
617-
FAST_REBOOT_DUMP_RC=0
618-
/usr/local/bin/fast-reboot-dump.py -t $DUMP_DIR || FAST_REBOOT_DUMP_RC=$?
619-
if [[ FAST_REBOOT_DUMP_RC -ne 0 ]]; then
620-
error "Failed to run fast-reboot-dump.py. Exit code: $FAST_REBOOT_DUMP_RC"
621-
unload_kernel
622-
exit "${EXIT_FAST_REBOOT_DUMP_FAILURE}"
623-
fi
624-
625-
FILTER_FDB_ENTRIES_RC=0
626-
# Filter FDB entries using MAC addresses from ARP table
627-
/usr/local/bin/filter_fdb_entries -f $DUMP_DIR/fdb.json -a $DUMP_DIR/arp.json -c $CONFIG_DB_FILE || FILTER_FDB_ENTRIES_RC=$?
628-
if [[ FILTER_FDB_ENTRIES_RC -ne 0 ]]; then
629-
error "Failed to filter FDb entries. Exit code: $FILTER_FDB_ENTRIES_RC"
630-
unload_kernel
631-
exit "${EXIT_FILTER_FDB_ENTRIES_FAILURE}"
632-
fi
633-
fi
634-
635607
init_warm_reboot_states
636608
637609
setup_control_plane_assistant
638610
639-
if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
611+
if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" || "$REBOOT_TYPE" = "fast-reboot" ]]; then
640612
# Freeze orchagent for warm restart
641613
# Ask orchagent_restart_check to try freeze 5 times with interval of 2 seconds,
642614
# it is possible that the orchagent is in transient state and no opportunity to freeze
@@ -668,6 +640,17 @@ fi
668640
# service will go down and we cannot recover from it.
669641
set +e
670642
643+
if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then
644+
# Clear all routes except of default routes for faster reconciliation time.
645+
sonic-db-cli APPL_DB eval "
646+
for _, k in ipairs(redis.call('keys', '*')) do
647+
if string.match(k, 'ROUTE_TABLE:') and not string.match(k, 'ROUTE_TABLE:0.0.0.0/0') and not string.match(k, 'ROUTE_TABLE:::/0') then \
648+
redis.call('del', k)
649+
end
650+
end
651+
" 0 > /dev/null
652+
fi
653+
671654
# disable trap-handlers which were set before
672655
trap '' EXIT HUP INT QUIT TERM KILL ABRT ALRM
673656
@@ -735,18 +718,19 @@ for service in ${SERVICES_TO_STOP}; do
735718
if [[ "x$sonic_asic_type" == x"mellanox" ]]; then
736719
check_issu_bank_file
737720
fi
721+
fi
738722
739-
# Warm reboot: dump state to host disk
740-
if [[ "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
741-
sonic-db-cli ASIC_DB FLUSHDB > /dev/null
742-
sonic-db-cli COUNTERS_DB FLUSHDB > /dev/null
743-
sonic-db-cli FLEX_COUNTER_DB FLUSHDB > /dev/null
744-
fi
745-
746-
# TODO: backup_database preserves FDB_TABLE
747-
# need to cleanup as well for fastfast boot case
748-
backup_database
723+
if [[ "$REBOOT_TYPE" = "fastfast-reboot" || "$REBOOT_TYPE" = "fast-reboot" ]]; then
724+
# Advanced reboot: dump state to host disk
725+
sonic-db-cli ASIC_DB FLUSHDB > /dev/null
726+
sonic-db-cli COUNTERS_DB FLUSHDB > /dev/null
727+
sonic-db-cli FLEX_COUNTER_DB FLUSHDB > /dev/null
749728
fi
729+
730+
# TODO: backup_database preserves FDB_TABLE
731+
# need to cleanup as well for fastfast boot case
732+
backup_database
733+
750734
fi
751735
done
752736

0 commit comments

Comments
 (0)