Skip to content

Commit 6e0e1da

Browse files
authored
[sai_failure_dump]Invoking dump during SAI failure (#2633)
* Added logic in techsupport script to collect SAI failure dump
1 parent f9130d1 commit 6e0e1da

File tree

1 file changed

+51
-13
lines changed

1 file changed

+51
-13
lines changed

scripts/generate_dump

+51-13
Original file line numberDiff line numberDiff line change
@@ -1053,21 +1053,26 @@ collect_mellanox() {
10531053
local sai_dump_folder="/tmp/saisdkdump"
10541054
local sai_dump_filename="${sai_dump_folder}/sai_sdk_dump_$(date +"%m_%d_%Y_%I_%M_%p")"
10551055

1056-
${CMD_PREFIX}docker exec syncd mkdir -p $sai_dump_folder
1057-
${CMD_PREFIX}docker exec syncd saisdkdump -f $sai_dump_filename
1058-
1059-
if [ $? != 0 ]; then
1060-
echo "Failed to collect saisdkdump."
1061-
fi
1056+
if [[ "$( docker container inspect -f '{{.State.Running}}' syncd )" == "true" ]]; then
1057+
if [[ x"$(sonic-db-cli APPL_DB EXISTS PORT_TABLE:PortInitDone)" == x"1" ]]; then
1058+
# Run saisdkdump only after the create_switch is known to be successful
1059+
${CMD_PREFIX}docker exec syncd mkdir -p $sai_dump_folder
1060+
${CMD_PREFIX}docker exec syncd saisdkdump -f $sai_dump_filename
1061+
1062+
if [ $? != 0 ]; then
1063+
echo "Failed to collect saisdkdump."
1064+
fi
10621065

1063-
copy_from_docker syncd $sai_dump_folder $sai_dump_folder
1064-
echo "$sai_dump_folder"
1065-
for file in `ls $sai_dump_folder`; do
1066-
save_file ${sai_dump_folder}/${file} sai_sdk_dump true
1067-
done
1066+
copy_from_docker syncd $sai_dump_folder $sai_dump_folder
1067+
echo "$sai_dump_folder"
1068+
for file in `ls $sai_dump_folder`; do
1069+
save_file ${sai_dump_folder}/${file} sai_sdk_dump true
1070+
done
10681071

1069-
${CMD_PREFIX}rm -rf $sai_dump_folder
1070-
${CMD_PREFIX}docker exec syncd rm -rf $sai_dump_folder
1072+
${CMD_PREFIX}rm -rf $sai_dump_folder
1073+
${CMD_PREFIX}docker exec syncd rm -rf $sai_dump_folder
1074+
fi
1075+
fi
10711076

10721077
# run 'hw-management-generate-dump.sh' script and save the result file
10731078
HW_DUMP_FILE=/usr/bin/hw-management-generate-dump.sh
@@ -1429,6 +1434,38 @@ save_crash_files() {
14291434
fi
14301435
}
14311436

1437+
###############################################################################
1438+
# Collect SAI failure dump files under /var/log/sai_failure_dump/. These files are
1439+
# created because of the orchagent abort triggered by SAI programming failure
1440+
# Globals:
1441+
# None
1442+
# Arguments:
1443+
# None
1444+
# Returns:
1445+
# None
1446+
###############################################################################
1447+
save_sai_failure_dump(){
1448+
for file in $(find_files "/var/log/sai_failure_dump/"); do
1449+
if $TAR -tf $TARFILE | grep $BASE/log/$(basename $file); then
1450+
# if the files are already collected under the log/ dir
1451+
# just add a symbolic link
1452+
if [ ! -z "${file##*.gz}" ]; then
1453+
# files saved under log/ are zipped with gz
1454+
file=$file.gz
1455+
fi
1456+
${CMD_PREFIX}save_symlink ${file} sai_failure_dump log
1457+
else
1458+
if [ ! -z "${file##*.gz}" ]; then
1459+
${CMD_PREFIX}save_file ${file} sai_failure_dump true
1460+
else
1461+
${CMD_PREFIX}save_file ${file} sai_failure_dump false
1462+
fi
1463+
fi
1464+
#Clean up the file once its part of tech support
1465+
rm -f $file
1466+
done
1467+
}
1468+
14321469
###############################################################################
14331470
# Get number of ASICs in the platform
14341471
# Globals:
@@ -1709,6 +1746,7 @@ main() {
17091746
save_log_files
17101747
save_crash_files
17111748
save_warmboot_files
1749+
save_sai_failure_dump
17121750

17131751
if [[ "$asic" = "mellanox" ]]; then
17141752
collect_mellanox_dfw_dumps

0 commit comments

Comments
 (0)