@@ -1053,21 +1053,26 @@ collect_mellanox() {
1053
1053
local sai_dump_folder=" /tmp/saisdkdump"
1054
1054
local sai_dump_filename=" ${sai_dump_folder} /sai_sdk_dump_$( date +" %m_%d_%Y_%I_%M_%p" ) "
1055
1055
1056
- ${CMD_PREFIX} docker exec syncd mkdir -p $sai_dump_folder
1057
- ${CMD_PREFIX} docker exec syncd saisdkdump -f $sai_dump_filename
1058
-
1059
- if [ $? != 0 ]; then
1060
- echo " Failed to collect saisdkdump."
1061
- fi
1056
+ if [[ " $( docker container inspect -f ' {{.State.Running}}' syncd ) " == " true" ]]; then
1057
+ if [[ x" $( sonic-db-cli APPL_DB EXISTS PORT_TABLE:PortInitDone) " == x" 1" ]]; then
1058
+ # Run saisdkdump only after the create_switch is known to be successful
1059
+ ${CMD_PREFIX} docker exec syncd mkdir -p $sai_dump_folder
1060
+ ${CMD_PREFIX} docker exec syncd saisdkdump -f $sai_dump_filename
1061
+
1062
+ if [ $? != 0 ]; then
1063
+ echo " Failed to collect saisdkdump."
1064
+ fi
1062
1065
1063
- copy_from_docker syncd $sai_dump_folder $sai_dump_folder
1064
- echo " $sai_dump_folder "
1065
- for file in ` ls $sai_dump_folder ` ; do
1066
- save_file ${sai_dump_folder} /${file} sai_sdk_dump true
1067
- done
1066
+ copy_from_docker syncd $sai_dump_folder $sai_dump_folder
1067
+ echo " $sai_dump_folder "
1068
+ for file in ` ls $sai_dump_folder ` ; do
1069
+ save_file ${sai_dump_folder} /${file} sai_sdk_dump true
1070
+ done
1068
1071
1069
- ${CMD_PREFIX} rm -rf $sai_dump_folder
1070
- ${CMD_PREFIX} docker exec syncd rm -rf $sai_dump_folder
1072
+ ${CMD_PREFIX} rm -rf $sai_dump_folder
1073
+ ${CMD_PREFIX} docker exec syncd rm -rf $sai_dump_folder
1074
+ fi
1075
+ fi
1071
1076
1072
1077
# run 'hw-management-generate-dump.sh' script and save the result file
1073
1078
HW_DUMP_FILE=/usr/bin/hw-management-generate-dump.sh
@@ -1429,6 +1434,38 @@ save_crash_files() {
1429
1434
fi
1430
1435
}
1431
1436
1437
+ # ##############################################################################
1438
+ # Collect SAI failure dump files under /var/log/sai_failure_dump/. These files are
1439
+ # created because of the orchagent abort triggered by SAI programming failure
1440
+ # Globals:
1441
+ # None
1442
+ # Arguments:
1443
+ # None
1444
+ # Returns:
1445
+ # None
1446
+ # ##############################################################################
1447
+ save_sai_failure_dump (){
1448
+ for file in $( find_files " /var/log/sai_failure_dump/" ) ; do
1449
+ if $TAR -tf $TARFILE | grep $BASE /log/$( basename $file ) ; then
1450
+ # if the files are already collected under the log/ dir
1451
+ # just add a symbolic link
1452
+ if [ ! -z " ${file##* .gz} " ]; then
1453
+ # files saved under log/ are zipped with gz
1454
+ file=$file .gz
1455
+ fi
1456
+ ${CMD_PREFIX} save_symlink ${file} sai_failure_dump log
1457
+ else
1458
+ if [ ! -z " ${file##* .gz} " ]; then
1459
+ ${CMD_PREFIX} save_file ${file} sai_failure_dump true
1460
+ else
1461
+ ${CMD_PREFIX} save_file ${file} sai_failure_dump false
1462
+ fi
1463
+ fi
1464
+ # Clean up the file once its part of tech support
1465
+ rm -f $file
1466
+ done
1467
+ }
1468
+
1432
1469
# ##############################################################################
1433
1470
# Get number of ASICs in the platform
1434
1471
# Globals:
@@ -1709,6 +1746,7 @@ main() {
1709
1746
save_log_files
1710
1747
save_crash_files
1711
1748
save_warmboot_files
1749
+ save_sai_failure_dump
1712
1750
1713
1751
if [[ " $asic " = " mellanox" ]]; then
1714
1752
collect_mellanox_dfw_dumps
0 commit comments