@@ -13,8 +13,12 @@ EXT_RECVSIG=3
13
13
EXT_RETRY=4
14
14
EXT_TAR_FAILED=5
15
15
EXT_PROCFS_SAVE_FAILED=6
16
+ EXT_INTERRUPTED=7
17
+ EXT_TERMINATED=8
16
18
EXT_INVALID_ARGUMENT=10
17
19
20
+ TIMEOUT_EXIT_CODE=124
21
+
18
22
TAR=tar
19
23
MKDIR=mkdir
20
24
RM=rm
@@ -61,6 +65,8 @@ rm_lock_and_exit()
61
65
handle_exit ()
62
66
{
63
67
ECODE=$?
68
+ echo " Cleaning up working directory $TARDIR "
69
+ $RM -rf $TARDIR
64
70
echo " Removing lock. Exit: $ECODE " >&2
65
71
$RM $V -rf ${LOCKDIR}
66
72
# Echo the filename as the last statement if the generation succeeds
@@ -69,11 +75,16 @@ handle_exit()
69
75
fi
70
76
}
71
77
72
- handle_signal ()
78
+ handle_sigint ()
73
79
{
74
80
echo " Generate Dump received interrupt" >&2
75
- $RM $V -rf $TARDIR
76
- exit $EXT_RECVSIG
81
+ exit $EXT_INTERRUPTED
82
+ }
83
+
84
+ handle_sigterm () {
85
+ echo " Dump generation terminated" >&2
86
+ finalize
87
+ exit $EXT_TERMINATED
77
88
}
78
89
79
90
handle_error () {
@@ -83,6 +94,10 @@ handle_error() {
83
94
fi
84
95
}
85
96
97
+ escape_quotes () {
98
+ echo $1 | sed ' s/\"/\\\"/g'
99
+ }
100
+
86
101
save_bcmcmd () {
87
102
trap ' handle_error $? $LINENO' ERR
88
103
local start_t=$( date +%s%3N)
@@ -93,6 +108,7 @@ save_bcmcmd() {
93
108
local do_gzip=${3:- false}
94
109
local tarpath=" ${BASE} /dump/$filename "
95
110
local timeout_cmd=" timeout --foreground ${TIMEOUT_MIN} m"
111
+ local cmd=$( escape_quotes " $cmd " )
96
112
if [ ! -d $LOGDIR ]; then
97
113
$MKDIR $V -p $LOGDIR
98
114
fi
@@ -106,12 +122,12 @@ save_bcmcmd() {
106
122
# as one argument, e.g. vtysh -c "COMMAND HERE" needs to have
107
123
# "COMMAND HERE" bunched together as 1 arg to vtysh -c
108
124
if $NOOP ; then
109
- echo " ${timeout_cmd} $ cmd &> '${filepath} '"
125
+ echo " ${timeout_cmd} bash -c \" ${ cmd} \" &> '${filepath} '"
110
126
else
111
127
ret=0
112
- eval " ${timeout_cmd} $ cmd" & > " ${filepath} " || ret=$?
128
+ eval " ${timeout_cmd} bash -c \" ${ cmd} \ " &> ' ${filepath} ' " || ret=$?
113
129
if [ $ret -ne 0 ]; then
114
- if [ $ret -eq 124 ]; then
130
+ if [ $ret -eq $TIMEOUT_EXIT_CODE ]; then
115
131
echo " Command: $cmd timedout after ${TIMEOUT_MIN} minutes."
116
132
else
117
133
RC=0
@@ -207,6 +223,8 @@ save_cmd() {
207
223
redirect_eval=" "
208
224
fi
209
225
226
+ local cmd=$( escape_quotes " $cmd " )
227
+ local cleanup_method_declration=$( declare -f $cleanup_method )
210
228
# eval required here to re-evaluate the $cmd properly at runtime
211
229
# This is required if $cmd has quoted strings that should be bunched
212
230
# as one argument, e.g. vtysh -c "COMMAND HERE" needs to have
@@ -215,25 +233,29 @@ save_cmd() {
215
233
tarpath=" ${tarpath} .gz"
216
234
filepath=" ${filepath} .gz"
217
235
# cleanup_method will run in a sub-shell, need declare it first
218
- local cleanup_method_declration=$( declare -f $cleanup_method )
219
236
local cmds=" $cleanup_method_declration ; $cmd $redirect_eval | $cleanup_method | gzip -c > '${filepath} '"
220
237
if $NOOP ; then
221
238
echo " ${timeout_cmd} bash -c \" ${cmds} \" "
222
239
else
223
240
RC=0
224
241
eval " ${timeout_cmd} bash -c \" ${cmds} \" " || RC=$?
225
- if [ $RC -ne 0 ]; then
242
+ if [ $RC -eq $TIMEOUT_EXIT_CODE ]; then
226
243
echo " Command: $cmds timedout after ${TIMEOUT_MIN} minutes."
244
+ elif [ $RC -ne 0 ]; then
245
+ echo " Command: $cmds failed with RC $RC "
227
246
fi
228
247
fi
229
248
else
249
+ local cmds=" $cleanup_method_declration ; $cmd | $cleanup_method $redirect '$filepath '"
230
250
if $NOOP ; then
231
- echo " ${timeout_cmd} $cmd | $cleanup_method $redirect ' $filepath ' "
251
+ echo " ${timeout_cmd} bash -c \" ${cmds} \" "
232
252
else
233
253
RC=0
234
- eval " ${timeout_cmd} $cmd | $cleanup_method " " $redirect " " $filepath " || RC=$?
235
- if [ $RC -ne 0 ]; then
236
- echo " Command: $cmd timedout after ${TIMEOUT_MIN} minutes."
254
+ eval " ${timeout_cmd} bash -c \" ${cmds} \" " || RC=$?
255
+ if [ $RC -eq $TIMEOUT_EXIT_CODE ]; then
256
+ echo " Command: $cmds timedout after ${TIMEOUT_MIN} minutes."
257
+ elif [ $RC -ne 0 ]; then
258
+ echo " Command: $cmds failed with RC $RC "
237
259
fi
238
260
fi
239
261
fi
@@ -484,20 +506,20 @@ save_bgp_neighbor() {
484
506
local asic_id=${1:- " " }
485
507
local ns=$( get_vtysh_namespace $asic_id )
486
508
487
- neighbor_list_v4=$( ${timeout_cmd} vtysh $ns -c " show ip bgp neighbors" | grep " BGP neighbor is" | awk -F ' [, ]' ' {print $4}' )
509
+ neighbor_list_v4=$( ${timeout_cmd} bash -c " vtysh $ns -c ' show ip bgp neighbors' | grep ' BGP neighbor is' | awk -F '[, ]' '{print \ $ 4}'" )
488
510
for word in $neighbor_list_v4 ; do
489
511
save_cmd " vtysh $ns -c \" show ip bgp neighbors $word advertised-routes\" " " ip.bgp.neighbor.$word .adv$asic_id "
490
512
save_cmd " vtysh $ns -c \" show ip bgp neighbors $word routes\" " " ip.bgp.neighbor.$word .rcv$asic_id "
491
513
done
492
- neighbor_list_v6=$( vtysh $ns -c " show bgp ipv6 neighbors" | grep " BGP neighbor is" | awk -F ' [, ]' ' {print $4}' | fgrep ' :' )
514
+ neighbor_list_v6=$( ${timeout_cmd} bash -c " vtysh $ns -c ' show bgp ipv6 neighbors' | grep ' BGP neighbor is' | awk -F '[, ]' '{print \ $ 4}' | fgrep ':'" )
493
515
for word in $neighbor_list_v6 ; do
494
516
save_cmd " vtysh $ns -c \" show bgp ipv6 neighbors $word advertised-routes\" " " ipv6.bgp.neighbor.$word .adv$asic_id "
495
517
save_cmd " vtysh $ns -c \" show bgp ipv6 neighbors $word routes\" " " ipv6.bgp.neighbor.$word .rcv$asic_id "
496
518
done
497
519
498
- vrf_list=` ${timeout_cmd} vtysh $ns -c " show vrf" | awk -F" " ' {print $2}' `
520
+ vrf_list=` ${timeout_cmd} bash -c " vtysh $ns -c ' show vrf' | awk -F" " '{print \ $ 2}'" `
499
521
for vrf in $vrf_list ; do
500
- neighbor_list=` ${timeout_cmd} vtysh $ns -c " show ip bgp vrf $vrf neighbors" | grep " BGP neighbor is" | awk -F ' [, ]' ' {print $4}' `
522
+ neighbor_list=` ${timeout_cmd} bash -c " vtysh $ns -c ' show ip bgp vrf $vrf neighbors' | grep ' BGP neighbor is' | awk -F '[, ]' '{print \ $ 4}'" `
501
523
for word in $neighbor_list ; do
502
524
save_cmd " vtysh $ns -c \" show ip bgp vrf $vrf neighbors $word advertised-routes\" " " ip.bgp.neighbor.$vrf .$word .adv$asic_id "
503
525
save_cmd " vtysh $ns -c \" show ip bgp vrf $vrf neighbors $word routes\" " " ip.bgp.neighbor.$vrf .$word .rcv$asic_id "
@@ -737,7 +759,7 @@ save_platform_info() {
737
759
save_cmd " show platform psustatus" " psustatus"
738
760
save_cmd " show platform ssdhealth" " ssdhealth"
739
761
save_cmd " show platform temperature" " temperature"
740
- save_cmd " show platform fan" " fan"
762
+ save_cmd " show platform fan" " fan"
741
763
fi
742
764
}
743
765
@@ -856,6 +878,7 @@ enable_logrotate() {
856
878
# ##############################################################################
857
879
collect_mellanox () {
858
880
trap ' handle_error $? $LINENO' ERR
881
+ local timeout_cmd=" timeout --foreground ${TIMEOUT_MIN} m"
859
882
local sai_dump_folder=" /tmp/saisdkdump"
860
883
local sai_dump_filename=" ${sai_dump_folder} /sai_sdk_dump_$( date +" %m_%d_%Y_%I_%M_%p" ) "
861
884
@@ -865,12 +888,12 @@ collect_mellanox() {
865
888
copy_from_docker syncd $sai_dump_folder $sai_dump_folder
866
889
echo " $sai_dump_folder "
867
890
for file in ` ls $sai_dump_folder ` ; do
868
- save_file ${sai_dump_folder} /${file} sai_sdk_dump true
891
+ save_file ${sai_dump_folder} /${file} sai_sdk_dump true
869
892
done
870
893
871
894
${CMD_PREFIX} rm -rf $sai_dump_folder
872
895
${CMD_PREFIX} docker exec syncd rm -rf $sai_dump_folder
873
-
896
+
874
897
# Save SDK error dumps
875
898
local sdk_dump_path=` ${CMD_PREFIX} docker exec syncd cat /tmp/sai.profile| grep " SAI_DUMP_STORE_PATH" | cut -d = -f2`
876
899
if [[ -d $sdk_dump_path ]]; then
@@ -880,6 +903,26 @@ collect_mellanox() {
880
903
done
881
904
rm -rf /tmp/sdk-dumps
882
905
fi
906
+
907
+ # run 'hw-management-generate-dump.sh' script and save the result file
908
+ HW_DUMP_FILE=/usr/bin/hw-management-generate-dump.sh
909
+ if [ -f " $HW_DUMP_FILE " ]; then
910
+ ${CMD_PREFIX}${timeout_cmd} /usr/bin/hw-management-generate-dump.sh $ALLOW_PROCESS_STOP
911
+ ret=$?
912
+ if [ $ret -ne 0 ]; then
913
+ if [ $ret -eq $TIMEOUT_EXIT_CODE ]; then
914
+ echo " hw-management dump timedout after ${TIMEOUT_MIN} minutes."
915
+ else
916
+ echo " hw-management dump failed ..."
917
+ fi
918
+ else
919
+ save_file " /tmp/hw-mgmt-dump*" " hw-mgmt" false
920
+ rm -f /tmp/hw-mgmt-dump*
921
+ fi
922
+ else
923
+ echo " HW Mgmt dump script $HW_DUMP_FILE does not exist"
924
+ fi
925
+
883
926
}
884
927
885
928
# ##############################################################################
@@ -1087,12 +1130,11 @@ save_crash_files() {
1087
1130
get_asic_count () {
1088
1131
trap ' handle_error $? $LINENO' ERR
1089
1132
local redirect_eval=" 2>&1"
1090
- if ! $SAVE_STDERR
1133
+ if ! $SAVE_STDERR
1091
1134
then
1092
1135
redirect_eval=" "
1093
1136
fi
1094
- local cmd=" show platform summary --json | python -c 'import sys, json; \
1095
- print(json.load(sys.stdin)[\" asic_count\" ])'"
1137
+ local cmd=" python -c 'from sonic_py_common.multi_asic import get_num_asics; print(get_num_asics())'"
1096
1138
echo ` eval ${cmd} ${redirect_eval} `
1097
1139
}
1098
1140
@@ -1199,6 +1241,11 @@ main() {
1199
1241
end_t=$( date +%s%3N)
1200
1242
echo " [ Capture Proc State ] : $(( $end_t - $start_t )) msec" >> $TECHSUPPORT_TIME_INFO
1201
1243
1244
+ # Save logs and cores early
1245
+ save_log_files
1246
+ save_crash_files
1247
+ save_warmboot_files
1248
+
1202
1249
# Save all the processes within each docker
1203
1250
save_cmd " show services" services.summary
1204
1251
@@ -1265,14 +1312,14 @@ main() {
1265
1312
save_bfd_info
1266
1313
save_redis_info
1267
1314
1268
- if $DEBUG_DUMP
1315
+ if $DEBUG_DUMP
1269
1316
then
1270
1317
save_dump_state_all_ns
1271
1318
fi
1272
1319
1273
1320
save_cmd " docker ps -a" " docker.ps"
1274
1321
save_cmd " docker top pmon" " docker.pmon"
1275
-
1322
+
1276
1323
if [[ -d ${PLUGINS_DIR} ]]; then
1277
1324
local -r dump_plugins=" $( find ${PLUGINS_DIR} -type f -executable) "
1278
1325
for plugin in $dump_plugins ; do
@@ -1333,25 +1380,16 @@ main() {
1333
1380
end_t=$( date +%s%3N)
1334
1381
echo " [ TAR /etc Files ] : $(( $end_t - $start_t )) msec" >> $TECHSUPPORT_TIME_INFO
1335
1382
1336
- save_log_files
1337
- save_warmboot_files
1338
- save_crash_files
1383
+ finalize
1384
+ }
1339
1385
1340
- # run 'hw-management-generate-dump.sh' script and save the result file
1341
- HW_DUMP_FILE=/usr/bin/hw-management-generate-dump.sh
1342
- if [ -f " $HW_DUMP_FILE " ]; then
1343
- /usr/bin/hw-management-generate-dump.sh $ALLOW_PROCESS_STOP
1344
- save_file " /tmp/hw-mgmt-dump*" " hw-mgmt" false
1345
- rm -f /tmp/hw-mgmt-dump*
1346
- else
1347
- echo " HW Mgmt dump script $HW_DUMP_FILE does not exist"
1348
- fi
1386
+ # ##############################################################################
1387
+ # Finalize dump generation
1388
+ # ##############################################################################
1389
+ finalize () {
1349
1390
# Save techsupport timing profile info
1350
1391
save_file $TECHSUPPORT_TIME_INFO log false
1351
1392
1352
- # clean up working tar dir before compressing
1353
- $RM $V -rf $TARDIR
1354
-
1355
1393
if $DO_COMPRESS ; then
1356
1394
RC=0
1357
1395
$GZIP $V $TARFILE || RC=$?
@@ -1364,13 +1402,14 @@ main() {
1364
1402
1365
1403
# Invoke the TechSupport Cleanup Hook
1366
1404
setsid python3 /usr/local/bin/techsupport_cleanup.py ${TARFILE} & > /tmp/techsupport_cleanup.log &
1367
-
1405
+
1368
1406
if ! $SAVE_STDERR
1369
1407
then
1370
1408
exit $RETURN_CODE
1371
1409
fi
1372
1410
}
1373
1411
1412
+
1374
1413
# ##############################################################################
1375
1414
# Remove secret from pipeline inout and output result to pipeline.
1376
1415
# Globals:
@@ -1416,7 +1455,7 @@ remove_secret_from_etc_files() {
1416
1455
1417
1456
# Remove snmp community string from snmp.yml
1418
1457
sed -i -E ' s/(\s*snmp_\S*community\s*:\s*)(\S*)/\1****/g' $dumppath /etc/sonic/snmp.yml
1419
-
1458
+
1420
1459
# Remove secret from /etc/sonic/config_db.json
1421
1460
cat $dumppath /etc/sonic/config_db.json | remove_secret_from_config_db_dump > $dumppath /etc/sonic/config_db.json.temp
1422
1461
mv $dumppath /etc/sonic/config_db.json.temp $dumppath /etc/sonic/config_db.json
@@ -1475,9 +1514,9 @@ OPTIONS
1475
1514
"24 March", "yesterday", etc.
1476
1515
-t TIMEOUT_MINS
1477
1516
Command level timeout in minutes
1478
- -r
1517
+ -r
1479
1518
Redirect any intermediate errors to STDERR
1480
- -d
1519
+ -d
1481
1520
Collect the output of debug dump cli
1482
1521
EOF
1483
1522
}
@@ -1527,7 +1566,7 @@ while getopts ":xnvhzas:t:r:d" opt; do
1527
1566
r)
1528
1567
SAVE_STDERR=false
1529
1568
;;
1530
- d)
1569
+ d)
1531
1570
DEBUG_DUMP=true
1532
1571
;;
1533
1572
/? )
@@ -1553,7 +1592,8 @@ if $MKDIR "${LOCKDIR}" &>/dev/null; then
1553
1592
echo " $$ " > " ${PIDFILE} "
1554
1593
# This handler will exit the script upon receiving these interrupts
1555
1594
# Trap configured on EXIT will be triggered by the exit from handle_signal function
1556
- trap ' handle_signal' SIGINT SIGHUP SIGQUIT SIGTERM
1595
+ trap ' handle_sigterm' SIGHUP SIGQUIT SIGTERM
1596
+ trap ' handle_sigint' SIGINT
1557
1597
echo " Lock succesfully accquired and installed signal handlers"
1558
1598
# Proceed with the actual code
1559
1599
if [[ ! -z " ${V} " ]]; then
0 commit comments