Skip to content

Commit f45dcfb

Browse files
[generate_dump] Optimize the execution time of 'show techsupport' CLI by paraller function execution (#2565)
- What I did Optimize the execution time of the 'show techsupport' script. - How I did it The show techsupport CLI command calls the generate_dump bash script. In the script, there are a many functions that do the next scenario: 1. Run some CLI command 2. Save output from step 1 to the temporary file 3. Append the temporary file from step 2 to the `/var/dump/sonic_dump_XXXX.tar` file 4. Delete the temporary file from step 2 This PR will add the execution of these functions in parallel manner. Also, it will not spawn too many processes to not waste all CPU time. - How to verify it First test scenario Run the `time show techsupport` CLI command and compare the execution time to the original script (with no parallelism), the execution time will be decreased by 10-20%. Second test scenario 1. Stuck the FW by using next commands a. mcra /dev/mst/mt52100_pci_cr0 0xa01e4 0x10 b. mcra /dev/mst/mt52100_pci_cr0 0xa05e4 0x10 c. mcra /dev/mst/mt52100_pci_cr0 0xa07e4 0x10 d. mcra /dev/mst/mt52100_pci_cr0 0xa09e4 0x10 e. mcra /dev/mst/mt52100_pci_cr0 0xa0be4 0x10 f. mcra /dev/mst/mt52100_pci_cr0 0xa0de4 0x10 g. mcra /dev/mst/mt52100_pci_cr0 0xa0fe4 0x10 2. Run the `time show techsupport` CLI command and compare the execution time to the original script (with no parallelism), the execution time will be decreased by up to 50% because inside the script we launch CLI commands with `timeout --foreground 5m`. Signed-off-by: Vadym Hlushko <[email protected]>
1 parent 67cbb15 commit f45dcfb

File tree

1 file changed

+74
-53
lines changed

1 file changed

+74
-53
lines changed

scripts/generate_dump

+74-53
Original file line numberDiff line numberDiff line change
@@ -1469,92 +1469,108 @@ main() {
14691469
/proc/pagetypeinfo /proc/partitions /proc/sched_debug /proc/slabinfo \
14701470
/proc/softirqs /proc/stat /proc/swaps /proc/sysvipc /proc/timer_list \
14711471
/proc/uptime /proc/version /proc/vmallocinfo /proc/vmstat \
1472-
/proc/zoneinfo
1472+
/proc/zoneinfo &
14731473
end_t=$(date +%s%3N)
14741474
echo "[ Capture Proc State ] : $(($end_t-$start_t)) msec" >> $TECHSUPPORT_TIME_INFO
14751475

14761476
# Save all the processes within each docker
1477-
save_cmd "show services" services.summary
1477+
save_cmd "show services" services.summary &
14781478

14791479
# Save reboot cause information
1480-
save_cmd "show reboot-cause" reboot.cause
1480+
save_cmd "show reboot-cause" reboot.cause &
1481+
wait
14811482

14821483
local asic="$(/usr/local/bin/sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type)"
14831484
# 1st counter snapshot early. Need 2 snapshots to make sense of counters trend.
14841485
save_counter_snapshot $asic 1
14851486

1486-
save_cmd "systemd-analyze blame" "systemd.analyze.blame"
1487-
save_cmd "systemd-analyze dump" "systemd.analyze.dump"
1488-
save_cmd "systemd-analyze plot" "systemd.analyze.plot.svg"
1487+
save_cmd "systemd-analyze blame" "systemd.analyze.blame" &
1488+
save_cmd "systemd-analyze dump" "systemd.analyze.dump" &
1489+
save_cmd "systemd-analyze plot" "systemd.analyze.plot.svg" &
1490+
wait
14891491

1490-
save_platform_info
1492+
save_platform_info &
14911493

1492-
save_cmd "show vlan brief" "vlan.summary"
1493-
save_cmd "show version" "version"
1494-
save_cmd "show platform summary" "platform.summary"
1495-
save_cmd "cat /host/machine.conf" "machine.conf"
1496-
save_cmd "docker stats --no-stream" "docker.stats"
1494+
save_cmd "show vlan brief" "vlan.summary" &
1495+
save_cmd "show version" "version" &
1496+
save_cmd "show platform summary" "platform.summary" &
1497+
wait
14971498

1498-
save_cmd "sensors" "sensors"
1499-
save_cmd "lspci -vvv -xx" "lspci"
1500-
save_cmd "lsusb -v" "lsusb"
1501-
save_cmd "sysctl -a" "sysctl"
1499+
save_cmd "cat /host/machine.conf" "machine.conf" &
1500+
save_cmd "docker stats --no-stream" "docker.stats" &
1501+
save_cmd "sensors" "sensors" &
1502+
wait
15021503

1503-
save_ip_info
1504-
save_bridge_info
1504+
save_cmd "lspci -vvv -xx" "lspci" &
1505+
save_cmd "lsusb -v" "lsusb" &
1506+
save_cmd "sysctl -a" "sysctl" &
1507+
wait
15051508

1506-
save_frr_info
1507-
save_bgp_info
1508-
save_evpn_info
1509+
save_ip_info &
1510+
save_bridge_info &
1511+
wait
15091512

1510-
save_cmd "show interface status -d all" "interface.status"
1511-
save_cmd "show interface transceiver presence" "interface.xcvrs.presence"
1512-
save_cmd "show interface transceiver eeprom --dom" "interface.xcvrs.eeprom"
1513-
save_cmd "show ip interface -d all" "ip.interface"
1513+
save_frr_info &
1514+
save_bgp_info &
1515+
save_evpn_info &
1516+
wait
15141517

1515-
save_cmd "lldpctl" "lldpctl"
1518+
save_cmd "show interface status -d all" "interface.status" &
1519+
save_cmd "show interface transceiver presence" "interface.xcvrs.presence" &
1520+
save_cmd "show interface transceiver eeprom --dom" "interface.xcvrs.eeprom" &
1521+
save_cmd "show ip interface -d all" "ip.interface" &
1522+
wait
1523+
1524+
save_cmd "lldpctl" "lldpctl" &
15161525
if [[ ( "$NUM_ASICS" > 1 ) ]]; then
15171526
for (( i=0; i<$NUM_ASICS; i++ ))
15181527
do
1519-
save_cmd "docker exec lldp$i lldpcli show statistics" "lldp$i.statistics"
1520-
save_cmd "docker logs bgp$i" "docker.bgp$i.log"
1521-
save_cmd "docker logs swss$i" "docker.swss$i.log"
1528+
save_cmd "docker exec lldp$i lldpcli show statistics" "lldp$i.statistics" &
1529+
save_cmd "docker logs bgp$i" "docker.bgp$i.log" &
1530+
save_cmd "docker logs swss$i" "docker.swss$i.log" &
15221531
done
15231532
else
1524-
save_cmd "docker exec lldp lldpcli show statistics" "lldp.statistics"
1525-
save_cmd "docker logs bgp" "docker.bgp.log"
1526-
save_cmd "docker logs swss" "docker.swss.log"
1533+
save_cmd "docker exec lldp lldpcli show statistics" "lldp.statistics" &
1534+
save_cmd "docker logs bgp" "docker.bgp.log" &
1535+
save_cmd "docker logs swss" "docker.swss.log" &
15271536
fi
1528-
1529-
save_cmd "ps aux" "ps.aux"
1530-
save_cmd "top -b -n 1" "top"
1531-
save_cmd "free" "free"
1532-
save_cmd "vmstat 1 5" "vmstat"
1533-
save_cmd "vmstat -m" "vmstat.m"
1534-
save_cmd "vmstat -s" "vmstat.s"
1535-
save_cmd "mount" "mount"
1536-
save_cmd "df" "df"
1537-
save_cmd "dmesg" "dmesg"
1538-
1539-
save_nat_info
1540-
save_bfd_info
1541-
save_redis_info
1537+
wait
1538+
1539+
save_cmd "ps aux" "ps.aux" &
1540+
save_cmd "top -b -n 1" "top" &
1541+
save_cmd "free" "free" &
1542+
wait
1543+
save_cmd "vmstat 1 5" "vmstat" &
1544+
save_cmd "vmstat -m" "vmstat.m" &
1545+
save_cmd "vmstat -s" "vmstat.s" &
1546+
wait
1547+
save_cmd "mount" "mount" &
1548+
save_cmd "df" "df" &
1549+
save_cmd "dmesg" "dmesg" &
1550+
wait
1551+
1552+
save_nat_info &
1553+
save_bfd_info &
1554+
wait
1555+
save_redis_info &
15421556

15431557
if $DEBUG_DUMP
15441558
then
1545-
save_dump_state_all_ns
1559+
save_dump_state_all_ns &
15461560
fi
1561+
wait
15471562

1548-
save_cmd "docker ps -a" "docker.ps"
1549-
save_cmd "docker top pmon" "docker.pmon"
1563+
save_cmd "docker ps -a" "docker.ps" &
1564+
save_cmd "docker top pmon" "docker.pmon" &
15501565

15511566
if [[ -d ${PLUGINS_DIR} ]]; then
15521567
local -r dump_plugins="$(find ${PLUGINS_DIR} -type f -executable)"
15531568
for plugin in $dump_plugins; do
15541569
# save stdout output of plugin and gzip it
1555-
save_cmd "$plugin" "$(basename $plugin)" true
1570+
save_cmd "$plugin" "$(basename $plugin)" true &
15561571
done
15571572
fi
1573+
wait
15581574

15591575
save_saidump
15601576

@@ -1573,6 +1589,10 @@ main() {
15731589
# 2nd counter snapshot late. Need 2 snapshots to make sense of counters trend.
15741590
save_counter_snapshot $asic 2
15751591

1592+
$RM $V -rf $TARDIR
1593+
$MKDIR $V -p $TARDIR
1594+
$MKDIR $V -p $LOGDIR
1595+
15761596
# Copying the /etc files to a directory and then tar it
15771597
$CP -r /etc $TARDIR/etc
15781598
rm_list=$(find -L $TARDIR/etc -maxdepth 5 -type l)
@@ -1592,9 +1612,10 @@ main() {
15921612
$TARDIR/etc/sonic/*.crt $TARDIR/etc/sonic/*.pem $TARDIR/etc/sonic/*.key \
15931613
$TARDIR/etc/ssl/*.pem $TARDIR/etc/ssl/certs/ $TARDIR/etc/ssl/private/*
15941614

1595-
save_log_files
1596-
save_crash_files
1597-
save_warmboot_files
1615+
save_log_files &
1616+
save_crash_files &
1617+
save_warmboot_files &
1618+
wait
15981619

15991620
if [[ "$asic" = "mellanox" ]]; then
16001621
collect_mellanox_dfw_dumps

0 commit comments

Comments
 (0)