Skip to content

Commit 258ffa0

Browse files
[generate_dump] Optimize the execution time of 'show techsupport' CLI by parallel function execution (sonic-net#2512)
- What I did Optimize the execution time of the 'show techsupport' script. - How I did it The show techsupport CLI command calls the generate_dump bash script. In the script, there are a many functions that do the next scenario: 1. Run some CLI command 2. Save output from step 1 to the temporary file 3. Append the temporary file from step 2 to the `/var/dump/sonic_dump_XXXX.tar` file 4. Delete the temporary file from step 2 This PR will add the execution of these functions in parallel manner. Also, it will not spawn too many processes to not waste all CPU time. - How to verify it First test scenario Run the `time show techsupport` CLI command and compare the execution time to the original script (with no parallelism), the execution time will be decreased by 10-20%. Second test scenario 1. Stuck the FW by using next commands a. mcra /dev/mst/mt52100_pci_cr0 0xa01e4 0x10 b. mcra /dev/mst/mt52100_pci_cr0 0xa05e4 0x10 c. mcra /dev/mst/mt52100_pci_cr0 0xa07e4 0x10 d. mcra /dev/mst/mt52100_pci_cr0 0xa09e4 0x10 e. mcra /dev/mst/mt52100_pci_cr0 0xa0be4 0x10 f. mcra /dev/mst/mt52100_pci_cr0 0xa0de4 0x10 g. mcra /dev/mst/mt52100_pci_cr0 0xa0fe4 0x10 2. Run the `time show techsupport` CLI command and compare the execution time to the original script (with no parallelism), the execution time will be decreased by up to 50% because inside the script we launch CLI commands with `timeout --foreground 5m`. Signed-off-by: Vadym Hlushko <[email protected]>
1 parent 739e4d7 commit 258ffa0

File tree

1 file changed

+92
-67
lines changed

1 file changed

+92
-67
lines changed

scripts/generate_dump

+92-67
Original file line numberDiff line numberDiff line change
@@ -1544,101 +1544,121 @@ main() {
15441544
/proc/pagetypeinfo /proc/partitions /proc/sched_debug /proc/slabinfo \
15451545
/proc/softirqs /proc/stat /proc/swaps /proc/sysvipc /proc/timer_list \
15461546
/proc/uptime /proc/version /proc/vmallocinfo /proc/vmstat \
1547-
/proc/zoneinfo
1548-
save_proc_stats
1547+
/proc/zoneinfo &
1548+
save_proc_stats &
15491549
end_t=$(date +%s%3N)
15501550
echo "[ Capture Proc State ] : $(($end_t-$start_t)) msec" >> $TECHSUPPORT_TIME_INFO
1551+
wait
15511552

15521553
# Save all the processes within each docker
1553-
save_cmd "show services" services.summary
1554+
save_cmd "show services" services.summary &
15541555

15551556
# Save reboot cause information
1556-
save_cmd "show reboot-cause" reboot.cause
1557+
save_cmd "show reboot-cause" reboot.cause &
1558+
wait
15571559

15581560
local asic="$(/usr/local/bin/sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type)"
15591561
# 1st counter snapshot early. Need 2 snapshots to make sense of counters trend.
15601562
save_counter_snapshot $asic 1
15611563

1562-
save_cmd "systemd-analyze blame" "systemd.analyze.blame"
1563-
save_cmd "systemd-analyze dump" "systemd.analyze.dump"
1564-
save_cmd "systemd-analyze plot" "systemd.analyze.plot.svg"
1565-
1566-
save_platform_info
1567-
save_cmd "show vlan brief" "vlan.summary"
1568-
save_cmd "show version" "version"
1569-
save_cmd "show platform summary" "platform.summary"
1570-
save_cmd "cat /host/machine.conf" "machine.conf"
1571-
save_cmd "cat /boot/config-$(uname -r)" "boot.conf"
1572-
save_cmd "docker stats --no-stream" "docker.stats"
1573-
1574-
save_cmd "sensors" "sensors"
1575-
save_cmd "lspci -vvv -xx" "lspci"
1576-
save_cmd "lsusb -v" "lsusb"
1577-
save_cmd "sysctl -a" "sysctl"
1578-
1579-
save_ip_info
1580-
save_bridge_info
1581-
save_frr_info
1582-
1583-
save_bgp_info
1584-
save_evpn_info
1585-
1586-
save_cmd "show interface status -d all" "interface.status"
1587-
save_cmd "show interface transceiver presence" "interface.xcvrs.presence"
1588-
save_cmd "show interface transceiver eeprom --dom" "interface.xcvrs.eeprom"
1589-
save_cmd "show ip interface -d all" "ip.interface"
1590-
1591-
save_cmd "lldpctl" "lldpctl"
1564+
save_cmd "systemd-analyze blame" "systemd.analyze.blame" &
1565+
save_cmd "systemd-analyze dump" "systemd.analyze.dump" &
1566+
save_cmd "systemd-analyze plot" "systemd.analyze.plot.svg" &
1567+
wait
1568+
1569+
save_platform_info &
1570+
save_cmd "show vlan brief" "vlan.summary" &
1571+
save_cmd "show version" "version" &
1572+
save_cmd "show platform summary" "platform.summary" &
1573+
wait
1574+
1575+
save_cmd "cat /host/machine.conf" "machine.conf" &
1576+
save_cmd "cat /boot/config-$(uname -r)" "boot.conf" &
1577+
save_cmd "docker stats --no-stream" "docker.stats" &
1578+
wait
1579+
1580+
save_cmd "sensors" "sensors" &
1581+
save_cmd "lspci -vvv -xx" "lspci" &
1582+
save_cmd "lsusb -v" "lsusb" &
1583+
save_cmd "sysctl -a" "sysctl" &
1584+
wait
1585+
1586+
save_ip_info &
1587+
save_bridge_info &
1588+
wait
1589+
1590+
save_frr_info &
1591+
1592+
save_bgp_info &
1593+
save_evpn_info &
1594+
wait
1595+
1596+
save_cmd "show interface status -d all" "interface.status" &
1597+
save_cmd "show interface transceiver presence" "interface.xcvrs.presence" &
1598+
save_cmd "show interface transceiver eeprom --dom" "interface.xcvrs.eeprom" &
1599+
save_cmd "show ip interface -d all" "ip.interface" &
1600+
wait
1601+
1602+
save_cmd "lldpctl" "lldpctl" &
15921603
if [[ ( "$NUM_ASICS" > 1 ) ]]; then
15931604
for (( i=0; i<$NUM_ASICS; i++ ))
15941605
do
1595-
save_cmd "docker exec lldp$i lldpcli show statistics" "lldp$i.statistics"
1596-
save_cmd "docker logs bgp$i" "docker.bgp$i.log"
1597-
save_cmd "docker logs swss$i" "docker.swss$i.log"
1606+
save_cmd "docker exec lldp$i lldpcli show statistics" "lldp$i.statistics" &
1607+
save_cmd "docker logs bgp$i" "docker.bgp$i.log" &
1608+
save_cmd "docker logs swss$i" "docker.swss$i.log" &
15981609
done
15991610
else
1600-
save_cmd "docker exec lldp lldpcli show statistics" "lldp.statistics"
1601-
save_cmd "docker logs bgp" "docker.bgp.log"
1602-
save_cmd "docker logs swss" "docker.swss.log"
1611+
save_cmd "docker exec lldp lldpcli show statistics" "lldp.statistics" &
1612+
save_cmd "docker logs bgp" "docker.bgp.log" &
1613+
save_cmd "docker logs swss" "docker.swss.log" &
16031614
fi
1604-
1605-
save_cmd "ps aux" "ps.aux"
1606-
save_cmd "top -b -n 1" "top"
1607-
save_cmd "free" "free"
1608-
save_cmd "vmstat 1 5" "vmstat"
1609-
save_cmd "vmstat -m" "vmstat.m"
1610-
save_cmd "vmstat -s" "vmstat.s"
1611-
save_cmd "mount" "mount"
1612-
save_cmd "df" "df"
1613-
save_cmd "dmesg" "dmesg"
1614-
1615-
save_nat_info
1616-
save_bfd_info
1617-
save_redis_info
1615+
wait
1616+
1617+
save_cmd "ps aux" "ps.aux" &
1618+
save_cmd "top -b -n 1" "top" &
1619+
save_cmd "free" "free" &
1620+
wait
1621+
save_cmd "vmstat 1 5" "vmstat" &
1622+
save_cmd "vmstat -m" "vmstat.m" &
1623+
save_cmd "vmstat -s" "vmstat.s" &
1624+
wait
1625+
save_cmd "mount" "mount" &
1626+
save_cmd "df" "df" &
1627+
save_cmd "dmesg" "dmesg" &
1628+
wait
1629+
1630+
save_nat_info &
1631+
save_bfd_info &
1632+
wait
1633+
save_redis_info &
16181634

16191635
if $DEBUG_DUMP
16201636
then
1621-
save_dump_state_all_ns
1637+
save_dump_state_all_ns &
16221638
fi
1639+
wait
16231640

1624-
save_cmd "docker ps -a" "docker.ps"
1625-
save_cmd "docker top pmon" "docker.pmon"
1641+
save_cmd "docker ps -a" "docker.ps" &
1642+
save_cmd "docker top pmon" "docker.pmon" &
16261643

16271644
if [[ -d ${PLUGINS_DIR} ]]; then
16281645
local -r dump_plugins="$(find ${PLUGINS_DIR} -type f -executable)"
16291646
for plugin in $dump_plugins; do
16301647
# save stdout output of plugin and gzip it
1631-
save_cmd "$plugin" "$(basename $plugin)" true
1648+
save_cmd "$plugin" "$(basename $plugin)" true &
16321649
done
16331650
fi
1651+
wait
16341652

1635-
save_cmd "dpkg -l" "dpkg"
1636-
save_cmd "who -a" "who"
1637-
save_cmd "swapon -s" "swapon"
1638-
save_cmd "hdparm -i /dev/sda" "hdparm"
1639-
save_cmd "ps -AwwL -o user,pid,lwp,ppid,nlwp,pcpu,pri,nice,vsize,rss,tty,stat,wchan:12,start,bsdtime,command" "ps.extended"
1653+
save_cmd "dpkg -l" "dpkg" &
1654+
save_cmd "who -a" "who" &
1655+
save_cmd "swapon -s" "swapon" &
1656+
wait
1657+
save_cmd "hdparm -i /dev/sda" "hdparm" &
1658+
save_cmd "ps -AwwL -o user,pid,lwp,ppid,nlwp,pcpu,pri,nice,vsize,rss,tty,stat,wchan:12,start,bsdtime,command" "ps.extended" &
16401659

1641-
save_saidump
1660+
save_saidump &
1661+
wait
16421662

16431663
if [ "$asic" = "barefoot" ]; then
16441664
collect_barefoot
@@ -1659,6 +1679,10 @@ main() {
16591679
# 2nd counter snapshot late. Need 2 snapshots to make sense of counters trend.
16601680
save_counter_snapshot $asic 2
16611681

1682+
$RM $V -rf $TARDIR
1683+
$MKDIR $V -p $TARDIR
1684+
$MKDIR $V -p $LOGDIR
1685+
16621686
# Copying the /etc files to a directory and then tar it
16631687
$CP -r /etc $TARDIR/etc
16641688
rm_list=$(find -L $TARDIR/etc -maxdepth 5 -type l)
@@ -1678,9 +1702,10 @@ main() {
16781702
$TARDIR/etc/sonic/*.crt $TARDIR/etc/sonic/*.pem $TARDIR/etc/sonic/*.key \
16791703
$TARDIR/etc/ssl/*.pem $TARDIR/etc/ssl/certs/ $TARDIR/etc/ssl/private/*
16801704

1681-
save_log_files
1682-
save_crash_files
1683-
save_warmboot_files
1705+
save_log_files &
1706+
save_crash_files &
1707+
save_warmboot_files &
1708+
wait
16841709

16851710
if [[ "$asic" = "mellanox" ]]; then
16861711
collect_mellanox_dfw_dumps

0 commit comments

Comments
 (0)