Skip to content

Commit e6d7f52

Browse files
stepanblyschakliat-grozovik
authored andcommitted
[generate_dump] system dump improvements (sonic-net#503)
* [techsupport] add option to collect logs since given date Allow user to dump logs newer some specific date to reduce dump archive size e.g: admin@sonic:~$ show techsupport --since=yesterday Signed-off-by: Stepan Blyschak <[email protected]> * [generate_dump] add specific error codes Signed-off-by: Stepan Blyschak <[email protected]> * [generate_dump] exclude mellanox folders in /etc/ Signed-off-by: Stepan Blyschak <[email protected]> * [generate_dump] disable logrotate during log collection Signed-off-by: Stepan Blyschak <[email protected]> * [generate_dump] add +w for procfs dump files Signed-off-by: Stepan Blyschak <[email protected]> * [generate_dump] SINCE_DATE is epoch by default Signed-off-by: Stepan Blyschak <[email protected]> * [generate_dump] change find_logs to find_files Signed-off-by: Stepan Blyschak <[email protected]> * [generate_dump] add mstdump Signed-off-by: Stepan Blyschak <[email protected]> * [generate_dump] fix noop mode when generating sai/sdk/fw dump Signed-off-by: Stepan Blyschak <[email protected]>
1 parent eb63da8 commit e6d7f52

File tree

2 files changed

+91
-12
lines changed

2 files changed

+91
-12
lines changed

scripts/generate_dump

+87-11
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66

77
set -u
88

9+
ERROR_TAR_FAILED=5
10+
ERROR_PROCFS_SAVE_FAILED=6
11+
ERROR_INVALID_ARGUMENT=10
12+
913
TAR=tar
1014
MKDIR=mkdir
1115
RM=rm
@@ -14,10 +18,13 @@ GZIP=gzip
1418
CP=cp
1519
MV=mv
1620
GREP=grep
21+
TOUCH=touch
1722
V=
1823
NOOP=false
1924
DO_COMPRESS=true
2025
CMD_PREFIX=
26+
SINCE_DATE="@0" # default is set to January 1, 1970 at 00:00:00 GMT
27+
REFERENCE_FILE=/tmp/reference
2128
BASE=sonic_dump_`hostname`_`date +%Y%m%d_%H%M%S`
2229
DUMPDIR=/var/dump
2330
TARDIR=$DUMPDIR/$BASE
@@ -72,7 +79,7 @@ save_cmd() {
7279
fi
7380
fi
7481
($TAR $V -rhf $TARFILE -C $DUMPDIR "$tarpath" \
75-
|| abort 5 "tar append operation failed. Aborting to prevent data loss.") \
82+
|| abort "${ERROR_TAR_FAILED}" "tar append operation failed. Aborting to prevent data loss.") \
7683
&& $RM $V -rf "$filepath"
7784
}
7885

@@ -151,7 +158,7 @@ save_proc() {
151158
local procfiles="$@"
152159
$MKDIR $V -p $TARDIR/proc \
153160
&& $CP $V -r $procfiles $TARDIR/proc \
154-
&& $TAR $V -rhf $TARFILE -C $DUMPDIR --mode=+r $BASE/proc \
161+
&& $TAR $V -rhf $TARFILE -C $DUMPDIR --mode=+rw $BASE/proc \
155162
&& $RM $V -rf $TARDIR/proc
156163
}
157164

@@ -212,10 +219,54 @@ save_file() {
212219
fi
213220
fi
214221
($TAR $V -rhf $TARFILE -C $DUMPDIR "$tar_path" \
215-
|| abort 5 "tar append operation failed. Aborting to prevent data loss.") \
222+
|| abort "${ERROR_PROCFS_SAVE_FAILED}" "tar append operation failed. Aborting to prevent data loss.") \
216223
&& $RM $V -f "$gz_path"
217224
}
218225

226+
###############################################################################
227+
# find_files routine
228+
# Globals:
229+
# SINCE_DATE: list files only newer than given date
230+
# REFERENCE_FILE: the file to be created as a reference to compare modification time
231+
# Arguments:
232+
# directory: directory to search files in
233+
# Returns:
234+
# None
235+
###############################################################################
236+
find_files() {
237+
local -r directory=$1
238+
$TOUCH --date="${SINCE_DATE}" "${REFERENCE_FILE}"
239+
local -r find_command="find -L $directory -type f -newer ${REFERENCE_FILE}"
240+
241+
echo $($find_command)
242+
}
243+
244+
###############################################################################
245+
# disable_logrotate routine
246+
# Globals:
247+
# None
248+
# Arguments:
249+
# None
250+
# Returns:
251+
# None
252+
###############################################################################
253+
disable_logrotate() {
254+
sed -i '/logrotate/s/^/#/g' /etc/cron.d/logrotate
255+
}
256+
257+
###############################################################################
258+
# enable_logrotate routine
259+
# Globals:
260+
# None
261+
# Arguments:
262+
# None
263+
# Returns:
264+
# None
265+
###############################################################################
266+
enable_logrotate() {
267+
sed -i '/logrotate/s/^#*//g' /etc/cron.d/logrotate
268+
}
269+
219270
###############################################################################
220271
# Main generate_dump routine
221272
# Globals:
@@ -251,7 +302,7 @@ main() {
251302
/proc/softirqs /proc/stat /proc/swaps /proc/sysvipc /proc/timer_list \
252303
/proc/uptime /proc/version /proc/vmallocinfo /proc/vmstat \
253304
/proc/zoneinfo \
254-
|| abort 6 "Proc saving operation failed. Aborting for safety."
305+
|| abort "${ERROR_PROCFS_SAVE_FAILED}" "Proc saving operation failed. Aborting for safety."
255306

256307
save_cmd "show version" "version"
257308
save_cmd "show platform summary" "platform.summary"
@@ -303,9 +354,16 @@ main() {
303354
local platform="$(/usr/local/bin/sonic-cfggen -H -v DEVICE_METADATA.localhost.platform)"
304355
if [[ $platform == *"mlnx"* ]]; then
305356
local sai_dump_filename="/tmp/sai_sdk_dump_$(date +"%m_%d_%Y_%I_%M_%p")"
306-
docker exec -it syncd saisdkdump -f $sai_dump_filename
307-
docker exec syncd tar Ccf $(dirname $sai_dump_filename) - $(basename $sai_dump_filename) | tar Cxf /tmp/ -
357+
${CMD_PREFIX}docker exec -it syncd saisdkdump -f $sai_dump_filename
358+
${CMD_PREFIX}docker exec syncd tar Ccf $(dirname $sai_dump_filename) - $(basename $sai_dump_filename) | tar Cxf /tmp/ -
308359
save_file $sai_dump_filename sai_sdk_dump true
360+
361+
local mst_dump_filename="/tmp/mstdump"
362+
local max_dump_count="3"
363+
for i in $(seq 1 $max_dump_count); do
364+
${CMD_PREFIX}/usr/bin/mstdump /dev/mst/mt*conf0 > "${mst_dump_filename}${i}"
365+
save_file "${mst_dump_filename}${i}" mstdump true
366+
done
309367
fi
310368

311369
local asic="$(/usr/local/bin/sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type)"
@@ -326,7 +384,7 @@ main() {
326384
$MKDIR $V -p $LOGDIR
327385
$LN $V -s /etc $TARDIR/etc
328386

329-
($TAR $V -rhf $TARFILE -C $DUMPDIR --mode=+r \
387+
($TAR $V -rhf $TARFILE -C $DUMPDIR --mode=+rw \
330388
--exclude="etc/alternatives" \
331389
--exclude="*/etc/passwd*" \
332390
--exclude="*/etc/shadow*" \
@@ -335,12 +393,17 @@ main() {
335393
--exclude="*/etc/ssh*" \
336394
--exclude="*get_creds*" \
337395
--exclude="*snmpd.conf*" \
396+
--exclude="/etc/mlnx" \
397+
--exclude="/etc/mft" \
338398
$BASE/etc \
339-
|| abort 5 "Tar append operation failed. Aborting for safety.") \
399+
|| abort "${ERROR_TAR_FAILED}" "Tar append operation failed. Aborting for safety.") \
340400
&& $RM $V -rf $TARDIR
341401

402+
disable_logrotate
403+
trap enable_logrotate HUP INT QUIT TERM KILL ABRT ALRM
404+
342405
# gzip up all log files individually before placing them in the incremental tarball
343-
for file in $(find -L /var/log -type f); do
406+
for file in $(find_files "/var/log/"); do
344407
# ignore the sparse file lastlog
345408
if [ "$file" = "/var/log/lastlog" ]; then
346409
continue
@@ -353,8 +416,10 @@ main() {
353416
fi
354417
done
355418

419+
enable_logrotate
420+
356421
# archive core dump files
357-
for file in $(find -L /var/core -type f); do
422+
for file in $(find_files "/var/core/"); do
358423
# don't gzip already-gzipped log files :)
359424
if [ -z "${file##*.gz}" ]; then
360425
save_file $file core false
@@ -422,10 +487,15 @@ OPTIONS
422487
Noop mode. Don't actually create anything, just echo what would happen
423488
-z
424489
Don't compress the tar at the end.
490+
-s DATE
491+
Collect logs since DATE;
492+
The argument is a mostly free format human readable string such as
493+
"24 March", "yesterday", etc.
494+
425495
EOF
426496
}
427497

428-
while getopts ":xnvhz" opt; do
498+
while getopts ":xnvhzs:" opt; do
429499
case $opt in
430500
x)
431501
# enable bash debugging
@@ -450,11 +520,17 @@ while getopts ":xnvhz" opt; do
450520
CMD_PREFIX="echo "
451521
MV="echo mv"
452522
CP="echo cp"
523+
TOUCH="echo touch"
453524
NOOP=true
454525
;;
455526
z)
456527
DO_COMPRESS=false
457528
;;
529+
s)
530+
SINCE_DATE="${OPTARG}"
531+
# validate date expression
532+
date --date="${SINCE_DATE}" &> /dev/null || abort "${ERROR_INVALID_ARGUMENT}" "Invalid date expression passed: '${SINCE_DATE}'"
533+
;;
458534
/?)
459535
echo "Invalid option: -$OPTARG" >&2
460536
exit 1

show/main.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1282,10 +1282,13 @@ def users(verbose):
12821282
#
12831283

12841284
@cli.command()
1285+
@click.option('--since', required=False, help="Collect logs and core files since given date")
12851286
@click.option('--verbose', is_flag=True, help="Enable verbose output")
1286-
def techsupport(verbose):
1287+
def techsupport(since, verbose):
12871288
"""Gather information for troubleshooting"""
12881289
cmd = "sudo generate_dump -v"
1290+
if since:
1291+
cmd += " -s {}".format(since)
12891292
run_command(cmd, display_cmd=verbose)
12901293

12911294

0 commit comments

Comments
 (0)