Skip to content

Commit 430cd65

Browse files
authored
[202205] [generate dump] Move the Core/Log collection to the End of process Execution and removed default timeout (#2230)
Thus moved the core/log collection to the end. But there is a catch regarding the above change, For eg: system is in a unstable state and most of the individual commands start to timeout, the techsupport dump eventually times out at 30m (because of the global timeout), then the dump is pretty useless, since it might not have any useful information at all Thus, i've removed the default global timeout, Clients can/should knowingly provide a value using -g option if the execution time has to be capped. A global timeout of 60 mins is used for Global timeout for Auto-techsupport invocation. Co-authored-by: Vivek Reddy Karri <[email protected]>
1 parent 785508d commit 430cd65

File tree

6 files changed

+40
-16
lines changed

6 files changed

+40
-16
lines changed

scripts/coredump_gen_handler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def parse_ts_dump_name(self, ts_stdout):
111111
return ""
112112

113113
def invoke_ts_cmd(self, since_cfg, num_retry=0):
114-
cmd_opts = ["show", "techsupport", "--silent", "--since", since_cfg]
114+
cmd_opts = ["show", "techsupport", "--silent", "--global-timeout", TS_GLOBAL_TIMEOUT, "--since", since_cfg]
115115
cmd = " ".join(cmd_opts)
116116
rc, stdout, stderr = subprocess_exec(cmd_opts, env=ENV_VAR)
117117
new_dump = ""

scripts/generate_dump

+4-5
Original file line numberDiff line numberDiff line change
@@ -1287,11 +1287,6 @@ main() {
12871287
end_t=$(date +%s%3N)
12881288
echo "[ Capture Proc State ] : $(($end_t-$start_t)) msec" >> $TECHSUPPORT_TIME_INFO
12891289

1290-
# Save logs and cores early
1291-
save_log_files
1292-
save_crash_files
1293-
save_warmboot_files
1294-
12951290
# Save all the processes within each docker
12961291
save_cmd "show services" services.summary
12971292

@@ -1426,6 +1421,10 @@ main() {
14261421
end_t=$(date +%s%3N)
14271422
echo "[ TAR /etc Files ] : $(($end_t-$start_t)) msec" >> $TECHSUPPORT_TIME_INFO
14281423

1424+
save_log_files
1425+
save_crash_files
1426+
save_warmboot_files
1427+
14291428
finalize
14301429
}
14311430

show/main.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1138,7 +1138,7 @@ def users(verbose):
11381138

11391139
@cli.command()
11401140
@click.option('--since', required=False, help="Collect logs and core files since given date")
1141-
@click.option('-g', '--global-timeout', default=30, type=int, help="Global timeout in minutes. Default 30 mins")
1141+
@click.option('-g', '--global-timeout', required=False, type=int, help="Global timeout in minutes. WARN: Dump might be incomplete if enforced")
11421142
@click.option('-c', '--cmd-timeout', default=5, type=int, help="Individual command timeout in minutes. Default 5 mins")
11431143
@click.option('--verbose', is_flag=True, help="Enable verbose output")
11441144
@click.option('--allow-process-stop', is_flag=True, help="Dump additional data which may require system interruption")
@@ -1147,7 +1147,10 @@ def users(verbose):
11471147
@click.option('--redirect-stderr', '-r', is_flag=True, help="Redirect an intermediate errors to STDERR")
11481148
def techsupport(since, global_timeout, cmd_timeout, verbose, allow_process_stop, silent, debug_dump, redirect_stderr):
11491149
"""Gather information for troubleshooting"""
1150-
cmd = "sudo timeout --kill-after={}s -s SIGTERM --foreground {}m".format(COMMAND_TIMEOUT, global_timeout)
1150+
cmd = "sudo"
1151+
1152+
if global_timeout:
1153+
cmd += " timeout --kill-after={}s -s SIGTERM --foreground {}m".format(COMMAND_TIMEOUT, global_timeout)
11511154

11521155
if allow_process_stop:
11531156
cmd += " -a"

tests/coredump_gen_handler_test.py

+21
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
/tmp/saisdkdump
2121
"""
2222

23+
TS_DEFAULT_CMD = "show techsupport --silent --global-timeout 60 --since 2 days ago"
24+
2325
def signal_handler(signum, frame):
2426
raise Exception("Timed out!")
2527

@@ -427,4 +429,23 @@ def mock_cmd(cmd, env):
427429
assert False, "Method should not time out"
428430
finally:
429431
signal.alarm(0)
432+
433+
def test_auto_ts_options(self):
434+
"""
435+
Scenario: Check if the techsupport is called as expected
436+
"""
437+
db_wrap = Db()
438+
redis_mock = db_wrap.db
439+
set_auto_ts_cfg(redis_mock, state="enabled", since_cfg="2 days ago")
440+
set_feature_table_cfg(redis_mock, state="enabled")
441+
with Patcher() as patcher:
442+
def mock_cmd(cmd, env):
443+
cmd_str = " ".join(cmd)
444+
if "show techsupport" in cmd_str and cmd_str != TS_DEFAULT_CMD:
445+
assert False, "Expected TS_CMD: {}, Recieved: {}".format(TS_DEFAULT_CMD, cmd_str)
446+
return 0, AUTO_TS_STDOUT, ""
447+
cdump_mod.subprocess_exec = mock_cmd
448+
patcher.fs.create_file("/var/core/orchagent.12345.123.core.gz")
449+
cls = cdump_mod.CriticalProcCoreDumpHandle("orchagent.12345.123.core.gz", "swss", redis_mock)
450+
cls.handle_core_dump_creation_event()
430451

tests/techsupport_test.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@
33
from unittest.mock import patch, Mock
44
from click.testing import CliRunner
55

6-
EXPECTED_BASE_COMMAND = 'sudo timeout --kill-after=300s -s SIGTERM --foreground '
6+
EXPECTED_BASE_COMMAND = 'sudo '
77

88
@patch("show.main.run_command")
99
@pytest.mark.parametrize(
1010
"cli_arguments,expected",
1111
[
12-
([], '30m generate_dump -v -t 5'),
13-
(['--since', '2 days ago'], "30m generate_dump -v -s '2 days ago' -t 5"),
14-
(['-g', '50'], '50m generate_dump -v -t 5'),
15-
(['--allow-process-stop'], '30m -a generate_dump -v -t 5'),
16-
(['--silent'], '30m generate_dump -t 5'),
17-
(['--debug-dump', '--redirect-stderr'], '30m generate_dump -v -d -t 5 -r'),
12+
([], 'generate_dump -v -t 5'),
13+
(['--since', '2 days ago'], "generate_dump -v -s '2 days ago' -t 5"),
14+
(['-g', '50'], 'timeout --kill-after=300s -s SIGTERM --foreground 50m generate_dump -v -t 5'),
15+
(['--allow-process-stop'], '-a generate_dump -v -t 5'),
16+
(['--silent'], 'generate_dump -t 5'),
17+
(['--debug-dump', '--redirect-stderr'], 'generate_dump -v -d -t 5 -r'),
1818
]
1919
)
2020
def test_techsupport(run_command, cli_arguments, expected):

utilities_common/auto_techsupport_helper.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"CFG_CORE_USAGE", "CFG_SINCE", "FEATURE", "STATE_DB",
1414
"TS_MAP", "CORE_DUMP", "TIMESTAMP", "CONTAINER", "TIME_BUF",
1515
"SINCE_DEFAULT", "TS_PTRN_GLOB", "EXT_LOCKFAIL", "EXT_RETRY",
16-
"EXT_SUCCESS", "MAX_RETRY_LIMIT"
16+
"EXT_SUCCESS", "MAX_RETRY_LIMIT", "TS_GLOBAL_TIMEOUT"
1717
] + [ # Methods
1818
"verify_recent_file_creation",
1919
"get_ts_dumps",
@@ -60,6 +60,7 @@
6060

6161
TIME_BUF = 20
6262
SINCE_DEFAULT = "2 days ago"
63+
TS_GLOBAL_TIMEOUT = "60"
6364

6465
# Techsupport Exit Codes
6566
EXT_LOCKFAIL = 2

0 commit comments

Comments
 (0)