Skip to content

Commit d85e1db

Browse files
authored
Disable routeCheck monit as part of config reload/minigraph stop service and enable it back as part of service start. (sonic-net#3682)
What I did: For Config reload/minigraph stop and re-enable routeCheck because with large route scale of 70K+ routes this can log monit error Transiently which can result in failure of sonic-mgmt test cases because of loganalyzer. Why I did: Because of this transient issue monit ERR log can get generated and this can result failure of sonic-mgmt test case. How I verify: Manual Verification via sudo monit status routeCheck and UT updated.
1 parent 139983a commit d85e1db

File tree

2 files changed

+13
-8
lines changed

2 files changed

+13
-8
lines changed

config/main.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -889,8 +889,9 @@ def _get_disabled_services_list(config_db):
889889
def _stop_services():
890890
try:
891891
subprocess.check_call(['sudo', 'monit', 'status'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
892-
click.echo("Disabling container monitoring ...")
892+
click.echo("Disabling container and routeCheck monitoring ...")
893893
clicommon.run_command(['sudo', 'monit', 'unmonitor', 'container_checker'])
894+
clicommon.run_command(['sudo', 'monit', 'unmonitor', 'routeCheck'])
894895
except subprocess.CalledProcessError as err:
895896
pass
896897

@@ -949,17 +950,18 @@ def _restart_services():
949950
wait_service_restart_finish('interfaces-config', last_interface_config_timestamp)
950951
wait_service_restart_finish('networking', last_networking_timestamp)
951952

953+
# Reload Monit configuration to pick up new hostname in case it changed
954+
click.echo("Reloading Monit configuration ...")
955+
clicommon.run_command(['sudo', 'monit', 'reload'])
956+
952957
try:
953958
subprocess.check_call(['sudo', 'monit', 'status'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
954-
click.echo("Enabling container monitoring ...")
959+
click.echo("Enabling container and routeCheck monitoring ...")
955960
clicommon.run_command(['sudo', 'monit', 'monitor', 'container_checker'])
961+
clicommon.run_command(['sudo', 'monit', 'monitor', 'routeCheck'])
956962
except subprocess.CalledProcessError as err:
957963
pass
958964

959-
# Reload Monit configuration to pick up new hostname in case it changed
960-
click.echo("Reloading Monit configuration ...")
961-
clicommon.run_command(['sudo', 'monit', 'reload'])
962-
963965
def _per_namespace_swss_ready(service_name):
964966
out, _ = clicommon.run_command(['systemctl', 'show', str(service_name), '--property', 'ActiveState', '--value'], return_cmd=True)
965967
if out.strip() != "active":

tests/config_test.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,14 @@
5050

5151
load_minigraph_command_output="""\
5252
Acquired lock on {0}
53+
Disabling container and routeCheck monitoring ...
5354
Stopping SONiC target ...
5455
Running command: /usr/local/bin/sonic-cfggen -H -m --write-to-db
5556
Running command: config qos reload --no-dynamic-buffer --no-delay
5657
Running command: pfcwd start_default
5758
Restarting SONiC target ...
5859
Reloading Monit configuration ...
60+
Enabling container and routeCheck monitoring ...
5961
Please note setting loaded from minigraph will be lost after system reboot. To preserve setting, run `config save`.
6062
Released lock on {0}
6163
"""
@@ -965,7 +967,8 @@ def setup_class(cls):
965967
importlib.reload(config.main)
966968

967969
@mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs', mock.MagicMock(return_value=("dummy_path", None)))
968-
def test_load_minigraph(self, get_cmd_module, setup_single_broadcom_asic):
970+
@mock.patch('config.main.subprocess.check_call')
971+
def test_load_minigraph(self, mock_check_call, get_cmd_module, setup_single_broadcom_asic):
969972
with mock.patch("utilities_common.cli.run_command", mock.MagicMock(side_effect=mock_run_command_side_effect)) as mock_run_command:
970973
(config, show) = get_cmd_module
971974
runner = CliRunner()
@@ -978,7 +981,7 @@ def test_load_minigraph(self, get_cmd_module, setup_single_broadcom_asic):
978981
(load_minigraph_command_output.format(config.SYSTEM_RELOAD_LOCK))
979982
# Verify "systemctl reset-failed" is called for services under sonic.target
980983
mock_run_command.assert_any_call(['systemctl', 'reset-failed', 'swss'])
981-
assert mock_run_command.call_count == 12
984+
assert mock_run_command.call_count == 16
982985

983986
@mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs',
984987
mock.MagicMock(return_value=("dummy_path", None)))

0 commit comments

Comments
 (0)