Skip to content

Add script to periodically update oper status of management interface #21245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 9, 2025
2 changes: 2 additions & 0 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ sudo cp $IMAGE_CONFIGS/monit/arp_update_checker $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/arp_update_checker
sudo cp $IMAGE_CONFIGS/monit/control_plane_drop_check $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/control_plane_drop_check
sudo cp $IMAGE_CONFIGS/monit/mgmt_oper_status $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/mgmt_oper_status

# Installed smartmontools version should match installed smartmontools in docker-platform-monitor Dockerfile
# TODO: are mismatching versions fine for bookworm?
Expand Down
5 changes: 5 additions & 0 deletions files/image_config/monit/conf.d/sonic-host
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,8 @@ check program arp_update_checker with path "/usr/bin/arp_update_checker" every 1
check program controlPlaneDropCheck with path "/usr/bin/control_plane_drop_check"
every 5 cycles
if status != 0 for 3 cycle then alert repeat every 1 cycles

# Periodically update oper status of mgmt interface in STATE_DB
check program mgmtOperStatus with path "/usr/bin/mgmt_oper_status"
every 1 cycles
if status != 0 for 3 cycle then alert repeat every 1 cycles
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have we tested out config relolad/minigraph scenario. Are we not getting monit error as that can impact nightly

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Verified reload/reboot, did not see the monit log message, is there any other specific concern on why this error might get logged?

42 changes: 42 additions & 0 deletions files/image_config/monit/mgmt_oper_status
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python3

"""
"""

import docker
import sys
import subprocess
import syslog

from sonic_py_common import multi_asic, device_info
from swsscommon.swsscommon import SonicV2Connector


def main():
db = SonicV2Connector(use_unix_socket_path=True)
db.connect('CONFIG_DB')
db.connect('STATE_DB')
mgmt_ports_keys = db.keys(db.CONFIG_DB, 'MGMT_PORT|*' )
if not mgmt_ports_keys:
syslog.syslog(syslog.LOG_DEBUG, 'No management interface found')
else:
try:
mgmt_ports = [key.split('MGMT_PORT|')[-1] for key in mgmt_ports_keys]
for port in mgmt_ports:
state_db_mgmt_port = db.keys(db.STATE_DB, 'MGMT_PORT_TABLE|*' )
state_db_key = "MGMT_PORT_TABLE|{}".format(port)
prev_oper_status = 'unknown'
if state_db_key in state_db_mgmt_port:
prev_oper_status = db.get(db.STATE_DB, state_db_key, 'oper_status')
port_operstate_path = '/sys/class/net/{}/operstate'.format(port)
current_oper_status = subprocess.run(['cat', port_operstate_path], capture_output=True, text=True)
if current_oper_status.stdout.strip() != prev_oper_status:
db.set(db.STATE_DB, state_db_key, 'oper_status', current_oper_status.stdout.strip())
syslog.syslog(syslog.LOG_INFO, "mgmt_oper_status_check: {}".format(current_oper_status.stdout.strip()))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you make syslog WARNING for down case and keep INFO for up case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Made the change in log level as suggested.

except Exception as e:
syslog.syslog(syslog.LOG_ERR, "mgmt_oper_status_check exception : {}".format(str(e)))


if __name__ == "__main__":
main()
sys.exit(0)
60 changes: 60 additions & 0 deletions files/image_config/monit/tests/test_mgmt_oper_status_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import unittest
from unittest.mock import patch, MagicMock
import subprocess
import syslog
import sys
import mgmt_oper_status

class TestMgmtOperStatusCheck(unittest.TestCase):

@patch('your_script_name.SonicV2Connector')
@patch('your_script_name.subprocess.run')
@patch('your_script_name.syslog.syslog')
def test_main_no_mgmt_ports(self, mock_syslog, mock_subprocess, mock_SonicV2Connector):
mock_db = MagicMock()
mock_SonicV2Connector.return_value = mock_db
mock_db.keys.return_value = []

mgmt_oper_status.main()

mock_syslog.assert_called_with(syslog.LOG_DEBUG, 'No management interface found')

@patch('your_script_name.SonicV2Connector')
@patch('your_script_name.subprocess.run')
@patch('your_script_name.syslog.syslog')
def test_main_with_mgmt_ports(self, mock_syslog, mock_subprocess, mock_SonicV2Connector):
mock_db = MagicMock()
mock_SonicV2Connector.return_value = mock_db
mgmt_ports_keys = ['MGMT_PORT|eth0', 'MGMT_PORT|eth1']
mock_db.keys.return_value = mgmt_ports_keys
mock_db.set.return_value = None

mock_subprocess.return_value = subprocess.CompletedProcess(args=['cat', '/sys/class/net/eth0/operstate'], returncode=0, stdout='up', stderr='')

mgmt_oper_status.main()

mock_syslog.assert_any_call(syslog.LOG_INFO, 'mgmt_oper_status: up')
mock_syslog.assert_any_call(syslog.LOG_INFO, 'mgmt_oper_status: up')

mock_db.set.assert_any_call(mock_db.STATE_DB, 'MGMT_PORT_TABLE|eth0', 'oper_status', 'up')
mock_db.set.assert_any_call(mock_db.STATE_DB, 'MGMT_PORT_TABLE|eth1', 'oper_status', 'up')

@patch('your_script_name.SonicV2Connector')
@patch('your_script_name.subprocess.run')
@patch('your_script_name.syslog.syslog')
def test_main_exception_handling(self, mock_syslog, mock_subprocess, mock_SonicV2Connector):
mock_db = MagicMock()
mock_SonicV2Connector.return_value = mock_db
mgmt_ports_keys = ['MGMT_PORT|eth0']
mock_db.keys.return_value = mgmt_ports_keys
mock_db.set.return_value = None

mock_subprocess.side_effect = Exception("File not found")

mgmt_oper_status.main()

mock_syslog.assert_called_with(syslog.LOG_ERR, "mgmt_oper_status exception : File not found")
mock_db.set.assert_any_call(mock_db.STATE_DB, 'MGMT_PORT_TABLE|eth0', 'oper_status', 'unknown')

if __name__ == '__main__':
unittest.main()
Loading