Skip to content

Add script to periodically update oper status of management interface #502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,8 @@ sudo cp $IMAGE_CONFIGS/monit/memory_checker $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/memory_checker
sudo cp $IMAGE_CONFIGS/monit/restart_service $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/restart_service
sudo cp $IMAGE_CONFIGS/monit/mgmt_oper_status.py $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/mgmt_oper_status.py

# Installed smartmontools version should match installed smartmontools in docker-platform-monitor Dockerfile
# TODO: are mismatching versions fine for bookworm?
Expand Down
5 changes: 5 additions & 0 deletions files/image_config/monit/conf.d/sonic-host
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,8 @@ check program vnetRouteCheck with path "/usr/local/bin/vnet_route_check.py"
# memory_check tool that verifies that memory usage does not cross the threshold or invokes techsupport.
check program memory_check with path "/usr/local/bin/memory_threshold_check.py"
if status == 2 for 10 times within 20 cycles then exec "/usr/local/bin/memory_threshold_check_handler.py"

# Periodically update oper status of mgmt interface in STATE_DB
check program mgmtOperStatus with path "/usr/bin/mgmt_oper_status.py"
every 1 cycles
if status != 0 for 3 cycle then alert repeat every 1 cycles
44 changes: 44 additions & 0 deletions files/image_config/monit/mgmt_oper_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python3

"""
"""
import sys
import subprocess
import syslog

from sonic_py_common import multi_asic, device_info
from swsscommon.swsscommon import SonicV2Connector


def main():
db = SonicV2Connector(use_unix_socket_path=True)
db.connect('CONFIG_DB')
db.connect('STATE_DB')
mgmt_ports_keys = db.keys(db.CONFIG_DB, 'MGMT_PORT|*' )
if not mgmt_ports_keys:
syslog.syslog(syslog.LOG_DEBUG, 'No management interface found')
else:
try:
mgmt_ports = [key.split('MGMT_PORT|')[-1] for key in mgmt_ports_keys]
for port in mgmt_ports:
state_db_mgmt_port = db.keys(db.STATE_DB, 'MGMT_PORT_TABLE|*' )
state_db_key = "MGMT_PORT_TABLE|{}".format(port)
prev_oper_status = 'unknown'
if state_db_key in state_db_mgmt_port:
prev_oper_status = db.get(db.STATE_DB, state_db_key, 'oper_status')
port_operstate_path = '/sys/class/net/{}/operstate'.format(port)
oper_status = subprocess.run(['cat', port_operstate_path], capture_output=True, text=True)
current_oper_status = oper_status.stdout.strip()
if current_oper_status != prev_oper_status:
db.set(db.STATE_DB, state_db_key, 'oper_status', current_oper_status)
log_level = syslog.LOG_INFO if current_oper_status == 'up' else syslog.LOG_WARNING
syslog.syslog(log_level, "mgmt_oper_status: {}".format(current_oper_status))
except Exception as e:
syslog.syslog(syslog.LOG_ERR, "mgmt_oper_status exception : {}".format(str(e)))
db.set(db.STATE_DB, state_db_key, 'oper_status', 'unknown')
sys.exit(1)


if __name__ == "__main__":
main()
sys.exit(0)
Empty file.
79 changes: 79 additions & 0 deletions files/image_config/monit/tests/test_mgmt_oper_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import unittest
from unittest.mock import patch, MagicMock
import subprocess
import syslog
import sys
import mgmt_oper_status

class TestMgmtOperStatusCheck(unittest.TestCase):

@patch('mgmt_oper_status.SonicV2Connector')
@patch('mgmt_oper_status.subprocess.run')
@patch('mgmt_oper_status.syslog.syslog')
def test_main_no_mgmt_ports(self, mock_syslog, mock_subprocess, mock_SonicV2Connector):
mock_db = MagicMock()
mock_SonicV2Connector.return_value = mock_db
mock_db.keys.return_value = []

mgmt_oper_status.main()

mock_syslog.assert_called_with(syslog.LOG_DEBUG, 'No management interface found')

@patch('mgmt_oper_status.SonicV2Connector')
@patch('mgmt_oper_status.subprocess.run')
@patch('mgmt_oper_status.syslog.syslog')
def test_main_with_mgmt_ports(self, mock_syslog, mock_subprocess, mock_SonicV2Connector):
mock_db = MagicMock()
mock_SonicV2Connector.return_value = mock_db
mgmt_ports_keys = ['MGMT_PORT|eth0', 'MGMT_PORT|eth1']
mock_db.keys.return_value = mgmt_ports_keys
mock_db.set.return_value = None

mock_subprocess.return_value = subprocess.CompletedProcess(args=['cat', '/sys/class/net/eth0/operstate'], returncode=0, stdout='up', stderr='')

mgmt_oper_status.main()

mock_syslog.assert_any_call(syslog.LOG_INFO, 'mgmt_oper_status: up')
mock_syslog.assert_any_call(syslog.LOG_INFO, 'mgmt_oper_status: up')

mock_db.set.assert_any_call(mock_db.STATE_DB, 'MGMT_PORT_TABLE|eth0', 'oper_status', 'up')
mock_db.set.assert_any_call(mock_db.STATE_DB, 'MGMT_PORT_TABLE|eth1', 'oper_status', 'up')

@patch('mgmt_oper_status.SonicV2Connector')
@patch('mgmt_oper_status.subprocess.run')
@patch('mgmt_oper_status.syslog.syslog')
def test_main_with_mgmt_port_down(self, mock_syslog, mock_subprocess, mock_SonicV2Connector):
mock_db = MagicMock()
mock_SonicV2Connector.return_value = mock_db
mgmt_ports_keys = ['MGMT_PORT|eth0']
mock_db.keys.return_value = mgmt_ports_keys
mock_db.set.return_value = None

mock_subprocess.return_value = subprocess.CompletedProcess(args=['cat', '/sys/class/net/eth0/operstate'], returncode=0, stdout='down', stderr='')

mgmt_oper_status.main()

mock_syslog.assert_any_call(syslog.LOG_WARNING, 'mgmt_oper_status: down')

mock_db.set.assert_any_call(mock_db.STATE_DB, 'MGMT_PORT_TABLE|eth0', 'oper_status', 'down')


@patch('mgmt_oper_status.SonicV2Connector')
@patch('mgmt_oper_status.subprocess.run')
@patch('mgmt_oper_status.syslog.syslog')
def test_main_exception_handling(self, mock_syslog, mock_subprocess, mock_SonicV2Connector):
mock_db = MagicMock()
mock_SonicV2Connector.return_value = mock_db
mgmt_ports_keys = ['MGMT_PORT|eth0']
mock_db.keys.return_value = mgmt_ports_keys
mock_db.set.return_value = None

mock_subprocess.side_effect = Exception("File not found")

mgmt_oper_status.main()

mock_syslog.assert_called_with(syslog.LOG_ERR, "mgmt_oper_status exception : File not found")
mock_db.set.assert_any_call(mock_db.STATE_DB, 'MGMT_PORT_TABLE|eth0', 'oper_status', 'unknown')

if __name__ == '__main__':
unittest.main()