Skip to content

Commit c1cb292

Browse files
[Mellanox] implement platform wait in python code (#17398)
- Why I did it New implementation of Nvidia platform_wait due to: 1. sysfs deprecated by hw-mgmt 2. new dependencies to SDK 3. For CMIS host management mode - How I did it wait hw-management ready wait SDK sysfs nodes ready - How to verify it manual test unit test sonic-mgmt regression
1 parent f373a16 commit c1cb292

File tree

5 files changed

+111
-72
lines changed

5 files changed

+111
-72
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,32 @@
1-
#!/bin/bash
2-
3-
declare -r SYSLOG_LOGGER="/usr/bin/logger"
4-
declare -r SYSLOG_IDENTIFIER="platform_wait"
5-
declare -r SYSLOG_ERROR="error"
6-
declare -r SYSLOG_NOTICE="notice"
7-
declare -r SYSLOG_INFO="info"
8-
9-
declare -r HW_MGMT_CONFIG="/var/run/hw-management/config"
10-
11-
declare -r ASIC_INIT_DONE="${HW_MGMT_CONFIG}/asics_init_done"
12-
declare -r NUM_ASICS="${HW_MGMT_CONFIG}/asic_num"
13-
declare -r ASIC_CHIPUP_COMPLETED="${HW_MGMT_CONFIG}/asic_chipup_completed"
14-
15-
declare -r EXIT_SUCCESS="0"
16-
declare -r EXIT_TIMEOUT="1"
17-
18-
function log_error() {
19-
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_ERROR} $@"
20-
}
21-
22-
function log_notice() {
23-
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_NOTICE} $@"
24-
}
25-
26-
function log_info() {
27-
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_INFO} $@"
28-
}
29-
30-
function wait_for_asic_chipup() {
31-
32-
local _ASIC_INIT="0"
33-
local _ASIC_COUNT="0"
34-
local _ASICS_CHIPUP="0"
35-
36-
local -i _WDOG_CNT="1"
37-
local -ir _WDOG_MAX="300"
38-
39-
local -r _TIMEOUT="1s"
40-
41-
while [[ "${_WDOG_CNT}" -le "${_WDOG_MAX}" ]]; do
42-
_ASIC_INIT="$(cat ${ASIC_INIT_DONE} 2>&1)"
43-
_ASIC_COUNT="$(cat ${NUM_ASICS} 2>&1)"
44-
_ASICS_CHIPUP="$(cat ${ASIC_CHIPUP_COMPLETED} 2>&1)"
45-
46-
if [[ "${_ASIC_INIT}" -eq 1 && "${_ASIC_COUNT}" -eq "${_ASICS_CHIPUP}" ]]; then
47-
return "${EXIT_SUCCESS}"
48-
fi
49-
50-
let "_WDOG_CNT++"
51-
sleep "${_TIMEOUT}"
52-
done
53-
54-
log_error "Mellanox ASIC is not ready: INIT: ${_ASIC_INIT}, NUM_ASIC: ${_ASIC_COUNT}, CHIPUP: ${_ASICS_CHIPUP} timeout...."
55-
return "${EXIT_TIMEOUT}"
56-
}
57-
58-
log_info "Wait for Mellanox ASIC to be ready"
59-
60-
wait_for_asic_chipup
61-
EXIT_CODE="$?"
62-
if [[ "${EXIT_CODE}" != "${EXIT_SUCCESS}" ]]; then
63-
exit "${EXIT_CODE}"
64-
fi
65-
66-
log_notice "Mellanox ASIC is ready"
67-
68-
exit "${EXIT_SUCCESS}"
1+
#!/usr/bin/python3
2+
3+
#
4+
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
5+
# Apache-2.0
6+
#
7+
# Licensed under the Apache License, Version 2.0 (the "License");
8+
# you may not use this file except in compliance with the License.
9+
# You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
import sys
21+
from sonic_platform.device_data import DeviceDataManager
22+
from sonic_py_common.logger import Logger
23+
24+
25+
logger = Logger(log_identifier='platform_wait')
26+
logger.log_notice('Nvidia: Wait for PMON dependencies to be ready')
27+
if DeviceDataManager.wait_platform_ready():
28+
logger.log_notice('Nvidia: PMON dependencies are ready')
29+
sys.exit(0)
30+
else:
31+
logger.log_error('Nvidia: PMON dependencies are not ready: timeout')
32+
sys.exit(-1)

platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import glob
1919
import os
20+
import time
2021

2122
from . import utils
2223

@@ -167,8 +168,11 @@ def is_psu_hotswapable(cls):
167168
@classmethod
168169
@utils.read_only_cache()
169170
def get_sfp_count(cls):
170-
sfp_count = utils.read_int_from_file('/run/hw-management/config/sfp_counter')
171-
return sfp_count if sfp_count > 0 else len(glob.glob('/sys/module/sx_core/asic0/module*'))
171+
from sonic_py_common import device_info
172+
platform_path = device_info.get_path_to_platform_dir()
173+
platform_json_path = os.path.join(platform_path, 'platform.json')
174+
platform_data = utils.load_json_file(platform_json_path)
175+
return len(platform_data['chassis']['sfps'])
172176

173177
@classmethod
174178
def get_linecard_sfp_count(cls, lc_index):
@@ -244,3 +248,23 @@ def is_independent_mode(cls):
244248
sai_profile_file = os.path.join(hwsku_dir, 'sai.profile')
245249
data = utils.read_key_value_file(sai_profile_file, delimeter='=')
246250
return data.get('SAI_INDEPENDENT_MODULE_MODE') == '1'
251+
252+
@classmethod
253+
def wait_platform_ready(cls):
254+
"""
255+
Wait for Nvidia platform related services(SDK, hw-management) ready
256+
Returns:
257+
bool: True if wait success else timeout
258+
"""
259+
conditions = []
260+
sysfs_nodes = ['power_mode', 'power_mode_policy', 'present', 'reset', 'status', 'statuserror']
261+
if cls.is_independent_mode():
262+
sysfs_nodes.extend(['control', 'frequency', 'frequency_support', 'hw_present', 'hw_reset',
263+
'power_good', 'power_limit', 'power_on', 'temperature/input'])
264+
else:
265+
conditions.append(lambda: utils.read_int_from_file('/var/run/hw-management/config/asics_init_done') == 1)
266+
sfp_count = cls.get_sfp_count()
267+
for sfp_index in range(sfp_count):
268+
for sysfs_node in sysfs_nodes:
269+
conditions.append(lambda: os.path.exists(f'/sys/module/sx_core/asic0/module{sfp_index}/{sysfs_node}'))
270+
return utils.wait_until_conditions(conditions, 300, 1)

platform/mellanox/mlnx-platform-api/sonic_platform/utils.py

+24
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,30 @@ def wait_until(predict, timeout, interval=1, *args, **kwargs):
290290
return False
291291

292292

293+
def wait_until_conditions(conditions, timeout, interval=1):
294+
"""
295+
Wait until all the conditions become true
296+
Args:
297+
conditions (list): a list of callable which generate True|False
298+
timeout (int): wait time in seconds
299+
interval (int, optional): interval to check the predict. Defaults to 1.
300+
301+
Returns:
302+
bool: True if wait success else False
303+
"""
304+
while timeout > 0:
305+
pending_conditions = []
306+
for condition in conditions:
307+
if not condition():
308+
pending_conditions.append(condition)
309+
if not pending_conditions:
310+
return True
311+
conditions = pending_conditions
312+
time.sleep(interval)
313+
timeout -= interval
314+
return False
315+
316+
293317
class TimerEvent:
294318
def __init__(self, interval, cb, repeat):
295319
self.interval = interval

platform/mellanox/mlnx-platform-api/tests/test_device_data.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,26 @@ def test_is_independent_mode(self, mock_read):
6060
mock_read.return_value = {'SAI_INDEPENDENT_MODULE_MODE': '1'}
6161
assert DeviceDataManager.is_independent_mode()
6262

63+
@mock.patch('sonic_py_common.device_info.get_path_to_platform_dir', mock.MagicMock(return_value='/tmp'))
64+
@mock.patch('sonic_platform.device_data.utils.load_json_file')
65+
def test_get_sfp_count(self, mock_load_json):
66+
mock_load_json.return_value = {
67+
'chassis': {
68+
'sfps': [1,2,3]
69+
}
70+
}
71+
assert DeviceDataManager.get_sfp_count() == 3
6372

64-
65-
73+
@mock.patch('sonic_platform.device_data.time.sleep', mock.MagicMock())
74+
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=3))
75+
@mock.patch('sonic_platform.device_data.utils.read_int_from_file', mock.MagicMock(return_value=1))
76+
@mock.patch('sonic_platform.device_data.os.path.exists')
77+
@mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode')
78+
def test_wait_platform_ready(self, mock_is_indep, mock_exists):
79+
mock_exists.return_value = True
80+
mock_is_indep.return_value = True
81+
assert DeviceDataManager.wait_platform_ready()
82+
mock_is_indep.return_value = False
83+
assert DeviceDataManager.wait_platform_ready()
84+
mock_exists.return_value = False
85+
assert not DeviceDataManager.wait_platform_ready()

platform/mellanox/mlnx-platform-api/tests/test_utils.py

+7
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,13 @@ def test_read_key_value_file(self):
195195
mock_os_open = mock.mock_open(read_data='a=b')
196196
with mock.patch('sonic_platform.utils.open', mock_os_open):
197197
assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'}
198+
199+
@mock.patch('sonic_platform.utils.time.sleep', mock.MagicMock())
200+
def test_wait_until_conditions(self):
201+
conditions = [lambda: True]
202+
assert utils.wait_until_conditions(conditions, 1)
203+
conditions = [lambda: False]
204+
assert not utils.wait_until_conditions(conditions, 1)
198205

199206
def test_timer(self):
200207
timer = utils.Timer()

0 commit comments

Comments
 (0)