Skip to content

Commit f7b5949

Browse files
Merge pull request #26 from stephenxs/sfp-bit-map-error-status
Enhanced - Handle the error status returned by platform APIs
2 parents 1ad32df + 08ab761 commit f7b5949

File tree

4 files changed

+79
-100
lines changed

4 files changed

+79
-100
lines changed

sonic-xcvrd/tests/test_xcvrd.py

+5-44
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from sonic_py_common import daemon_base
1111
from swsscommon import swsscommon
12+
from sonic_platform_base.sfp_base import SfpBase
1213
from .mock_swsscommon import Table
1314

1415

@@ -315,46 +316,6 @@ def test_get_media_settings_key(self):
315316
assert result == ['MOLEX-1064141421', 'QSFP+']
316317
# TODO: Ensure that error message was logged
317318

318-
def test_update_port_transceiver_status_table(self):
319-
logical_port_name = "Ethernet0"
320-
status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE)
321-
update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_INSERTED)
322-
entry = status_tbl.get(logical_port_name)
323-
print(entry[1])
324-
print(entry[0][0])
325-
assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED
326-
assert status_tbl.get(logical_port_name)[1][1] == 'N/A'
327-
328-
update_port_transceiver_status_table(logical_port_name, status_tbl, SFP_STATUS_REMOVED)
329-
assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_REMOVED
330-
assert status_tbl.get(logical_port_name)[1][1] == 'N/A'
331-
332-
error_dict = {
333-
'3': 'SFP_STATUS_ERR_I2C_STUCK',
334-
'5': 'SFP_STATUS_ERR_BAD_EEPROM',
335-
'9': 'SFP_STATUS_ERR_UNSUPPORTED_CABLE',
336-
'17': 'SFP_STATUS_ERR_HIGH_TEMP',
337-
'33': 'SFP_STATUS_ERR_BAD_CABLE'
338-
}
339-
340-
# Test single errors
341-
for error_value, error_msg in error_dict.items():
342-
update_port_transceiver_status_table(logical_port_name, status_tbl, error_value, True)
343-
assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED
344-
assert status_tbl.get(logical_port_name)[1][1] == error_msg
345-
346-
# Test multiple errors
347-
update_port_transceiver_status_table(logical_port_name, status_tbl, '63', True)
348-
assert status_tbl.get(logical_port_name)[0][1] == SFP_STATUS_INSERTED
349-
error = status_tbl.get(logical_port_name)[1][1]
350-
for error_msg in error_dict.values():
351-
assert error_msg in error
352-
353-
# Test unsupported errors
354-
status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE)
355-
update_port_transceiver_status_table(logical_port_name, status_tbl, '1024', True)
356-
assert status_tbl.get(logical_port_name) is None
357-
358319
def test_detect_port_in_error_status(self):
359320
class MockTable:
360321
def get(self, key):
@@ -364,13 +325,13 @@ def get(self, key):
364325
status_tbl.get = MagicMock(return_value=(True, {'error': 'N/A'}))
365326
assert not detect_port_in_error_status(None, status_tbl)
366327

367-
status_tbl.get = MagicMock(return_value=(True, {'error': 'SFP_STATUS_ERR_I2C_STUCK'}))
328+
status_tbl.get = MagicMock(return_value=(True, {'error': SfpBase.SFP_ERROR_DESCRIPTION_BLOCKING}))
368329
assert detect_port_in_error_status(None, status_tbl)
369330

370331
def test_is_error_sfp_status(self):
371-
error_values = ['3', '5', '9', '17', '33']
332+
error_values = [7, 11, 19, 35]
372333
for error_value in error_values:
373334
assert is_error_block_eeprom_reading(error_value)
374335

375-
assert not is_error_block_eeprom_reading(SFP_STATUS_INSERTED)
376-
assert not is_error_block_eeprom_reading(SFP_STATUS_REMOVED)
336+
assert not is_error_block_eeprom_reading(int(SFP_STATUS_INSERTED))
337+
assert not is_error_block_eeprom_reading(int(SFP_STATUS_REMOVED))

sonic-xcvrd/xcvrd/xcvrd.py

+41-33
Original file line numberDiff line numberDiff line change
@@ -176,11 +176,13 @@ def _wrapper_get_transceiver_change_event(timeout):
176176
if platform_chassis is not None:
177177
try:
178178
status, events = platform_chassis.get_change_event(timeout)
179-
sfp_events = events['sfp']
180-
return status, sfp_events
179+
sfp_events = events.get('sfp')
180+
sfp_errors = events.get('sfp_error')
181+
return status, sfp_events, sfp_errors
181182
except NotImplementedError:
182183
pass
183-
return platform_sfputil.get_transceiver_change_event(timeout)
184+
status, events = platform_sfputil.get_transceiver_change_event(timeout)
185+
return status, events, None
184186

185187

186188
def _wrapper_get_sfp_type(physical_port):
@@ -191,6 +193,14 @@ def _wrapper_get_sfp_type(physical_port):
191193
pass
192194
return None
193195

196+
197+
def _wrapper_get_sfp_error_description(physical_port):
198+
if platform_chassis:
199+
try:
200+
return platform_chassis.get_sfp(physical_port).get_error_description()
201+
except NotImplementedError:
202+
pass
203+
return None
194204
# Remove unnecessary unit from the raw data
195205

196206

@@ -759,22 +769,9 @@ def waiting_time_compensation_with_sleep(time_start, time_to_wait):
759769
# Update port SFP status table on receiving SFP change event
760770

761771

762-
def update_port_transceiver_status_table(logical_port_name, status_tbl, status, has_error=False):
763-
if not has_error:
764-
fvs = swsscommon.FieldValuePairs([('status', status), ('error', 'N/A')])
765-
status_tbl.set(logical_port_name, fvs)
766-
else:
767-
error_list = []
768-
int_status = int(status)
769-
for error_code, error_msg in sfp_status_helper.SFP_STATUS_ERR_DICT.items():
770-
if error_code & int_status:
771-
error_list.append(error_msg)
772-
if error_list:
773-
fvs = swsscommon.FieldValuePairs([('status', str(int_status & 1)), ('error', '|'.join(error_list))])
774-
status_tbl.set(logical_port_name, fvs)
775-
else:
776-
# SFP return unkown event, just ignore for now.
777-
helper_logger.log_warning("Got unknown event {}, ignored".format(status))
772+
def update_port_transceiver_status_table(logical_port_name, status_tbl, status, error_descriptions='N/A'):
773+
fvs = swsscommon.FieldValuePairs([('status', status), ('error', error_descriptions)])
774+
status_tbl.set(logical_port_name, fvs)
778775

779776

780777
# Delete port from SFP status table
@@ -1003,7 +1000,7 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence):
10031000
while not stopping_event.is_set():
10041001
next_state = state
10051002
time_start = time.time()
1006-
status, port_dict = _wrapper_get_transceiver_change_event(timeout)
1003+
status, port_dict, error_dict = _wrapper_get_transceiver_change_event(timeout)
10071004
if not port_dict:
10081005
continue
10091006
helper_logger.log_debug("Got event {} {} in state {}".format(status, port_dict, state))
@@ -1083,21 +1080,32 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence):
10831080
helper_logger.log_info("Got SFP removed event")
10841081
update_port_transceiver_status_table(
10851082
logical_port, status_tbl[asic_index], sfp_status_helper.SFP_STATUS_REMOVED)
1086-
helper_logger.log_info("receive plug out and pdate port sfp status table.")
1083+
helper_logger.log_info("receive plug out and update port sfp status table.")
10871084
del_port_sfp_dom_info_from_db(logical_port, int_tbl[asic_index], dom_tbl[asic_index])
10881085
else:
1089-
helper_logger.log_info("Got SFP Error event")
1090-
# Add port to error table to stop accessing eeprom of it
1091-
# If the port already in the error table, the stored error code will
1092-
# be updated to the new one.
1093-
update_port_transceiver_status_table(logical_port, status_tbl[asic_index], value, True)
1094-
helper_logger.log_info("receive error update port sfp status table.")
1095-
# In this case EEPROM is not accessible, so remove the DOM info
1096-
# since it will be outdated if long time no update.
1097-
# but will keep the interface info in the DB since it static.
1098-
if sfp_status_helper.is_error_block_eeprom_reading(value):
1099-
del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index])
1100-
1086+
try:
1087+
error_bits = int(value)
1088+
helper_logger.log_info("Got SFP error event {}".format(value))
1089+
1090+
error_descriptions = sfp_status_helper.fetch_generic_error_description(error_bits)
1091+
1092+
if sfp_status_helper.has_vendor_specific_error(error_bits):
1093+
if error_dict:
1094+
vendor_specific_error_description = error_dict.get(key)
1095+
else:
1096+
vendor_specific_error_description = _wrapper_get_sfp_error_description(key)
1097+
error_descriptions.append(vendor_specific_error_description)
1098+
1099+
# Add error info to database
1100+
# Any existing error will be replaced by the new one.
1101+
update_port_transceiver_status_table(logical_port, status_tbl[asic_index], value, '|'.join(error_descriptions))
1102+
helper_logger.log_info("Receive error update port sfp status table.")
1103+
# In this case EEPROM is not accessible. The DOM info will be removed since it can be out-of-date.
1104+
# The interface info remains in the DB since it is static.
1105+
if sfp_status_helper.is_error_block_eeprom_reading(error_bits):
1106+
del_port_sfp_dom_info_from_db(logical_port, None, dom_tbl[asic_index])
1107+
except (TypeError, ValueError) as e:
1108+
logger.log_error("Got unrecognized event {}, ignored".format(value))
11011109

11021110
# Since ports could be connected to a mux cable, if there is a change event process the change for being on a Y cable Port
11031111
y_cable_helper.change_ports_status_for_y_cable_change_event(
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,37 @@
1+
from sonic_platform_base.sfp_base import SfpBase
2+
13
# SFP status definition, shall be aligned with the definition in get_change_event() of ChassisBase
24
SFP_STATUS_REMOVED = '0'
35
SFP_STATUS_INSERTED = '1'
46

57
# SFP error code dictinary, new elements can be added if new errors need to be supported.
6-
SFP_STATUS_ERR_DICT = {
7-
2: 'SFP_STATUS_ERR_I2C_STUCK',
8-
4: 'SFP_STATUS_ERR_BAD_EEPROM',
9-
8: 'SFP_STATUS_ERR_UNSUPPORTED_CABLE',
10-
16: 'SFP_STATUS_ERR_HIGH_TEMP',
11-
32: 'SFP_STATUS_ERR_BAD_CABLE'
12-
}
13-
14-
error_code_block_eeprom_reading = set((error_code for error_code in SFP_STATUS_ERR_DICT.keys()))
15-
error_str_block_eeprom_reading = set((error for error in SFP_STATUS_ERR_DICT.values()))
16-
17-
18-
def is_error_block_eeprom_reading(status):
19-
int_status = int(status)
20-
for error_code in error_code_block_eeprom_reading:
21-
if int_status & error_code:
22-
return True
23-
return False
8+
SFP_ERRORS_BLOCKING_MASK = 0x02
9+
SFP_ERRORS_GENERIC_MASK = 0x0000FFFE
10+
SFP_ERRORS_VENDOR_SPECIFIC_MASK = 0xFFFF0000
11+
12+
def is_error_block_eeprom_reading(error_bits):
13+
return 0 != (error_bits & SFP_ERRORS_BLOCKING_MASK)
14+
15+
16+
def has_vendor_specific_error(error_bits):
17+
return 0 != (error_bits & SFP_ERRORS_VENDOR_SPECIFIC_MASK)
18+
19+
20+
def fetch_generic_error_description(error_bits):
21+
generic_error_bits = (error_bits & SFP_ERRORS_GENERIC_MASK)
22+
error_descriptions = []
23+
if generic_error_bits:
24+
for error_bit, error_description in SfpBase.SFP_ERROR_BIT_TO_DESCRIPTION_DICT.items():
25+
if error_bit & generic_error_bits:
26+
error_descriptions.append(error_description)
27+
return error_descriptions
2428

2529

2630
def detect_port_in_error_status(logical_port_name, status_tbl):
2731
rec, fvp = status_tbl.get(logical_port_name)
2832
if rec:
2933
status_dict = dict(fvp)
30-
if 'error' in status_dict:
31-
for error in error_str_block_eeprom_reading:
32-
if error in status_dict['error']:
33-
return True
34+
error = status_dict.get('error')
35+
return SfpBase.SFP_ERROR_DESCRIPTION_BLOCKING in error
3436
return False
3537

sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -419,11 +419,19 @@ def change_ports_status_for_y_cable_change_event(port_dict, y_cable_presence, st
419419
helper_logger.log_info("Got SFP inserted event")
420420
check_identifier_presence_and_update_mux_table_entry(
421421
state_db, port_tbl, y_cable_tbl, static_tbl, mux_tbl, asic_index, logical_port_name, y_cable_presence)
422-
elif value == sfp_status_helper.SFP_STATUS_REMOVED or sfp_status_helper.is_error_block_eeprom_reading(value):
422+
elif value == sfp_status_helper.SFP_STATUS_REMOVED:
423423
check_identifier_presence_and_delete_mux_table_entry(
424424
state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event)
425425

426426
else:
427+
try:
428+
# Now that the value is in bitmap format, let's convert it to number
429+
event_bits = int(value)
430+
if sfp_status_helper.is_error_block_eeprom_reading(event_bits):
431+
check_identifier_presence_and_delete_mux_table_entry(
432+
state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event)
433+
except:
434+
pass
427435
# SFP return unkown event, just ignore for now.
428436
helper_logger.log_warning("Got unknown event {}, ignored".format(value))
429437
continue

0 commit comments

Comments
 (0)