Skip to content

Commit 6e4cf6f

Browse files
authored
[xcvrd][y_cable] refactor xcvrd to listen to port probe without locks; fix the get_firmware_version API to sync with download_firmware (sonic-net#216)
Signed-off-by: vaibhav-dahiya [email protected] Description This PR refactors the listening of port probes from linkmgr to xcvrd as so that xcvrd does not see timeout messages like these below Sep 27 22:24:37.490921 SONIC WARNING mux#linkmgrd: link_manager/LinkManagerStateMachine.cpp:815 handleMuxWaitTimeout: Ethernet120: xcvrd timed out responding to linkmgrd, current state: (P: Unknown, M: Wait, L: Down) Refactoring to a separate listener thread mode as well as removing the lock(safe because read/write transaction via eeprom for port probe) seems to handle this, and does not have the logs like this one above. as the read write eeprom also has a mutex lock in the platform api call, optoe implementation. https://github.com/Azure/sonic-linux-kernel/blob/889d76e36b3f012d3782a1c5e1587c32e4d1ed11/patch/driver-support-optoe.patch#L717 Motivation and Context refactor xcvrd for listening to port probes, disable calling the get_firmware_api() while a download is in progress. Signed-off-by: vaibhav-dahiya <[email protected]>
1 parent 84386e6 commit 6e4cf6f

File tree

1 file changed

+36
-31
lines changed

1 file changed

+36
-31
lines changed

sonic-xcvrd/xcvrd/xcvrd_utilities/y_cable_helper.py

+36-31
Original file line numberDiff line numberDiff line change
@@ -238,17 +238,16 @@ def y_cable_toggle_mux_torA(physical_port):
238238
port_instance = y_cable_port_instances.get(physical_port)
239239
if port_instance is None:
240240
helper_logger.log_error(
241-
"Error: Could not get port instance for read side for Y cable port {}".format(physical_port))
241+
"Error: Could not get port instance for read side for Y cable port {} {}".format(physical_port, threading.currentThread().getName()))
242242
return -1
243243

244-
with y_cable_port_locks[physical_port]:
245-
try:
246-
update_status = port_instance.toggle_mux_to_tor_a()
247-
except Exception as e:
248-
update_status = -1
249-
helper_logger.log_warning("Failed to execute the toggle mux ToR A API for port {} due to {}".format(physical_port,repr(e)))
244+
try:
245+
update_status = port_instance.toggle_mux_to_tor_a()
246+
except Exception as e:
247+
update_status = -1
248+
helper_logger.log_warning("Failed to execute the toggle mux ToR A API for port {} due to {} {}".format(physical_port, repr(e) , threading.currentThread().getName()))
250249

251-
helper_logger.log_debug("Y_CABLE_DEBUG: Status of toggling mux to ToR A for port {} {}".format(physical_port, update_status))
250+
helper_logger.log_debug("Y_CABLE_DEBUG: Status of toggling mux to ToR A for port {} status {} {}".format(physical_port, update_status, threading.currentThread().getName()))
252251
if update_status is True:
253252
return 1
254253
else:
@@ -260,17 +259,16 @@ def y_cable_toggle_mux_torA(physical_port):
260259
def y_cable_toggle_mux_torB(physical_port):
261260
port_instance = y_cable_port_instances.get(physical_port)
262261
if port_instance is None:
263-
helper_logger.log_error("Error: Could not get port instance for read side for Y cable port {}".format(physical_port))
262+
helper_logger.log_error("Error: Could not get port instance for read side for Y cable port {} {}".format(physical_port, threading.currentThread().getName()))
264263
return -1
265264

266-
with y_cable_port_locks[physical_port]:
267-
try:
268-
update_status = port_instance.toggle_mux_to_tor_b()
269-
except Exception as e:
270-
update_status = -1
271-
helper_logger.log_warning("Failed to execute the toggle mux ToR B API for port {} due to {}".format(physical_port,repr(e)))
265+
try:
266+
update_status = port_instance.toggle_mux_to_tor_b()
267+
except Exception as e:
268+
update_status = -1
269+
helper_logger.log_warning("Failed to execute the toggle mux ToR B API for port {} due to {} {}".format(physical_port,repr(e), threading.currentThread().getName()))
272270

273-
helper_logger.log_debug("Y_CABLE_DEBUG: Status of toggling mux to ToR B for port {} {}".format(physical_port, update_status))
271+
helper_logger.log_debug("Y_CABLE_DEBUG: Status of toggling mux to ToR B for port {} {} {}".format(physical_port, update_status, threading.currentThread().getName()))
274272
if update_status is True:
275273
return 2
276274
else:
@@ -346,12 +344,11 @@ def update_appdb_port_mux_cable_response_table(logical_port_name, asic_index, ap
346344
return
347345

348346
active_side = None
349-
with y_cable_port_locks[physical_port]:
350-
try:
351-
active_side = port_instance.get_mux_direction()
352-
except Exception as e:
353-
active_side = -1
354-
helper_logger.log_warning("Failed to execute the get_mux_direction for port {} due to {}".format(physical_port,repr(e)))
347+
try:
348+
active_side = port_instance.get_mux_direction()
349+
except Exception as e:
350+
active_side = -1
351+
helper_logger.log_warning("Failed to execute the get_mux_direction for port {} due to {}".format(physical_port,repr(e)))
355352

356353
if active_side is None or active_side == port_instance.EEPROM_ERROR or active_side < 0 :
357354

@@ -778,7 +775,6 @@ def change_ports_status_for_y_cable_change_event(port_dict, y_cable_presence, st
778775
elif value == sfp_status_helper.SFP_STATUS_REMOVED:
779776
check_identifier_presence_and_delete_mux_table_entry(
780777
state_db, port_tbl, asic_index, logical_port_name, y_cable_presence, delete_change_event)
781-
782778
else:
783779
try:
784780
# Now that the value is in bitmap format, let's convert it to number
@@ -847,8 +843,9 @@ def delete_ports_status_for_y_cable():
847843
if len(physical_port_list) == 1:
848844

849845
physical_port = physical_port_list[0]
850-
y_cable_port_instances.pop(physical_port)
851-
y_cable_port_locks.pop(physical_port)
846+
if y_cable_port_instances.get(physical_port) is not None:
847+
y_cable_port_instances.pop(physical_port)
848+
y_cable_port_locks.pop(physical_port)
852849
else:
853850
helper_logger.log_warning(
854851
"Error: Retreived multiple ports for a Y cable port {} while deleting entries".format(logical_port_name))
@@ -897,6 +894,15 @@ def check_identifier_presence_and_update_mux_info_entry(state_db, mux_tbl, asic_
897894
def get_firmware_dict(physical_port, port_instance, target, side, mux_info_dict):
898895

899896
result = {}
897+
if port_instance.download_firmware_status == port_instance.FIRMWARE_DOWNLOAD_STATUS_INPROGRESS or port_instance.download_firmware_status == port_instance.FIRMWARE_DOWNLOAD_STATUS_FAILED:
898+
mux_info_dict[("version_{}_active".format(side))] = "N/A"
899+
mux_info_dict[("version_{}_inactive".format(side))] = "N/A"
900+
mux_info_dict[("version_{}_next".format(side))] = "N/A"
901+
helper_logger.log_warning(
902+
"trying to post firmware info while download in progress returning without execute {}".format(physical_port))
903+
904+
return
905+
900906
with y_cable_port_locks[physical_port]:
901907
try:
902908
result = port_instance.get_firmware_version(target)
@@ -1455,7 +1461,6 @@ def task_download_firmware_worker(port, physical_port, port_instance, file_full_
14551461
rc[0] = status
14561462
helper_logger.log_debug("download thread finished port {} physical_port {}".format(port, physical_port))
14571463

1458-
14591464
# Thread wrapper class to update y_cable status periodically
14601465
class YCableTableUpdateTask(object):
14611466
def __init__(self):
@@ -1530,7 +1535,7 @@ def task_worker(self):
15301535
if not port:
15311536
break
15321537

1533-
helper_logger.log_debug("Y_CABLE_DEBUG: received an event for port transition {}".format(port))
1538+
helper_logger.log_debug("Y_CABLE_DEBUG: received an event for port transition {} {}".format(port, threading.currentThread().getName()))
15341539

15351540
# entering this section signifies a start for xcvrd state
15361541
# change request from swss so initiate recording in mux_metrics table
@@ -1563,9 +1568,9 @@ def task_worker(self):
15631568
port, old_status, new_status))
15641569
new_status = 'unknown'
15651570

1566-
helper_logger.log_debug("Y_CABLE_DEBUG: xcvrd successful to transition port {} from {} to {} and write back to the DB".format(port, old_status, new_status))
1567-
helper_logger.log_info("Got a change event for toggle the mux-direction active side for port {} state from {} to {}".format(
1568-
port, old_status, new_status))
1571+
helper_logger.log_debug("Y_CABLE_DEBUG: xcvrd successful to transition port {} from {} to {} and write back to the DB {}".format(port, old_status, new_status, threading.currentThread().getName()))
1572+
helper_logger.log_notice("Got a change event for toggle the mux-direction active side for port {} state from {} to {} {}".format(
1573+
port, old_status, new_status, threading.currentThread().getName()))
15691574
time_end = datetime.datetime.utcnow().strftime("%Y-%b-%d %H:%M:%S.%f")
15701575
fvs_metrics = swsscommon.FieldValuePairs([('xcvrd_switch_{}_start'.format(new_status), str(time_start)),
15711576
('xcvrd_switch_{}_end'.format(new_status), str(time_end))])
@@ -1584,7 +1589,7 @@ def task_worker(self):
15841589

15851590
if not port_m:
15861591
break
1587-
helper_logger.log_debug("Y_CABLE_DEBUG: received a probe for port status {}".format(port_m))
1592+
helper_logger.log_debug("Y_CABLE_DEBUG: received a probe for port status {} {}".format(port_m, threading.currentThread().getName()))
15881593

15891594
if fvp_m:
15901595

0 commit comments

Comments
 (0)