Skip to content

Commit c71f82e

Browse files
committed
Changes as per previous discussion
1 parent 39a6830 commit c71f82e

File tree

3 files changed

+228
-28
lines changed

3 files changed

+228
-28
lines changed

sonic-chassisd/scripts/chassisd

+25-18
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ MODULE_ADMIN_UP = 1
104104
MODULE_REBOOT_CAUSE_DIR = "/host/reboot-cause/module/"
105105
MAX_HISTORY_FILES = 10
106106

107+
DP_STATE = 'dpu_data_plane_state'
108+
DP_UPDATE_TIME = 'dpu_data_plane_time'
109+
CP_STATE = 'dpu_control_plane_state'
110+
CP_UPDATE_TIME = 'dpu_control_plane_time'
111+
112+
107113
# This daemon should return non-zero exit code so that supervisord will
108114
# restart it automatically.
109115
exit_code = 0
@@ -819,16 +825,19 @@ class SmartSwitchModuleUpdater(ModuleUpdater):
819825
if not self.chassis_state_db:
820826
self.chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB")
821827

822-
# If state is down, delete the table first
823-
if state == "down":
824-
self.chassis_state_db.delete(key)
825828

826829
# Prepare the fields to update
827830
updates = {
828831
"dpu_midplane_link_state": state,
829832
"dpu_midplane_link_reason": "",
830833
"dpu_midplane_link_time": get_formatted_time(),
831834
}
835+
# If midplane state is down, set control plane, data plane states to down as well
836+
if state == "down":
837+
updates[CP_STATE] = "down"
838+
updates[CP_UPDATE_TIME] = get_formatted_time()
839+
updates[DP_STATE] = "down"
840+
updates[DP_UPDATE_TIME] = get_formatted_time()
832841

833842
# Update each field directly
834843
for field, value in updates.items():
@@ -1175,11 +1184,6 @@ class SmartSwitchConfigManagerTask(ProcessTaskBase):
11751184

11761185
class DpuStateUpdater(logger.Logger):
11771186

1178-
DP_STATE = 'dpu_data_plane_state'
1179-
DP_UPDATE_TIME = 'dpu_data_plane_time'
1180-
CP_STATE = 'dpu_control_plane_state'
1181-
CP_UPDATE_TIME = 'dpu_control_plane_time'
1182-
11831187
def __init__(self, log_identifier, chassis):
11841188
super(DpuStateUpdater, self).__init__(log_identifier)
11851189

@@ -1234,12 +1238,12 @@ class DpuStateUpdater(logger.Logger):
12341238
return get_formatted_time()
12351239

12361240
def _update_dp_dpu_state(self, state):
1237-
self.dpu_state_table.hset(self.name, self.DP_STATE, state)
1238-
self.dpu_state_table.hset(self.name, self.DP_UPDATE_TIME, self._time_now())
1241+
self.dpu_state_table.hset(self.name, DP_STATE, state)
1242+
self.dpu_state_table.hset(self.name, DP_UPDATE_TIME, self._time_now())
12391243

12401244
def _update_cp_dpu_state(self, state):
1241-
self.dpu_state_table.hset(self.name, self.CP_STATE, state)
1242-
self.dpu_state_table.hset(self.name, self.CP_UPDATE_TIME, self._time_now())
1245+
self.dpu_state_table.hset(self.name, CP_STATE, state)
1246+
self.dpu_state_table.hset(self.name, CP_UPDATE_TIME, self._time_now())
12431247

12441248
def get_dp_state(self):
12451249
return 'up' if self._get_dp_state() else 'down'
@@ -1250,16 +1254,17 @@ class DpuStateUpdater(logger.Logger):
12501254
def update_state(self):
12511255

12521256
dp_current_state = self.get_dp_state()
1253-
_, dp_prev_state = self.dpu_state_table.hget(self.name, self.DP_STATE)
1257+
_, dp_prev_state = self.dpu_state_table.hget(self.name, DP_STATE)
12541258

12551259
if dp_current_state != dp_prev_state:
12561260
self._update_dp_dpu_state(dp_current_state)
12571261

12581262
cp_current_state = self.get_cp_state()
1259-
_, cp_prev_state = self.dpu_state_table.hget(self.name, self.CP_STATE)
1263+
_, cp_prev_state = self.dpu_state_table.hget(self.name, CP_STATE)
12601264

12611265
if cp_current_state != cp_prev_state:
12621266
self._update_cp_dpu_state(cp_current_state)
1267+
return [dp_current_state, cp_current_state]
12631268

12641269
def deinit(self):
12651270
self._update_dp_dpu_state('down')
@@ -1409,6 +1414,8 @@ class DpuStateManagerTask(ProcessTaskBase):
14091414
self.state_db = daemon_base.db_connect('STATE_DB')
14101415
self.app_db = daemon_base.db_connect('APPL_DB')
14111416
self.chassis_state_db = daemon_base.db_connect('CHASSIS_STATE_DB')
1417+
self.current_dp_state = None
1418+
self.current_cp_state = None
14121419

14131420
def task_worker(self):
14141421
sel = swsscommon.Select()
@@ -1437,7 +1444,6 @@ class DpuStateManagerTask(ProcessTaskBase):
14371444
result = s.pop()
14381445
update_required = True # If there is any selectable object, we need to update the state
14391446
if result is None:
1440-
print("Here with None for {}".format(type(s)))
14411447
continue
14421448
key, op, fvp = result # Changed from _ to fvp to match what we use below
14431449
# Check if this is the DPU_STATE table
@@ -1446,16 +1452,17 @@ class DpuStateManagerTask(ProcessTaskBase):
14461452
if key != self.dpu_state_updater.name:
14471453
update_required = False
14481454
continue
1449-
# Don't update if this is our own dataplane/control plane state update to avoid recursion
14501455
if op == 'SET' and isinstance(fvp, tuple):
14511456
fvs = dict(fvp)
1452-
if 'dpu_data_plane_state' in fvs and 'dpu_control_plane_state' in fvs:
1457+
# No need to update if the state is the same as the current state
1458+
if ('dpu_data_plane_state' in fvs and fvs['dpu_data_plane_state'] == self.current_dp_state) and \
1459+
('dpu_control_plane_state' in fvs and fvs['dpu_control_plane_state'] == self.current_cp_state):
14531460
update_required = False
14541461
continue
14551462
self.logger.log_info(f"DPU_STATE change detected: operation={op}, key={key}")
14561463

14571464
if update_required:
1458-
self.dpu_state_updater.update_state()
1465+
[self.current_dp_state, self.current_cp_state] = self.dpu_state_updater.update_state()
14591466

14601467
except KeyboardInterrupt:
14611468
pass

sonic-chassisd/tests/test_chassisd.py

+68
Original file line numberDiff line numberDiff line change
@@ -1765,3 +1765,71 @@ def is_valid_date(date_str):
17651765
if not date_value:
17661766
AssertionError("Date is not set!")
17671767
assert is_valid_date(date_value)
1768+
1769+
def test_smartswitch_moduleupdater_midplane_state_change():
1770+
"""Test that when midplane goes down, control plane and data plane states are set to down"""
1771+
chassis = MockSmartSwitchChassis()
1772+
index = 0
1773+
name = "DPU0"
1774+
desc = "DPU Module 0"
1775+
slot = 0
1776+
serial = "DPU0-0000"
1777+
module_type = ModuleBase.MODULE_TYPE_DPU
1778+
module = MockModule(index, name, desc, module_type, slot, serial)
1779+
module.set_midplane_ip()
1780+
chassis.module_list.append(module)
1781+
1782+
# Create the updater
1783+
module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis)
1784+
module_updater.midplane_initialized = True
1785+
1786+
# Mock chassis_state_db
1787+
chassis_state_db = {}
1788+
def mock_hset(key, field, value):
1789+
if key not in chassis_state_db:
1790+
chassis_state_db[key] = {}
1791+
chassis_state_db[key][field] = value
1792+
1793+
def mock_hget(key, field):
1794+
if key in chassis_state_db and field in chassis_state_db[key]:
1795+
return chassis_state_db[key][field]
1796+
return None
1797+
1798+
with patch.object(module_updater, 'chassis_state_db') as mock_db:
1799+
mock_db.hset = MagicMock(side_effect=mock_hset)
1800+
mock_db.hget = MagicMock(side_effect=mock_hget)
1801+
1802+
# Initially set midplane as up
1803+
module.set_midplane_reachable(True)
1804+
module_updater.check_midplane_reachability()
1805+
1806+
# Verify initial state
1807+
key = "DPU_STATE|" + name
1808+
assert chassis_state_db[key]["dpu_midplane_link_state"] == "up"
1809+
1810+
# Now set midplane as down
1811+
module.set_midplane_reachable(False)
1812+
module_updater.check_midplane_reachability()
1813+
1814+
# Verify all states are set to down
1815+
assert chassis_state_db[key]["dpu_midplane_link_state"] == "down"
1816+
assert chassis_state_db[key]["dpu_control_plane_state"] == "down"
1817+
assert chassis_state_db[key]["dpu_data_plane_state"] == "down"
1818+
1819+
# Verify timestamps are set
1820+
assert "dpu_midplane_link_time" in chassis_state_db[key]
1821+
assert "dpu_control_plane_time" in chassis_state_db[key]
1822+
assert "dpu_data_plane_time" in chassis_state_db[key]
1823+
1824+
# Verify time format
1825+
date_format = "%a %b %d %I:%M:%S %p UTC %Y"
1826+
def is_valid_date(date_str):
1827+
try:
1828+
datetime.strptime(date_str, date_format)
1829+
return True
1830+
except ValueError:
1831+
return False
1832+
1833+
assert is_valid_date(chassis_state_db[key]["dpu_midplane_link_time"])
1834+
assert is_valid_date(chassis_state_db[key]["dpu_control_plane_time"])
1835+
assert is_valid_date(chassis_state_db[key]["dpu_data_plane_time"])

sonic-chassisd/tests/test_dpu_chassisd.py

+135-10
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def hset(key, field, value):
268268
}}
269269

270270

271-
def test_dpu_state_manager_specific_key_update():
271+
def test_dpu_state_manager_none_result():
272272
chassis = MockDpuChassis()
273273
chassis.get_dpu_id = MagicMock(return_value=0)
274274
chassis.get_dataplane_state = MagicMock(return_value=True)
@@ -281,8 +281,11 @@ def hset(key, field, value):
281281
chassis_state_db[key] = {}
282282
chassis_state_db[key][field] = value
283283

284+
def mock_pop():
285+
return None
286+
284287
with mock.patch.object(swsscommon.Table, 'hset', side_effect=hset):
285-
with mock.patch.object(swsscommon.SubscriberStateTable, 'pop', return_value=('DPU0', 'SET', (('dpu_control_plane_state', 'up'), ('dpu_data_plane_state', 'up')))):
288+
with mock.patch.object(swsscommon.SubscriberStateTable, 'pop', side_effect=mock_pop):
286289
with mock.patch.object(swsscommon.Select, 'select',
287290
side_effect=[(swsscommon.Select.OBJECT, None), KeyboardInterrupt]):
288291

@@ -292,11 +295,17 @@ def hset(key, field, value):
292295
dpu_state_mng = DpuStateManagerTask(SYSLOG_IDENTIFIER, dpu_updater)
293296
dpu_state_mng.task_worker()
294297

295-
# Verify no state update occurred since this was our own state update
296-
assert chassis_state_db == {}
298+
# Verify state was updated even with None result
299+
assert chassis_state_db == {'DPU0': {
300+
'dpu_data_plane_state': 'up',
301+
'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
302+
'dpu_control_plane_state': 'up',
303+
'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'
304+
}}
297305

298306

299-
def test_dpu_state_manager_none_result():
307+
def test_dpu_state_manager_state_tracking():
308+
"""Test that DpuStateManagerTask correctly tracks current states and avoids unnecessary updates"""
300309
chassis = MockDpuChassis()
301310
chassis.get_dpu_id = MagicMock(return_value=0)
302311
chassis.get_dataplane_state = MagicMock(return_value=True)
@@ -309,11 +318,9 @@ def hset(key, field, value):
309318
chassis_state_db[key] = {}
310319
chassis_state_db[key][field] = value
311320

312-
def mock_pop():
313-
return None
314-
321+
# First update - should set initial states
315322
with mock.patch.object(swsscommon.Table, 'hset', side_effect=hset):
316-
with mock.patch.object(swsscommon.SubscriberStateTable, 'pop', side_effect=mock_pop):
323+
with mock.patch.object(swsscommon.SubscriberStateTable, 'pop', return_value=('DPU0', 'SET', None)):
317324
with mock.patch.object(swsscommon.Select, 'select',
318325
side_effect=[(swsscommon.Select.OBJECT, None), KeyboardInterrupt]):
319326

@@ -323,10 +330,128 @@ def mock_pop():
323330
dpu_state_mng = DpuStateManagerTask(SYSLOG_IDENTIFIER, dpu_updater)
324331
dpu_state_mng.task_worker()
325332

326-
# Verify state was updated even with None result
333+
# Verify initial state was set
327334
assert chassis_state_db == {'DPU0': {
328335
'dpu_data_plane_state': 'up',
329336
'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
330337
'dpu_control_plane_state': 'up',
331338
'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'
332339
}}
340+
341+
# Verify current states are tracked
342+
assert dpu_state_mng.current_dp_state == 'up'
343+
assert dpu_state_mng.current_cp_state == 'up'
344+
345+
346+
def test_dpu_state_manager_avoid_duplicate_updates():
347+
"""Test that DpuStateManagerTask avoids duplicate state updates"""
348+
chassis = MockDpuChassis()
349+
chassis.get_dpu_id = MagicMock(return_value=0)
350+
chassis.get_dataplane_state = MagicMock(return_value=True)
351+
chassis.get_controlplane_state = MagicMock(return_value=True)
352+
353+
chassis_state_db = {}
354+
update_count = 0
355+
356+
def hset(key, field, value):
357+
nonlocal update_count
358+
update_count += 1
359+
if key not in chassis_state_db:
360+
chassis_state_db[key] = {}
361+
chassis_state_db[key][field] = value
362+
363+
# Test with same state update
364+
with mock.patch.object(swsscommon.Table, 'hset', side_effect=hset):
365+
with mock.patch.object(swsscommon.SubscriberStateTable, 'pop',
366+
return_value=('DPU0', 'SET', (('dpu_data_plane_state', 'up'), ('dpu_control_plane_state', 'up')))):
367+
with mock.patch.object(swsscommon.Select, 'select',
368+
side_effect=[(swsscommon.Select.OBJECT, None), (swsscommon.Select.OBJECT, None), KeyboardInterrupt]):
369+
370+
dpu_updater = DpuStateUpdater(SYSLOG_IDENTIFIER, chassis)
371+
dpu_updater._time_now = MagicMock(return_value='Sat Jan 01 12:00:00 AM UTC 2000')
372+
373+
dpu_state_mng = DpuStateManagerTask(SYSLOG_IDENTIFIER, dpu_updater)
374+
dpu_state_mng.current_dp_state = 'up'
375+
dpu_state_mng.current_cp_state = 'up'
376+
dpu_state_mng.task_worker()
377+
378+
# Verify no updates occurred since states were unchanged
379+
assert update_count == 0
380+
381+
382+
def test_dpu_state_manager_different_dpu():
383+
"""Test that DpuStateManagerTask handles updates for different DPUs correctly"""
384+
chassis = MockDpuChassis()
385+
chassis.get_dpu_id = MagicMock(return_value=0)
386+
chassis.get_dataplane_state = MagicMock(return_value=True)
387+
chassis.get_controlplane_state = MagicMock(return_value=True)
388+
389+
chassis_state_db = {}
390+
update_count = 0
391+
392+
def hset(key, field, value):
393+
nonlocal update_count
394+
update_count += 1
395+
if key not in chassis_state_db:
396+
chassis_state_db[key] = {}
397+
chassis_state_db[key][field] = value
398+
399+
# Test with update for different DPU
400+
with mock.patch.object(swsscommon.Table, 'hset', side_effect=hset):
401+
with mock.patch.object(swsscommon.SubscriberStateTable, 'pop',
402+
return_value=('DPU1', 'SET', (('dpu_data_plane_state', 'down'), ('dpu_control_plane_state', 'down')))):
403+
with mock.patch.object(swsscommon.Select, 'select',
404+
side_effect=[(swsscommon.Select.OBJECT, None), KeyboardInterrupt]):
405+
406+
dpu_updater = DpuStateUpdater(SYSLOG_IDENTIFIER, chassis)
407+
dpu_updater._time_now = MagicMock(return_value='Sat Jan 01 12:00:00 AM UTC 2000')
408+
409+
dpu_state_mng = DpuStateManagerTask(SYSLOG_IDENTIFIER, dpu_updater)
410+
dpu_state_mng.current_dp_state = 'up'
411+
dpu_state_mng.current_cp_state = 'up'
412+
dpu_state_mng.task_worker()
413+
414+
# Verify no updates occurred since it was for a different DPU
415+
assert update_count == 0
416+
417+
418+
def test_dpu_state_manager_state_change():
419+
"""Test that DpuStateManagerTask handles state changes correctly"""
420+
chassis = MockDpuChassis()
421+
chassis.get_dpu_id = MagicMock(return_value=0)
422+
chassis.get_dataplane_state = MagicMock(return_value=True)
423+
chassis.get_controlplane_state = MagicMock(return_value=False) # CP state changed to False
424+
425+
chassis_state_db = {}
426+
427+
def hset(key, field, value):
428+
if key not in chassis_state_db:
429+
chassis_state_db[key] = {}
430+
chassis_state_db[key][field] = value
431+
432+
# Test with state change
433+
with mock.patch.object(swsscommon.Table, 'hset', side_effect=hset):
434+
with mock.patch.object(swsscommon.SubscriberStateTable, 'pop',
435+
return_value=('DPU0', 'SET', None)):
436+
with mock.patch.object(swsscommon.Select, 'select',
437+
side_effect=[(swsscommon.Select.OBJECT, None), KeyboardInterrupt]):
438+
439+
dpu_updater = DpuStateUpdater(SYSLOG_IDENTIFIER, chassis)
440+
dpu_updater._time_now = MagicMock(return_value='Sat Jan 01 12:00:00 AM UTC 2000')
441+
442+
dpu_state_mng = DpuStateManagerTask(SYSLOG_IDENTIFIER, dpu_updater)
443+
dpu_state_mng.current_dp_state = 'up'
444+
dpu_state_mng.current_cp_state = 'up' # Previous state was up
445+
dpu_state_mng.task_worker()
446+
447+
# Verify state was updated since control plane state changed
448+
assert chassis_state_db == {'DPU0': {
449+
'dpu_data_plane_state': 'up',
450+
'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
451+
'dpu_control_plane_state': 'down', # Changed to down
452+
'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'
453+
}}
454+
455+
# Verify current states were updated
456+
assert dpu_state_mng.current_dp_state == 'up'
457+
assert dpu_state_mng.current_cp_state == 'down'

0 commit comments

Comments
 (0)