Skip to content

Commit 41e61bd

Browse files
jipanyangqiluo-msft
authored andcommitted
Warm reboot: port state sync up (sonic-net#557)
* Warm start: port state sync up * Use Table::hget() to simplify the oper_status retrieval processing. * Add more comment for port state sync up * Use m_oper_status field of Port class instead of reading port oper status rom appDB. * Add common function for port oper status update * Throw execption upon port oper status get error * Add VS test for port state sync up
1 parent bf38bec commit 41e61bd

File tree

5 files changed

+167
-5
lines changed

5 files changed

+167
-5
lines changed

orchagent/main.cpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,14 @@ int main(int argc, char **argv)
281281
exit(EXIT_FAILURE);
282282
}
283283

284-
syncd_apply_view();
284+
/*
285+
* In syncd view comparison solution, apply view has been sent
286+
* immediately after restore is done
287+
*/
288+
if (!WarmStart::isWarmStart())
289+
{
290+
syncd_apply_view();
291+
}
285292

286293
orchDaemon->start();
287294
}

orchagent/orchdaemon.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ using namespace swss;
1717

1818
extern sai_switch_api_t* sai_switch_api;
1919
extern sai_object_id_t gSwitchId;
20+
21+
extern void syncd_apply_view();
2022
/*
2123
* Global orch daemon variables
2224
*/
@@ -387,7 +389,10 @@ bool OrchDaemon::warmRestoreAndSyncUp()
387389

388390
SWSS_LOG_NOTICE("Orchagent state restore done");
389391

390-
/* TODO: perform port and fdb state sync up*/
392+
syncd_apply_view();
393+
394+
/* Start dynamic state sync up */
395+
gPortsOrch->refreshPortStatus();
391396

392397
/*
393398
* Note. Arp sync up is handled in neighsyncd.

orchagent/portsorch.cpp

+56-2
Original file line numberDiff line numberDiff line change
@@ -2866,10 +2866,64 @@ void PortsOrch::doTask(NotificationConsumer &consumer)
28662866

28672867
SWSS_LOG_NOTICE("Get port state change notification id:%lx status:%d", id, status);
28682868

2869-
this->updateDbPortOperStatus(id, status);
2870-
this->setHostIntfsOperStatus(id, status == SAI_PORT_OPER_STATUS_UP);
2869+
Port p;
2870+
if (!getPort(id, p))
2871+
{
2872+
SWSS_LOG_ERROR("Failed to get port object for port id 0x%lx", id);
2873+
continue;
2874+
}
2875+
updatePortOperStatus(p, status);
28712876
}
28722877

28732878
sai_deserialize_free_port_oper_status_ntf(count, portoperstatus);
28742879
}
28752880
}
2881+
2882+
void PortsOrch::updatePortOperStatus(Port &port, sai_port_oper_status_t status)
2883+
{
2884+
if (status != port.m_oper_status)
2885+
{
2886+
SWSS_LOG_NOTICE("Port state changed for %s from %s to %s", port.m_alias.c_str(),
2887+
oper_status_strings.at(port.m_oper_status).c_str(), oper_status_strings.at(status).c_str());
2888+
this->updateDbPortOperStatus(port.m_port_id, status);
2889+
if(status == SAI_PORT_OPER_STATUS_UP || port.m_oper_status == SAI_PORT_OPER_STATUS_UP)
2890+
{
2891+
this->setHostIntfsOperStatus(port.m_port_id, status == SAI_PORT_OPER_STATUS_UP);
2892+
}
2893+
}
2894+
}
2895+
/*
2896+
* sync up orchagent with libsai/ASIC for port state.
2897+
*
2898+
* Currently NotificationProducer is used by syncd to inform port state change,
2899+
* which means orchagent will miss the signal if it happens between orchagent shutdown and startup.
2900+
* Syncd doesn't know whether the signal has been lost or not.
2901+
* Also the source of notification event is from libsai/SDK.
2902+
*
2903+
* Latest oper status for each port is retrieved via SAI_PORT_ATTR_OPER_STATUS sai API,
2904+
* the hostif and db are updated accordingly.
2905+
*/
2906+
void PortsOrch::refreshPortStatus()
2907+
{
2908+
SWSS_LOG_ENTER();
2909+
2910+
for (auto &it: m_portList)
2911+
{
2912+
auto &p = it.second;
2913+
if (p.m_type == Port::PHY)
2914+
{
2915+
sai_attribute_t attr;
2916+
attr.id = SAI_PORT_ATTR_OPER_STATUS;
2917+
2918+
sai_status_t ret = sai_port_api->get_port_attribute(p.m_port_id, 1, &attr);
2919+
if (ret != SAI_STATUS_SUCCESS)
2920+
{
2921+
SWSS_LOG_ERROR("Failed to get oper status for %s", p.m_alias.c_str());
2922+
throw "PortsOrch get port oper status failure";
2923+
}
2924+
sai_port_oper_status_t status = (sai_port_oper_status_t)attr.value.u32;
2925+
SWSS_LOG_INFO("%s oper status is %s", p.m_alias.c_str(), oper_status_strings.at(status).c_str());
2926+
updatePortOperStatus(p, status);
2927+
}
2928+
}
2929+
}

orchagent/portsorch.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ class PortsOrch : public Orch, public Subject
7575
bool setPortPfc(sai_object_id_t portId, uint8_t pfc_bitmask);
7676

7777
void generateQueueMap();
78-
78+
void refreshPortStatus();
7979
private:
8080
unique_ptr<Table> m_counterTable;
8181
unique_ptr<Table> m_portTable;
@@ -167,6 +167,8 @@ class PortsOrch : public Orch, public Subject
167167

168168
bool setPortAutoNeg(sai_object_id_t id, int an);
169169
bool setPortFecMode(sai_object_id_t id, int fec);
170+
171+
void updatePortOperStatus(Port &port, sai_port_oper_status_t status);
170172
};
171173
#endif /* SWSS_PORTSORCH_H */
172174

tests/test_warm_reboot.py

+94
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,19 @@
44
import time
55
import json
66

7+
# start processes in SWSS
8+
def start_swss(dvs):
9+
dvs.runcmd(['sh', '-c', 'supervisorctl start orchagent; supervisorctl start portsyncd; supervisorctl start intfsyncd; \
10+
supervisorctl start neighsyncd; supervisorctl start intfmgrd; supervisorctl start vlanmgrd; \
11+
supervisorctl start buffermgrd; supervisorctl start arp_update'])
12+
13+
# stop processes in SWSS
14+
def stop_swss(dvs):
15+
dvs.runcmd(['sh', '-c', 'supervisorctl stop orchagent; supervisorctl stop portsyncd; supervisorctl stop intfsyncd; \
16+
supervisorctl stop neighsyncd; supervisorctl stop intfmgrd; supervisorctl stop vlanmgrd; \
17+
supervisorctl stop buffermgrd; supervisorctl stop arp_update'])
18+
19+
720
# Get restart count of all processes supporting warm restart
821
def swss_get_RestartCount(state_db):
922
restart_count = {}
@@ -683,3 +696,84 @@ def test_swss_neighbor_syncup(dvs):
683696
check_sairedis_for_neighbor_entry(dvs, 4, 4, 4)
684697
# check restart Count
685698
swss_app_check_RestartCount_single(state_db, restart_count, "neighsyncd")
699+
700+
def test_swss_port_state_syncup(dvs):
701+
702+
appl_db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)
703+
conf_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0)
704+
state_db = swsscommon.DBConnector(swsscommon.STATE_DB, dvs.redis_sock, 0)
705+
706+
# enable warm restart
707+
# TODO: use cfg command to config it
708+
create_entry_tbl(
709+
conf_db,
710+
swsscommon.CFG_WARM_RESTART_TABLE_NAME, "swss",
711+
[
712+
("enable", "true"),
713+
]
714+
)
715+
716+
tbl = swsscommon.Table(appl_db, swsscommon.APP_PORT_TABLE_NAME)
717+
718+
restart_count = swss_get_RestartCount(state_db)
719+
720+
# update port admin state
721+
dvs.runcmd("ifconfig Ethernet0 10.0.0.0/31 up")
722+
dvs.runcmd("ifconfig Ethernet4 10.0.0.2/31 up")
723+
dvs.runcmd("ifconfig Ethernet8 10.0.0.4/31 up")
724+
725+
dvs.runcmd("arp -s 10.0.0.1 00:00:00:00:00:01")
726+
dvs.runcmd("arp -s 10.0.0.3 00:00:00:00:00:02")
727+
dvs.runcmd("arp -s 10.0.0.5 00:00:00:00:00:03")
728+
729+
dvs.servers[0].runcmd("ip link set down dev eth0") == 0
730+
dvs.servers[1].runcmd("ip link set down dev eth0") == 0
731+
dvs.servers[2].runcmd("ip link set down dev eth0") == 0
732+
733+
dvs.servers[2].runcmd("ip link set up dev eth0") == 0
734+
735+
time.sleep(3)
736+
737+
for i in [0, 1, 2]:
738+
(status, fvs) = tbl.get("Ethernet%d" % (i * 4))
739+
assert status == True
740+
oper_status = "unknown"
741+
for v in fvs:
742+
if v[0] == "oper_status":
743+
oper_status = v[1]
744+
break
745+
if i == 2:
746+
assert oper_status == "up"
747+
else:
748+
assert oper_status == "down"
749+
750+
stop_swss(dvs)
751+
time.sleep(3)
752+
753+
# flap the port oper status for Ethernet0, Ethernet4 and Ethernet8
754+
dvs.servers[0].runcmd("ip link set down dev eth0") == 0
755+
dvs.servers[1].runcmd("ip link set down dev eth0") == 0
756+
dvs.servers[2].runcmd("ip link set down dev eth0") == 0
757+
758+
dvs.servers[0].runcmd("ip link set up dev eth0") == 0
759+
dvs.servers[1].runcmd("ip link set up dev eth0") == 0
760+
761+
time.sleep(5)
762+
start_swss(dvs)
763+
time.sleep(10)
764+
765+
swss_check_RestartCount(state_db, restart_count)
766+
767+
for i in [0, 1, 2]:
768+
(status, fvs) = tbl.get("Ethernet%d" % (i * 4))
769+
assert status == True
770+
oper_status = "unknown"
771+
for v in fvs:
772+
if v[0] == "oper_status":
773+
oper_status = v[1]
774+
break
775+
if i == 2:
776+
assert oper_status == "down"
777+
else:
778+
assert oper_status == "up"
779+

0 commit comments

Comments
 (0)