Skip to content

Commit d8a1cb7

Browse files
authored
[dualtor] Fix neighbor miss when mux is not ready (sonic-net#2676)
What I did The issue is that MuxOrch::m_syncdNeighbors assumes all cached zero-mac neighbors have tunnel routes installed. The check before adding tunnel routes ignores the following zero-mac neighbor events. Let's ensure that, if a zero-mac neighbor is present in MuxOrch::m_syncdNeighbors, it has a tunnel route installed. So let's check the tunnel route install success before adding a neighbor to MuxOrch::m_syncdNeighbors. Why I did it To fix sonic-net#2675 If MuxOrch is not fully initialized, and there is a FAILED neighbor added to kernel, the tunnel route creation will fail. But the subsequent FAILED neighbor events cannot trigger tunnel route creation because MuxOrch::m_syncdNeighbors caches the first event and regard the tunnel as already installed. How I verified it UT and verify on testbed. Signed-off-by: Longxiang Lyu <[email protected]>
1 parent 1531dff commit d8a1cb7

File tree

5 files changed

+79
-15
lines changed

5 files changed

+79
-15
lines changed

orchagent/muxorch.cpp

+8-3
Original file line numberDiff line numberDiff line change
@@ -1104,7 +1104,7 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
11041104
return;
11051105
}
11061106

1107-
auto standalone_tunnel_neigh_it = standalone_tunnel_neighbors_.find(update.entry.ip_address);
1107+
bool is_tunnel_route_installed = isStandaloneTunnelRouteInstalled(update.entry.ip_address);
11081108
// Handling zero MAC neighbor updates
11091109
if (!update.mac)
11101110
{
@@ -1115,7 +1115,7 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
11151115

11161116
if (update.add)
11171117
{
1118-
if (standalone_tunnel_neigh_it == standalone_tunnel_neighbors_.end())
1118+
if (!is_tunnel_route_installed)
11191119
{
11201120
createStandaloneTunnelRoute(update.entry.ip_address);
11211121
}
@@ -1130,7 +1130,7 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
11301130
* make sure to remove any existing tunnel routes to prevent conflicts.
11311131
* This block also covers the case of neighbor deletion.
11321132
*/
1133-
if (standalone_tunnel_neigh_it != standalone_tunnel_neighbors_.end())
1133+
if (is_tunnel_route_installed)
11341134
{
11351135
removeStandaloneTunnelRoute(update.entry.ip_address);
11361136
}
@@ -1474,6 +1474,11 @@ void MuxOrch::removeStandaloneTunnelRoute(IpAddress neighborIp)
14741474
standalone_tunnel_neighbors_.erase(neighborIp);
14751475
}
14761476

1477+
bool MuxOrch::isStandaloneTunnelRouteInstalled(const IpAddress& neighborIp)
1478+
{
1479+
return standalone_tunnel_neighbors_.find(neighborIp) != standalone_tunnel_neighbors_.end();
1480+
}
1481+
14771482
MuxCableOrch::MuxCableOrch(DBConnector *db, DBConnector *sdb, const std::string& tableName):
14781483
Orch2(db, tableName, request_),
14791484
app_tunnel_route_table_(db, APP_TUNNEL_ROUTE_TABLE_NAME),

orchagent/muxorch.h

+2
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@ class MuxOrch : public Orch2, public Observer, public Subject
202202
bool removeNextHopTunnel(std::string tunnelKey, IpAddress& ipAddr);
203203
sai_object_id_t getNextHopTunnelId(std::string tunnelKey, IpAddress& ipAddr);
204204

205+
bool isStandaloneTunnelRouteInstalled(const IpAddress& neighborIp);
206+
205207
private:
206208
virtual bool addOperation(const Request& request);
207209
virtual bool delOperation(const Request& request);

orchagent/neighorch.cpp

+32-10
Original file line numberDiff line numberDiff line change
@@ -739,17 +739,33 @@ void NeighOrch::doTask(Consumer &consumer)
739739
mac_address = MacAddress(fvValue(*i));
740740
}
741741

742-
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end()
743-
|| m_syncdNeighbors[neighbor_entry].mac != mac_address)
742+
bool nbr_not_found = (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end());
743+
if (nbr_not_found || m_syncdNeighbors[neighbor_entry].mac != mac_address)
744744
{
745-
// only for unresolvable neighbors that are new
746-
if (!mac_address)
745+
if (!mac_address)
747746
{
748-
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end())
747+
if (nbr_not_found)
749748
{
750-
addZeroMacTunnelRoute(neighbor_entry, mac_address);
749+
// only for unresolvable neighbors that are new
750+
if (addZeroMacTunnelRoute(neighbor_entry, mac_address))
751+
{
752+
it = consumer.m_toSync.erase(it);
753+
}
754+
else
755+
{
756+
it++;
757+
continue;
758+
}
759+
}
760+
else
761+
{
762+
/*
763+
* For neighbors that were previously resolvable but are now unresolvable,
764+
* we expect such neighbor entries to be deleted prior to a zero MAC update
765+
* arriving for that same neighbor.
766+
*/
767+
it = consumer.m_toSync.erase(it);
751768
}
752-
it = consumer.m_toSync.erase(it);
753769
}
754770
else if (addNeighbor(neighbor_entry, mac_address))
755771
{
@@ -1755,12 +1771,18 @@ void NeighOrch::updateSrv6Nexthop(const NextHopKey &nh, const sai_object_id_t &n
17551771
m_syncdNextHops.erase(nh);
17561772
}
17571773
}
1758-
void NeighOrch::addZeroMacTunnelRoute(const NeighborEntry& entry, const MacAddress& mac)
1774+
1775+
bool NeighOrch::addZeroMacTunnelRoute(const NeighborEntry& entry, const MacAddress& mac)
17591776
{
17601777
SWSS_LOG_INFO("Creating tunnel route for neighbor %s", entry.ip_address.to_string().c_str());
17611778
MuxOrch* mux_orch = gDirectory.get<MuxOrch*>();
17621779
NeighborUpdate update = {entry, mac, true};
17631780
mux_orch->update(SUBJECT_TYPE_NEIGH_CHANGE, static_cast<void *>(&update));
1764-
m_syncdNeighbors[entry] = { mac, false };
1765-
}
1781+
if (mux_orch->isStandaloneTunnelRouteInstalled(entry.ip_address))
1782+
{
1783+
m_syncdNeighbors[entry] = { mac, false };
1784+
return true;
1785+
}
17661786

1787+
return false;
1788+
}

orchagent/neighorch.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class NeighOrch : public Orch, public Subject, public Observer
116116
bool resolveNeighborEntry(const NeighborEntry &, const MacAddress &);
117117
void clearResolvedNeighborEntry(const NeighborEntry &);
118118

119-
void addZeroMacTunnelRoute(const NeighborEntry &, const MacAddress &);
119+
bool addZeroMacTunnelRoute(const NeighborEntry &, const MacAddress &);
120120
};
121121

122122
#endif /* SWSS_NEIGHORCH_H */

tests/test_mux.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -1173,12 +1173,16 @@ def test_Route(self, dvs, dvs_route, testlog):
11731173

11741174
self.create_and_test_route(appdb, asicdb, dvs, dvs_route)
11751175

1176-
def test_NH(self, dvs, dvs_route, intf_fdb_map, setup_peer_switch, setup_tunnel, testlog):
1176+
def test_NH(self, dvs, dvs_route, intf_fdb_map, setup, setup_mux_cable,
1177+
setup_peer_switch, setup_tunnel, testlog):
11771178
""" test NH routes and mux state change """
11781179
appdb = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)
11791180
asicdb = dvs.get_asic_db()
11801181
mac = intf_fdb_map["Ethernet0"]
11811182

1183+
# get tunnel nexthop
1184+
self.check_tnl_nexthop_in_asic_db(asicdb)
1185+
11821186
self.create_and_test_NH_routes(appdb, asicdb, dvs, dvs_route, mac)
11831187

11841188
def test_acl(self, dvs, dvs_acl, testlog):
@@ -1226,6 +1230,37 @@ def test_neighbor_miss(
12261230
expected_mac=mac if exp_result[REAL_MAC] else '00:00:00:00:00:00'
12271231
)
12281232

1233+
def test_neighbor_miss_no_mux(
1234+
self, dvs, dvs_route, setup_vlan, setup_tunnel, setup,
1235+
setup_peer_switch, neighbor_cleanup, testlog
1236+
):
1237+
config_db = dvs.get_config_db()
1238+
appdb = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)
1239+
1240+
test_ip = self.SERV1_SOC_IPV4
1241+
self.ping_ip(dvs, test_ip)
1242+
1243+
# no mux present, no standalone tunnel route installed
1244+
self.check_neighbor_state(dvs, dvs_route, test_ip, expect_route=False)
1245+
1246+
# setup the mux
1247+
config_db = dvs.get_config_db()
1248+
self.create_mux_cable(config_db)
1249+
# tunnel route should be installed immediately after mux setup
1250+
self.check_neighbor_state(dvs, dvs_route, test_ip, expect_route=True)
1251+
1252+
# set port state as standby
1253+
self.set_mux_state(appdb, "Ethernet0", "standby")
1254+
self.check_neighbor_state(dvs, dvs_route, test_ip, expect_route=True)
1255+
1256+
# set port state as active
1257+
self.set_mux_state(appdb, "Ethernet0", "active")
1258+
self.check_neighbor_state(dvs, dvs_route, test_ip, expect_route=True)
1259+
1260+
# clear the FAILED neighbor
1261+
self.clear_neighbors(dvs)
1262+
self.check_neighbor_state(dvs, dvs_route, test_ip, expect_route=False)
1263+
12291264
def test_neighbor_miss_no_peer(
12301265
self, dvs, dvs_route, setup_vlan, setup_mux_cable, setup_tunnel,
12311266
remove_peer_switch, neighbor_cleanup, testlog

0 commit comments

Comments
 (0)