Skip to content

Commit 081dd01

Browse files
theasianpianistSumukha Tumkur Vani
and
Sumukha Tumkur Vani
authored
Handle dual ToR neighbor miss scenario (#2137)
- When orchagent receives a neighbor update with a zero MAC: - If the neighbor IP is configured for a specific mux cable port in the MUX_CABLE table in CONFIG_DB, handle the neighbor normally (if active for the port, no action is needed. if standby, a tunnel route is created for the neighbor IP) - If the neighbor IP is not configured for a specific port, create a tunnel route for the IP to the peer switch. - When these neighbor IPs are eventually resolved, remove the tunnel route and handle the neighbor normally. - When creating/initializing a mux cable object, set the internal state to standby to match the constructor behavior. - Various formatting fixes inside test_mux.py - Remove references to deprecated `@pytest.yield_fixture` - Add dual ToR neighbor miss test cases: - Test cases and expected results are described in `mux_neigh_miss_tests.py`. These descriptions are used by the generic test runner `test_neighbor_miss` function to execute the test actions and verify expected results - Various setup fixtures and test info fixtures were added - Existing test cases were changed to use these setup fixtures for consistency Signed-off-by: Lawrence Lee <[email protected]> Co-authored-by: Sumukha Tumkur Vani <[email protected]>
1 parent 4bff5c6 commit 081dd01

14 files changed

+756
-138
lines changed

neighsyncd/neighsync.cpp

+31-5
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717
using namespace std;
1818
using namespace swss;
1919

20-
NeighSync::NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb) :
20+
NeighSync::NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb, DBConnector *cfgDb) :
2121
m_neighTable(pipelineAppDB, APP_NEIGH_TABLE_NAME),
22-
m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME)
22+
m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME),
23+
m_cfgPeerSwitchTable(cfgDb, CFG_PEER_SWITCH_TABLE_NAME)
2324
{
2425
m_AppRestartAssist = new AppRestartAssist(pipelineAppDB, "neighsyncd", "swss", DEFAULT_NEIGHSYNC_WARMSTART_TIMER);
2526
if (m_AppRestartAssist)
@@ -87,14 +88,39 @@ void NeighSync::onMsg(int nlmsg_type, struct nl_object *obj)
8788
return;
8889
}
8990

91+
std::vector<std::string> peerSwitchKeys;
9092
bool delete_key = false;
91-
if ((nlmsg_type == RTM_DELNEIGH) || (state == NUD_INCOMPLETE) ||
92-
(state == NUD_FAILED))
93+
bool use_zero_mac = false;
94+
m_cfgPeerSwitchTable.getKeys(peerSwitchKeys);
95+
bool is_dualtor = peerSwitchKeys.size() > 0;
96+
if (is_dualtor && (state == NUD_INCOMPLETE || state == NUD_FAILED))
97+
{
98+
SWSS_LOG_INFO("Unable to resolve %s, setting zero MAC", key.c_str());
99+
use_zero_mac = true;
100+
101+
// Unresolved neighbor deletion on dual ToR devices must be handled
102+
// separately, otherwise delete_key is never set to true
103+
// and neighorch is never able to remove the neighbor
104+
if (nlmsg_type == RTM_DELNEIGH)
105+
{
106+
delete_key = true;
107+
}
108+
}
109+
else if ((nlmsg_type == RTM_DELNEIGH) ||
110+
(state == NUD_INCOMPLETE) || (state == NUD_FAILED))
93111
{
94112
delete_key = true;
95113
}
96114

97-
nl_addr2str(rtnl_neigh_get_lladdr(neigh), macStr, MAX_ADDR_SIZE);
115+
if (use_zero_mac)
116+
{
117+
std::string zero_mac = "00:00:00:00:00:00";
118+
strncpy(macStr, zero_mac.c_str(), zero_mac.length());
119+
}
120+
else
121+
{
122+
nl_addr2str(rtnl_neigh_get_lladdr(neigh), macStr, MAX_ADDR_SIZE);
123+
}
98124

99125
/* Ignore neighbor entries with Broadcast Mac - Trigger for directed broadcast */
100126
if (!delete_key && (MacAddress(macStr) == MacAddress("ff:ff:ff:ff:ff:ff")))

neighsyncd/neighsync.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class NeighSync : public NetMsg
2323
public:
2424
enum { MAX_ADDR_SIZE = 64 };
2525

26-
NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb);
26+
NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb, DBConnector *cfgDb);
2727
~NeighSync();
2828

2929
virtual void onMsg(int nlmsg_type, struct nl_object *obj);
@@ -36,7 +36,7 @@ class NeighSync : public NetMsg
3636
}
3737

3838
private:
39-
Table m_stateNeighRestoreTable;
39+
Table m_stateNeighRestoreTable, m_cfgPeerSwitchTable;
4040
ProducerStateTable m_neighTable;
4141
AppRestartAssist *m_AppRestartAssist;
4242
};

neighsyncd/neighsyncd.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ int main(int argc, char **argv)
1818
DBConnector appDb("APPL_DB", 0);
1919
RedisPipeline pipelineAppDB(&appDb);
2020
DBConnector stateDb("STATE_DB", 0);
21+
DBConnector cfgDb("CONFIG_DB", 0);
2122

22-
NeighSync sync(&pipelineAppDB, &stateDb);
23+
NeighSync sync(&pipelineAppDB, &stateDb, &cfgDb);
2324

2425
NetDispatcher::getInstance().registerMessageHandler(RTM_NEWNEIGH, &sync);
2526
NetDispatcher::getInstance().registerMessageHandler(RTM_DELNEIGH, &sync);

orchagent/muxorch.cpp

+53
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ MuxCable::MuxCable(string name, IpPrefix& srv_ip4, IpPrefix& srv_ip6, IpAddress
329329

330330
/* Set initial state to "standby" */
331331
stateStandby();
332+
state_ = MuxState::MUX_STATE_STANDBY;
332333
}
333334

334335
bool MuxCable::stateInitActive()
@@ -1025,6 +1026,37 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
10251026
return;
10261027
}
10271028

1029+
auto standalone_tunnel_neigh_it = standalone_tunnel_neighbors_.find(update.entry.ip_address);
1030+
// Handling zero MAC neighbor updates
1031+
if (!update.mac)
1032+
{
1033+
/* For neighbors that were previously resolvable but are now unresolvable,
1034+
* we expect such neighbor entries to be deleted prior to a zero MAC update
1035+
* arriving for that same neighbor.
1036+
*/
1037+
1038+
if (update.add)
1039+
{
1040+
if (standalone_tunnel_neigh_it == standalone_tunnel_neighbors_.end())
1041+
{
1042+
createStandaloneTunnelRoute(update.entry.ip_address);
1043+
}
1044+
/* If the MAC address in the neighbor entry is zero but the neighbor IP
1045+
* is already present in standalone_tunnel_neighbors_, assume we have already
1046+
* added a tunnel route for it and exit early
1047+
*/
1048+
return;
1049+
}
1050+
}
1051+
/* If the update operation for a neighbor contains a non-zero MAC, we must
1052+
* make sure to remove any existing tunnel routes to prevent conflicts.
1053+
* This block also covers the case of neighbor deletion.
1054+
*/
1055+
if (standalone_tunnel_neigh_it != standalone_tunnel_neighbors_.end())
1056+
{
1057+
removeStandaloneTunnelRoute(update.entry.ip_address);
1058+
}
1059+
10281060
for (auto it = mux_cable_tb_.begin(); it != mux_cable_tb_.end(); it++)
10291061
{
10301062
MuxCable* ptr = it->second.get();
@@ -1292,6 +1324,27 @@ bool MuxOrch::delOperation(const Request& request)
12921324
return true;
12931325
}
12941326

1327+
void MuxOrch::createStandaloneTunnelRoute(IpAddress neighborIp)
1328+
{
1329+
SWSS_LOG_INFO("Creating standalone tunnel route for neighbor %s", neighborIp.to_string().c_str());
1330+
sai_object_id_t tunnel_nexthop = getNextHopTunnelId(MUX_TUNNEL, mux_peer_switch_);
1331+
if (tunnel_nexthop == SAI_NULL_OBJECT_ID) {
1332+
SWSS_LOG_NOTICE("%s nexthop not created yet, ignoring tunnel route creation for %s", MUX_TUNNEL, neighborIp.to_string().c_str());
1333+
return;
1334+
}
1335+
IpPrefix pfx = neighborIp.to_string();
1336+
create_route(pfx, tunnel_nexthop);
1337+
standalone_tunnel_neighbors_.insert(neighborIp);
1338+
}
1339+
1340+
void MuxOrch::removeStandaloneTunnelRoute(IpAddress neighborIp)
1341+
{
1342+
SWSS_LOG_INFO("Removing standalone tunnel route for neighbor %s", neighborIp.to_string().c_str());
1343+
IpPrefix pfx = neighborIp.to_string();
1344+
remove_route(pfx);
1345+
standalone_tunnel_neighbors_.erase(neighborIp);
1346+
}
1347+
12951348
MuxCableOrch::MuxCableOrch(DBConnector *db, DBConnector *sdb, const std::string& tableName):
12961349
Orch2(db, tableName, request_),
12971350
app_tunnel_route_table_(db, APP_TUNNEL_ROUTE_TABLE_NAME),

orchagent/muxorch.h

+8
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,13 @@ class MuxOrch : public Orch2, public Observer, public Subject
196196

197197
bool getMuxPort(const MacAddress&, const string&, string&);
198198

199+
/***
200+
* Methods for managing tunnel routes for neighbor IPs not associated
201+
* with a specific mux cable
202+
***/
203+
void createStandaloneTunnelRoute(IpAddress neighborIp);
204+
void removeStandaloneTunnelRoute(IpAddress neighborIp);
205+
199206
IpAddress mux_peer_switch_ = 0x0;
200207
sai_object_id_t mux_tunnel_id_ = SAI_NULL_OBJECT_ID;
201208

@@ -210,6 +217,7 @@ class MuxOrch : public Orch2, public Observer, public Subject
210217
FdbOrch *fdb_orch_;
211218

212219
MuxCfgRequest request_;
220+
std::set<IpAddress> standalone_tunnel_neighbors_;
213221
};
214222

215223
const request_description_t mux_cable_request_description = {

orchagent/neighorch.cpp

+19-1
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,16 @@ void NeighOrch::doTask(Consumer &consumer)
547547
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end()
548548
|| m_syncdNeighbors[neighbor_entry].mac != mac_address)
549549
{
550-
if (addNeighbor(neighbor_entry, mac_address))
550+
// only for unresolvable neighbors that are new
551+
if (!mac_address)
552+
{
553+
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end())
554+
{
555+
addZeroMacTunnelRoute(neighbor_entry, mac_address);
556+
}
557+
it = consumer.m_toSync.erase(it);
558+
}
559+
else if (addNeighbor(neighbor_entry, mac_address))
551560
{
552561
it = consumer.m_toSync.erase(it);
553562
}
@@ -954,3 +963,12 @@ bool NeighOrch::removeTunnelNextHop(const NextHopKey& nh)
954963
return true;
955964
}
956965

966+
void NeighOrch::addZeroMacTunnelRoute(const NeighborEntry& entry, const MacAddress& mac)
967+
{
968+
SWSS_LOG_INFO("Creating tunnel route for neighbor %s", entry.ip_address.to_string().c_str());
969+
MuxOrch* mux_orch = gDirectory.get<MuxOrch*>();
970+
NeighborUpdate update = {entry, mac, true};
971+
mux_orch->update(SUBJECT_TYPE_NEIGH_CHANGE, static_cast<void *>(&update));
972+
m_syncdNeighbors[entry] = { mac, false };
973+
}
974+

orchagent/neighorch.h

+2
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ class NeighOrch : public Orch, public Subject, public Observer
9999

100100
bool resolveNeighborEntry(const NeighborEntry &, const MacAddress &);
101101
void clearResolvedNeighborEntry(const NeighborEntry &);
102+
103+
void addZeroMacTunnelRoute(const NeighborEntry &, const MacAddress &);
102104
};
103105

104106
#endif /* SWSS_NEIGHORCH_H */

tests/conftest.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -1529,7 +1529,7 @@ def verify_vct(self):
15291529
return ret1 and ret2
15301530

15311531

1532-
@pytest.yield_fixture(scope="module")
1532+
@pytest.fixture(scope="module")
15331533
def dvs(request) -> DockerVirtualSwitch:
15341534
if sys.version_info[0] < 3:
15351535
raise NameError("Python 2 is not supported, please install python 3")
@@ -1559,7 +1559,7 @@ def dvs(request) -> DockerVirtualSwitch:
15591559
dvs.ctn_restart()
15601560

15611561

1562-
@pytest.yield_fixture(scope="module")
1562+
@pytest.fixture(scope="module")
15631563
def vct(request):
15641564
vctns = request.config.getoption("--vctns")
15651565
topo = request.config.getoption("--topo")
@@ -1579,7 +1579,7 @@ def vct(request):
15791579
vct.destroy()
15801580

15811581

1582-
@pytest.yield_fixture
1582+
@pytest.fixture
15831583
def testlog(request, dvs):
15841584
dvs.runcmd(f"logger === start test {request.node.name} ===")
15851585
yield testlog
@@ -1602,13 +1602,13 @@ def dvs_route(request, dvs) -> DVSRoute:
16021602

16031603
# FIXME: The rest of these also need to be reverted back to normal fixtures to
16041604
# appease the linter.
1605-
@pytest.yield_fixture(scope="class")
1605+
@pytest.fixture(scope="class")
16061606
def dvs_lag_manager(request, dvs):
16071607
request.cls.dvs_lag = dvs_lag.DVSLag(dvs.get_asic_db(),
16081608
dvs.get_config_db())
16091609

16101610

1611-
@pytest.yield_fixture(scope="class")
1611+
@pytest.fixture(scope="class")
16121612
def dvs_vlan_manager(request, dvs):
16131613
request.cls.dvs_vlan = dvs_vlan.DVSVlan(dvs.get_asic_db(),
16141614
dvs.get_config_db(),
@@ -1617,7 +1617,7 @@ def dvs_vlan_manager(request, dvs):
16171617
dvs.get_app_db())
16181618

16191619

1620-
@pytest.yield_fixture(scope="class")
1620+
@pytest.fixture(scope="class")
16211621
def dvs_mirror_manager(request, dvs):
16221622
request.cls.dvs_mirror = dvs_mirror.DVSMirror(dvs.get_asic_db(),
16231623
dvs.get_config_db(),
@@ -1626,7 +1626,7 @@ def dvs_mirror_manager(request, dvs):
16261626
dvs.get_app_db())
16271627

16281628

1629-
@pytest.yield_fixture(scope="class")
1629+
@pytest.fixture(scope="class")
16301630
def dvs_policer_manager(request, dvs):
16311631
request.cls.dvs_policer = dvs_policer.DVSPolicer(dvs.get_asic_db(),
16321632
dvs.get_config_db())
@@ -1647,7 +1647,7 @@ def remove_dpb_config_file(dvs):
16471647
dvs.runcmd(cmd)
16481648

16491649

1650-
@pytest.yield_fixture(scope="module")
1650+
@pytest.fixture(scope="module")
16511651
def dpb_setup_fixture(dvs):
16521652
create_dpb_config_file(dvs)
16531653
if dvs.vct is None:

0 commit comments

Comments
 (0)