Skip to content

Commit 840fe1d

Browse files
[fpmsyncd] Implement pending route suppression feature (#2551)
DEPENDS: #2512 What I did I implemented support to enable pending routes suppression feature. When this feature is enabled, fpmsyncd will wait for reply from orchagent before sending offload status message to zebra. Why I did it This is done to not announce routes which aren't yet offloaded in HW. How I verified it UT and manual tests.
1 parent a2bd92f commit 840fe1d

13 files changed

+864
-10
lines changed

fpmsyncd/fpminterface.h

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#pragma once
2+
3+
#include <swss/selectable.h>
4+
#include <libnl3/netlink/netlink.h>
5+
6+
#include "fpm/fpm.h"
7+
8+
namespace swss
9+
{
10+
11+
/**
12+
* @brief FPM zebra communication interface
13+
*/
14+
class FpmInterface : public Selectable
15+
{
16+
public:
17+
virtual ~FpmInterface() = default;
18+
19+
/**
20+
* @brief Send netlink message through FPM socket
21+
* @param msg Netlink message
22+
* @return True on success, otherwise false is returned
23+
*/
24+
virtual bool send(nlmsghdr* nl_hdr) = 0;
25+
};
26+
27+
}

fpmsyncd/fpmlink.cpp

+39
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,17 @@ FpmLink::FpmLink(RouteSync *rsync, unsigned short port) :
158158

159159
m_server_up = true;
160160
m_messageBuffer = new char[m_bufSize];
161+
m_sendBuffer = new char[m_bufSize];
162+
163+
m_routesync->onFpmConnected(*this);
161164
}
162165

163166
FpmLink::~FpmLink()
164167
{
168+
m_routesync->onFpmDisconnected();
169+
165170
delete[] m_messageBuffer;
171+
delete[] m_sendBuffer;
166172
if (m_connected)
167173
close(m_connection_socket);
168174
if (m_server_up)
@@ -277,3 +283,36 @@ void FpmLink::processFpmMessage(fpm_msg_hdr_t* hdr)
277283
nlmsg_free(msg);
278284
}
279285
}
286+
287+
bool FpmLink::send(nlmsghdr* nl_hdr)
288+
{
289+
fpm_msg_hdr_t hdr{};
290+
291+
size_t len = fpm_msg_align(sizeof(hdr) + nl_hdr->nlmsg_len);
292+
293+
if (len > m_bufSize)
294+
{
295+
SWSS_LOG_THROW("Message length %zu is greater than the send buffer size %d", len, m_bufSize);
296+
}
297+
298+
hdr.version = FPM_PROTO_VERSION;
299+
hdr.msg_type = FPM_MSG_TYPE_NETLINK;
300+
hdr.msg_len = htons(static_cast<uint16_t>(len));
301+
302+
memcpy(m_sendBuffer, &hdr, sizeof(hdr));
303+
memcpy(m_sendBuffer + sizeof(hdr), nl_hdr, nl_hdr->nlmsg_len);
304+
305+
size_t sent = 0;
306+
while (sent != len)
307+
{
308+
auto rc = ::send(m_connection_socket, m_sendBuffer + sent, len - sent, 0);
309+
if (rc == -1)
310+
{
311+
SWSS_LOG_ERROR("Failed to send FPM message: %s", strerror(errno));
312+
return false;
313+
}
314+
sent += rc;
315+
}
316+
317+
return true;
318+
}

fpmsyncd/fpmlink.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@
1111
#include <unistd.h>
1212
#include <exception>
1313

14-
#include "selectable.h"
1514
#include "fpm/fpm.h"
15+
#include "fpmsyncd/fpminterface.h"
1616
#include "fpmsyncd/routesync.h"
1717

1818
namespace swss {
1919

20-
class FpmLink : public Selectable {
20+
class FpmLink : public FpmInterface {
2121
public:
2222
const int MSG_BATCH_SIZE;
2323
FpmLink(RouteSync *rsync, unsigned short port = FPM_DEFAULT_PORT);
@@ -41,10 +41,13 @@ class FpmLink : public Selectable {
4141

4242
void processFpmMessage(fpm_msg_hdr_t* hdr);
4343

44+
bool send(nlmsghdr* nl_hdr) override;
45+
4446
private:
4547
RouteSync *m_routesync;
4648
unsigned int m_bufSize;
4749
char *m_messageBuffer;
50+
char *m_sendBuffer;
4851
unsigned int m_pos;
4952

5053
bool m_connected;

fpmsyncd/fpmsyncd.cpp

+107-5
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44
#include "select.h"
55
#include "selectabletimer.h"
66
#include "netdispatcher.h"
7+
#include "netlink.h"
8+
#include "notificationconsumer.h"
9+
#include "subscriberstatetable.h"
710
#include "warmRestartHelper.h"
811
#include "fpmsyncd/fpmlink.h"
912
#include "fpmsyncd/routesync.h"
1013

14+
#include <netlink/route/route.h>
1115

1216
using namespace std;
1317
using namespace swss;
@@ -47,21 +51,47 @@ static bool eoiuFlagsSet(Table &bgpStateTable)
4751
int main(int argc, char **argv)
4852
{
4953
swss::Logger::linkToDbNative("fpmsyncd");
54+
55+
const auto routeResponseChannelName = std::string("APPL_DB_") + APP_ROUTE_TABLE_NAME + "_RESPONSE_CHANNEL";
56+
5057
DBConnector db("APPL_DB", 0);
58+
DBConnector cfgDb("CONFIG_DB", 0);
59+
SubscriberStateTable deviceMetadataTableSubscriber(&cfgDb, CFG_DEVICE_METADATA_TABLE_NAME);
60+
Table deviceMetadataTable(&cfgDb, CFG_DEVICE_METADATA_TABLE_NAME);
61+
DBConnector applStateDb("APPL_STATE_DB", 0);
62+
std::unique_ptr<NotificationConsumer> routeResponseChannel;
63+
5164
RedisPipeline pipeline(&db);
5265
RouteSync sync(&pipeline);
5366

5467
DBConnector stateDb("STATE_DB", 0);
5568
Table bgpStateTable(&stateDb, STATE_BGP_TABLE_NAME);
5669

70+
NetLink netlink;
71+
72+
netlink.registerGroup(RTNLGRP_LINK);
73+
5774
NetDispatcher::getInstance().registerMessageHandler(RTM_NEWROUTE, &sync);
5875
NetDispatcher::getInstance().registerMessageHandler(RTM_DELROUTE, &sync);
76+
NetDispatcher::getInstance().registerMessageHandler(RTM_NEWLINK, &sync);
77+
NetDispatcher::getInstance().registerMessageHandler(RTM_DELLINK, &sync);
78+
79+
rtnl_route_read_protocol_names(DefaultRtProtoPath);
80+
81+
std::string suppressionEnabledStr;
82+
deviceMetadataTable.hget("localhost", "suppress-fib-pending", suppressionEnabledStr);
83+
if (suppressionEnabledStr == "enabled")
84+
{
85+
routeResponseChannel = std::make_unique<NotificationConsumer>(&applStateDb, routeResponseChannelName);
86+
sync.setSuppressionEnabled(true);
87+
}
5988

6089
while (true)
6190
{
6291
try
6392
{
6493
FpmLink fpm(&sync);
94+
6595
Select s;
6696
SelectableTimer warmStartTimer(timespec{0, 0});
6797
// Before eoiu flags detected, check them periodically. It also stop upon detection of reconciliation done.
@@ -80,6 +110,13 @@ int main(int argc, char **argv)
80110
cout << "Connected!" << endl;
81111

82112
s.addSelectable(&fpm);
113+
s.addSelectable(&netlink);
114+
s.addSelectable(&deviceMetadataTableSubscriber);
115+
116+
if (sync.isSuppressionEnabled())
117+
{
118+
s.addSelectable(routeResponseChannel.get());
119+
}
83120

84121
/* If warm-restart feature is enabled, execute 'restoration' logic */
85122
bool warmStartEnabled = sync.m_warmStartHelper.checkAndStart();
@@ -139,11 +176,8 @@ int main(int argc, char **argv)
139176
SWSS_LOG_NOTICE("Warm-Restart EOIU hold timer expired.");
140177
}
141178

142-
if (sync.m_warmStartHelper.inProgress())
143-
{
144-
sync.m_warmStartHelper.reconcile();
145-
SWSS_LOG_NOTICE("Warm-Restart reconciliation processed.");
146-
}
179+
sync.onWarmStartEnd(applStateDb);
180+
147181
// remove the one-shot timer.
148182
s.removeSelectable(temps);
149183
pipeline.flush();
@@ -182,6 +216,74 @@ int main(int argc, char **argv)
182216
s.removeSelectable(&eoiuCheckTimer);
183217
}
184218
}
219+
else if (temps == &deviceMetadataTableSubscriber)
220+
{
221+
std::deque<KeyOpFieldsValuesTuple> keyOpFvsQueue;
222+
deviceMetadataTableSubscriber.pops(keyOpFvsQueue);
223+
224+
for (const auto& keyOpFvs: keyOpFvsQueue)
225+
{
226+
const auto& key = kfvKey(keyOpFvs);
227+
const auto& op = kfvOp(keyOpFvs);
228+
const auto& fvs = kfvFieldsValues(keyOpFvs);
229+
230+
if (op != SET_COMMAND)
231+
{
232+
continue;
233+
}
234+
235+
if (key != "localhost")
236+
{
237+
continue;
238+
}
239+
240+
for (const auto& fv: fvs)
241+
{
242+
const auto& field = fvField(fv);
243+
const auto& value = fvValue(fv);
244+
245+
if (field != "suppress-fib-pending")
246+
{
247+
continue;
248+
}
249+
250+
bool shouldEnable = (value == "enabled");
251+
252+
if (shouldEnable && !sync.isSuppressionEnabled())
253+
{
254+
routeResponseChannel = std::make_unique<NotificationConsumer>(&applStateDb, routeResponseChannelName);
255+
sync.setSuppressionEnabled(true);
256+
s.addSelectable(routeResponseChannel.get());
257+
}
258+
else if (!shouldEnable && sync.isSuppressionEnabled())
259+
{
260+
/* When disabling suppression we mark all existing routes offloaded in zebra
261+
* as there could be some transient routes which are pending response from
262+
* orchagent, thus such updates might be missing. Since we are disabling suppression
263+
* we no longer care about real HW offload status and can mark all routes as offloaded
264+
* to avoid routes stuck in suppressed state after transition. */
265+
sync.markRoutesOffloaded(db);
266+
267+
sync.setSuppressionEnabled(false);
268+
s.removeSelectable(routeResponseChannel.get());
269+
routeResponseChannel.reset();
270+
}
271+
} // end for fvs
272+
} // end for keyOpFvsQueue
273+
}
274+
else if (routeResponseChannel && (temps == routeResponseChannel.get()))
275+
{
276+
std::deque<KeyOpFieldsValuesTuple> notifications;
277+
routeResponseChannel->pops(notifications);
278+
279+
for (const auto& notification: notifications)
280+
{
281+
const auto& key = kfvKey(notification);
282+
const auto& fieldValues = kfvFieldsValues(notification);
283+
284+
sync.onRouteResponse(key, fieldValues);
285+
}
286+
}
185287
else if (!warmStartEnabled || sync.m_warmStartHelper.isReconciled())
186288
{
187289
pipeline.flush();

0 commit comments

Comments
 (0)