Skip to content

Commit 1fdf0e6

Browse files
authored
Add support for reconciliation after warm restart (sonic-net#76) (sonic-net#97)
Picking up commit below from master branch: 9044962 Jing Zhang Mon Jul 18 15:38:04 2022 -0700 Add support for reconciliation after warm restart (sonic-net#76) Description of PR Summary: Fixes # (issue) This PR is to add support for linkmgrd process reconciliation after warm restart. sign-off: Jing Zhang [email protected] What is the motivation for this PR? One step of warm reboot procedure for dual ToR is to config the switch into manual mode. Before warm reboot finalizer executes config save, we want to config the switch back into auto mode, so config_db.json will be consistent before and after the reboot. How did you do it? When linkmgrd is initializing, get the systemwide warm reboot flag from WARM_RESTART_ENABLE_TABLE. If flag == true, start a reconciliation timer. Maintenance a mux port count based on MUX_CABLE|PORTNAME count. When one port completes reconciliation, if warm restart flag == true, config it back into auto mode, reduce reconciliation port count by 1. If reconciliation timer expires or port count == 0, set state to reconciled in WARM_RESTART_TABLE|linkmgrd. How did you verify/test it? Unit tests Tested on dual ToR testbed. Ports were auto mode after warm restart completed. Entry WARM_RESTART_TABLE|linkmgrd was added as expected.
1 parent a2367d0 commit 1fdf0e6

13 files changed

+352
-2
lines changed

src/DbInterface.cpp

+49
Original file line numberDiff line numberDiff line change
@@ -581,9 +581,58 @@ void DbInterface::getServerIpAddress(std::shared_ptr<swss::DBConnector> configDb
581581
std::vector<swss::KeyOpFieldsValuesTuple> entries;
582582

583583
configDbMuxCableTable.getContent(entries);
584+
mMuxManagerPtr->updateWarmRestartReconciliationCount(entries.size());
584585
processServerIpAddress(entries);
585586
}
586587

588+
// ---> warmRestartReconciliation(const std::string &portName);
589+
//
590+
// port warm restart reconciliation procedure
591+
//
592+
void DbInterface::warmRestartReconciliation(const std::string &portName)
593+
{
594+
MUXLOGDEBUG(portName);
595+
596+
if (isWarmStart()) {
597+
setMuxMode(portName, "auto");
598+
mMuxManagerPtr->updateWarmRestartReconciliationCount(-1);
599+
}
600+
}
601+
602+
//
603+
// ---> setMuxMode
604+
//
605+
// set config db mux mode
606+
//
607+
void DbInterface::setMuxMode(const std::string &portName, const std::string state)
608+
{
609+
MUXLOGDEBUG(portName);
610+
611+
boost::asio::io_service &ioService = mStrand.context();
612+
ioService.post(mStrand.wrap(boost::bind(
613+
&DbInterface::handleSetMuxMode,
614+
this,
615+
portName,
616+
state
617+
)));
618+
}
619+
620+
//
621+
// ---> handleSetMuxmode
622+
//
623+
// handle set mux mode
624+
//
625+
void DbInterface::handleSetMuxMode(const std::string &portName, const std::string state)
626+
{
627+
MUXLOGWARNING(boost::format("%s: configuring mux mode to %s after warm restart") % portName % state);
628+
629+
std::shared_ptr<swss::DBConnector> configDbPtr = std::make_shared<swss::DBConnector> ("CONFIG_DB", 0);
630+
std::shared_ptr<swss::Table> configDbMuxCableTablePtr = std::make_shared<swss::Table> (
631+
configDbPtr.get(), CFG_MUX_CABLE_TABLE_NAME
632+
);
633+
configDbMuxCableTablePtr->hset(portName, "state", state);
634+
}
635+
587636
//
588637
// ---> processMuxPortConfigNotifiction(std::deque<swss::KeyOpFieldsValuesTuple> &entries);
589638
//

src/DbInterface.h

+63
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "swss/dbconnector.h"
3535
#include "swss/producerstatetable.h"
3636
#include "swss/subscriberstatetable.h"
37+
#include "swss/warm_restart.h"
3738

3839
#include "link_manager/LinkManagerStateMachine.h"
3940
#include "mux_state/MuxState.h"
@@ -245,6 +246,56 @@ class DbInterface
245246
*/
246247
void stopSwssNotificationPoll() {mPollSwssNotifcation = false;};
247248

249+
/**
250+
* @method setMuxMode
251+
*
252+
* @brief set config db mux mode
253+
*
254+
* @param portName (in) MUX port name
255+
* @param state (in) MUX mode state
256+
*
257+
* @return none
258+
*/
259+
void setMuxMode(const std::string &portName, const std::string state);
260+
261+
/**
262+
* @method warmRestartReconciliation
263+
*
264+
* @brief port warm restart reconciliation procedure
265+
*
266+
* @param portName(in) Mux port name
267+
*
268+
* @return none
269+
*/
270+
void warmRestartReconciliation(const std::string &portName);
271+
272+
/**
273+
* @method isWarmStart
274+
*
275+
* @brief is warm start or not
276+
*
277+
* @return system flag for warm start context
278+
*/
279+
virtual bool isWarmStart(){return swss::WarmStart::isWarmStart();};
280+
281+
/**
282+
* @method getWarmStartTimer
283+
*
284+
* @brief get warm start time out in sec
285+
*
286+
* @return timeout in sec
287+
*/
288+
virtual uint32_t getWarmStartTimer(){return swss::WarmStart::getWarmStartTimer("linkmgrd", "mux");};
289+
290+
/**
291+
* @method setWarmStartStateReconciled
292+
*
293+
* @brief set warm start state reconciled
294+
*
295+
* @return none
296+
*/
297+
virtual void setWarmStartStateReconciled(){swss::WarmStart::setWarmStartState("linkmgrd", swss::WarmStart::RECONCILED);};
298+
248299
private:
249300
friend class test::MuxManagerTest;
250301

@@ -347,6 +398,18 @@ class DbInterface
347398
const uint64_t expectedPacketCount
348399
);
349400

401+
/**
402+
* @method handleSetMuxMode
403+
*
404+
* @brief handle set mux mode
405+
*
406+
* @param portName (in) MUX port name
407+
* @param state (in) MUX mode state
408+
*
409+
* @return none
410+
*/
411+
virtual void handleSetMuxMode(const std::string &portName, const std::string state);
412+
350413
/**
351414
*@method processTorMacAddress
352415
*

src/LinkMgrdMain.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include <boost/lexical_cast.hpp>
2727
#include <boost/program_options.hpp>
2828

29+
#include "swss/warm_restart.h"
30+
2931
#include "MuxManager.h"
3032
#include "MuxPort.h"
3133
#include "common/MuxConfig.h"
@@ -123,6 +125,13 @@ int main(int argc, const char* argv[])
123125
link_prober::IcmpPayload::generateGuid();
124126
link_manager::LinkManagerStateMachine::initializeTransitionFunctionTable();
125127

128+
// warm restart static
129+
swss::WarmStart::initialize("linkmgrd", "mux");
130+
swss::WarmStart::checkWarmStart("linkmgrd", "mux");
131+
if (swss::WarmStart::isWarmStart()) {
132+
swss::WarmStart::setWarmStartState("linkmgrd", swss::WarmStart::INITIALIZED);
133+
}
134+
126135
std::shared_ptr<mux::MuxManager> muxManagerPtr = std::make_shared<mux::MuxManager> ();
127136
muxManagerPtr->initialize(measureSwitchover, defaultRoute);
128137
muxManagerPtr->run();

src/MuxManager.cpp

+69-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ MuxManager::MuxManager() :
4343
mMuxConfig(),
4444
mWork(mIoService),
4545
mSignalSet(boost::asio::signal_set(mIoService, SIGINT, SIGTERM)),
46-
mDbInterfacePtr(std::make_shared<mux::DbInterface> (this, &mIoService))
46+
mDbInterfacePtr(std::make_shared<mux::DbInterface> (this, &mIoService)),
47+
mStrand(mIoService),
48+
mReconciliationTimer(mIoService)
4749
{
4850
mSignalSet.add(SIGUSR1);
4951
mSignalSet.add(SIGUSR2);
@@ -70,6 +72,11 @@ void MuxManager::initialize(bool enable_feature_measurement, bool enable_feature
7072

7173
mDbInterfacePtr->initialize();
7274

75+
if (mDbInterfacePtr->isWarmStart()) {
76+
MUXLOGINFO("Detected warm restart context, starting reconciliation timer.");
77+
startWarmRestartReconciliationTimer(mDbInterfacePtr->getWarmStartTimer());
78+
}
79+
7380
mMuxConfig.enableSwitchoverMeasurement(enable_feature_measurement);
7481
mMuxConfig.enableDefaultRouteFeature(enable_feature_default_route);
7582
}
@@ -363,4 +370,65 @@ void MuxManager::handleProcessTerminate()
363370
mDbInterfacePtr->getBarrier().wait();
364371
}
365372

373+
// ---> updateWarmRestartReconciliationCount(int increment);
374+
//
375+
// update warm restart reconciliation count
376+
//
377+
void MuxManager::updateWarmRestartReconciliationCount(int increment)
378+
{
379+
MUXLOGDEBUG(increment);
380+
381+
boost::asio::io_service &ioService = mStrand.context();
382+
383+
ioService.post(mStrand.wrap(boost::bind(
384+
&MuxManager::handleUpdateReconciliationCount,
385+
this,
386+
increment
387+
)));
388+
}
389+
390+
// ---> handleUpdateReconciliationCount(int increment);
391+
//
392+
// handler of updating reconciliation port count
393+
//
394+
void MuxManager::handleUpdateReconciliationCount(int increment)
395+
{
396+
MUXLOGDEBUG(mPortReconciliationCount);
397+
398+
mPortReconciliationCount += increment;
399+
400+
if(mPortReconciliationCount == 0) {
401+
mReconciliationTimer.cancel();
402+
}
403+
}
404+
405+
// ---> startWarmRestartReconciliationTimer
406+
//
407+
// start warm restart reconciliation timer
408+
//
409+
void MuxManager::startWarmRestartReconciliationTimer(uint32_t timeout)
410+
{
411+
mReconciliationTimer.expires_from_now(boost::posix_time::seconds(
412+
timeout == 0? mMuxConfig.getMuxReconciliationTimeout_sec():timeout
413+
));
414+
mReconciliationTimer.async_wait(mStrand.wrap(boost::bind(
415+
&MuxManager::handleWarmRestartReconciliationTimeout,
416+
this,
417+
boost::asio::placeholders::error
418+
)));
419+
}
420+
421+
// ---> handleWarmRestartReconciliationTimeout
422+
//
423+
// handle warm restart reconciliationTimeout
424+
//
425+
void MuxManager::handleWarmRestartReconciliationTimeout(const boost::system::error_code errorCode)
426+
{
427+
if (errorCode == boost::system::errc::success) {
428+
MUXLOGWARNING("Reconciliation timed out after warm restart, set service to reconciled now.");
429+
}
430+
431+
mDbInterfacePtr->setWarmStartStateReconciled();
432+
}
433+
366434
} /* namespace mux */

src/MuxManager.h

+47
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,17 @@ class MuxManager
353353
*/
354354
void addOrUpdateDefaultRouteState(bool is_v4, const std::string &routeState);
355355

356+
/**
357+
* @method updateWarmRestartReconciliationCount
358+
*
359+
* @brief update warm restart reconciliation count
360+
*
361+
* @param increment
362+
*
363+
* @return none
364+
*/
365+
void updateWarmRestartReconciliationCount(int increment);
366+
356367
private:
357368
/**
358369
*@method getMuxPortPtrOrThrow
@@ -397,6 +408,38 @@ class MuxManager
397408
*/
398409
void setDbInterfacePtr(std::shared_ptr<mux::DbInterface> dbInterfacePtr) {mDbInterfacePtr = dbInterfacePtr;};
399410

411+
private:
412+
/**
413+
* @method startWarmRestartReconciliationTimer
414+
*
415+
* @brief start warm restart reconciliation timer
416+
*
417+
* @return none
418+
*/
419+
void startWarmRestartReconciliationTimer(uint32_t timeout=0);
420+
421+
/**
422+
* @method handleWarmRestartReconciliationTimeout
423+
*
424+
* @brief handle warm restart reconciliationTimeout
425+
*
426+
* @param errorCode (in) Boost error code
427+
*
428+
* @return none
429+
*/
430+
void handleWarmRestartReconciliationTimeout(const boost::system::error_code errorCode);
431+
432+
/**
433+
* @method handleUpdateReconciliationCount
434+
*
435+
* @brief handler of updating reconciliation port count
436+
*
437+
* @param increment
438+
*
439+
* @return none
440+
*/
441+
void handleUpdateReconciliationCount(int increment);
442+
400443
private:
401444
common::MuxConfig mMuxConfig;
402445

@@ -405,6 +448,10 @@ class MuxManager
405448
boost::thread_group mThreadGroup;
406449
boost::asio::signal_set mSignalSet;
407450

451+
boost::asio::io_service::strand mStrand;
452+
boost::asio::deadline_timer mReconciliationTimer;
453+
uint16_t mPortReconciliationCount = 0;
454+
408455
std::shared_ptr<mux::DbInterface> mDbInterfacePtr;
409456

410457
PortMap mPortMap;

src/MuxPort.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -288,4 +288,16 @@ void MuxPort::resetPckLossCount()
288288
)));
289289
}
290290

291+
//
292+
// ---> warmRestartReconciliation();
293+
//
294+
// brief port warm restart reconciliation procedure
295+
//
296+
void MuxPort::warmRestartReconciliation()
297+
{
298+
if (mMuxPortConfig.getMode() != common::MuxPortConfig::Mode::Auto) {
299+
mDbInterfacePtr->warmRestartReconciliation(mMuxPortConfig.getPortName());
300+
}
301+
}
302+
291303
} /* namespace mux */

src/MuxPort.h

+9
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,15 @@ class MuxPort: public std::enable_shared_from_this<MuxPort>
310310
*/
311311
void resetPckLossCount();
312312

313+
/**
314+
* @method warmRestartReconciliation
315+
*
316+
* @brief port warm restart reconciliation procedure
317+
*
318+
* @return none
319+
*/
320+
void warmRestartReconciliation();
321+
313322
protected:
314323
friend class test::MuxManagerTest;
315324
friend class test::FakeMuxPort;

src/common/MuxConfig.h

+11
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,15 @@ class MuxConfig
316316
*/
317317
inline bool getIfEnableDefaultRouteFeature() {return mEnableDefaultRouteFeature;};
318318

319+
/**
320+
* @method getMuxReconciliationTimeout
321+
*
322+
* @brief getter of mux reconciliation time out
323+
*
324+
* @return timeout in sec
325+
*/
326+
inline uint32_t getMuxReconciliationTimeout_sec(){return mMuxReconciliationTimeout_sec;};
327+
319328
private:
320329
uint8_t mNumberOfThreads = 5;
321330
uint32_t mTimeoutIpv4_msec = 100;
@@ -329,6 +338,8 @@ class MuxConfig
329338
bool mEnableSwitchoverMeasurement = false;
330339
uint32_t mDecreasedTimeoutIpv4_msec = 10;
331340

341+
uint32_t mMuxReconciliationTimeout_sec = 10;
342+
332343
bool mEnableDefaultRouteFeature = false;
333344

334345
std::array<uint8_t, ETHER_ADDR_LEN> mTorMacAddress;

src/link_manager/LinkManagerStateMachine.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,8 @@ void LinkManagerStateMachine::activateStateMachine()
482482
mStartProbingFnPtr();
483483

484484
updateMuxLinkmgrState();
485+
486+
mMuxPortPtr->warmRestartReconciliation();
485487
}
486488
}
487489

0 commit comments

Comments
 (0)