Skip to content

Commit 9044962

Browse files
authored
Add support for reconciliation after warm restart (#76)
Description of PR Summary: Fixes # (issue) This PR is to add support for linkmgrd process reconciliation after warm restart. sign-off: Jing Zhang [email protected] Type of change Bug fix New feature Doc/Design Unit test Approach What is the motivation for this PR? One step of warm reboot procedure for dual ToR is to config the switch into manual mode. Before warm reboot finalizer executes config save, we want to config the switch back into auto mode, so config_db.json will be consistent before and after the reboot. How did you do it? When linkmgrd is initializing, get the systemwide warm reboot flag from WARM_RESTART_ENABLE_TABLE. If flag == true, start a reconciliation timer. Maintenance a mux port count based on MUX_CABLE|PORTNAME count. When one port completes reconciliation, if warm restart flag == true, config it back into auto mode, reduce reconciliation port count by 1. If reconciliation timer expires or port count == 0, set state to reconciled in WARM_RESTART_TABLE|linkmgrd. How did you verify/test it? Unit tests Tested on dual ToR testbed. Ports were auto mode after warm restart completed. Entry WARM_RESTART_TABLE|linkmgrd was added as expected.
1 parent 58d8aae commit 9044962

13 files changed

+352
-2
lines changed

src/DbInterface.cpp

+49
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,7 @@ void DbInterface::getServerIpAddress(std::shared_ptr<swss::DBConnector> configDb
715715
std::vector<swss::KeyOpFieldsValuesTuple> entries;
716716

717717
configDbMuxCableTable.getContent(entries);
718+
mMuxManagerPtr->updateWarmRestartReconciliationCount(entries.size());
718719
processServerIpAddress(entries);
719720
}
720721

@@ -817,6 +818,54 @@ void DbInterface::getSoCIpAddress(std::shared_ptr<swss::DBConnector> configDbCon
817818
processSoCIpAddress(entries);
818819
}
819820

821+
// ---> warmRestartReconciliation(const std::string &portName);
822+
//
823+
// port warm restart reconciliation procedure
824+
//
825+
void DbInterface::warmRestartReconciliation(const std::string &portName)
826+
{
827+
MUXLOGDEBUG(portName);
828+
829+
if (isWarmStart()) {
830+
setMuxMode(portName, "auto");
831+
mMuxManagerPtr->updateWarmRestartReconciliationCount(-1);
832+
}
833+
}
834+
835+
//
836+
// ---> setMuxMode
837+
//
838+
// set config db mux mode
839+
//
840+
void DbInterface::setMuxMode(const std::string &portName, const std::string state)
841+
{
842+
MUXLOGDEBUG(portName);
843+
844+
boost::asio::io_service &ioService = mStrand.context();
845+
ioService.post(mStrand.wrap(boost::bind(
846+
&DbInterface::handleSetMuxMode,
847+
this,
848+
portName,
849+
state
850+
)));
851+
}
852+
853+
//
854+
// ---> handleSetMuxmode
855+
//
856+
// handle set mux mode
857+
//
858+
void DbInterface::handleSetMuxMode(const std::string &portName, const std::string state)
859+
{
860+
MUXLOGWARNING(boost::format("%s: configuring mux mode to %s after warm restart") % portName % state);
861+
862+
std::shared_ptr<swss::DBConnector> configDbPtr = std::make_shared<swss::DBConnector> ("CONFIG_DB", 0);
863+
std::shared_ptr<swss::Table> configDbMuxCableTablePtr = std::make_shared<swss::Table> (
864+
configDbPtr.get(), CFG_MUX_CABLE_TABLE_NAME
865+
);
866+
configDbMuxCableTablePtr->hset(portName, "state", state);
867+
}
868+
820869
//
821870
// ---> processMuxPortConfigNotifiction(std::deque<swss::KeyOpFieldsValuesTuple> &entries);
822871
//

src/DbInterface.h

+63
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "swss/dbconnector.h"
3535
#include "swss/producerstatetable.h"
3636
#include "swss/subscriberstatetable.h"
37+
#include "swss/warm_restart.h"
3738

3839
#include "link_manager/LinkManagerStateMachineActiveStandby.h"
3940
#include "mux_state/MuxState.h"
@@ -274,6 +275,56 @@ class DbInterface
274275
*/
275276
void stopSwssNotificationPoll() {mPollSwssNotifcation = false;};
276277

278+
/**
279+
* @method setMuxMode
280+
*
281+
* @brief set config db mux mode
282+
*
283+
* @param portName (in) MUX port name
284+
* @param state (in) MUX mode state
285+
*
286+
* @return none
287+
*/
288+
void setMuxMode(const std::string &portName, const std::string state);
289+
290+
/**
291+
* @method warmRestartReconciliation
292+
*
293+
* @brief port warm restart reconciliation procedure
294+
*
295+
* @param portName(in) Mux port name
296+
*
297+
* @return none
298+
*/
299+
void warmRestartReconciliation(const std::string &portName);
300+
301+
/**
302+
* @method isWarmStart
303+
*
304+
* @brief is warm start or not
305+
*
306+
* @return system flag for warm start context
307+
*/
308+
virtual bool isWarmStart(){return swss::WarmStart::isWarmStart();};
309+
310+
/**
311+
* @method getWarmStartTimer
312+
*
313+
* @brief get warm start time out in sec
314+
*
315+
* @return timeout in sec
316+
*/
317+
virtual uint32_t getWarmStartTimer(){return swss::WarmStart::getWarmStartTimer("linkmgrd", "mux");};
318+
319+
/**
320+
* @method setWarmStartStateReconciled
321+
*
322+
* @brief set warm start state reconciled
323+
*
324+
* @return none
325+
*/
326+
virtual void setWarmStartStateReconciled(){swss::WarmStart::setWarmStartState("linkmgrd", swss::WarmStart::RECONCILED);};
327+
277328
private:
278329
friend class test::MuxManagerTest;
279330

@@ -399,6 +450,18 @@ class DbInterface
399450
const uint64_t expectedPacketCount
400451
);
401452

453+
/**
454+
* @method handleSetMuxMode
455+
*
456+
* @brief handle set mux mode
457+
*
458+
* @param portName (in) MUX port name
459+
* @param state (in) MUX mode state
460+
*
461+
* @return none
462+
*/
463+
virtual void handleSetMuxMode(const std::string &portName, const std::string state);
464+
402465
/**
403466
*@method processTorMacAddress
404467
*

src/LinkMgrdMain.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include <boost/lexical_cast.hpp>
2727
#include <boost/program_options.hpp>
2828

29+
#include "swss/warm_restart.h"
30+
2931
#include "MuxManager.h"
3032
#include "MuxPort.h"
3133
#include "common/MuxConfig.h"
@@ -123,6 +125,13 @@ int main(int argc, const char* argv[])
123125
// initialize static data
124126
link_prober::IcmpPayload::generateGuid();
125127

128+
// warm restart static
129+
swss::WarmStart::initialize("linkmgrd", "mux");
130+
swss::WarmStart::checkWarmStart("linkmgrd", "mux");
131+
if (swss::WarmStart::isWarmStart()) {
132+
swss::WarmStart::setWarmStartState("linkmgrd", swss::WarmStart::INITIALIZED);
133+
}
134+
126135
std::shared_ptr<mux::MuxManager> muxManagerPtr = std::make_shared<mux::MuxManager> ();
127136
muxManagerPtr->initialize(measureSwitchover, defaultRoute);
128137
muxManagerPtr->run();

src/MuxManager.cpp

+69-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ MuxManager::MuxManager() :
4343
mMuxConfig(),
4444
mWork(mIoService),
4545
mSignalSet(boost::asio::signal_set(mIoService, SIGINT, SIGTERM)),
46-
mDbInterfacePtr(std::make_shared<mux::DbInterface> (this, &mIoService))
46+
mDbInterfacePtr(std::make_shared<mux::DbInterface> (this, &mIoService)),
47+
mStrand(mIoService),
48+
mReconciliationTimer(mIoService)
4749
{
4850
mSignalSet.add(SIGUSR1);
4951
mSignalSet.add(SIGUSR2);
@@ -85,6 +87,11 @@ void MuxManager::initialize(bool enable_feature_measurement, bool enable_feature
8587

8688
mDbInterfacePtr->initialize();
8789

90+
if (mDbInterfacePtr->isWarmStart()) {
91+
MUXLOGINFO("Detected warm restart context, starting reconciliation timer.");
92+
startWarmRestartReconciliationTimer(mDbInterfacePtr->getWarmStartTimer());
93+
}
94+
8895
mMuxConfig.enableSwitchoverMeasurement(enable_feature_measurement);
8996
mMuxConfig.enableDefaultRouteFeature(enable_feature_default_route);
9097
}
@@ -507,4 +514,65 @@ void MuxManager::generateServerMac(uint16_t serverId, std::array<uint8_t, ETHER_
507514
}
508515
}
509516

517+
// ---> updateWarmRestartReconciliationCount(int increment);
518+
//
519+
// update warm restart reconciliation count
520+
//
521+
void MuxManager::updateWarmRestartReconciliationCount(int increment)
522+
{
523+
MUXLOGDEBUG(increment);
524+
525+
boost::asio::io_service &ioService = mStrand.context();
526+
527+
ioService.post(mStrand.wrap(boost::bind(
528+
&MuxManager::handleUpdateReconciliationCount,
529+
this,
530+
increment
531+
)));
532+
}
533+
534+
// ---> handleUpdateReconciliationCount(int increment);
535+
//
536+
// handler of updating reconciliation port count
537+
//
538+
void MuxManager::handleUpdateReconciliationCount(int increment)
539+
{
540+
MUXLOGDEBUG(mPortReconciliationCount);
541+
542+
mPortReconciliationCount += increment;
543+
544+
if(mPortReconciliationCount == 0) {
545+
mReconciliationTimer.cancel();
546+
}
547+
}
548+
549+
// ---> startWarmRestartReconciliationTimer
550+
//
551+
// start warm restart reconciliation timer
552+
//
553+
void MuxManager::startWarmRestartReconciliationTimer(uint32_t timeout)
554+
{
555+
mReconciliationTimer.expires_from_now(boost::posix_time::seconds(
556+
timeout == 0? mMuxConfig.getMuxReconciliationTimeout_sec():timeout
557+
));
558+
mReconciliationTimer.async_wait(mStrand.wrap(boost::bind(
559+
&MuxManager::handleWarmRestartReconciliationTimeout,
560+
this,
561+
boost::asio::placeholders::error
562+
)));
563+
}
564+
565+
// ---> handleWarmRestartReconciliationTimeout
566+
//
567+
// handle warm restart reconciliationTimeout
568+
//
569+
void MuxManager::handleWarmRestartReconciliationTimeout(const boost::system::error_code errorCode)
570+
{
571+
if (errorCode == boost::system::errc::success) {
572+
MUXLOGWARNING("Reconciliation timed out after warm restart, set service to reconciled now.");
573+
}
574+
575+
mDbInterfacePtr->setWarmStartStateReconciled();
576+
}
577+
510578
} /* namespace mux */

src/MuxManager.h

+47
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,17 @@ class MuxManager
441441
*/
442442
void addOrUpdateDefaultRouteState(bool is_v4, const std::string &routeState);
443443

444+
/**
445+
* @method updateWarmRestartReconciliationCount
446+
*
447+
* @brief update warm restart reconciliation count
448+
*
449+
* @param increment
450+
*
451+
* @return none
452+
*/
453+
void updateWarmRestartReconciliationCount(int increment);
454+
444455
private:
445456
/**
446457
*@method getMuxPortCableType
@@ -505,6 +516,38 @@ class MuxManager
505516
*/
506517
void setDbInterfacePtr(std::shared_ptr<mux::DbInterface> dbInterfacePtr) {mDbInterfacePtr = dbInterfacePtr;};
507518

519+
private:
520+
/**
521+
* @method startWarmRestartReconciliationTimer
522+
*
523+
* @brief start warm restart reconciliation timer
524+
*
525+
* @return none
526+
*/
527+
void startWarmRestartReconciliationTimer(uint32_t timeout=0);
528+
529+
/**
530+
* @method handleWarmRestartReconciliationTimeout
531+
*
532+
* @brief handle warm restart reconciliationTimeout
533+
*
534+
* @param errorCode (in) Boost error code
535+
*
536+
* @return none
537+
*/
538+
void handleWarmRestartReconciliationTimeout(const boost::system::error_code errorCode);
539+
540+
/**
541+
* @method handleUpdateReconciliationCount
542+
*
543+
* @brief handler of updating reconciliation port count
544+
*
545+
* @param increment
546+
*
547+
* @return none
548+
*/
549+
void handleUpdateReconciliationCount(int increment);
550+
508551
private:
509552
common::MuxConfig mMuxConfig;
510553

@@ -513,6 +556,10 @@ class MuxManager
513556
boost::thread_group mThreadGroup;
514557
boost::asio::signal_set mSignalSet;
515558

559+
boost::asio::io_service::strand mStrand;
560+
boost::asio::deadline_timer mReconciliationTimer;
561+
uint16_t mPortReconciliationCount = 0;
562+
516563
std::shared_ptr<mux::DbInterface> mDbInterfacePtr;
517564

518565
PortMap mPortMap;

src/MuxPort.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -405,4 +405,16 @@ void MuxPort::probeMuxState()
405405
}
406406
}
407407

408+
//
409+
// ---> warmRestartReconciliation();
410+
//
411+
// brief port warm restart reconciliation procedure
412+
//
413+
void MuxPort::warmRestartReconciliation()
414+
{
415+
if (mMuxPortConfig.getMode() != common::MuxPortConfig::Mode::Auto) {
416+
mDbInterfacePtr->warmRestartReconciliation(mMuxPortConfig.getPortName());
417+
}
418+
}
419+
408420
} /* namespace mux */

src/MuxPort.h

+9
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,15 @@ class MuxPort: public std::enable_shared_from_this<MuxPort>
386386
*/
387387
void resetPckLossCount();
388388

389+
/**
390+
* @method warmRestartReconciliation
391+
*
392+
* @brief port warm restart reconciliation procedure
393+
*
394+
* @return none
395+
*/
396+
void warmRestartReconciliation();
397+
389398
protected:
390399
friend class test::MuxManagerTest;
391400
friend class test::FakeMuxPort;

src/common/MuxConfig.h

+11
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,15 @@ class MuxConfig
374374
*/
375375
inline bool getIfEnableUseTorMac() {return mEnableUseTorMac;};
376376

377+
/**
378+
* @method getMuxReconciliationTimeout
379+
*
380+
* @brief getter of mux reconciliation time out
381+
*
382+
* @return timeout in sec
383+
*/
384+
inline uint32_t getMuxReconciliationTimeout_sec(){return mMuxReconciliationTimeout_sec;};
385+
377386
private:
378387
uint8_t mNumberOfThreads = 5;
379388
uint32_t mTimeoutIpv4_msec = 100;
@@ -387,6 +396,8 @@ class MuxConfig
387396
bool mEnableSwitchoverMeasurement = false;
388397
uint32_t mDecreasedTimeoutIpv4_msec = 10;
389398

399+
uint32_t mMuxReconciliationTimeout_sec = 10;
400+
390401
bool mEnableDefaultRouteFeature = false;
391402
bool mUseWellKnownMacActiveActive = true;
392403

src/link_manager/LinkManagerStateMachineActiveStandby.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,8 @@ void ActiveStandbyStateMachine::activateStateMachine()
426426
mStartProbingFnPtr();
427427

428428
updateMuxLinkmgrState();
429+
430+
mMuxPortPtr->warmRestartReconciliation();
429431
}
430432
}
431433

0 commit comments

Comments
 (0)