Skip to content

Commit 65720c1

Browse files
authored
Send hearbeat during warm reboot freese (#2923) (#2956)
Orchangent send heartbeat during warm-reboot to prevent Orchagent stuck alert. Why I did it Orchangent will freese during warm-reboot, then supervisor-proc-exit-listener will generate false alert during warm reboot: sonic-net/sonic-buildimage#16686 Work item tracking Microsoft ADO: 25295846 How I did it Send heartbeat during warm-reboot freeze. How to verify it Pass all UT. Manually verify issue fixed by check syslog.
1 parent 9b9ac4f commit 65720c1

File tree

2 files changed

+16
-1
lines changed

2 files changed

+16
-1
lines changed

orchagent/orchdaemon.cpp

+14-1
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,7 @@ void OrchDaemon::start()
824824
flush();
825825

826826
SWSS_LOG_WARN("Orchagent is frozen for warm restart!");
827-
sleep(UINT_MAX);
827+
freezeAndHeartBeat(UINT_MAX);
828828
}
829829
}
830830
}
@@ -993,6 +993,19 @@ void OrchDaemon::heartBeat(std::chrono::time_point<std::chrono::high_resolution_
993993
}
994994
}
995995

996+
void OrchDaemon::freezeAndHeartBeat(unsigned int duration)
997+
{
998+
while (duration > 0)
999+
{
1000+
// Send heartbeat message to prevent Orchagent stuck alert.
1001+
auto tend = std::chrono::high_resolution_clock::now();
1002+
heartBeat(tend);
1003+
1004+
duration--;
1005+
sleep(1);
1006+
}
1007+
}
1008+
9961009
FabricOrchDaemon::FabricOrchDaemon(DBConnector *applDb, DBConnector *configDb, DBConnector *stateDb, DBConnector *chassisAppDb) :
9971010
OrchDaemon(applDb, configDb, stateDb, chassisAppDb),
9981011
m_applDb(applDb),

orchagent/orchdaemon.h

+2
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ class OrchDaemon
9696
void flush();
9797

9898
void heartBeat(std::chrono::time_point<std::chrono::high_resolution_clock> tcurrent);
99+
100+
void freezeAndHeartBeat(unsigned int duration);
99101
};
100102

101103
class FabricOrchDaemon : public OrchDaemon

0 commit comments

Comments
 (0)