Skip to content

Commit 88a1982

Browse files
tahmed-devlguohan
authored andcommitted
[syncd] Use steady clock for TimerWatchdog (sonic-net#613)
Clock can rollback in big jumps and this causes TimerWatchdog to through and crashes syncd. This code uses steady clock instead. Steady clock is guaranteed to be monotonically increasing clock. signed-of-by: Tamer Ahmed <[email protected]>
1 parent e50cf66 commit 88a1982

File tree

2 files changed

+37
-7
lines changed

2 files changed

+37
-7
lines changed

syncd/TimerWatchdog.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ TimerWatchdog::TimerWatchdog(
88
_In_ int64_t warnTimespan):
99
m_run(true),
1010
m_warnTimespan(warnTimespan),
11-
m_callback(0)
11+
m_callback(nullptr)
1212
{
1313
SWSS_LOG_ENTER();
1414

@@ -68,11 +68,8 @@ void TimerWatchdog::threadFunction()
6868

6969
SWSS_LOG_NOTICE("starting timer watchdog thread");
7070

71-
int id = 0;
72-
7371
while (m_run)
7472
{
75-
id++;
7673
std::this_thread::sleep_for(std::chrono::seconds(1));
7774

7875
// we make local copies, since executing functions can be so fast that
@@ -94,11 +91,11 @@ void TimerWatchdog::threadFunction()
9491
// executing, this negative span can be arbitrary long even hours,
9592
// and that is fine, since we don't know when OA makes next
9693
// function call
97-
94+
9895
span = now - start; // this must be always non negative
9996

10097
SWSS_LOG_NOTICE(" new span = %ld", span);
101-
98+
10299
if (span < 0)
103100
SWSS_LOG_THROW("negative span 'now - start': %ld - %ld", now, start);
104101

@@ -129,5 +126,5 @@ int64_t TimerWatchdog::getTimeSinceEpoch()
129126
{
130127
SWSS_LOG_ENTER();
131128

132-
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
129+
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
133130
}

syncd/tests.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#include <arpa/inet.h>
22
#include <unistd.h>
3+
#include <time.h>
4+
#include <sys/time.h>
5+
#include <stdlib.h>
6+
#include <stdio.h>
37

48
extern "C" {
59
#include <sai.h>
@@ -10,6 +14,7 @@ extern "C" {
1014
#include "MetadataLogger.h"
1115
#include "sairedis.h"
1216
#include "sairediscommon.h"
17+
#include "TimerWatchdog.h"
1318

1419
#include "meta/sai_serialize.h"
1520
#include "meta/OidRefCounter.h"
@@ -775,6 +780,32 @@ void test_bulk_route_create()
775780
sleep(10000);
776781
}
777782

783+
void test_watchdog_timer_clock_rollback()
784+
{
785+
SWSS_LOG_ENTER();
786+
787+
const int64_t WARN_TIMESPAN_USEC = 30 * 1000000;
788+
const uint8_t ROLLBACK_TIME_SEC = 5;
789+
const uint8_t LONG_RUNNING_API_TIME_SEC = 3;
790+
791+
// take note of current time
792+
struct timeval currentTime;
793+
gettimeofday(&currentTime, NULL);
794+
795+
// start watchdog timer
796+
TimerWatchdog twd(WARN_TIMESPAN_USEC);
797+
twd.setStartTime();
798+
799+
// roll back time by ROLLBACK_TIME_SEC
800+
currentTime.tv_sec -= ROLLBACK_TIME_SEC;
801+
assert(settimeofday(&currentTime, NULL) == 0);
802+
803+
// Simulate long running API
804+
sleep(LONG_RUNNING_API_TIME_SEC);
805+
806+
twd.setEndTime();
807+
}
808+
778809
int main()
779810
{
780811
swss::Logger::getInstance().setMinPrio(swss::Logger::SWSS_DEBUG);
@@ -800,6 +831,8 @@ int main()
800831
sai_api_uninitialize();
801832

802833
printf("\n[ %s ]\n\n", sai_serialize_status(SAI_STATUS_SUCCESS).c_str());
834+
835+
test_watchdog_timer_clock_rollback();
803836
}
804837
catch (const std::exception &e)
805838
{

0 commit comments

Comments
 (0)