Skip to content

Commit 1e48feb

Browse files
jlevequetiantianlv
authored andcommitted
[supervisor] Fix crash if system clock rolls back within startsecs; Refactor patch (sonic-net#2624)
1 parent 50a23a9 commit 1e48feb

3 files changed

+132
-95
lines changed

src/supervisor/patch/0002-Add-unit-test-cases-to-ensure-state-transitions-work.patch renamed to src/supervisor/patch/0001-Prevent-excessive-hanging-and-crashes-if-system-cloc.patch

+130-10
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,91 @@
1-
From 43b6f3ee93593f666af2c6a7af897bf3b5ee762d Mon Sep 17 00:00:00 2001
1+
From 5b56519e68e866662d9fd9a6681b6b47d6b5fcdb Mon Sep 17 00:00:00 2001
22
From: Joe LeVeque <[email protected]>
3-
Date: Wed, 17 Jan 2018 22:27:56 +0000
4-
Subject: [PATCH 2/2] Add unit test cases to ensure state transitions work if
5-
system time has rolled backward
3+
Date: Thu, 28 Feb 2019 01:43:54 +0000
4+
Subject: [PATCH] Prevent excessive hanging and crashes if system clock rolls
5+
backward
66

77
---
8-
supervisor/tests/test_process.py | 146 +++++++++++++++++++++++++++++++++++++++
9-
1 file changed, 146 insertions(+)
8+
supervisor/process.py | 36 ++++++
9+
supervisor/tests/test_process.py | 183 +++++++++++++++++++++++++++++++
10+
2 files changed, 219 insertions(+)
1011

12+
diff --git a/supervisor/process.py b/supervisor/process.py
13+
index f9ddcd9..5f73e6b 100644
14+
--- a/supervisor/process.py
15+
+++ b/supervisor/process.py
16+
@@ -347,6 +347,28 @@ class Subprocess:
17+
options.write(2, "supervisor: child process was not spawned\n")
18+
options._exit(127) # exit process with code for spawn failure
19+
20+
+ def _check_and_adjust_for_system_clock_rollback(self, test_time):
21+
+ """
22+
+ Check if system clock has rolled backward beyond test_time. If so, set
23+
+ affected timestamps to test_time.
24+
+ """
25+
+ if self.state == ProcessStates.STARTING:
26+
+ if test_time < self.laststart:
27+
+ self.laststart = test_time;
28+
+ if self.delay > 0 and test_time < (self.delay - self.config.startsecs):
29+
+ self.delay = test_time + self.config.startsecs
30+
+ elif self.state == ProcessStates.RUNNING:
31+
+ if test_time > self.laststart and test_time < (self.laststart + self.config.startsecs):
32+
+ self.laststart = test_time - self.config.startsecs
33+
+ elif self.state == ProcessStates.STOPPING:
34+
+ if test_time < self.laststopreport:
35+
+ self.laststopreport = test_time;
36+
+ if self.delay > 0 and test_time < (self.delay - self.config.stopwaitsecs):
37+
+ self.delay = test_time + self.config.stopwaitsecs
38+
+ elif self.state == ProcessStates.BACKOFF:
39+
+ if self.delay > 0 and test_time < (self.delay - self.backoff):
40+
+ self.delay = test_time + self.backoff
41+
+
42+
def stop(self):
43+
""" Administrative stop """
44+
self.administrative_stop = True
45+
@@ -357,6 +379,9 @@ class Subprocess:
46+
""" Log a 'waiting for x to stop' message with throttling. """
47+
if self.state == ProcessStates.STOPPING:
48+
now = time.time()
49+
+
50+
+ self._check_and_adjust_for_system_clock_rollback(now)
51+
+
52+
if now > (self.laststopreport + 2): # every 2 seconds
53+
self.config.options.logger.info(
54+
'waiting for %s to stop' % self.config.name)
55+
@@ -487,6 +512,9 @@ class Subprocess:
56+
es, msg = decode_wait_status(sts)
57+
58+
now = time.time()
59+
+
60+
+ self._check_and_adjust_for_system_clock_rollback(now)
61+
+
62+
self.laststop = now
63+
processname = self.config.name
64+
65+
@@ -586,6 +614,8 @@ class Subprocess:
66+
now = time.time()
67+
state = self.state
68+
69+
+ self._check_and_adjust_for_system_clock_rollback(now)
70+
+
71+
logger = self.config.options.logger
72+
73+
if self.config.options.mood > SupervisorStates.RESTARTING:
74+
@@ -805,6 +835,12 @@ class EventListenerPool(ProcessGroupBase):
75+
if dispatch_capable:
76+
if self.dispatch_throttle:
77+
now = time.time()
78+
+
79+
+ if now < self.last_dispatch:
80+
+ # The system clock appears to have moved backward
81+
+ # Reset self.last_dispatch accordingly
82+
+ self.last_dispatch = now;
83+
+
84+
if now - self.last_dispatch < self.dispatch_throttle:
85+
return
86+
self.dispatch()
1187
diff --git a/supervisor/tests/test_process.py b/supervisor/tests/test_process.py
12-
index 7f9bab2..ea16315 100644
88+
index 7f9bab2..6808f60 100644
1389
--- a/supervisor/tests/test_process.py
1490
+++ b/supervisor/tests/test_process.py
1591
@@ -736,6 +736,40 @@ class SubprocessTests(unittest.TestCase):
@@ -53,7 +129,51 @@ index 7f9bab2..ea16315 100644
53129
def test_give_up(self):
54130
options = DummyOptions()
55131
config = DummyPConfig(options, 'test', '/test')
56-
@@ -1402,6 +1436,92 @@ class SubprocessTests(unittest.TestCase):
132+
@@ -1105,6 +1139,43 @@ class SubprocessTests(unittest.TestCase):
133+
self.assertEqual(event.__class__, events.ProcessStateBackoffEvent)
134+
self.assertEqual(event.from_state, ProcessStates.STARTING)
135+
136+
+ # This tests the case where the process has stayed alive longer than
137+
+ # startsecs (i.e., long enough to enter the RUNNING state), however the
138+
+ # system clock has since rolled backward such that the current time is
139+
+ # greater than laststart but less than startsecs.
140+
+ def test_finish_running_state_exited_too_quickly_due_to_clock_rollback(self):
141+
+ options = DummyOptions()
142+
+ config = DummyPConfig(options, 'notthere', '/notthere',
143+
+ stdout_logfile='/tmp/foo', startsecs=10)
144+
+ instance = self._makeOne(config)
145+
+ instance.config.options.pidhistory[123] = instance
146+
+ pipes = {'stdout':'','stderr':''}
147+
+ instance.pipes = pipes
148+
+ instance.config.exitcodes =[-1]
149+
+ instance.laststart = time.time()
150+
+ from supervisor.states import ProcessStates
151+
+ from supervisor import events
152+
+ instance.state = ProcessStates.RUNNING
153+
+ L = []
154+
+ events.subscribe(events.ProcessStateEvent, lambda x: L.append(x))
155+
+ instance.pid = 123
156+
+ instance.finish(123, 1)
157+
+ self.assertFalse(instance.killing)
158+
+ self.assertEqual(instance.pid, 0)
159+
+ self.assertEqual(options.parent_pipes_closed, pipes)
160+
+ self.assertEqual(instance.pipes, {})
161+
+ self.assertEqual(instance.dispatchers, {})
162+
+ self.assertEqual(options.logger.data[0],
163+
+ 'exited: notthere (terminated by SIGHUP; expected)')
164+
+ self.assertEqual(instance.exitstatus, -1)
165+
+ self.assertEqual(len(L), 1)
166+
+ event = L[0]
167+
+ self.assertEqual(event.__class__,
168+
+ events.ProcessStateExitedEvent)
169+
+ self.assertEqual(event.expected, True)
170+
+ self.assertEqual(event.extra_values, [('expected', True), ('pid', 123)])
171+
+ self.assertEqual(event.from_state, ProcessStates.RUNNING)
172+
+
173+
def test_finish_running_state_laststart_in_future(self):
174+
options = DummyOptions()
175+
config = DummyPConfig(options, 'notthere', '/notthere',
176+
@@ -1402,6 +1473,92 @@ class SubprocessTests(unittest.TestCase):
57177
event = L[0]
58178
self.assertEqual(event.__class__, events.ProcessStateRunningEvent)
59179

@@ -146,7 +266,7 @@ index 7f9bab2..ea16315 100644
146266
def test_transition_backoff_to_fatal(self):
147267
from supervisor import events
148268
L = []
149-
@@ -2033,6 +2153,32 @@ class EventListenerPoolTests(ProcessGroupBaseTests):
269+
@@ -2033,6 +2190,32 @@ class EventListenerPoolTests(ProcessGroupBaseTests):
150270
self.assertEqual(process1.listener_state, EventListenerStates.BUSY)
151271
self.assertEqual(process1.event, event)
152272

@@ -180,5 +300,5 @@ index 7f9bab2..ea16315 100644
180300
options = DummyOptions()
181301
from supervisor.states import ProcessStates
182302
--
183-
2.1.4
303+
2.17.1
184304

src/supervisor/patch/0001-Prevent-excessive-hanging-delays-in-process-state-tr.patch

-83
This file was deleted.

src/supervisor/patch/series

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
0001-Prevent-excessive-hanging-delays-in-process-state-tr.patch
2-
0002-Add-unit-test-cases-to-ensure-state-transitions-work.patch
1+
# This series applies on GIT commit 34b690aea8cf7572d73765c95c54a83261d69aad
2+
0001-Prevent-excessive-hanging-and-crashes-if-system-cloc.patch

0 commit comments

Comments
 (0)