Skip to content

Commit 4f2773c

Browse files
[generic-config-updater] Handle failed service restarts (#2020)
What I did During config update, update of certain tables do demand service restart. With multiple related updates are not grouped together, this might result in too many service restarts, which could fail with "hitting start limit". When that happens, call reset-failed, try to restart. If it fails again, take a pause and try to restart again. How I did it When service restart fails, call reset-failed, try, pause and then call service restart again.
1 parent 48b5e73 commit 4f2773c

File tree

2 files changed

+68
-10
lines changed

2 files changed

+68
-10
lines changed

generic_config_updater/services_validator.py

+32-3
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,42 @@ def set_verbose(verbose=False):
1717

1818
def _service_restart(svc_name):
1919
rc = os.system(f"systemctl restart {svc_name}")
20-
logger.log(logger.LOG_PRIORITY_NOTICE,
21-
f"Restarted {svc_name}", print_to_console)
20+
if rc != 0:
21+
# This failure is likely due to too many restarts
22+
#
23+
rc = os.system(f"systemctl reset-failed {svc_name}")
24+
logger.log(logger.LOG_PRIORITY_ERROR,
25+
f"Service has been reset. rc={rc}; Try restart again...",
26+
print_to_console)
27+
28+
rc = os.system(f"systemctl restart {svc_name}")
29+
if rc != 0:
30+
# Even with reset-failed, restart fails.
31+
# Give a pause before retry.
32+
#
33+
logger.log(logger.LOG_PRIORITY_ERROR,
34+
f"Restart failed for {svc_name} rc={rc} after reset; Pause for 10s & retry",
35+
print_to_console)
36+
os.system("sleep 10s")
37+
rc = os.system(f"systemctl restart {svc_name}")
38+
39+
if rc == 0:
40+
logger.log(logger.LOG_PRIORITY_NOTICE,
41+
f"Restart succeeded for {svc_name}",
42+
print_to_console)
43+
else:
44+
logger.log(logger.LOG_PRIORITY_ERROR,
45+
f"Restart failed for {svc_name} rc={rc}",
46+
print_to_console)
2247
return rc == 0
2348

2449

2550
def rsyslog_validator(old_config, upd_config, keys):
26-
return _service_restart("rsyslog-config")
51+
rc = os.system("/usr/bin/rsyslog-config.sh")
52+
if rc != 0:
53+
return _service_restart("rsyslog")
54+
else:
55+
return True
2756

2857

2958
def dhcp_validator(old_config, upd_config, keys):

tests/generic_config_updater/service_validator_test.py

+36-7
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,25 @@
66
from collections import defaultdict
77
from unittest.mock import patch
88

9-
from generic_config_updater.services_validator import vlan_validator
9+
from generic_config_updater.services_validator import vlan_validator, rsyslog_validator
1010
import generic_config_updater.gu_common
1111

1212

1313
# Mimics os.system call
1414
#
15-
os_system_expected_cmd = ""
15+
os_system_calls = []
16+
os_system_call_index = 0
1617
msg = ""
1718

18-
def os_system_cfggen(cmd):
19-
assert cmd == os_system_expected_cmd, msg
20-
return 0
19+
def mock_os_system_call(cmd):
20+
global os_system_calls, os_system_call_index
21+
22+
assert os_system_call_index < len(os_system_calls)
23+
entry = os_system_calls[os_system_call_index]
24+
os_system_call_index += 1
25+
26+
assert cmd == entry["cmd"], msg
27+
return entry["rc"]
2128

2229

2330
test_data = [
@@ -53,19 +60,41 @@ def os_system_cfggen(cmd):
5360
}
5461
]
5562

63+
test_rsyslog_fail = [
64+
# Fail the calls, to get the entire fail path calls invoked
65+
#
66+
{ "cmd": "/usr/bin/rsyslog-config.sh", "rc": 1 }, # config update; fails
67+
{ "cmd": "systemctl restart rsyslog", "rc": 1 }, # rsyslog restart; fails
68+
{ "cmd": "systemctl reset-failed rsyslog", "rc": 1 }, # reset; failure here just logs
69+
{ "cmd": "systemctl restart rsyslog", "rc": 1 }, # restart again; fails
70+
{ "cmd": "sleep 10s", "rc": 0 }, # sleep; rc ignored
71+
{ "cmd": "systemctl restart rsyslog", "rc": 1 }, # restart again; fails
72+
]
73+
74+
5675
class TestServiceValidator(unittest.TestCase):
5776

5877
@patch("generic_config_updater.change_applier.os.system")
5978
def test_change_apply(self, mock_os_sys):
6079
global os_system_expected_cmd
80+
global os_system_calls, os_system_call_index
6181

62-
mock_os_sys.side_effect = os_system_cfggen
82+
mock_os_sys.side_effect = mock_os_system_call
6383

6484
i = 0
6585
for entry in test_data:
66-
os_system_expected_cmd = entry["cmd"]
86+
if entry["cmd"]:
87+
os_system_calls.append({"cmd": entry["cmd"], "rc": 0 })
6788
msg = "case failed: {}".format(str(entry))
6889

6990
vlan_validator(entry["old"], entry["upd"], None)
7091

7192

93+
# Test failure case
94+
#
95+
os_system_calls = test_rsyslog_fail
96+
os_system_call_index = 0
97+
98+
rc = rsyslog_validator("", "", "")
99+
assert not rc, "rsyslog_validator expected to fail"
100+

0 commit comments

Comments
 (0)