Skip to content

Commit 47989f6

Browse files
yozhao101zhenggen-xu
authored andcommitted
[Services] Restart DHCP-Relay service upon unexpected critical process exit. (sonic-net#3667)
Signed-off-by: Yong Zhao <[email protected]>
1 parent ddc76e2 commit 47989f6

File tree

7 files changed

+22
-1
lines changed

7 files changed

+22
-1
lines changed

dockers/docker-dhcp-relay/Dockerfile.j2

+2
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,7 @@ RUN apt-get clean -y && \
2626

2727
COPY ["docker_init.sh", "start.sh", "/usr/bin/"]
2828
COPY ["docker-dhcp-relay.supervisord.conf.j2", "wait_for_intf.sh.j2", "/usr/share/sonic/templates/"]
29+
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
30+
COPY ["critical_processes", "/etc/supervisor"]
2931

3032
ENTRYPOINT ["/usr/bin/docker_init.sh"]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
isc-dhcp-relay

dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2

+6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ logfile_maxbytes=1MB
33
logfile_backups=2
44
nodaemon=true
55

6+
[eventlistener:supervisor-proc-exit-listener]
7+
command=/usr/bin/supervisor-proc-exit-listener
8+
events=PROCESS_STATE_EXITED
9+
autostart=true
10+
autorestart=unexpected
11+
612
[program:start.sh]
713
command=/usr/bin/start.sh
814
priority=1

files/build_templates/dhcp_relay.service.j2

+4
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@ Description=DHCP relay container
33
Requires=updategraph.service swss.service teamd.service
44
After=updategraph.service swss.service syncd.service teamd.service
55
Before=ntp-config.service
6+
StartLimitIntervalSec=1200
7+
StartLimitBurst=3
68

79
[Service]
810
User={{ sonicadmin_user }}
911
ExecStartPre=/usr/bin/{{ docker_container_name }}.sh start
1012
ExecStart=/usr/bin/{{ docker_container_name }}.sh wait
1113
ExecStop=/usr/bin/{{ docker_container_name }}.sh stop
14+
Restart=always
15+
RestartSec=30
1216

1317
[Install]
1418
WantedBy=multi-user.target swss.service teamd.service

files/scripts/supervisor-proc-exit-listener

+2-1
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,10 @@ def main():
3333

3434
expected = int(payload_headers['expected'])
3535
processname = payload_headers['processname']
36+
groupname = payload_headers['groupname']
3637

3738
# If a critical process exited unexpectedly, terminate supervisor
38-
if expected == 0 and processname in critical_processes:
39+
if expected == 0 and processname in critical_processes or groupname in critical_processes:
3940
MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..."
4041
msg = MSG_FORMAT_STR.format(payload_headers['processname'])
4142
syslog.syslog(syslog.LOG_INFO, msg)

rules/docker-dhcp-relay.mk

+1
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ SONIC_STRETCH_DBG_DOCKERS += $(DOCKER_DHCP_RELAY_DBG)
2525
$(DOCKER_DHCP_RELAY)_CONTAINER_NAME = dhcp_relay
2626
$(DOCKER_DHCP_RELAY)_RUN_OPT += --net=host --privileged -t
2727
$(DOCKER_DHCP_RELAY)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
28+
$(DOCKER_DHCP_RELAY)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)

src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf

+6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ logfile_maxbytes=1MB
33
logfile_backups=2
44
nodaemon=true
55

6+
[eventlistener:supervisor-proc-exit-listener]
7+
command=/usr/bin/supervisor-proc-exit-listener
8+
events=PROCESS_STATE_EXITED
9+
autostart=true
10+
autorestart=unexpected
11+
612
[program:start.sh]
713
command=/usr/bin/start.sh
814
priority=1

0 commit comments

Comments
 (0)