From 221484219b5c359c01a9da095108344883dbcac5 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 13 Nov 2019 16:08:59 -0800 Subject: [PATCH 1/7] [docker-telemetry] Create a file named critical_processes. Signed-off-by: Yong Zhao --- dockers/docker-sonic-telemetry/critical_processes | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 dockers/docker-sonic-telemetry/critical_processes diff --git a/dockers/docker-sonic-telemetry/critical_processes b/dockers/docker-sonic-telemetry/critical_processes new file mode 100644 index 000000000000..d6953dd0c883 --- /dev/null +++ b/dockers/docker-sonic-telemetry/critical_processes @@ -0,0 +1,2 @@ +telemetry +dialout From 3f87a21589507c7260710a7f28bf3f9f3632fead Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 13 Nov 2019 16:15:17 -0800 Subject: [PATCH 2/7] [docker-telemetry] Add paths of supervisord listener script and critical processes file into dockerfile.j2. Signed-off-by: Yong Zhao --- dockers/docker-sonic-telemetry/Dockerfile.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dockers/docker-sonic-telemetry/Dockerfile.j2 b/dockers/docker-sonic-telemetry/Dockerfile.j2 index cfbe7c6f266c..3a5716001ca5 100644 --- a/dockers/docker-sonic-telemetry/Dockerfile.j2 +++ b/dockers/docker-sonic-telemetry/Dockerfile.j2 @@ -35,5 +35,7 @@ RUN apt-get clean -y && \ COPY ["start.sh", "telemetry.sh", "dialout.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor"] ENTRYPOINT ["/usr/bin/supervisord"] From 144fadac8b7160f65b335a2cc53fe03162d98365 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 13 Nov 2019 16:18:48 -0800 Subject: [PATCH 3/7] [docer-telemetry] Make event listener autostart by adding option in supervisord conf file. Signed-off-by: Yong Zhao --- dockers/docker-sonic-telemetry/supervisord.conf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dockers/docker-sonic-telemetry/supervisord.conf b/dockers/docker-sonic-telemetry/supervisord.conf index dcd8a9eb1e80..aff300b27140 100644 --- a/dockers/docker-sonic-telemetry/supervisord.conf +++ b/dockers/docker-sonic-telemetry/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=always +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 From 178b24da3cab62ad0447674b5a4a9a9ceb15217e Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 13 Nov 2019 16:21:57 -0800 Subject: [PATCH 4/7] [docker-telemetry] Configure systemd to stop restarting telemetry if it wants to restart this container more than 3 times in 20 minutes. Signed-off-by: Yong Zhao --- files/build_templates/telemetry.service.j2 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/files/build_templates/telemetry.service.j2 b/files/build_templates/telemetry.service.j2 index 8781ce7afb47..b94a3a71b868 100644 --- a/files/build_templates/telemetry.service.j2 +++ b/files/build_templates/telemetry.service.j2 @@ -3,12 +3,16 @@ Description=Telemetry container Requires=database.service After=database.service Before=ntp-config.service +StartLimitIntervalSec=1200 +StartLimitBurst=3 [Service] User={{ sonicadmin_user }} ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always +RestartSec=30 [Install] WantedBy=multi-user.target From 0830a3fc3fae948c4a4c20990b370821da59a3e0 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 13 Nov 2019 16:28:50 -0800 Subject: [PATCH 5/7] [docker-telemetry] Add macro $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) into shared Makefile telemetry.mk. Signed-off-by: Yong Zhao --- rules/telemetry.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/rules/telemetry.mk b/rules/telemetry.mk index 1d903e603251..af568fb5bd6f 100644 --- a/rules/telemetry.mk +++ b/rules/telemetry.mk @@ -3,3 +3,4 @@ SONIC_TELEMETRY = sonic-telemetry_0.1_$(CONFIGURED_ARCH).deb $(SONIC_TELEMETRY)_SRC_PATH = $(SRC_PATH)/telemetry SONIC_DPKG_DEBS += $(SONIC_TELEMETRY) +$(SONIC_TELEMETRY)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) From 0d42d43e521e5ed80faf82c28d79f092009adbd1 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 15 Nov 2019 15:29:41 -0800 Subject: [PATCH 6/7] [docker-telemetry] Modify the supervisord conf file to enable the auto-restart of rsyslogd and disable auto-restart of another two processes telemetry and dialout. Signed-off-by: Yong Zhao --- dockers/docker-sonic-telemetry/supervisord.conf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dockers/docker-sonic-telemetry/supervisord.conf b/dockers/docker-sonic-telemetry/supervisord.conf index aff300b27140..4e25651452bf 100644 --- a/dockers/docker-sonic-telemetry/supervisord.conf +++ b/dockers/docker-sonic-telemetry/supervisord.conf @@ -20,7 +20,7 @@ stderr_logfile=syslog [program:rsyslogd] command=/usr/sbin/rsyslogd -n priority=2 -autostart=false +autostart=true autorestart=false stdout_logfile=syslog stderr_logfile=syslog @@ -29,7 +29,7 @@ stderr_logfile=syslog command=/usr/bin/telemetry.sh priority=3 autostart=false -autorestart=true +autorestart=false stdout_logfile=syslog stderr_logfile=syslog @@ -37,6 +37,6 @@ stderr_logfile=syslog command=/usr/bin/dialout.sh priority=4 autostart=false -autorestart=true +autorestart=false stdout_logfile=syslog stderr_logfile=syslog From cee94b70b5b52679ebb603cc132c01666effcb68 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 15 Nov 2019 16:37:42 -0800 Subject: [PATCH 7/7] [docker-telemetry] Modify the auto-restart option from false to true in supervisord conf file. Signed-off-by: Yong Zhao --- dockers/docker-sonic-telemetry/supervisord.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dockers/docker-sonic-telemetry/supervisord.conf b/dockers/docker-sonic-telemetry/supervisord.conf index 4e25651452bf..b6a01de58a7b 100644 --- a/dockers/docker-sonic-telemetry/supervisord.conf +++ b/dockers/docker-sonic-telemetry/supervisord.conf @@ -20,8 +20,8 @@ stderr_logfile=syslog [program:rsyslogd] command=/usr/sbin/rsyslogd -n priority=2 -autostart=true -autorestart=false +autostart=false +autorestart=true stdout_logfile=syslog stderr_logfile=syslog