From df11b2b9f18394782f089a6d6bc4e28d3b6392f4 Mon Sep 17 00:00:00 2001 From: yozhao101 <56170650+yozhao101@users.noreply.github.com> Date: Mon, 18 Nov 2019 16:56:44 -0800 Subject: [PATCH] [Services] Restart Telemetry service upon unexpected critical process exit. (#3768) Signed-off-by: Yong Zhao --- dockers/docker-sonic-telemetry/Dockerfile.j2 | 2 ++ dockers/docker-sonic-telemetry/critical_processes | 2 ++ dockers/docker-sonic-telemetry/supervisord.conf | 12 +++++++++--- files/build_templates/telemetry.service.j2 | 4 ++++ rules/telemetry.mk | 1 + 5 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 dockers/docker-sonic-telemetry/critical_processes diff --git a/dockers/docker-sonic-telemetry/Dockerfile.j2 b/dockers/docker-sonic-telemetry/Dockerfile.j2 index cfbe7c6f266c..3a5716001ca5 100644 --- a/dockers/docker-sonic-telemetry/Dockerfile.j2 +++ b/dockers/docker-sonic-telemetry/Dockerfile.j2 @@ -35,5 +35,7 @@ RUN apt-get clean -y && \ COPY ["start.sh", "telemetry.sh", "dialout.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor"] ENTRYPOINT ["/usr/bin/supervisord"] diff --git a/dockers/docker-sonic-telemetry/critical_processes b/dockers/docker-sonic-telemetry/critical_processes new file mode 100644 index 000000000000..d6953dd0c883 --- /dev/null +++ b/dockers/docker-sonic-telemetry/critical_processes @@ -0,0 +1,2 @@ +telemetry +dialout diff --git a/dockers/docker-sonic-telemetry/supervisord.conf b/dockers/docker-sonic-telemetry/supervisord.conf index dcd8a9eb1e80..b6a01de58a7b 100644 --- a/dockers/docker-sonic-telemetry/supervisord.conf +++ b/dockers/docker-sonic-telemetry/supervisord.conf @@ -3,6 +3,12 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener +events=PROCESS_STATE_EXITED +autostart=always +autorestart=unexpected + [program:start.sh] command=/usr/bin/start.sh priority=1 @@ -15,7 +21,7 @@ stderr_logfile=syslog command=/usr/sbin/rsyslogd -n priority=2 autostart=false -autorestart=false +autorestart=true stdout_logfile=syslog stderr_logfile=syslog @@ -23,7 +29,7 @@ stderr_logfile=syslog command=/usr/bin/telemetry.sh priority=3 autostart=false -autorestart=true +autorestart=false stdout_logfile=syslog stderr_logfile=syslog @@ -31,6 +37,6 @@ stderr_logfile=syslog command=/usr/bin/dialout.sh priority=4 autostart=false -autorestart=true +autorestart=false stdout_logfile=syslog stderr_logfile=syslog diff --git a/files/build_templates/telemetry.service.j2 b/files/build_templates/telemetry.service.j2 index 2e7e45218df2..98ae2871bf6f 100644 --- a/files/build_templates/telemetry.service.j2 +++ b/files/build_templates/telemetry.service.j2 @@ -3,12 +3,16 @@ Description=Telemetry container Requires=database.service After=database.service swss.service syncd.service Before=ntp-config.service +StartLimitIntervalSec=1200 +StartLimitBurst=3 [Service] User={{ sonicadmin_user }} ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh wait ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always +RestartSec=30 [Install] WantedBy=multi-user.target diff --git a/rules/telemetry.mk b/rules/telemetry.mk index 1d903e603251..af568fb5bd6f 100644 --- a/rules/telemetry.mk +++ b/rules/telemetry.mk @@ -3,3 +3,4 @@ SONIC_TELEMETRY = sonic-telemetry_0.1_$(CONFIGURED_ARCH).deb $(SONIC_TELEMETRY)_SRC_PATH = $(SRC_PATH)/telemetry SONIC_DPKG_DEBS += $(SONIC_TELEMETRY) +$(SONIC_TELEMETRY)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)