diff --git a/dockers/docker-base/Dockerfile.j2 b/dockers/docker-base/Dockerfile.j2 index 2aef59241b13..8aab02ea2a19 100644 --- a/dockers/docker-base/Dockerfile.j2 +++ b/dockers/docker-base/Dockerfile.j2 @@ -45,6 +45,7 @@ RUN mkdir -p /etc/supervisor RUN mkdir -p /var/log/supervisor COPY ["etc/supervisor/supervisord.conf", "/etc/supervisor/"] +COPY ["etc/supervisor/kill_supervisor.py", "/usr/bin/"] RUN apt-get -y purge \ exim4 \ diff --git a/dockers/docker-base/etc/supervisor/kill_supervisor.py b/dockers/docker-base/etc/supervisor/kill_supervisor.py new file mode 100755 index 000000000000..f3ca79e00bf8 --- /dev/null +++ b/dockers/docker-base/etc/supervisor/kill_supervisor.py @@ -0,0 +1,59 @@ +#!/usr/bin/python + +# Please follow the link for documentation: http://supervisord.org/events.html +# SupervisorD exits immediately if one of its managed process get crashed as it subscribes to "EVENT Listener" process. + +import sys +import os +import signal +import subprocess + +from supervisor.childutils import listener + +def write_stdout(s): + # only eventlistener protocol messages may be sent to stdout + sys.stdout.write(s) + sys.stdout.flush() + +def write_stderr(s): + sys.stderr.write(s) + sys.stderr.flush() + +def main(): + while True: + all_service_list = [] + proc = subprocess.Popen(["supervisorctl avail | cut -d' ' -f1"], shell=True, stdout=subprocess.PIPE) + (out, err) = proc.communicate() + + all_service_list = out.split() + + # "exception_service_list" contains all the program excluded from event listener process. + exception_service_list = ["start.sh", "enable_counters", "swssconfig", "arp_update", "ledinit", "fancontrol", "lm-sensors", "ledd", "xcvrd", "configdb-load.sh", "snmpd-config-updater"] + + service_list = [x for x in all_service_list if x not in exception_service_list] + headers, body = listener.wait(sys.stdin, sys.stdout) + body = dict([pair.split(":") for pair in body.split(" ")]) + + write_stderr("Headers: %r\n" % repr(headers)) + write_stderr("Body: %r\n" % repr(body)) + + process = body["processname"]; + state = headers["eventname"].split('_')[2]; + if process in service_list: + write_stderr("Process {} got {} !!! Time to kill Supervisord !!!\n".format(process,state)) + try: + pidfile = open('/var/run/supervisord.pid','r') + pid = int(pidfile.readline()); + os.kill(pid, signal.SIGQUIT) + except Exception as e: + write_stdout('Could not kill supervisor: ' + e.strerror + '\n') + else: + write_stderr("Process {} got {} !!! But no need to kill Supervisor !!!\n".format(process,state)) + + # # transition from READY to ACKNOWLEDGED + write_stdout("RESULT 2\nOK") + + +if __name__ == "__main__": + main() + diff --git a/dockers/docker-database/supervisord.conf b/dockers/docker-database/supervisord.conf index f095e78bc144..dd18886ce8de 100644 --- a/dockers/docker-database/supervisord.conf +++ b/dockers/docker-database/supervisord.conf @@ -27,3 +27,9 @@ autorestart=false startsecs=0 stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 index 747f65a3aaf6..3dda8608deff 100644 --- a/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 +++ b/dockers/docker-dhcp-relay/docker-dhcp-relay.supervisord.conf.j2 @@ -65,3 +65,9 @@ stderr_logfile=syslog {% endfor %} {% endif %} {% endif %} + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/dockers/docker-fpm-quagga/supervisord.conf b/dockers/docker-fpm-quagga/supervisord.conf index 3e05215babaa..09574c09e54f 100644 --- a/dockers/docker-fpm-quagga/supervisord.conf +++ b/dockers/docker-fpm-quagga/supervisord.conf @@ -56,3 +56,9 @@ autorestart=false startsecs=0 stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/dockers/docker-lldp-sv2/supervisord.conf b/dockers/docker-lldp-sv2/supervisord.conf index e8022a9a5fa6..25b69a5df9ba 100644 --- a/dockers/docker-lldp-sv2/supervisord.conf +++ b/dockers/docker-lldp-sv2/supervisord.conf @@ -47,3 +47,9 @@ autostart=false autorestart=false stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/dockers/docker-orchagent/supervisord.conf b/dockers/docker-orchagent/supervisord.conf index 71748ad4fba6..574a9df6191f 100644 --- a/dockers/docker-orchagent/supervisord.conf +++ b/dockers/docker-orchagent/supervisord.conf @@ -116,3 +116,9 @@ autostart=false autorestart=false stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/dockers/docker-platform-monitor/supervisord.conf b/dockers/docker-platform-monitor/supervisord.conf index aa947ce2c9ae..452ca5c36957 100644 --- a/dockers/docker-platform-monitor/supervisord.conf +++ b/dockers/docker-platform-monitor/supervisord.conf @@ -54,3 +54,9 @@ autorestart=false stdout_logfile=syslog stderr_logfile=syslog startsecs=0 + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf index f0bb4d5b3bbd..a53b63cf10e3 100644 --- a/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf +++ b/dockers/docker-router-advertiser/docker-router-advertiser.supervisord.conf @@ -27,3 +27,9 @@ autostart=false autorestart=false stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/dockers/docker-snmp-sv2/supervisord.conf b/dockers/docker-snmp-sv2/supervisord.conf index d80579506100..745e51f4f144 100644 --- a/dockers/docker-snmp-sv2/supervisord.conf +++ b/dockers/docker-snmp-sv2/supervisord.conf @@ -34,3 +34,9 @@ autostart=false autorestart=false stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/dockers/docker-sonic-telemetry/supervisord.conf b/dockers/docker-sonic-telemetry/supervisord.conf index dcd8a9eb1e80..86efb1a7ee3d 100644 --- a/dockers/docker-sonic-telemetry/supervisord.conf +++ b/dockers/docker-sonic-telemetry/supervisord.conf @@ -34,3 +34,9 @@ autostart=false autorestart=true stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/dockers/docker-teamd/supervisord.conf b/dockers/docker-teamd/supervisord.conf index 738751d0a59f..26a13c93522b 100644 --- a/dockers/docker-teamd/supervisord.conf +++ b/dockers/docker-teamd/supervisord.conf @@ -34,3 +34,9 @@ autostart=false autorestart=false stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/files/build_templates/database.service.j2 b/files/build_templates/database.service.j2 index c353653e4562..fc43a8fc06c5 100644 --- a/files/build_templates/database.service.j2 +++ b/files/build_templates/database.service.j2 @@ -9,5 +9,7 @@ ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh attach ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always + [Install] WantedBy=multi-user.target diff --git a/files/build_templates/dhcp_relay.service.j2 b/files/build_templates/dhcp_relay.service.j2 index ea53fa2deda4..4211d0081434 100644 --- a/files/build_templates/dhcp_relay.service.j2 +++ b/files/build_templates/dhcp_relay.service.j2 @@ -9,5 +9,7 @@ ExecStartPre=/usr/bin/{{ docker_container_name }}.sh start ExecStart=/usr/bin/{{ docker_container_name }}.sh attach ExecStop=/usr/bin/{{ docker_container_name }}.sh stop +Restart=always + [Install] WantedBy=multi-user.target teamd.service diff --git a/files/build_templates/lldp.service.j2 b/files/build_templates/lldp.service.j2 index d35cdacca0f3..9ae8ff071363 100644 --- a/files/build_templates/lldp.service.j2 +++ b/files/build_templates/lldp.service.j2 @@ -9,5 +9,7 @@ ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh attach ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always + [Install] WantedBy=multi-user.target diff --git a/files/build_templates/pmon.service.j2 b/files/build_templates/pmon.service.j2 index 83dd8e4b648d..9b5ac0603e78 100644 --- a/files/build_templates/pmon.service.j2 +++ b/files/build_templates/pmon.service.j2 @@ -9,5 +9,7 @@ ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh attach ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always + [Install] WantedBy=multi-user.target diff --git a/files/build_templates/radv.service.j2 b/files/build_templates/radv.service.j2 index 7d6da31e135f..30cba91a427b 100644 --- a/files/build_templates/radv.service.j2 +++ b/files/build_templates/radv.service.j2 @@ -9,5 +9,7 @@ ExecStartPre=/usr/bin/{{ docker_container_name }}.sh start ExecStart=/usr/bin/{{ docker_container_name }}.sh attach ExecStop=/usr/bin/{{ docker_container_name }}.sh stop +Restart=always + [Install] WantedBy=multi-user.target diff --git a/files/build_templates/snmp.service.j2 b/files/build_templates/snmp.service.j2 index 6398477ab6a1..2238c0e58f35 100644 --- a/files/build_templates/snmp.service.j2 +++ b/files/build_templates/snmp.service.j2 @@ -7,3 +7,5 @@ After=updategraph.service swss.service ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh attach ExecStop=/usr/bin/{{docker_container_name}}.sh stop + +Restart=always diff --git a/files/build_templates/swss.service.j2 b/files/build_templates/swss.service.j2 index 8b8d7b42d1ee..74e8b62dea48 100644 --- a/files/build_templates/swss.service.j2 +++ b/files/build_templates/swss.service.j2 @@ -15,5 +15,7 @@ Environment=sonic_asic_platform={{ sonic_asic_platform }} ExecStart=/usr/local/bin/swss.sh start ExecStop=/usr/local/bin/swss.sh stop +Restart=always + [Install] WantedBy=multi-user.target diff --git a/files/build_templates/syncd.service.j2 b/files/build_templates/syncd.service.j2 index 38b208e74b0b..dfcb7817edd7 100644 --- a/files/build_templates/syncd.service.j2 +++ b/files/build_templates/syncd.service.j2 @@ -20,5 +20,7 @@ Environment=sonic_asic_platform={{ sonic_asic_platform }} ExecStart=/usr/local/bin/syncd.sh start ExecStop=/usr/local/bin/syncd.sh stop +Restart=always + [Install] WantedBy=multi-user.target diff --git a/files/build_templates/teamd.service.j2 b/files/build_templates/teamd.service.j2 index bbd04a81b05b..43f6871bbc31 100644 --- a/files/build_templates/teamd.service.j2 +++ b/files/build_templates/teamd.service.j2 @@ -9,5 +9,7 @@ ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh attach ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always + [Install] WantedBy=multi-user.target diff --git a/files/build_templates/telemetry.service.j2 b/files/build_templates/telemetry.service.j2 index 5f2e81ed95ea..587ba54a61d4 100644 --- a/files/build_templates/telemetry.service.j2 +++ b/files/build_templates/telemetry.service.j2 @@ -9,5 +9,7 @@ ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh attach ExecStop=/usr/bin/{{docker_container_name}}.sh stop +Restart=always + [Install] WantedBy=multi-user.target diff --git a/platform/barefoot/docker-syncd-bfn/supervisord.conf b/platform/barefoot/docker-syncd-bfn/supervisord.conf index 1e015fef931f..5b63cba18a71 100644 --- a/platform/barefoot/docker-syncd-bfn/supervisord.conf +++ b/platform/barefoot/docker-syncd-bfn/supervisord.conf @@ -27,3 +27,8 @@ autorestart=false stdout_logfile=syslog stderr_logfile=syslog +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/platform/broadcom/docker-syncd-brcm/supervisord.conf b/platform/broadcom/docker-syncd-brcm/supervisord.conf index fe590ede18c5..857f20e12e0f 100644 --- a/platform/broadcom/docker-syncd-brcm/supervisord.conf +++ b/platform/broadcom/docker-syncd-brcm/supervisord.conf @@ -34,3 +34,9 @@ autostart=false autorestart=false stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/platform/cavium/docker-syncd-cavm/supervisord.conf b/platform/cavium/docker-syncd-cavm/supervisord.conf index 1af5d70a1d0c..5b63cba18a71 100644 --- a/platform/cavium/docker-syncd-cavm/supervisord.conf +++ b/platform/cavium/docker-syncd-cavm/supervisord.conf @@ -26,3 +26,9 @@ autostart=false autorestart=false stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/platform/centec/docker-syncd-centec/supervisord.conf b/platform/centec/docker-syncd-centec/supervisord.conf index 1af5d70a1d0c..5b63cba18a71 100644 --- a/platform/centec/docker-syncd-centec/supervisord.conf +++ b/platform/centec/docker-syncd-centec/supervisord.conf @@ -26,3 +26,9 @@ autostart=false autorestart=false stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/platform/marvell/docker-syncd-mrvl/supervisord.conf b/platform/marvell/docker-syncd-mrvl/supervisord.conf index 1e015fef931f..5b63cba18a71 100644 --- a/platform/marvell/docker-syncd-mrvl/supervisord.conf +++ b/platform/marvell/docker-syncd-mrvl/supervisord.conf @@ -27,3 +27,8 @@ autorestart=false stdout_logfile=syslog stderr_logfile=syslog +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/platform/mellanox/docker-syncd-mlnx/supervisord.conf b/platform/mellanox/docker-syncd-mlnx/supervisord.conf index 8860bd6c0205..4bfefb3d3d17 100644 --- a/platform/mellanox/docker-syncd-mlnx/supervisord.conf +++ b/platform/mellanox/docker-syncd-mlnx/supervisord.conf @@ -34,3 +34,9 @@ autostart=false autorestart=false stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/platform/nephos/docker-syncd-nephos/supervisord.conf b/platform/nephos/docker-syncd-nephos/supervisord.conf index 1af5d70a1d0c..5b63cba18a71 100644 --- a/platform/nephos/docker-syncd-nephos/supervisord.conf +++ b/platform/nephos/docker-syncd-nephos/supervisord.conf @@ -26,3 +26,9 @@ autostart=false autorestart=false stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/platform/vs/docker-sonic-vs/supervisord.conf b/platform/vs/docker-sonic-vs/supervisord.conf index 088684293cd9..bc9144e1d093 100644 --- a/platform/vs/docker-sonic-vs/supervisord.conf +++ b/platform/vs/docker-sonic-vs/supervisord.conf @@ -154,3 +154,9 @@ autostart=false autorestart=false stdout_logfile=syslog stderr_logfile=syslog + +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog diff --git a/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf b/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf index ed14f2ca0f8c..7bc2e6e97620 100644 --- a/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf +++ b/src/sonic-config-engine/tests/sample_output/docker-dhcp-relay.supervisord.conf @@ -31,3 +31,8 @@ stdout_logfile=syslog stderr_logfile=syslog +[eventlistener:kill_supervisor] +command=/usr/bin/kill_supervisor.py +events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL +stdout_logfile=syslog +stderr_logfile=syslog