Skip to content

System recovery when syncd crashes #3517

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions files/scripts/supervisor-proc-exit-listener
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ from supervisor import childutils
# Contents of file should be the names of critical processes (as defined in
# supervisor.conf file), one per line
CRITICAL_PROCESSES_FILE = '/etc/supervisor/critical_processes'
CUSTOM_HANDLER = '/usr/bin/custom_handler'

def main():
# Read the list of critical processes from a file
Expand Down Expand Up @@ -39,6 +40,8 @@ def main():
MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..."
msg = MSG_FORMAT_STR.format(payload_headers['processname'])
syslog.syslog(syslog.LOG_INFO, msg)
if os.path.exists(CUSTOM_HANDLER):
os.system(CUSTOM_HANDLER)
os.kill(os.getppid(), signal.SIGTERM)

if __name__ == "__main__":
Expand Down
3 changes: 2 additions & 1 deletion platform/broadcom/docker-syncd-brcm/Dockerfile.j2
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ debs/{{ deb }}{{' '}}
## TODO: add kmod into Depends
RUN apt-get install -yf kmod

COPY ["files/dsserve", "files/bcmcmd", "start.sh", "bcmsh", "/usr/bin/"]
COPY ["files/dsserve", "files/bcmcmd", "start.sh", "bcmsh", "files/supervisor-proc-exit-listener", "custom_handler", "/usr/bin/"]
RUN chmod +x /usr/bin/dsserve /usr/bin/bcmcmd

COPY ["supervisord.conf", "/etc/supervisor/conf.d/"]
COPY ["critical_processes", "/etc/supervisor/"]

## Clean up
RUN apt-get clean -y; apt-get autoclean -y; apt-get autoremove -y
Expand Down
1 change: 1 addition & 0 deletions platform/broadcom/docker-syncd-brcm/critical_processes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
syncd
31 changes: 31 additions & 0 deletions platform/broadcom/docker-syncd-brcm/custom_handler
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python
# This is custom handler for supervisor-proc-exit-listener
# docker specfic handling when critical processes exit can be added here.
import os
import sys
import syslog
import json

from swsssdk import SonicV2Connector

class OaNotif(object):

def __init__(self):
super(OaNotif,self).__init__()
self.db = SonicV2Connector(host="127.0.0.1")
self.db.connect(self.db.ASIC_DB, False)
return

def shutdown(self):
opdata = ["switch_shutdown_request",""]
msg = json.dumps(opdata,separators=(',',':'))
self.db.publish('ASIC_DB','NOTIFICATIONS', msg)
return

def main():
orch_agent_notif = OaNotif()
syslog.syslog(syslog.LOG_INFO, "Restarting swss to recover the system")
orch_agent_notif.shutdown()

if __name__ == "__main__":
main()
6 changes: 6 additions & 0 deletions platform/broadcom/docker-syncd-brcm/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ autorestart=false
stdout_logfile=syslog
stderr_logfile=syslog

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

[program:rsyslogd]
command=/usr/sbin/rsyslogd -n
priority=2
Expand Down