watchdogd.conf

# /etc/watchdogd.conf sample
# Commented out values are program defaults.
#
# The checker/monitor `warning` and `critical` levels are 0.00-1.00,
# i.e. 0-100%, except for load average which can vary a lot between
# systems and use-cases, not just because of the number of CPU cores.
# Use the `script = ...` setting to call script when `warning` and
# `critical` are reached for a monitor.  In `critical` the monitor
# otherwise triggers an unconditional reboot.
#
# NOTE: `critical` is optional, omitting it disables the reboot action.
#

###
# Do not set WDT timeout and kick interval too low, the daemon runs at
# SCHED_OTHER level with all other tasks, unless the process supervisor
# is enabled.  The monitor plugins (below) need CPU time as well.
#timeout   = 20
#interval  = 10

###
# With safe-exit enabled (true) the daemon will ask the driver disable
# the WDT before exiting (SIGINT).  However, some WDT drivers (or HW)
# may not support this.
#safe-exit = false

### Supervisor
# Instrumented processes can have their main loop supervised.  Processes
# subscribe to this service using the libwdog API, see the docs for more
# on this.  When the supervisor is enabled watchdogd runs as a SCHED_RR
# process with the below elevated realtime priority.  When disabled it
# runs as a regular SCHED_OTHER process, this is the default.
#
# When a supervised process fails to meet its deadline, the daemon will
# perform an unconditional reset having saved the reset cause.  If a
# script is provided in this section it will be called instead.  The
# script is called as:
#
#    script.sh supervisor CAUSE PID LABEL
#
#supervisor {
#    enabled  = false
#    priority = 98
#    script = "/path/to/supervisor-script.sh"
#}

### Reset cause
# The following section controls if/how the reset cause & reset counter
# is tracked.  By default this is disabled, since not all systems allow
# writing to disk, e.g. embedded systems using MTD devices with limited
# number of write cycles.  Another backend can be implemented and linked
# to the daemon, but --disable-rcfile with the configure script first.
#
# The default file setting is a non-volatile path, according to the FHS.
# It can be changed to another location, but make sure that location is
# writable first.
#reset-cause {
#    enabled = false
#    file    = "/var/lib/watchdogd.state"
#}

### Checkers/Monitors ##################################################
#
# Script or command to run instead of reboot when a monitor plugin
# reaches any of its critical or warning level.  Setting this will
# disable the built-in reboot on critical, it is therefore up to the
# script to perform reboot, if needed.  The script is called as:
#
#    script.sh {filenr, loadavg, meminfo} {crit, warn} VALUE
#
#script = "/path/to/reboot-action.sh"

# Monitors file descriptor leaks based on /proc/sys/fs/file-nr
#filenr {
#    interval = 300
#    logmark  = false
#    warning  = 0.9
#    critical = 1.0
#    script = "/path/to/alt-reboot-action.sh"
#}

# Monitors load average based on sysinfo() from /proc/loadavg
# The level is composed from the average of the 1 and 5 min marks.
#loadavg {
#    interval = 300
#    logmark  = false
#    warning  = 1.0
#    critical = 2.0
#    script = "/path/to/alt-reboot-action.sh"
#}

# Monitors free RAM based on data from /proc/meminfo
#meminfo {
#    interval = 300
#    logmark  = false
#    warning  = 0.9
#    critical = 0.95
#    script = "/path/to/alt-reboot-action.sh"
#}

# Monitor a generic script, executes 'monitor-script' every 'interval'
# seconds, with a max runtime of 'timeout' seconds.  When the exit code
# of the monitor script is above the critical level watchdogd either
# starts the reboot, or calls the alternate 'script' to determin the
# next cause of action.
#generic {
#    interval = 300
#    timeout = 60
#    warning  = 1
#    critical = 10
#    monitor-script = "/path/to/monitor-script.sh"
#    script = "/path/to/alt-reboot-action.sh"
#}