forked from troglobit/watchdogd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
watchdogd.conf
113 lines (104 loc) · 3.87 KB
/
watchdogd.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# /etc/watchdogd.conf sample
# Commented out values are program defaults.
#
# The checker/monitor `warning` and `critical` levels are 0.00-1.00,
# i.e. 0-100%, except for load average which can vary a lot between
# systems and use-cases, not just because of the number of CPU cores.
# Use the `script = ...` setting to call script when `warning` and
# `critical` are reached for a monitor. In `critical` the monitor
# otherwise triggers an unconditional reboot.
#
# NOTE: `critical` is optional, omitting it disables the reboot action.
#
###
# Do not set WDT timeout and kick interval too low, the daemon runs at
# SCHED_OTHER level with all other tasks, unless the process supervisor
# is enabled. The monitor plugins (below) need CPU time as well.
#timeout = 20
#interval = 10
###
# With safe-exit enabled (true) the daemon will ask the driver disable
# the WDT before exiting (SIGINT). However, some WDT drivers (or HW)
# may not support this.
#safe-exit = false
### Supervisor
# Instrumented processes can have their main loop supervised. Processes
# subscribe to this service using the libwdog API, see the docs for more
# on this. When the supervisor is enabled watchdogd runs as a SCHED_RR
# process with the below elevated realtime priority. When disabled it
# runs as a regular SCHED_OTHER process, this is the default.
#
# When a supervised process fails to meet its deadline, the daemon will
# perform an unconditional reset having saved the reset cause. If a
# script is provided in this section it will be called instead. The
# script is called as:
#
# script.sh supervisor CAUSE PID LABEL
#
#supervisor {
# enabled = false
# priority = 98
# script = "/path/to/supervisor-script.sh"
#}
### Reset cause
# The following section controls if/how the reset cause & reset counter
# is tracked. By default this is disabled, since not all systems allow
# writing to disk, e.g. embedded systems using MTD devices with limited
# number of write cycles. Another backend can be implemented and linked
# to the daemon, but --disable-rcfile with the configure script first.
#
# The default file setting is a non-volatile path, according to the FHS.
# It can be changed to another location, but make sure that location is
# writable first.
#reset-cause {
# enabled = false
# file = "/var/lib/watchdogd.state"
#}
### Checkers/Monitors ##################################################
#
# Script or command to run instead of reboot when a monitor plugin
# reaches any of its critical or warning level. Setting this will
# disable the built-in reboot on critical, it is therefore up to the
# script to perform reboot, if needed. The script is called as:
#
# script.sh {filenr, loadavg, meminfo} {crit, warn} VALUE
#
#script = "/path/to/reboot-action.sh"
# Monitors file descriptor leaks based on /proc/sys/fs/file-nr
#filenr {
# interval = 300
# logmark = false
# warning = 0.9
# critical = 1.0
# script = "/path/to/alt-reboot-action.sh"
#}
# Monitors load average based on sysinfo() from /proc/loadavg
# The level is composed from the average of the 1 and 5 min marks.
#loadavg {
# interval = 300
# logmark = false
# warning = 1.0
# critical = 2.0
# script = "/path/to/alt-reboot-action.sh"
#}
# Monitors free RAM based on data from /proc/meminfo
#meminfo {
# interval = 300
# logmark = false
# warning = 0.9
# critical = 0.95
# script = "/path/to/alt-reboot-action.sh"
#}
# Monitor a generic script, executes 'monitor-script' every 'interval'
# seconds, with a max runtime of 'timeout' seconds. When the exit code
# of the monitor script is above the critical level watchdogd either
# starts the reboot, or calls the alternate 'script' to determin the
# next cause of action.
#generic {
# interval = 300
# timeout = 60
# warning = 1
# critical = 10
# monitor-script = "/path/to/monitor-script.sh"
# script = "/path/to/alt-reboot-action.sh"
#}