Skip to content

Commit

Permalink
Nomis: healthcheck fix (#819)
Browse files Browse the repository at this point in the history
* exclude TAGSAR from healthcheck

* update healthcheck script

* improve keepalive service script

* fix

* tagsar monitoring
  • Loading branch information
drobinson-moj authored May 30, 2024
1 parent 9246a18 commit 4f11fda
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 5 deletions.
3 changes: 3 additions & 0 deletions ansible/group_vars/server_type_nomis_web.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ collectd_monitored_services_servertype:
- metric_name: service_status_app
metric_dimension: weblogic-healthcheck
shell_cmd: "service weblogic-healthcheck status"
- metric_name: service_status_app
metric_dimension: WLS_TAGSAR
shell_cmd: "service WLS_TAGSAR status"
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,17 @@ do
then
echo "${output}"
/etc/init.d/weblogic-all status
echo "Removing keepalive"
rm -f /u01/tag/static/keepalive.htm
echo "Waiting 2 minutes before checking again"
sleep 120
output=$(/etc/init.d/weblogic-all healthcheck 2>&1)
status=$?
if [ $status -eq 1 ]
then
echo "${output}"
/etc/init.d/weblogic-all status
echo "Removing keepalive"
rm -f /u01/tag/static/keepalive.htm
fi
fi
else
if [ ! -f "/u01/tag/static/keepalive.htm" ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ get_unhealthy_services() {
unhealthy+=(weblogic-server)
fi
fi
managed_services=$(find /etc/init.d/ -name 'WLS*' | cut -d/ -f4)
managed_services=$(find /etc/init.d/ -name 'WLS*' | grep -v 'WLS_TAGSAR' | cut -d/ -f4)
for managed_service in $managed_services; do
ok=1
if ! /etc/init.d/"$managed_service" status | head -1 | grep OK > /dev/null; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,30 @@ stop() {
fi
}

# same as stop but keep the keepalive file
pause() {
echo -n $"Stopping $prog: "
rm -f /var/lock/subsys/$prog
if ! PIDS=$(get_healthcheck_pid); then
echo -n "Already stopped"
echo_success
echo
return 0
fi
echo "init.d killing $prog $PIDS" | logger -p local3.info -t "$prog"
kill $PIDS
sleep 2
if ! get_healthcheck_pid > /dev/null; then
echo_success
echo
return 0
else
echo_failure
echo "init.d failed to kill $prog $PIDS" | logger -p local3.info -t "$prog"
return 1
fi
}

status() {
echo -n $"Status of $prog: "
if ! PIDS=$(get_healthcheck_pid); then
Expand Down Expand Up @@ -107,7 +131,7 @@ keepalive() {
}

restart() {
stop
pause
start
}

Expand All @@ -118,6 +142,9 @@ case "$1" in
stop)
stop
;;
pause)
stop
;;
restart)
restart
;;
Expand All @@ -128,7 +155,7 @@ case "$1" in
keepalive
;;
*)
echo "Usage: $0 {start|stop|restart|status|keepalive}"
echo "Usage: $0 {start|stop|pause|restart|status|keepalive}"
exit 3
esac

Expand Down

0 comments on commit 4f11fda

Please sign in to comment.