1 |
msalle |
2454 |
#!/bin/dash |
2 |
msalle |
2451 |
# |
3 |
|
|
# Copyright (C) Nikhef 2011 |
4 |
|
|
# |
5 |
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
6 |
|
|
# you may not use this file except in compliance with the License. |
7 |
|
|
# You may obtain a copy of the License at |
8 |
|
|
# |
9 |
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
10 |
|
|
# |
11 |
|
|
# Unless required by applicable law or agreed to in writing, software |
12 |
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
13 |
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 |
|
|
# See the License for the specific language governing permissions and |
15 |
|
|
# limitations under the License. |
16 |
|
|
# |
17 |
|
|
# Author: |
18 |
|
|
# Mischa Sall\'e <msalle@nikhef.nl> |
19 |
|
|
# NIKHEF Amsterdam, the Netherlands |
20 |
|
|
# |
21 |
|
|
######################################################################## |
22 |
|
|
# |
23 |
|
|
# Nagios probe to test functioning of gLExec |
24 |
|
|
# |
25 |
|
|
# Nagios state can be one of the following: |
26 |
|
|
# - Missing glexec command: CRITICAL |
27 |
|
|
# - input proxies empty: UNKNOWN |
28 |
msalle |
2452 |
# - short timeout exceeded: WARNING |
29 |
|
|
# - timeout exceeded: CRITICAL |
30 |
msalle |
2451 |
# - gLExec exit codes: |
31 |
|
|
# 0 glexec succeeded: OK |
32 |
|
|
# 201 Client error: CRITICAL |
33 |
|
|
# 202 Internal error: CRITICAL |
34 |
|
|
# 203 Auth error: CRITICAL |
35 |
|
|
# 204 Overlap: CRITICAL |
36 |
|
|
# 126 execve failed: WARNING |
37 |
|
|
# 128+n signal: WARNING |
38 |
|
|
# !=0 rc of payload: WARNING |
39 |
|
|
# |
40 |
|
|
######################################################################## |
41 |
|
|
|
42 |
|
|
# version |
43 |
|
|
VERSION=0.1 |
44 |
|
|
|
45 |
|
|
# plugin name and version |
46 |
|
|
PROG=`basename $0` |
47 |
|
|
# command will be set after arguments are parsed |
48 |
|
|
DEF_GLEXEC_CMD="id" |
49 |
|
|
DEF_GLEXEC_CMD_ARGS="-a" |
50 |
|
|
# glexec command itself |
51 |
|
|
GLEXEC_EXE="" |
52 |
|
|
# Default verbosity |
53 |
|
|
VERBOSE=0 |
54 |
|
|
# Default timeout |
55 |
|
|
TIMEOUT=10 |
56 |
|
|
# Default short timeout, longer than this results in warning |
57 |
msalle |
2454 |
CRITTIMEOUT=8 |
58 |
|
|
# Default short timeout, longer than this results in warning |
59 |
|
|
WARNTIMEOUT=5 |
60 |
msalle |
2451 |
# Default GLEXEC_CLIENT_CERT will be set after arguments are parsed |
61 |
|
|
GLEXEC_CLIENT_CERT="" |
62 |
|
|
|
63 |
|
|
######################################################################## |
64 |
|
|
# |
65 |
|
|
# general options |
66 |
|
|
# |
67 |
|
|
######################################################################## |
68 |
|
|
|
69 |
|
|
# Short usage text |
70 |
|
|
shortusage() { |
71 |
|
|
echo "Usage: $PROG [options]" |
72 |
|
|
exit 0 |
73 |
|
|
} |
74 |
|
|
|
75 |
|
|
# Long usage text |
76 |
|
|
usage() { |
77 |
|
|
echo "Usage: $PROG [options]" |
78 |
|
|
echo |
79 |
|
|
echo "Options:" |
80 |
|
|
echo " -t|--timeout <timeout> maximum runtime for probe, default $TIMEOUT sec" |
81 |
msalle |
2454 |
echo " -w|--warning <timeout> runtime after which to warn, default $WARNTIMEOUT sec" |
82 |
|
|
echo " -c|--critical <timeout> runtime after which to probe is to be killed, default $CRITTIMEOUT sec" |
83 |
msalle |
2451 |
echo " -x|--x509-user-proxy <file> set X509_USER_PROXY to given file" |
84 |
|
|
echo " -g|--glexec-client-cert <file> set GLEXEC_CLIENT_CERT to given file" |
85 |
|
|
echo " default: value of variable X509_USER_PROXY" |
86 |
|
|
echo " -e|--execute <cmd> command to execute by gLExec" |
87 |
|
|
echo " default: \"$DEF_GLEXEC_CMD_EXE\"" |
88 |
|
|
echo " -v|--verbose be more verbose, more -v means more verbosity" |
89 |
|
|
echo " -V|--version print version" |
90 |
|
|
echo " -h|--help show this helptext" |
91 |
|
|
exit 0 |
92 |
|
|
} |
93 |
|
|
|
94 |
|
|
# Log function: log <level> <message> |
95 |
|
|
log() { |
96 |
|
|
level=$1 |
97 |
|
|
shift |
98 |
|
|
if [ $VERBOSE -ge $level ];then |
99 |
|
|
for line in "$@" ; do |
100 |
|
|
echo "$line" |
101 |
|
|
done |
102 |
|
|
fi |
103 |
|
|
} |
104 |
|
|
|
105 |
|
|
# Prints nagios status line: <stat>: <summary> |
106 |
msalle |
2454 |
nagios_status() { |
107 |
msalle |
2451 |
code=$1 |
108 |
|
|
shift |
109 |
|
|
summary="$*" |
110 |
|
|
case "$code" in |
111 |
|
|
0) stat='OK' ;; |
112 |
|
|
1) stat='WARNING' ;; |
113 |
|
|
2) stat='CRITICAL' ;; |
114 |
|
|
3) stat='UNKNOWN' ;; |
115 |
|
|
*) stat='INVALID NAGIOS CODE $1' ;; |
116 |
|
|
esac |
117 |
msalle |
2454 |
log 0 "NAGIOS_STATUS_LINE $code $stat: $summary" |
118 |
msalle |
2451 |
exit $code |
119 |
|
|
} |
120 |
|
|
|
121 |
msalle |
2454 |
# Prints last line first, then rest. Last line is supposed to contain |
122 |
|
|
# NAGIOS_STATUS_LINE exitcode text |
123 |
|
|
nagios_output() { |
124 |
|
|
logstring="" |
125 |
|
|
nagiosline="" |
126 |
|
|
code=0 |
127 |
|
|
while read line;do |
128 |
|
|
line2=${line##NAGIOS_STATUS_LINE } |
129 |
|
|
if [ "$line2" = "$line" ];then |
130 |
|
|
if [ -z "$logstring" ];then |
131 |
|
|
logstring="${line}" |
132 |
|
|
else |
133 |
|
|
logstring="${logstring}\n${line}" |
134 |
|
|
fi |
135 |
|
|
else |
136 |
|
|
code=`echo $line2|cut -d' ' -f1` |
137 |
|
|
nagiosline=`echo $line2|cut -d' ' -f2-` |
138 |
|
|
fi |
139 |
|
|
done |
140 |
|
|
if [ -z "$nagiosline" ];then |
141 |
|
|
printf "${logstring}" |
142 |
|
|
else |
143 |
|
|
printf "${nagiosline}\n${logstring}" |
144 |
|
|
fi |
145 |
|
|
exit $code |
146 |
|
|
} |
147 |
|
|
|
148 |
msalle |
2451 |
# parsing error function |
149 |
|
|
parse_err() { |
150 |
|
|
code=3 # Unknown |
151 |
|
|
summary="" |
152 |
|
|
for line in "$@" ; do |
153 |
|
|
[ -z "$summary" ] && summary="$line" |
154 |
|
|
log 2 "$line" |
155 |
|
|
done |
156 |
msalle |
2454 |
nagios_status $code "$summary" |
157 |
msalle |
2451 |
} |
158 |
|
|
|
159 |
|
|
# don't use builtin which since it might not exist |
160 |
|
|
which_cmd() { |
161 |
|
|
for dir in `echo $PATH|tr : ' '` ; do |
162 |
|
|
cmd="${dir}/$1" |
163 |
|
|
log 3 "Looking for $1 in $dir" |
164 |
|
|
if [ -f "$cmd" ];then |
165 |
|
|
echo $cmd |
166 |
|
|
return |
167 |
|
|
fi |
168 |
|
|
done |
169 |
|
|
} |
170 |
|
|
|
171 |
|
|
# Assumes range as input, finds upperlimit, currently @ is not understood |
172 |
|
|
# see http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT |
173 |
|
|
parse_range() { |
174 |
|
|
[ $# -gt 1 ] && return 1 |
175 |
|
|
# invalid chars? |
176 |
|
|
echo $1 | grep -q '[^~:0-9]' && return 1 |
177 |
|
|
# find upperlimit |
178 |
|
|
echo $1 | grep -q ':' \ |
179 |
|
|
&& uplimit=`echo $1|cut -d: -f2-` \ |
180 |
|
|
|| uplimit=$1 |
181 |
|
|
# invalid chars? |
182 |
|
|
echo $uplimit | grep -q '[^0-9]' && return 1 |
183 |
|
|
# print limit, note: might be empty |
184 |
|
|
echo $uplimit |
185 |
|
|
return 0 |
186 |
|
|
} |
187 |
|
|
|
188 |
|
|
# Parses command line options |
189 |
|
|
parse_short_args() { |
190 |
|
|
while getopts ":g:x:t:c:w:e:hvVH:p:u:" i ; do |
191 |
|
|
case "$i" in |
192 |
|
|
g) GLEXEC_CLIENT_CERT="$OPTARG" ;; |
193 |
|
|
x) X509_USER_PROXY="$OPTARG" ;; |
194 |
msalle |
2454 |
t) TIMEOUT=`parse_range "$OPTARG"` |
195 |
msalle |
2451 |
if [ -z "$TIMEOUT" ];then |
196 |
|
|
parse_err "Not a valid timeout: \"$OPTARG\"" |
197 |
|
|
else |
198 |
|
|
log 2 "TIMEOUT set to $TIMEOUT" |
199 |
|
|
fi |
200 |
|
|
;; |
201 |
msalle |
2454 |
c) CRITTIMEOUT=`parse_range "$OPTARG"` |
202 |
|
|
if [ -z "$CRITTIMEOUT" ];then |
203 |
msalle |
2451 |
parse_err "Not a valid timeout: \"$OPTARG\"" |
204 |
|
|
else |
205 |
msalle |
2454 |
log 2 "CRITTIMEOUT set to $CRITTIMEOUT" |
206 |
msalle |
2451 |
fi |
207 |
|
|
;; |
208 |
msalle |
2454 |
w) WARNTIMEOUT=`parse_range "$OPTARG"` |
209 |
|
|
if [ -z "$WARNTIMEOUT" ];then |
210 |
|
|
parse_err "Not a valid timeout: \"$OPTARG\"" |
211 |
|
|
else |
212 |
|
|
log 2 "WARNTIMEOUT set to $WARNTIMEOUT" |
213 |
|
|
fi |
214 |
|
|
;; |
215 |
msalle |
2451 |
e) GLEXEC_CMD_EXE="$OPTARG" ;; |
216 |
|
|
h) shortusage ;; |
217 |
msalle |
2454 |
V) nagios_status 0 "$PROG version $VERSION" ;; |
218 |
msalle |
2451 |
v) VERBOSE=`expr $VERBOSE + 1` ;; |
219 |
|
|
H) log 2 "$PROG: option -H/--hostname is not used" ;; |
220 |
|
|
p) log 2 "$PROG: option -p/--port is not used" ;; |
221 |
|
|
u) log 2 "$PROG: option -u/--url is not used" ;; |
222 |
|
|
:) parse_err "Option requires an argument -- '$OPTARG'" \ |
223 |
|
|
"Try \`$PROG -h' for more information." |
224 |
|
|
;; |
225 |
|
|
\?) parse_err "Invalid option -- '$OPTARG'" \ |
226 |
|
|
"Try \`$PROG -h' for more information." |
227 |
|
|
;; |
228 |
|
|
esac |
229 |
|
|
done |
230 |
|
|
# Check if we specified GLEXEC_CLIENT_CERT |
231 |
|
|
if [ -z "$GLEXEC_CLIENT_CERT" ];then |
232 |
|
|
GLEXEC_CLIENT_CERT="$X509_USER_PROXY" |
233 |
|
|
log 2 "Using same proxy for GLEXEC_CLIENT_CERT and X509_USER_PROXY" |
234 |
|
|
fi |
235 |
|
|
# Check if we specified a command |
236 |
|
|
if [ -z "$GLEXEC_CMD_EXE" ];then |
237 |
|
|
GLEXEC_CMD_EXE="$DEF_GLEXEC_CMD_EXE" |
238 |
|
|
log 2 "Using default payload command \"$GLEXEC_CMD_EXE\"" |
239 |
|
|
fi |
240 |
|
|
} |
241 |
|
|
|
242 |
|
|
# Converts long options into short options |
243 |
|
|
parse_args() { |
244 |
|
|
# Find default executable |
245 |
|
|
cmd="`which_cmd $DEF_GLEXEC_CMD`" |
246 |
|
|
if [ -n "$cmd" ];then |
247 |
|
|
DEF_GLEXEC_CMD_EXE="$cmd $DEF_GLEXEC_CMD_ARGS" |
248 |
|
|
fi |
249 |
|
|
args="" |
250 |
|
|
for arg in "$@" ; do |
251 |
|
|
subarg=${arg##--} |
252 |
|
|
if [ "${subarg}" != "${arg}" ];then |
253 |
|
|
case "$subarg" in |
254 |
|
|
x509-user-proxy) args="$args -x " ;; |
255 |
|
|
glexec-client-cert) args="$args -c " ;; |
256 |
|
|
execute) args="$args -e " ;; |
257 |
|
|
timeout) args="$args -t " ;; |
258 |
msalle |
2454 |
critical) args="$args -c " ;; |
259 |
|
|
warning) args="$args -w " ;; |
260 |
msalle |
2451 |
verbose) args="$args -v " ;; |
261 |
|
|
version) args="$args -V " ;; |
262 |
|
|
help) usage ;; |
263 |
|
|
# Unused long options: |
264 |
|
|
hostname) args="$args -H " ;; |
265 |
|
|
port) args="$args -p " ;; |
266 |
|
|
url) args="$args -u " ;; |
267 |
|
|
*) |
268 |
|
|
parse_err "$PROG: invalid longoption -- '$subarg'" \ |
269 |
|
|
"Try \`$PROG -h' for more information." |
270 |
|
|
;; |
271 |
|
|
esac |
272 |
|
|
else |
273 |
|
|
args="$args \"$arg\"" |
274 |
|
|
fi |
275 |
|
|
done |
276 |
|
|
# Now parse the resulting short options |
277 |
|
|
eval parse_short_args `echo $args` |
278 |
|
|
} |
279 |
|
|
|
280 |
msalle |
2454 |
# wait wrapper |
281 |
|
|
wait_func() { |
282 |
|
|
if [ $# -ne 1 ];then |
283 |
|
|
log 2 "wait_func needs exactly one argument" |
284 |
|
|
return 1 |
285 |
|
|
fi |
286 |
|
|
if [ $VERBOSE -ge 1 ];then |
287 |
|
|
wait $1 2>&1 |
288 |
|
|
else |
289 |
|
|
wait $1 2> /dev/null |
290 |
|
|
fi |
291 |
|
|
return $? |
292 |
|
|
} |
293 |
|
|
|
294 |
|
|
# kill wrapper |
295 |
|
|
kill_func() { |
296 |
|
|
if [ $# -eq 0 ];then |
297 |
|
|
log 2 "kill_func needs at least one argument" |
298 |
|
|
return 1 |
299 |
|
|
fi |
300 |
|
|
ps -fjA|grep "${pid##-}" |
301 |
|
|
if [ $# -eq 1 ];then |
302 |
|
|
signo="-TERM" |
303 |
|
|
pid=$1 |
304 |
|
|
else |
305 |
|
|
signo=$1 |
306 |
|
|
pid=$2 |
307 |
|
|
fi |
308 |
|
|
log 3 "About to send $signo to $pid" |
309 |
|
|
if [ $VERBOSE -gt 1 ];then |
310 |
|
|
/bin/kill $signo $pid 2>&1 |
311 |
|
|
else |
312 |
|
|
/bin/kill $signo $pid 2> /dev/null |
313 |
|
|
fi |
314 |
|
|
return $? |
315 |
|
|
} |
316 |
|
|
|
317 |
msalle |
2451 |
# wait for background process to finish or timeout |
318 |
|
|
waiter() { |
319 |
|
|
pid=$1 |
320 |
msalle |
2454 |
code=0 |
321 |
|
|
if [ $CRITTIMEOUT -lt $TIMEOUT ];then |
322 |
|
|
sleep $CRITTIMEOUT |
323 |
|
|
if [ -n "`ps -opid= -p $pid`" ];then |
324 |
|
|
log 2 "Child process $pid is running after critical range $CRITTIMEOUT sec, sending SIGTERM" |
325 |
|
|
kill_func -$pid |
326 |
|
|
fi |
327 |
|
|
sleep $((TIMEOUT-CRITTIMEOUT)) |
328 |
|
|
else |
329 |
|
|
sleep $TIMEOUT |
330 |
|
|
fi |
331 |
msalle |
2451 |
|
332 |
|
|
# If process still running: kill it |
333 |
|
|
if [ -n "`ps -opid= -p $pid`" ];then |
334 |
|
|
# TIMEOUT exceeded: kill it |
335 |
msalle |
2454 |
log 2 "Child process $pid is running after timeout $TIMEOUT sec," \ |
336 |
|
|
"sending SIGKILL" |
337 |
|
|
kill_func -9 -$pid |
338 |
msalle |
2451 |
fi |
339 |
|
|
} |
340 |
|
|
|
341 |
|
|
######################################################################## |
342 |
|
|
# |
343 |
|
|
# gLExec specific functions |
344 |
|
|
# |
345 |
|
|
######################################################################## |
346 |
|
|
|
347 |
|
|
# Converts gLExec exit values to corresponding nagios codes |
348 |
|
|
glexecrc_to_nagios() { |
349 |
|
|
rc=$1 |
350 |
|
|
|
351 |
|
|
case "$rc" in |
352 |
|
|
0) |
353 |
|
|
code=0 # Success |
354 |
|
|
summary='success' |
355 |
|
|
;; |
356 |
|
|
126) |
357 |
|
|
code=1 # Warning |
358 |
|
|
summary="executable can't be executed ($rc)" |
359 |
|
|
;; |
360 |
|
|
201) |
361 |
|
|
code=2 # Critical |
362 |
|
|
summary="client error ($rc)" |
363 |
|
|
;; |
364 |
|
|
202) |
365 |
|
|
code=2 # Critical |
366 |
|
|
summary="system error ($rc)" |
367 |
|
|
;; |
368 |
|
|
203) |
369 |
|
|
code=2 # Critical |
370 |
|
|
summary="authorization error ($rc)" |
371 |
|
|
;; |
372 |
|
|
204) |
373 |
|
|
code=2 # Critical |
374 |
|
|
summary="exit code overlap ($rc)" |
375 |
|
|
;; |
376 |
|
|
*) |
377 |
|
|
code=1 # Warning |
378 |
|
|
summary="executable failed with exit code $rc" |
379 |
|
|
;; |
380 |
|
|
esac |
381 |
|
|
|
382 |
|
|
return $code |
383 |
|
|
} |
384 |
|
|
|
385 |
|
|
# Searches for gLExec and sets global GLEXEC_EXE variable |
386 |
|
|
find_glexec() { |
387 |
|
|
# First look at GLEXEC_LOCATION |
388 |
|
|
if [ -n "$GLEXEC_LOCATION" ];then |
389 |
|
|
log 3 "GLEXEC_LOCATION=$GLEXEC_LOCATION" |
390 |
|
|
glexloc="${GLEXEC_LOCATION}/sbin/glexec" |
391 |
|
|
if [ -f "$glexloc" ];then |
392 |
|
|
log 2 "gLExec found at $glexloc" |
393 |
|
|
GLEXEC_EXE=$glexloc |
394 |
|
|
return |
395 |
|
|
else |
396 |
|
|
log 2 "gLExec not found at \$GLEXEC_LOCATION" |
397 |
|
|
fi |
398 |
|
|
fi |
399 |
|
|
|
400 |
|
|
# Set GLITE_LOCATION if unset |
401 |
|
|
if [ -z "$GLITE_LOCATION" ];then |
402 |
|
|
GLITE_LOCATION=/opt/glite |
403 |
|
|
else |
404 |
|
|
log 3 "GLITE_LOCATION=$GLITE_LOCATION" |
405 |
|
|
fi |
406 |
|
|
|
407 |
|
|
|
408 |
|
|
# Check in PATH, GLITE_LOCATION and extra dirs |
409 |
|
|
for dir in `echo $PATH|tr : ' '` \ |
410 |
|
|
${GLITE_LOCATION}/sbin \ |
411 |
|
|
/usr/local/sbin /usr/sbin /sbin /usr/local/bin /usr/bin ; do |
412 |
|
|
glexloc="${dir}/glexec" |
413 |
|
|
log 3 "Looking for gLExec at $glexloc" |
414 |
|
|
if [ -f "$glexloc" ];then |
415 |
|
|
log 2 "gLExec found at $glexloc" |
416 |
|
|
GLEXEC_EXE=$glexloc |
417 |
|
|
return |
418 |
|
|
fi |
419 |
|
|
done |
420 |
|
|
} |
421 |
|
|
|
422 |
msalle |
2454 |
# Full gLExec run including finding the command and printing nagios status when |
423 |
|
|
# successful. In case of timeout, nagios status will come from run_probe. |
424 |
msalle |
2451 |
run_glexec() { |
425 |
|
|
# Store start time |
426 |
|
|
t1=$(date +%s) |
427 |
|
|
|
428 |
|
|
# Find glexec executable |
429 |
|
|
find_glexec |
430 |
|
|
|
431 |
|
|
# Test executable |
432 |
|
|
if [ -z "$GLEXEC_EXE" ] ; then |
433 |
|
|
code=2 # Critical |
434 |
|
|
summary='glexec command not found.' |
435 |
msalle |
2454 |
nagios_status $code $summary |
436 |
msalle |
2451 |
fi |
437 |
|
|
# Test proxy variable |
438 |
|
|
if [ -z "$X509_USER_PROXY" ] ; then |
439 |
|
|
code=3 # Unknown |
440 |
|
|
summary="\$X509_USER_PROXY is unset." |
441 |
msalle |
2454 |
nagios_status $code $summary |
442 |
msalle |
2451 |
fi |
443 |
|
|
# Test proxy file |
444 |
|
|
if [ ! -f "$X509_USER_PROXY" -o ! -s "$X509_USER_PROXY" ] ; then |
445 |
|
|
code=3 # Unknown |
446 |
|
|
summary="\$X509_USER_PROXY does not point to a nonempty file." |
447 |
msalle |
2454 |
nagios_status $code $summary |
448 |
msalle |
2451 |
fi |
449 |
|
|
|
450 |
|
|
if [ ! -f "$GLEXEC_CLIENT_CERT" -o ! -s "$X509_USER_PROXY" ] ; then |
451 |
|
|
code=3 # Unknown |
452 |
|
|
summary="\$GLEXEC_CLIENT_CERT does not point to a nonempty file." |
453 |
msalle |
2454 |
nagios_status $code $summary |
454 |
msalle |
2451 |
fi |
455 |
|
|
|
456 |
|
|
log 2 "Running $GLEXEC_EXE $GLEXEC_CMD_EXE" |
457 |
|
|
export GLEXEC_CLIENT_CERT X509_USER_PROXY |
458 |
|
|
if [ $VERBOSE -gt 1 ];then |
459 |
|
|
eval $GLEXEC_EXE $GLEXEC_CMD_EXE 2>&1 |
460 |
|
|
else |
461 |
|
|
eval $GLEXEC_EXE $GLEXEC_CMD_EXE > /dev/null 2>&1 |
462 |
|
|
fi |
463 |
|
|
glexecrc_to_nagios $? |
464 |
|
|
|
465 |
|
|
# Store end time |
466 |
|
|
dt=$(( $(date +%s) - t1)) |
467 |
|
|
|
468 |
msalle |
2454 |
if [ "$code" -eq 0 -a $dt -gt $WARNTIMEOUT ];then |
469 |
msalle |
2451 |
code=1 # Warning |
470 |
|
|
summary="gLExec took long time to succeed" |
471 |
|
|
fi |
472 |
|
|
|
473 |
msalle |
2454 |
perfdata="time=${dt}s;$WARNTIMEOUT;$CRITTIMEOUT;0" |
474 |
|
|
nagios_status $code "$summary|$perfdata" |
475 |
msalle |
2451 |
} |
476 |
|
|
|
477 |
|
|
######################################################################## |
478 |
|
|
# |
479 |
msalle |
2454 |
# main |
480 |
msalle |
2451 |
# |
481 |
|
|
######################################################################## |
482 |
|
|
|
483 |
msalle |
2454 |
{ |
484 |
|
|
# Turn on jobcontrol (separate process groups for subshells), such that we |
485 |
|
|
# can kill the process group for the background processes. |
486 |
|
|
set -m |
487 |
msalle |
2451 |
|
488 |
msalle |
2454 |
# Parse cmdline arguments (long ones are converted in corresponding short |
489 |
|
|
# ones) |
490 |
|
|
parse_args "$@" |
491 |
|
|
|
492 |
|
|
# Start glexec_run in background to have control over timeout |
493 |
|
|
run_glexec & |
494 |
|
|
probe_pid=$! |
495 |
msalle |
2451 |
|
496 |
msalle |
2454 |
# Start watch process in background: will kill probe after timeout |
497 |
|
|
waiter $probe_pid & |
498 |
|
|
waiter_pid=$! |
499 |
msalle |
2451 |
|
500 |
msalle |
2454 |
# Wait for run_glexec: it will either end by itself or by the waiter() |
501 |
|
|
log 3 "Waiting at most $TIMEOUT seconds for probe $probe_pid to finish" |
502 |
|
|
wait_func $probe_pid |
503 |
|
|
probe_rc=$? |
504 |
msalle |
2451 |
|
505 |
msalle |
2454 |
# Kill the waiter if it is still there |
506 |
|
|
if [ -n "`ps -opid= -p $waiter_pid`" ];then |
507 |
|
|
log 3 "Cleaning up waiter process $waiter_pid" |
508 |
|
|
kill_func -$waiter_pid |
509 |
|
|
fi |
510 |
msalle |
2453 |
# Call wait here to prevent logging of termination at end of script |
511 |
msalle |
2454 |
wait_func $waiter_pid |
512 |
msalle |
2451 |
|
513 |
msalle |
2454 |
# If probe was killed, it's exit value will be outside valid nagios range of |
514 |
|
|
# 0-3, if gLExec itself fails, the run_probe will exit with a 3 |
515 |
|
|
case "$probe_rc" in |
516 |
|
|
0|1|2|3) # Normal ending of run_glexec, which has called nagios_status |
517 |
|
|
code=$probe_rc; exit $code |
518 |
|
|
;; |
519 |
|
|
137) # run_glexec ended via SIGKILL |
520 |
|
|
nagios_status 2 "probe TIMEOUT of $TIMEOUT seconds exceeded" |
521 |
|
|
;; |
522 |
|
|
143) # run_glexec ended via SIGTERM |
523 |
|
|
nagios_status 2 "probe critical range of $CRITTIMEOUT seconds exceeded" |
524 |
|
|
;; |
525 |
|
|
*) # run_glexec ended prematurely?! |
526 |
|
|
nagios_status 3 "background process died unexpectly with rc=$probe_rc" |
527 |
|
|
;; |
528 |
|
|
esac; |
529 |
|
|
} | nagios_output |
530 |
msalle |
2451 |
|
531 |
msalle |
2454 |
exit $? |