/[pdpsoft]/trunk/nagios/glexec/check_glexec
ViewVC logotype

Contents of /trunk/nagios/glexec/check_glexec

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2451 - (show annotations) (download)
Tue Nov 29 13:53:29 2011 UTC (10 years, 8 months ago) by msalle
File size: 11704 byte(s)
Creating new subtree nagios/
Creating new subtree nagios/glexec/
Adding gLExec probe to nagios/glexec/

1 #!/bin/dash
2 #
3 # Copyright (C) Nikhef 2011
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 # Author:
18 # Mischa Sall\'e <msalle@nikhef.nl>
19 # NIKHEF Amsterdam, the Netherlands
20 #
21 ########################################################################
22 #
23 # Nagios probe to test functioning of gLExec
24 #
25 # Nagios state can be one of the following:
26 # - Missing glexec command: CRITICAL
27 # - input proxies empty: UNKNOWN
28 # - timeout exceeded: UNKNOWN
29 # - gLExec exit codes:
30 # 0 glexec succeeded: OK
31 # 201 Client error: CRITICAL
32 # 202 Internal error: CRITICAL
33 # 203 Auth error: CRITICAL
34 # 204 Overlap: CRITICAL
35 # 126 execve failed: WARNING
36 # 128+n signal: WARNING
37 # !=0 rc of payload: WARNING
38 #
39 ########################################################################
40
41 # version
42 VERSION=0.1
43
44 # plugin name and version
45 PROG=`basename $0`
46 # command will be set after arguments are parsed
47 DEF_GLEXEC_CMD="id"
48 DEF_GLEXEC_CMD_ARGS="-a"
49 # glexec command itself
50 GLEXEC_EXE=""
51 # Default verbosity
52 VERBOSE=0
53 # Default timeout
54 TIMEOUT=10
55 # Default short timeout, longer than this results in warning
56 SHORTTIMEOUT=5
57 # Default GLEXEC_CLIENT_CERT will be set after arguments are parsed
58 GLEXEC_CLIENT_CERT=""
59
60 ########################################################################
61 #
62 # general options
63 #
64 ########################################################################
65
66 # Short usage text
67 shortusage() {
68 echo "Usage: $PROG [options]"
69 exit 0
70 }
71
72 # Long usage text
73 usage() {
74 echo "Usage: $PROG [options]"
75 echo
76 echo "Options:"
77 echo " -t|--timeout <timeout> maximum runtime for probe, default $TIMEOUT sec"
78 echo " -s|--shorttimeout <timeout> runtime after which to warn, default $SHORTTIMEOUT sec"
79 echo " -x|--x509-user-proxy <file> set X509_USER_PROXY to given file"
80 echo " -g|--glexec-client-cert <file> set GLEXEC_CLIENT_CERT to given file"
81 echo " default: value of variable X509_USER_PROXY"
82 echo " -e|--execute <cmd> command to execute by gLExec"
83 echo " default: \"$DEF_GLEXEC_CMD_EXE\""
84 echo " -v|--verbose be more verbose, more -v means more verbosity"
85 echo " -V|--version print version"
86 echo " -h|--help show this helptext"
87 exit 0
88 }
89
90 # Log function: log <level> <message>
91 log() {
92 level=$1
93 shift
94 if [ $VERBOSE -ge $level ];then
95 for line in "$@" ; do
96 echo "$line"
97 done
98 fi
99 }
100
101 # Prints nagios status line: <stat>: <summary>
102 nagios_printout() {
103 code=$1
104 shift
105 summary="$*"
106 case "$code" in
107 0) stat='OK' ;;
108 1) stat='WARNING' ;;
109 2) stat='CRITICAL' ;;
110 3) stat='UNKNOWN' ;;
111 *) stat='INVALID NAGIOS CODE $1' ;;
112 esac
113 log 0 "$stat: $summary"
114 exit $code
115 }
116
117 # parsing error function
118 parse_err() {
119 code=3 # Unknown
120 summary=""
121 for line in "$@" ; do
122 [ -z "$summary" ] && summary="$line"
123 log 2 "$line"
124 done
125 nagios_printout $code "$summary"
126 }
127
128 # don't use builtin which since it might not exist
129 which_cmd() {
130 for dir in `echo $PATH|tr : ' '` ; do
131 cmd="${dir}/$1"
132 log 3 "Looking for $1 in $dir"
133 if [ -f "$cmd" ];then
134 echo $cmd
135 return
136 fi
137 done
138 }
139
140 # Assumes range as input, finds upperlimit, currently @ is not understood
141 # see http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
142 parse_range() {
143 [ $# -gt 1 ] && return 1
144 # invalid chars?
145 echo $1 | grep -q '[^~:0-9]' && return 1
146 # find upperlimit
147 echo $1 | grep -q ':' \
148 && uplimit=`echo $1|cut -d: -f2-` \
149 || uplimit=$1
150 # invalid chars?
151 echo $uplimit | grep -q '[^0-9]' && return 1
152 # print limit, note: might be empty
153 echo $uplimit
154 return 0
155 }
156
157 # Parses command line options
158 parse_short_args() {
159 while getopts ":g:x:t:c:w:e:hvVH:p:u:" i ; do
160 case "$i" in
161 g) GLEXEC_CLIENT_CERT="$OPTARG" ;;
162 x) X509_USER_PROXY="$OPTARG" ;;
163 c|t) TIMEOUT=`parse_range "$OPTARG"`
164 if [ -z "$TIMEOUT" ];then
165 parse_err "Not a valid timeout: \"$OPTARG\""
166 else
167 log 2 "TIMEOUT set to $TIMEOUT"
168 fi
169 ;;
170 w) SHORTTIMEOUT=`parse_range "$OPTARG"`
171 if [ -z "$SHORTTIMEOUT" ];then
172 parse_err "Not a valid timeout: \"$OPTARG\""
173 else
174 log 2 "SHORTTIMEOUT set to $TIMEOUT"
175 fi
176 ;;
177 e) GLEXEC_CMD_EXE="$OPTARG" ;;
178 h) shortusage ;;
179 V) nagios_printout 0 "$PROG version $VERSION" ;;
180 v) VERBOSE=`expr $VERBOSE + 1` ;;
181 H) log 2 "$PROG: option -H/--hostname is not used" ;;
182 p) log 2 "$PROG: option -p/--port is not used" ;;
183 u) log 2 "$PROG: option -u/--url is not used" ;;
184 :) parse_err "Option requires an argument -- '$OPTARG'" \
185 "Try \`$PROG -h' for more information."
186 ;;
187 \?) parse_err "Invalid option -- '$OPTARG'" \
188 "Try \`$PROG -h' for more information."
189 ;;
190 esac
191 done
192 # Check if we specified GLEXEC_CLIENT_CERT
193 if [ -z "$GLEXEC_CLIENT_CERT" ];then
194 GLEXEC_CLIENT_CERT="$X509_USER_PROXY"
195 log 2 "Using same proxy for GLEXEC_CLIENT_CERT and X509_USER_PROXY"
196 fi
197 # Check if we specified a command
198 if [ -z "$GLEXEC_CMD_EXE" ];then
199 GLEXEC_CMD_EXE="$DEF_GLEXEC_CMD_EXE"
200 log 2 "Using default payload command \"$GLEXEC_CMD_EXE\""
201 fi
202 }
203
204 # Converts long options into short options
205 parse_args() {
206 # Find default executable
207 cmd="`which_cmd $DEF_GLEXEC_CMD`"
208 if [ -n "$cmd" ];then
209 DEF_GLEXEC_CMD_EXE="$cmd $DEF_GLEXEC_CMD_ARGS"
210 fi
211 args=""
212 for arg in "$@" ; do
213 subarg=${arg##--}
214 if [ "${subarg}" != "${arg}" ];then
215 case "$subarg" in
216 x509-user-proxy) args="$args -x " ;;
217 glexec-client-cert) args="$args -c " ;;
218 execute) args="$args -e " ;;
219 timeout) args="$args -t " ;;
220 shorttimeout) args="$args -s " ;;
221 verbose) args="$args -v " ;;
222 version) args="$args -V " ;;
223 help) usage ;;
224 # Unused long options:
225 hostname) args="$args -H " ;;
226 port) args="$args -p " ;;
227 url) args="$args -u " ;;
228 warning) args="$args -w " ;;
229 critical) args="$args -c " ;;
230 *)
231 parse_err "$PROG: invalid longoption -- '$subarg'" \
232 "Try \`$PROG -h' for more information."
233 ;;
234 esac
235 else
236 args="$args \"$arg\""
237 fi
238 done
239 # Now parse the resulting short options
240 eval parse_short_args `echo $args`
241 }
242
243 # wait for background process to finish or timeout
244 waiter() {
245 pid=$1
246 sleep $TIMEOUT
247
248 # If process still running: kill it
249 if [ -n "`ps -opid= -p $pid`" ];then
250 # TIMEOUT exceeded: kill it
251 log 2 "Child process $pid is still running after timeout $TIMEOUT"
252 kill -9 $pid
253 exit 3
254 else
255 exit 0
256 fi
257 }
258
259 ########################################################################
260 #
261 # gLExec specific functions
262 #
263 ########################################################################
264
265 # Converts gLExec exit values to corresponding nagios codes
266 glexecrc_to_nagios() {
267 rc=$1
268
269 case "$rc" in
270 0)
271 code=0 # Success
272 summary='success'
273 ;;
274 126)
275 code=1 # Warning
276 summary="executable can't be executed ($rc)"
277 ;;
278 201)
279 code=2 # Critical
280 summary="client error ($rc)"
281 ;;
282 202)
283 code=2 # Critical
284 summary="system error ($rc)"
285 ;;
286 203)
287 code=2 # Critical
288 summary="authorization error ($rc)"
289 ;;
290 204)
291 code=2 # Critical
292 summary="exit code overlap ($rc)"
293 ;;
294 *)
295 code=1 # Warning
296 summary="executable failed with exit code $rc"
297 ;;
298 esac
299
300 return $code
301 }
302
303 # Searches for gLExec and sets global GLEXEC_EXE variable
304 find_glexec() {
305 # First look at GLEXEC_LOCATION
306 if [ -n "$GLEXEC_LOCATION" ];then
307 log 3 "GLEXEC_LOCATION=$GLEXEC_LOCATION"
308 glexloc="${GLEXEC_LOCATION}/sbin/glexec"
309 if [ -f "$glexloc" ];then
310 log 2 "gLExec found at $glexloc"
311 GLEXEC_EXE=$glexloc
312 return
313 else
314 log 2 "gLExec not found at \$GLEXEC_LOCATION"
315 fi
316 fi
317
318 # Set GLITE_LOCATION if unset
319 if [ -z "$GLITE_LOCATION" ];then
320 GLITE_LOCATION=/opt/glite
321 else
322 log 3 "GLITE_LOCATION=$GLITE_LOCATION"
323 fi
324
325
326 # Check in PATH, GLITE_LOCATION and extra dirs
327 for dir in `echo $PATH|tr : ' '` \
328 ${GLITE_LOCATION}/sbin \
329 /usr/local/sbin /usr/sbin /sbin /usr/local/bin /usr/bin ; do
330 glexloc="${dir}/glexec"
331 log 3 "Looking for gLExec at $glexloc"
332 if [ -f "$glexloc" ];then
333 log 2 "gLExec found at $glexloc"
334 GLEXEC_EXE=$glexloc
335 return
336 fi
337 done
338 }
339
340 # Full run:
341 # - search for glexec command
342 # - run glexec
343 # - print nagios status
344 run_glexec() {
345 # Store start time
346 t1=$(date +%s)
347
348 # Find glexec executable
349 find_glexec
350
351 # Test executable
352 if [ -z "$GLEXEC_EXE" ] ; then
353 code=2 # Critical
354 summary='glexec command not found.'
355 nagios_printout $code $summary
356 fi
357 # Test proxy variable
358 if [ -z "$X509_USER_PROXY" ] ; then
359 code=3 # Unknown
360 summary="\$X509_USER_PROXY is unset."
361 nagios_printout $code $summary
362 fi
363 # Test proxy file
364 if [ ! -f "$X509_USER_PROXY" -o ! -s "$X509_USER_PROXY" ] ; then
365 code=3 # Unknown
366 summary="\$X509_USER_PROXY does not point to a nonempty file."
367 nagios_printout $code $summary
368 fi
369
370 if [ ! -f "$GLEXEC_CLIENT_CERT" -o ! -s "$X509_USER_PROXY" ] ; then
371 code=3 # Unknown
372 summary="\$GLEXEC_CLIENT_CERT does not point to a nonempty file."
373 nagios_printout $code $summary
374 fi
375
376 log 2 "Running $GLEXEC_EXE $GLEXEC_CMD_EXE"
377 export GLEXEC_CLIENT_CERT X509_USER_PROXY
378 if [ $VERBOSE -gt 1 ];then
379 eval $GLEXEC_EXE $GLEXEC_CMD_EXE 2>&1
380 else
381 eval $GLEXEC_EXE $GLEXEC_CMD_EXE > /dev/null 2>&1
382 fi
383 glexecrc_to_nagios $?
384
385 # Store end time
386 dt=$(( $(date +%s) - t1))
387
388 if [ "$code" -eq 0 -a $dt -gt $SHORTTIMEOUT ];then
389 code=1 # Warning
390 summary="gLExec took long time to succeed"
391 fi
392
393 perfdata="time=${dt}s;$SHORTTIMEOUT;$TIMEOUT;0"
394 nagios_printout $code "$summary|$perfdata"
395 }
396
397 ########################################################################
398 #
399 # main program
400 #
401 ########################################################################
402
403 # Parse cmdline arguments (long ones are converted in corresponding short ones)
404 parse_args "$@"
405
406 # Start glexec_run in background to have control over timeout
407 run_glexec &
408 probe_pid=$!
409
410 # Start watch process in background: will kill probe after timeout
411 waiter $probe_pid &
412 waiter_pid=$!
413
414 # Wait for run_glexec: it will either end by itself or by the waiter()
415 log 3 "Waiting at most $TIMEOUT seconds for probe $probe_pid to finish"
416 if [ $VERBOSE -ge 1 ];then
417 wait $probe_pid
418 else
419 wait $probe_pid 2> /dev/null
420 fi
421 probe_rc=$?
422
423 # Kill the waiter if it is still there
424 if [ -n "`ps -opid= -p $waiter_pid`" ];then
425 kill $waiter_pid 2> /dev/null
426 fi
427
428 # If probe was killed, it's exit value will be outside valid nagios range of 0-3
429 if [ $probe_rc -gt 3 ];then
430 code=2 # Critical
431 if [ $VERBOSE -eq 0 ];then
432 nagios_printout $code "probe TIMEOUT of $TIMEOUT seconds exceeded"
433 else
434 nagios_printout $code "probe TIMEOUT of $TIMEOUT seconds exceeded (rc=$probe_rc)"
435 fi
436 else
437 code=$probe_rc
438 fi
439
440 # run_glexec has finished: parse its exit value and exit with it
441 exit $probe_rc

Properties

Name Value
svn:executable *

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28