/[pdpsoft]/trunk/nagios/glexec/check_glexec
ViewVC logotype

Annotation of /trunk/nagios/glexec/check_glexec

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2452 - (hide annotations) (download)
Tue Nov 29 13:58:12 2011 UTC (10 years, 10 months ago) by msalle
File size: 11739 byte(s)
Fix help text (in comments) and change shell to sh

1 msalle 2452 #!/bin/sh
2 msalle 2451 #
3     # Copyright (C) Nikhef 2011
4     #
5     # Licensed under the Apache License, Version 2.0 (the "License");
6     # you may not use this file except in compliance with the License.
7     # You may obtain a copy of the License at
8     #
9     # http://www.apache.org/licenses/LICENSE-2.0
10     #
11     # Unless required by applicable law or agreed to in writing, software
12     # distributed under the License is distributed on an "AS IS" BASIS,
13     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14     # See the License for the specific language governing permissions and
15     # limitations under the License.
16     #
17     # Author:
18     # Mischa Sall\'e <msalle@nikhef.nl>
19     # NIKHEF Amsterdam, the Netherlands
20     #
21     ########################################################################
22     #
23     # Nagios probe to test functioning of gLExec
24     #
25     # Nagios state can be one of the following:
26     # - Missing glexec command: CRITICAL
27     # - input proxies empty: UNKNOWN
28 msalle 2452 # - short timeout exceeded: WARNING
29     # - timeout exceeded: CRITICAL
30 msalle 2451 # - gLExec exit codes:
31     # 0 glexec succeeded: OK
32     # 201 Client error: CRITICAL
33     # 202 Internal error: CRITICAL
34     # 203 Auth error: CRITICAL
35     # 204 Overlap: CRITICAL
36     # 126 execve failed: WARNING
37     # 128+n signal: WARNING
38     # !=0 rc of payload: WARNING
39     #
40     ########################################################################
41    
42     # version
43     VERSION=0.1
44    
45     # plugin name and version
46     PROG=`basename $0`
47     # command will be set after arguments are parsed
48     DEF_GLEXEC_CMD="id"
49     DEF_GLEXEC_CMD_ARGS="-a"
50     # glexec command itself
51     GLEXEC_EXE=""
52     # Default verbosity
53     VERBOSE=0
54     # Default timeout
55     TIMEOUT=10
56     # Default short timeout, longer than this results in warning
57     SHORTTIMEOUT=5
58     # Default GLEXEC_CLIENT_CERT will be set after arguments are parsed
59     GLEXEC_CLIENT_CERT=""
60    
61     ########################################################################
62     #
63     # general options
64     #
65     ########################################################################
66    
67     # Short usage text
68     shortusage() {
69     echo "Usage: $PROG [options]"
70     exit 0
71     }
72    
73     # Long usage text
74     usage() {
75     echo "Usage: $PROG [options]"
76     echo
77     echo "Options:"
78     echo " -t|--timeout <timeout> maximum runtime for probe, default $TIMEOUT sec"
79     echo " -s|--shorttimeout <timeout> runtime after which to warn, default $SHORTTIMEOUT sec"
80     echo " -x|--x509-user-proxy <file> set X509_USER_PROXY to given file"
81     echo " -g|--glexec-client-cert <file> set GLEXEC_CLIENT_CERT to given file"
82     echo " default: value of variable X509_USER_PROXY"
83     echo " -e|--execute <cmd> command to execute by gLExec"
84     echo " default: \"$DEF_GLEXEC_CMD_EXE\""
85     echo " -v|--verbose be more verbose, more -v means more verbosity"
86     echo " -V|--version print version"
87     echo " -h|--help show this helptext"
88     exit 0
89     }
90    
91     # Log function: log <level> <message>
92     log() {
93     level=$1
94     shift
95     if [ $VERBOSE -ge $level ];then
96     for line in "$@" ; do
97     echo "$line"
98     done
99     fi
100     }
101    
102     # Prints nagios status line: <stat>: <summary>
103     nagios_printout() {
104     code=$1
105     shift
106     summary="$*"
107     case "$code" in
108     0) stat='OK' ;;
109     1) stat='WARNING' ;;
110     2) stat='CRITICAL' ;;
111     3) stat='UNKNOWN' ;;
112     *) stat='INVALID NAGIOS CODE $1' ;;
113     esac
114     log 0 "$stat: $summary"
115     exit $code
116     }
117    
118     # parsing error function
119     parse_err() {
120     code=3 # Unknown
121     summary=""
122     for line in "$@" ; do
123     [ -z "$summary" ] && summary="$line"
124     log 2 "$line"
125     done
126     nagios_printout $code "$summary"
127     }
128    
129     # don't use builtin which since it might not exist
130     which_cmd() {
131     for dir in `echo $PATH|tr : ' '` ; do
132     cmd="${dir}/$1"
133     log 3 "Looking for $1 in $dir"
134     if [ -f "$cmd" ];then
135     echo $cmd
136     return
137     fi
138     done
139     }
140    
141     # Assumes range as input, finds upperlimit, currently @ is not understood
142     # see http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
143     parse_range() {
144     [ $# -gt 1 ] && return 1
145     # invalid chars?
146     echo $1 | grep -q '[^~:0-9]' && return 1
147     # find upperlimit
148     echo $1 | grep -q ':' \
149     && uplimit=`echo $1|cut -d: -f2-` \
150     || uplimit=$1
151     # invalid chars?
152     echo $uplimit | grep -q '[^0-9]' && return 1
153     # print limit, note: might be empty
154     echo $uplimit
155     return 0
156     }
157    
158     # Parses command line options
159     parse_short_args() {
160     while getopts ":g:x:t:c:w:e:hvVH:p:u:" i ; do
161     case "$i" in
162     g) GLEXEC_CLIENT_CERT="$OPTARG" ;;
163     x) X509_USER_PROXY="$OPTARG" ;;
164     c|t) TIMEOUT=`parse_range "$OPTARG"`
165     if [ -z "$TIMEOUT" ];then
166     parse_err "Not a valid timeout: \"$OPTARG\""
167     else
168     log 2 "TIMEOUT set to $TIMEOUT"
169     fi
170     ;;
171     w) SHORTTIMEOUT=`parse_range "$OPTARG"`
172     if [ -z "$SHORTTIMEOUT" ];then
173     parse_err "Not a valid timeout: \"$OPTARG\""
174     else
175     log 2 "SHORTTIMEOUT set to $TIMEOUT"
176     fi
177     ;;
178     e) GLEXEC_CMD_EXE="$OPTARG" ;;
179     h) shortusage ;;
180     V) nagios_printout 0 "$PROG version $VERSION" ;;
181     v) VERBOSE=`expr $VERBOSE + 1` ;;
182     H) log 2 "$PROG: option -H/--hostname is not used" ;;
183     p) log 2 "$PROG: option -p/--port is not used" ;;
184     u) log 2 "$PROG: option -u/--url is not used" ;;
185     :) parse_err "Option requires an argument -- '$OPTARG'" \
186     "Try \`$PROG -h' for more information."
187     ;;
188     \?) parse_err "Invalid option -- '$OPTARG'" \
189     "Try \`$PROG -h' for more information."
190     ;;
191     esac
192     done
193     # Check if we specified GLEXEC_CLIENT_CERT
194     if [ -z "$GLEXEC_CLIENT_CERT" ];then
195     GLEXEC_CLIENT_CERT="$X509_USER_PROXY"
196     log 2 "Using same proxy for GLEXEC_CLIENT_CERT and X509_USER_PROXY"
197     fi
198     # Check if we specified a command
199     if [ -z "$GLEXEC_CMD_EXE" ];then
200     GLEXEC_CMD_EXE="$DEF_GLEXEC_CMD_EXE"
201     log 2 "Using default payload command \"$GLEXEC_CMD_EXE\""
202     fi
203     }
204    
205     # Converts long options into short options
206     parse_args() {
207     # Find default executable
208     cmd="`which_cmd $DEF_GLEXEC_CMD`"
209     if [ -n "$cmd" ];then
210     DEF_GLEXEC_CMD_EXE="$cmd $DEF_GLEXEC_CMD_ARGS"
211     fi
212     args=""
213     for arg in "$@" ; do
214     subarg=${arg##--}
215     if [ "${subarg}" != "${arg}" ];then
216     case "$subarg" in
217     x509-user-proxy) args="$args -x " ;;
218     glexec-client-cert) args="$args -c " ;;
219     execute) args="$args -e " ;;
220     timeout) args="$args -t " ;;
221     shorttimeout) args="$args -s " ;;
222     verbose) args="$args -v " ;;
223     version) args="$args -V " ;;
224     help) usage ;;
225     # Unused long options:
226     hostname) args="$args -H " ;;
227     port) args="$args -p " ;;
228     url) args="$args -u " ;;
229     warning) args="$args -w " ;;
230     critical) args="$args -c " ;;
231     *)
232     parse_err "$PROG: invalid longoption -- '$subarg'" \
233     "Try \`$PROG -h' for more information."
234     ;;
235     esac
236     else
237     args="$args \"$arg\""
238     fi
239     done
240     # Now parse the resulting short options
241     eval parse_short_args `echo $args`
242     }
243    
244     # wait for background process to finish or timeout
245     waiter() {
246     pid=$1
247     sleep $TIMEOUT
248    
249     # If process still running: kill it
250     if [ -n "`ps -opid= -p $pid`" ];then
251     # TIMEOUT exceeded: kill it
252     log 2 "Child process $pid is still running after timeout $TIMEOUT"
253     kill -9 $pid
254     exit 3
255     else
256     exit 0
257     fi
258     }
259    
260     ########################################################################
261     #
262     # gLExec specific functions
263     #
264     ########################################################################
265    
266     # Converts gLExec exit values to corresponding nagios codes
267     glexecrc_to_nagios() {
268     rc=$1
269    
270     case "$rc" in
271     0)
272     code=0 # Success
273     summary='success'
274     ;;
275     126)
276     code=1 # Warning
277     summary="executable can't be executed ($rc)"
278     ;;
279     201)
280     code=2 # Critical
281     summary="client error ($rc)"
282     ;;
283     202)
284     code=2 # Critical
285     summary="system error ($rc)"
286     ;;
287     203)
288     code=2 # Critical
289     summary="authorization error ($rc)"
290     ;;
291     204)
292     code=2 # Critical
293     summary="exit code overlap ($rc)"
294     ;;
295     *)
296     code=1 # Warning
297     summary="executable failed with exit code $rc"
298     ;;
299     esac
300    
301     return $code
302     }
303    
304     # Searches for gLExec and sets global GLEXEC_EXE variable
305     find_glexec() {
306     # First look at GLEXEC_LOCATION
307     if [ -n "$GLEXEC_LOCATION" ];then
308     log 3 "GLEXEC_LOCATION=$GLEXEC_LOCATION"
309     glexloc="${GLEXEC_LOCATION}/sbin/glexec"
310     if [ -f "$glexloc" ];then
311     log 2 "gLExec found at $glexloc"
312     GLEXEC_EXE=$glexloc
313     return
314     else
315     log 2 "gLExec not found at \$GLEXEC_LOCATION"
316     fi
317     fi
318    
319     # Set GLITE_LOCATION if unset
320     if [ -z "$GLITE_LOCATION" ];then
321     GLITE_LOCATION=/opt/glite
322     else
323     log 3 "GLITE_LOCATION=$GLITE_LOCATION"
324     fi
325    
326    
327     # Check in PATH, GLITE_LOCATION and extra dirs
328     for dir in `echo $PATH|tr : ' '` \
329     ${GLITE_LOCATION}/sbin \
330     /usr/local/sbin /usr/sbin /sbin /usr/local/bin /usr/bin ; do
331     glexloc="${dir}/glexec"
332     log 3 "Looking for gLExec at $glexloc"
333     if [ -f "$glexloc" ];then
334     log 2 "gLExec found at $glexloc"
335     GLEXEC_EXE=$glexloc
336     return
337     fi
338     done
339     }
340    
341     # Full run:
342     # - search for glexec command
343     # - run glexec
344     # - print nagios status
345     run_glexec() {
346     # Store start time
347     t1=$(date +%s)
348    
349     # Find glexec executable
350     find_glexec
351    
352     # Test executable
353     if [ -z "$GLEXEC_EXE" ] ; then
354     code=2 # Critical
355     summary='glexec command not found.'
356     nagios_printout $code $summary
357     fi
358     # Test proxy variable
359     if [ -z "$X509_USER_PROXY" ] ; then
360     code=3 # Unknown
361     summary="\$X509_USER_PROXY is unset."
362     nagios_printout $code $summary
363     fi
364     # Test proxy file
365     if [ ! -f "$X509_USER_PROXY" -o ! -s "$X509_USER_PROXY" ] ; then
366     code=3 # Unknown
367     summary="\$X509_USER_PROXY does not point to a nonempty file."
368     nagios_printout $code $summary
369     fi
370    
371     if [ ! -f "$GLEXEC_CLIENT_CERT" -o ! -s "$X509_USER_PROXY" ] ; then
372     code=3 # Unknown
373     summary="\$GLEXEC_CLIENT_CERT does not point to a nonempty file."
374     nagios_printout $code $summary
375     fi
376    
377     log 2 "Running $GLEXEC_EXE $GLEXEC_CMD_EXE"
378     export GLEXEC_CLIENT_CERT X509_USER_PROXY
379     if [ $VERBOSE -gt 1 ];then
380     eval $GLEXEC_EXE $GLEXEC_CMD_EXE 2>&1
381     else
382     eval $GLEXEC_EXE $GLEXEC_CMD_EXE > /dev/null 2>&1
383     fi
384     glexecrc_to_nagios $?
385    
386     # Store end time
387     dt=$(( $(date +%s) - t1))
388    
389     if [ "$code" -eq 0 -a $dt -gt $SHORTTIMEOUT ];then
390     code=1 # Warning
391     summary="gLExec took long time to succeed"
392     fi
393    
394     perfdata="time=${dt}s;$SHORTTIMEOUT;$TIMEOUT;0"
395     nagios_printout $code "$summary|$perfdata"
396     }
397    
398     ########################################################################
399     #
400     # main program
401     #
402     ########################################################################
403    
404     # Parse cmdline arguments (long ones are converted in corresponding short ones)
405     parse_args "$@"
406    
407     # Start glexec_run in background to have control over timeout
408     run_glexec &
409     probe_pid=$!
410    
411     # Start watch process in background: will kill probe after timeout
412     waiter $probe_pid &
413     waiter_pid=$!
414    
415     # Wait for run_glexec: it will either end by itself or by the waiter()
416     log 3 "Waiting at most $TIMEOUT seconds for probe $probe_pid to finish"
417     if [ $VERBOSE -ge 1 ];then
418     wait $probe_pid
419     else
420     wait $probe_pid 2> /dev/null
421     fi
422     probe_rc=$?
423    
424     # Kill the waiter if it is still there
425     if [ -n "`ps -opid= -p $waiter_pid`" ];then
426     kill $waiter_pid 2> /dev/null
427     fi
428    
429     # If probe was killed, it's exit value will be outside valid nagios range of 0-3
430     if [ $probe_rc -gt 3 ];then
431     code=2 # Critical
432     if [ $VERBOSE -eq 0 ];then
433     nagios_printout $code "probe TIMEOUT of $TIMEOUT seconds exceeded"
434     else
435     nagios_printout $code "probe TIMEOUT of $TIMEOUT seconds exceeded (rc=$probe_rc)"
436     fi
437     else
438     code=$probe_rc
439     fi
440    
441     # run_glexec has finished: parse its exit value and exit with it
442     exit $probe_rc

Properties

Name Value
svn:executable *

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28