/[pdpsoft]/trunk/nagios/glexec/check_glexec
ViewVC logotype

Annotation of /trunk/nagios/glexec/check_glexec

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2451 - (hide annotations) (download)
Tue Nov 29 13:53:29 2011 UTC (10 years, 10 months ago) by msalle
File size: 11704 byte(s)
Creating new subtree nagios/
Creating new subtree nagios/glexec/
Adding gLExec probe to nagios/glexec/

1 msalle 2451 #!/bin/dash
2     #
3     # Copyright (C) Nikhef 2011
4     #
5     # Licensed under the Apache License, Version 2.0 (the "License");
6     # you may not use this file except in compliance with the License.
7     # You may obtain a copy of the License at
8     #
9     # http://www.apache.org/licenses/LICENSE-2.0
10     #
11     # Unless required by applicable law or agreed to in writing, software
12     # distributed under the License is distributed on an "AS IS" BASIS,
13     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14     # See the License for the specific language governing permissions and
15     # limitations under the License.
16     #
17     # Author:
18     # Mischa Sall\'e <msalle@nikhef.nl>
19     # NIKHEF Amsterdam, the Netherlands
20     #
21     ########################################################################
22     #
23     # Nagios probe to test functioning of gLExec
24     #
25     # Nagios state can be one of the following:
26     # - Missing glexec command: CRITICAL
27     # - input proxies empty: UNKNOWN
28     # - timeout exceeded: UNKNOWN
29     # - gLExec exit codes:
30     # 0 glexec succeeded: OK
31     # 201 Client error: CRITICAL
32     # 202 Internal error: CRITICAL
33     # 203 Auth error: CRITICAL
34     # 204 Overlap: CRITICAL
35     # 126 execve failed: WARNING
36     # 128+n signal: WARNING
37     # !=0 rc of payload: WARNING
38     #
39     ########################################################################
40    
41     # version
42     VERSION=0.1
43    
44     # plugin name and version
45     PROG=`basename $0`
46     # command will be set after arguments are parsed
47     DEF_GLEXEC_CMD="id"
48     DEF_GLEXEC_CMD_ARGS="-a"
49     # glexec command itself
50     GLEXEC_EXE=""
51     # Default verbosity
52     VERBOSE=0
53     # Default timeout
54     TIMEOUT=10
55     # Default short timeout, longer than this results in warning
56     SHORTTIMEOUT=5
57     # Default GLEXEC_CLIENT_CERT will be set after arguments are parsed
58     GLEXEC_CLIENT_CERT=""
59    
60     ########################################################################
61     #
62     # general options
63     #
64     ########################################################################
65    
66     # Short usage text
67     shortusage() {
68     echo "Usage: $PROG [options]"
69     exit 0
70     }
71    
72     # Long usage text
73     usage() {
74     echo "Usage: $PROG [options]"
75     echo
76     echo "Options:"
77     echo " -t|--timeout <timeout> maximum runtime for probe, default $TIMEOUT sec"
78     echo " -s|--shorttimeout <timeout> runtime after which to warn, default $SHORTTIMEOUT sec"
79     echo " -x|--x509-user-proxy <file> set X509_USER_PROXY to given file"
80     echo " -g|--glexec-client-cert <file> set GLEXEC_CLIENT_CERT to given file"
81     echo " default: value of variable X509_USER_PROXY"
82     echo " -e|--execute <cmd> command to execute by gLExec"
83     echo " default: \"$DEF_GLEXEC_CMD_EXE\""
84     echo " -v|--verbose be more verbose, more -v means more verbosity"
85     echo " -V|--version print version"
86     echo " -h|--help show this helptext"
87     exit 0
88     }
89    
90     # Log function: log <level> <message>
91     log() {
92     level=$1
93     shift
94     if [ $VERBOSE -ge $level ];then
95     for line in "$@" ; do
96     echo "$line"
97     done
98     fi
99     }
100    
101     # Prints nagios status line: <stat>: <summary>
102     nagios_printout() {
103     code=$1
104     shift
105     summary="$*"
106     case "$code" in
107     0) stat='OK' ;;
108     1) stat='WARNING' ;;
109     2) stat='CRITICAL' ;;
110     3) stat='UNKNOWN' ;;
111     *) stat='INVALID NAGIOS CODE $1' ;;
112     esac
113     log 0 "$stat: $summary"
114     exit $code
115     }
116    
117     # parsing error function
118     parse_err() {
119     code=3 # Unknown
120     summary=""
121     for line in "$@" ; do
122     [ -z "$summary" ] && summary="$line"
123     log 2 "$line"
124     done
125     nagios_printout $code "$summary"
126     }
127    
128     # don't use builtin which since it might not exist
129     which_cmd() {
130     for dir in `echo $PATH|tr : ' '` ; do
131     cmd="${dir}/$1"
132     log 3 "Looking for $1 in $dir"
133     if [ -f "$cmd" ];then
134     echo $cmd
135     return
136     fi
137     done
138     }
139    
140     # Assumes range as input, finds upperlimit, currently @ is not understood
141     # see http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
142     parse_range() {
143     [ $# -gt 1 ] && return 1
144     # invalid chars?
145     echo $1 | grep -q '[^~:0-9]' && return 1
146     # find upperlimit
147     echo $1 | grep -q ':' \
148     && uplimit=`echo $1|cut -d: -f2-` \
149     || uplimit=$1
150     # invalid chars?
151     echo $uplimit | grep -q '[^0-9]' && return 1
152     # print limit, note: might be empty
153     echo $uplimit
154     return 0
155     }
156    
157     # Parses command line options
158     parse_short_args() {
159     while getopts ":g:x:t:c:w:e:hvVH:p:u:" i ; do
160     case "$i" in
161     g) GLEXEC_CLIENT_CERT="$OPTARG" ;;
162     x) X509_USER_PROXY="$OPTARG" ;;
163     c|t) TIMEOUT=`parse_range "$OPTARG"`
164     if [ -z "$TIMEOUT" ];then
165     parse_err "Not a valid timeout: \"$OPTARG\""
166     else
167     log 2 "TIMEOUT set to $TIMEOUT"
168     fi
169     ;;
170     w) SHORTTIMEOUT=`parse_range "$OPTARG"`
171     if [ -z "$SHORTTIMEOUT" ];then
172     parse_err "Not a valid timeout: \"$OPTARG\""
173     else
174     log 2 "SHORTTIMEOUT set to $TIMEOUT"
175     fi
176     ;;
177     e) GLEXEC_CMD_EXE="$OPTARG" ;;
178     h) shortusage ;;
179     V) nagios_printout 0 "$PROG version $VERSION" ;;
180     v) VERBOSE=`expr $VERBOSE + 1` ;;
181     H) log 2 "$PROG: option -H/--hostname is not used" ;;
182     p) log 2 "$PROG: option -p/--port is not used" ;;
183     u) log 2 "$PROG: option -u/--url is not used" ;;
184     :) parse_err "Option requires an argument -- '$OPTARG'" \
185     "Try \`$PROG -h' for more information."
186     ;;
187     \?) parse_err "Invalid option -- '$OPTARG'" \
188     "Try \`$PROG -h' for more information."
189     ;;
190     esac
191     done
192     # Check if we specified GLEXEC_CLIENT_CERT
193     if [ -z "$GLEXEC_CLIENT_CERT" ];then
194     GLEXEC_CLIENT_CERT="$X509_USER_PROXY"
195     log 2 "Using same proxy for GLEXEC_CLIENT_CERT and X509_USER_PROXY"
196     fi
197     # Check if we specified a command
198     if [ -z "$GLEXEC_CMD_EXE" ];then
199     GLEXEC_CMD_EXE="$DEF_GLEXEC_CMD_EXE"
200     log 2 "Using default payload command \"$GLEXEC_CMD_EXE\""
201     fi
202     }
203    
204     # Converts long options into short options
205     parse_args() {
206     # Find default executable
207     cmd="`which_cmd $DEF_GLEXEC_CMD`"
208     if [ -n "$cmd" ];then
209     DEF_GLEXEC_CMD_EXE="$cmd $DEF_GLEXEC_CMD_ARGS"
210     fi
211     args=""
212     for arg in "$@" ; do
213     subarg=${arg##--}
214     if [ "${subarg}" != "${arg}" ];then
215     case "$subarg" in
216     x509-user-proxy) args="$args -x " ;;
217     glexec-client-cert) args="$args -c " ;;
218     execute) args="$args -e " ;;
219     timeout) args="$args -t " ;;
220     shorttimeout) args="$args -s " ;;
221     verbose) args="$args -v " ;;
222     version) args="$args -V " ;;
223     help) usage ;;
224     # Unused long options:
225     hostname) args="$args -H " ;;
226     port) args="$args -p " ;;
227     url) args="$args -u " ;;
228     warning) args="$args -w " ;;
229     critical) args="$args -c " ;;
230     *)
231     parse_err "$PROG: invalid longoption -- '$subarg'" \
232     "Try \`$PROG -h' for more information."
233     ;;
234     esac
235     else
236     args="$args \"$arg\""
237     fi
238     done
239     # Now parse the resulting short options
240     eval parse_short_args `echo $args`
241     }
242    
243     # wait for background process to finish or timeout
244     waiter() {
245     pid=$1
246     sleep $TIMEOUT
247    
248     # If process still running: kill it
249     if [ -n "`ps -opid= -p $pid`" ];then
250     # TIMEOUT exceeded: kill it
251     log 2 "Child process $pid is still running after timeout $TIMEOUT"
252     kill -9 $pid
253     exit 3
254     else
255     exit 0
256     fi
257     }
258    
259     ########################################################################
260     #
261     # gLExec specific functions
262     #
263     ########################################################################
264    
265     # Converts gLExec exit values to corresponding nagios codes
266     glexecrc_to_nagios() {
267     rc=$1
268    
269     case "$rc" in
270     0)
271     code=0 # Success
272     summary='success'
273     ;;
274     126)
275     code=1 # Warning
276     summary="executable can't be executed ($rc)"
277     ;;
278     201)
279     code=2 # Critical
280     summary="client error ($rc)"
281     ;;
282     202)
283     code=2 # Critical
284     summary="system error ($rc)"
285     ;;
286     203)
287     code=2 # Critical
288     summary="authorization error ($rc)"
289     ;;
290     204)
291     code=2 # Critical
292     summary="exit code overlap ($rc)"
293     ;;
294     *)
295     code=1 # Warning
296     summary="executable failed with exit code $rc"
297     ;;
298     esac
299    
300     return $code
301     }
302    
303     # Searches for gLExec and sets global GLEXEC_EXE variable
304     find_glexec() {
305     # First look at GLEXEC_LOCATION
306     if [ -n "$GLEXEC_LOCATION" ];then
307     log 3 "GLEXEC_LOCATION=$GLEXEC_LOCATION"
308     glexloc="${GLEXEC_LOCATION}/sbin/glexec"
309     if [ -f "$glexloc" ];then
310     log 2 "gLExec found at $glexloc"
311     GLEXEC_EXE=$glexloc
312     return
313     else
314     log 2 "gLExec not found at \$GLEXEC_LOCATION"
315     fi
316     fi
317    
318     # Set GLITE_LOCATION if unset
319     if [ -z "$GLITE_LOCATION" ];then
320     GLITE_LOCATION=/opt/glite
321     else
322     log 3 "GLITE_LOCATION=$GLITE_LOCATION"
323     fi
324    
325    
326     # Check in PATH, GLITE_LOCATION and extra dirs
327     for dir in `echo $PATH|tr : ' '` \
328     ${GLITE_LOCATION}/sbin \
329     /usr/local/sbin /usr/sbin /sbin /usr/local/bin /usr/bin ; do
330     glexloc="${dir}/glexec"
331     log 3 "Looking for gLExec at $glexloc"
332     if [ -f "$glexloc" ];then
333     log 2 "gLExec found at $glexloc"
334     GLEXEC_EXE=$glexloc
335     return
336     fi
337     done
338     }
339    
340     # Full run:
341     # - search for glexec command
342     # - run glexec
343     # - print nagios status
344     run_glexec() {
345     # Store start time
346     t1=$(date +%s)
347    
348     # Find glexec executable
349     find_glexec
350    
351     # Test executable
352     if [ -z "$GLEXEC_EXE" ] ; then
353     code=2 # Critical
354     summary='glexec command not found.'
355     nagios_printout $code $summary
356     fi
357     # Test proxy variable
358     if [ -z "$X509_USER_PROXY" ] ; then
359     code=3 # Unknown
360     summary="\$X509_USER_PROXY is unset."
361     nagios_printout $code $summary
362     fi
363     # Test proxy file
364     if [ ! -f "$X509_USER_PROXY" -o ! -s "$X509_USER_PROXY" ] ; then
365     code=3 # Unknown
366     summary="\$X509_USER_PROXY does not point to a nonempty file."
367     nagios_printout $code $summary
368     fi
369    
370     if [ ! -f "$GLEXEC_CLIENT_CERT" -o ! -s "$X509_USER_PROXY" ] ; then
371     code=3 # Unknown
372     summary="\$GLEXEC_CLIENT_CERT does not point to a nonempty file."
373     nagios_printout $code $summary
374     fi
375    
376     log 2 "Running $GLEXEC_EXE $GLEXEC_CMD_EXE"
377     export GLEXEC_CLIENT_CERT X509_USER_PROXY
378     if [ $VERBOSE -gt 1 ];then
379     eval $GLEXEC_EXE $GLEXEC_CMD_EXE 2>&1
380     else
381     eval $GLEXEC_EXE $GLEXEC_CMD_EXE > /dev/null 2>&1
382     fi
383     glexecrc_to_nagios $?
384    
385     # Store end time
386     dt=$(( $(date +%s) - t1))
387    
388     if [ "$code" -eq 0 -a $dt -gt $SHORTTIMEOUT ];then
389     code=1 # Warning
390     summary="gLExec took long time to succeed"
391     fi
392    
393     perfdata="time=${dt}s;$SHORTTIMEOUT;$TIMEOUT;0"
394     nagios_printout $code "$summary|$perfdata"
395     }
396    
397     ########################################################################
398     #
399     # main program
400     #
401     ########################################################################
402    
403     # Parse cmdline arguments (long ones are converted in corresponding short ones)
404     parse_args "$@"
405    
406     # Start glexec_run in background to have control over timeout
407     run_glexec &
408     probe_pid=$!
409    
410     # Start watch process in background: will kill probe after timeout
411     waiter $probe_pid &
412     waiter_pid=$!
413    
414     # Wait for run_glexec: it will either end by itself or by the waiter()
415     log 3 "Waiting at most $TIMEOUT seconds for probe $probe_pid to finish"
416     if [ $VERBOSE -ge 1 ];then
417     wait $probe_pid
418     else
419     wait $probe_pid 2> /dev/null
420     fi
421     probe_rc=$?
422    
423     # Kill the waiter if it is still there
424     if [ -n "`ps -opid= -p $waiter_pid`" ];then
425     kill $waiter_pid 2> /dev/null
426     fi
427    
428     # If probe was killed, it's exit value will be outside valid nagios range of 0-3
429     if [ $probe_rc -gt 3 ];then
430     code=2 # Critical
431     if [ $VERBOSE -eq 0 ];then
432     nagios_printout $code "probe TIMEOUT of $TIMEOUT seconds exceeded"
433     else
434     nagios_printout $code "probe TIMEOUT of $TIMEOUT seconds exceeded (rc=$probe_rc)"
435     fi
436     else
437     code=$probe_rc
438     fi
439    
440     # run_glexec has finished: parse its exit value and exit with it
441     exit $probe_rc

Properties

Name Value
svn:executable *

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28