/[pdpsoft]/trunk/nagios/ees/check_ees
ViewVC logotype

Annotation of /trunk/nagios/ees/check_ees

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2461 - (hide annotations) (download)
Tue Dec 6 12:27:27 2011 UTC (10 years, 10 months ago) by msalle
File size: 12256 byte(s)
Allow floats as timeouts and add handling of invalid values.


1 msalle 2457 #!/usr/bin/perl
2     #
3     # Copyright (C) Nikhef 2011
4     #
5     # Licensed under the Apache License, Version 2.0 (the "License");
6     # you may not use this file except in compliance with the License.
7     # You may obtain a copy of the License at
8     #
9     # http://www.apache.org/licenses/LICENSE-2.0
10     #
11     # Unless required by applicable law or agreed to in writing, software
12     # distributed under the License is distributed on an "AS IS" BASIS,
13     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14     # See the License for the specific language governing permissions and
15     # limitations under the License.
16     #
17     # Author:
18     # Mischa Sall\'e <msalle@nikhef.nl>
19     # NIKHEF Amsterdam, the Netherlands
20     #
21     ########################################################################
22     #
23     # Nagios probe to test functioning of EES
24     #
25     # Nagios state can be one of the following:
26 msalle 2459 # OK:
27     # EES is up and running and responds correctly to SAML2-XACML2-request
28     # WARNING:
29     # EES is running and responds, but either slowly or not with 200 OK
30     # CRITICAL:
31     # EES does not respond or not in time
32     # UNKNOWN
33 msalle 2457 #
34     ########################################################################
35    
36     # DEFAULTS
37     my $probeversion=0.1;
38    
39     # Note the following defaults can be overridden using cmdline options
40     my $deftimeout=10; # Overall timeout for probe
41 msalle 2458 my $defwarning=1; # When to warn about slow running
42 msalle 2459 my $defhost="localhost";# Default hostname for EES service
43     my $defport=6217; # Default portnumber for EES service
44 msalle 2457
45     ########################################################################
46     # Logging package
47     # keeps internal log trace which can be dumped with dump_log
48     ########################################################################
49     package logger;
50     use strict;
51     use warnings;
52     {
53     my $loglevel;
54     my @logstring;
55    
56     # Constructor
57     sub new {
58     my $classname=shift;
59     my $self={}; bless $self;
60     my $level=shift;
61     if (defined $level) {
62     $self->set_loglevel($level);
63     } else {
64     $loglevel=0;
65     }
66     return $self;
67     }
68    
69     # Sets loglevel
70     sub set_loglevel($) {
71     my $self=shift;
72     my $level=shift;
73     $loglevel=$level;
74     }
75    
76     # Logging function: log_func(priority, "logstring\n");
77     sub log_func($@) {
78     my $self=shift;
79     my $prio=shift;
80     return if ($prio > $loglevel);
81     for my $line (@_) {
82     push @logstring,$line;
83     }
84     }
85    
86     # Dumps log
87     sub get_log(@) {
88     my $self=shift;
89     foreach my $myentry ( @logstring ) {
90     print $myentry;
91     }
92     }
93     }
94    
95     ########################################################################
96     # Nagios status printing package
97     # Can set and dump nagios status output
98     ########################################################################
99     package nagstat;
100     {
101     my $code;
102     my $summary;
103     my $perfdata;
104     my @stat;
105    
106     # Constructor
107     sub new() {
108     my $classname=shift;
109     my $self={}; bless $self;
110     $code=3; # Default status unknown
111     $summary=undef;
112     $perfdata=undef;
113     @stat=("OK","WARNING","CRITICAL","UNKNOWN");
114     return $self;
115     }
116    
117     # Set nagios code (0-3) plus summary
118     sub set_status($$) {
119     my $self=shift;
120     if (!defined $summary) {
121     $code=shift;
122     $summary=shift;
123     }
124     }
125    
126     # Set internal performance data
127     sub set_perfdata($) {
128     my $self=shift;
129     $perfdata=shift;
130     }
131    
132     # Printout nagios status, summary and optionally performance data
133     # return value is code (0-3)
134     sub get_status {
135     if (!defined $summary) {
136     $summary="unknown status";
137     }
138     if (defined $perfdata) {
139     print $stat[$code].": ".$summary."|".$perfdata."\n";
140     } else {
141     print $stat[$code].": ".$summary."\n";
142     }
143     return $code;
144     }
145     }
146    
147     ########################################################################
148     # Inter process communication package for nagios probes
149     # Starts alarm handler when receiving alarm which checks status of
150     # probe, and terminates or kills it.
151     ########################################################################
152     package probeipc;
153     {
154     my $timeout;
155    
156     # Constructor: new(exitfunc,[kill time], [term time])
157     sub new() {
158     my $classname=shift;
159     my $self={}; bless $self;
160     my $alarmhandler=shift
161     or die ($classname."::new() needs alarmhandler arg\n");
162     my $inthandler=shift
163     or die ($classname."::new() needs inthandler arg\n");
164     my $timeout=(shift or 10); # probe default timeout is 10
165     $self->set_alarmhandler($alarmhandler);
166     $self->set_inthandler($inthandler);
167     $self->set_timeout($timeout);
168     return $self;
169     }
170    
171     # Sets time after which to send SIGKILL
172     sub set_timeout($) {
173     my $self=shift;
174     $timeout=shift;
175     }
176    
177     # Sets function to call when SIGALRM is caught
178     sub set_alarmhandler($) {
179     my $self=shift;
180     my $alarmhandler=shift;
181     # \& for function reference, $ for stringvar
182     $SIG{'ALRM'} = \&$alarmhandler;
183     }
184    
185     # Sets function to call when SIGINT or SIGTERM is caught
186     sub set_inthandler($) {
187     my $self=shift;
188     my $inthandler=shift;
189     $SIG{'INT'} = \&$inthandler;
190     $SIG{'TERM'} = \&$inthandler;
191     }
192    
193     }
194    
195     ########################################################################
196     # Running main probe package
197     ########################################################################
198     package main;
199     use strict;
200     use warnings;
201    
202     use IO::Socket;
203     use Getopt::Long qw(:config no_ignore_case bundling);
204     use Time::HiRes qw(time alarm);
205    
206     my $timeout; # Total maximum runtime for probe
207     my $critical; # Time after which to kill gLExec
208     my $warning; # Time after which to warn about slow gLExec
209     my $host; # EES hostname
210     my $port; # EES portnumber
211     my $verbose; # Verbosity level
212    
213     my $sock; # socket to EES
214 msalle 2459
215 msalle 2457 # Define different stages, such that e.g. the sighandlers know where we are
216     my %stages=(
217     'presock' => 0,
218     'sockopen' => 1,
219     'datasent' => 2,
220     'headrcvd' => 3,
221     'resprcvd' => 4,
222     'sockclosed'=> 5
223     );
224     my $stage=$stages{'presock'}; # Which state the socket is in
225    
226     # Prints usage output
227     sub usage() {
228     (my $name = $0) =~ s/.*\///;
229     print <<EOHELP;
230     Usage: $name [options]
231    
232     Options:
233     -t|--timeout <timeout> maximum runtime for probe, default: $deftimeout sec
234     -c|--critical <timeout> idem
235     -w|--warning <time> runtime after which to warn, default: $defwarning sec
236     -H|--host <hostname> hostname
237     -p|--port <portnumber> port number
238     -v|--verbose be more verbose, more -v means more verbosity
239     -V|--version print version
240     -h|--help show this helptext
241     EOHELP
242     exit 0;
243     }
244    
245     # Prints short usage output (oneline)
246     sub shortusage() {
247     (my $name = $0) =~ s/.*\///;
248     print <<EOHELP;
249     Usage: $name [options]
250     EOHELP
251     }
252    
253     # Prints probe version
254     sub version() {
255     (my $name = $0) =~ s/.*\///;
256     print <<EOHELP;
257     $name version: $probeversion
258     EOHELP
259     }
260    
261     # Parses command line options and sets global variables
262     sub getopts() {
263     my $version;
264     my $help;
265     my $shorthelp;
266    
267     $timeout=$deftimeout;
268     $warning=$defwarning;
269     $host=$defhost;
270     $port=$defport;
271     GetOptions(
272 msalle 2461 "t|timeout=f" => \$timeout,
273     "c|critical=f" => \$timeout,
274     "w|warning=f" => \$warning,
275 msalle 2457 "H|host=s" => \$host,
276     "p|port=i" => \$port,
277     "u|url=s",
278     "v|verbose+" => \$verbose,
279     "help+" => \$help,
280     "h+" => \$shorthelp,
281     "V|version+" => \$version) or &usage and exit(1);
282    
283     $help and &usage and exit(0);
284     $shorthelp and &shortusage and exit(0);
285     $version and &version and exit(0);
286 msalle 2461 $timeout=0 if ($timeout<0);
287     $warning=0 if ($warning<0);
288 msalle 2457 }
289    
290     # Exit function: prints nagios status and dumps log
291     sub nagios_exit() {
292     my $rc=nagstat->get_status();
293    
294     # Logging object
295     logger->get_log();
296    
297     exit $rc;
298     }
299    
300     # Signal handler for SIGALRM
301     sub alarm_handler() {
302     my ($sig)=@_;
303     logger->log_func(2,"Timeout exceeded\n");
304     if ($stage>$stages{'presock'} && $stage<$stages{'sockclosed'}) {
305     logger->log_func(2,"Socket has been opened, closing it\n");
306     close ($sock);
307     }
308     nagstat->set_status(2,"probe timeout exceeded");
309     nagios_exit;
310     }
311    
312     # Signal handler for SIGINT and SIGTERM
313     sub int_handler() {
314     my ($sig)=@_;
315     logger->log_func(2,"Caught signal ".$sig."\n");
316     if ($stage>$stages{'presock'} && $stage<$stages{'sockclosed'}) {
317     logger->log_func(2,"Socket has been opened, closing it\n");
318     close ($sock);
319     }
320     nagstat->set_status(2,"caught signal ".$sig);
321     nagios_exit;
322     }
323    
324     # Actual probe opening socket to EES, sending message, receiving response and
325     # parsing the result
326     sub call_ees($) {
327     my $msg=shift;
328     my $t1;
329     my $t2;
330    
331     # Make sure to have starttime
332     $t1=time();
333    
334     # Set alarm
335     alarm($timeout);
336    
337     logger->log_func(2,"Opening connection to ".$host.":".$port."\n");
338     # Open socket to $host:$port
339     $sock = IO::Socket::INET->new(
340     PeerAddr => $host,
341     PeerPort => $port,
342     Proto => 'tcp'
343     );
344     if (!defined $sock) {
345     nagstat->set_status(2,"Failed to connect ($!)");
346     return 1;
347     }
348     $stage=$stages{'sockopen'};
349    
350     # Send soap message
351     logger->log_func(3,"Socket opened, sending message\n");
352     print $sock $msg;
353     $stage=$stages{'datasent'};
354    
355     # Get header and response
356     logger->log_func(3,"Message sent, waiting for response\n");
357     my $header = <$sock>;
358     if (!defined $header) {
359     my $summary="cannot read from socket ($!)";
360     close($sock);
361     nagstat->set_status(2,$summary);
362     return 1;
363     }
364     $stage=$stages{'headrcvd'};
365    
366     # Chop of any carriage-return or line-feed from header
367     $header =~ s/[\r\n]+$//;
368     logger->log_func(3,"Header ".$header." received\n");
369    
370     # Dump remaining response in log
371     logger->log_func(3,"Reading remaining response\n");
372     while(my $line=<$sock>) {
373     logger->log_func(3,"$line");
374     }
375     logger->log_func(3,"\n");
376     $stage=$stages{'resprcvd'};
377    
378     # Close socket
379     logger->log_func(3,"Response finished, closing socket\n");
380     close($sock);
381     $stage=$stages{'sockclosed'};
382     logger->log_func(3,"Socket closed\n");
383     # We are done with the socket, we have timing statistics
384     $t2=time();
385    
386     # Set performance data
387     my $dt=int(($t2-$t1)*1000+0.5)/1000;
388     nagstat->set_perfdata("${dt}s;$warning;$timeout;0");
389    
390     # Check header
391     if ("$header" eq "HTTP/1.1 200 OK") {
392     if ($dt<$warning) {
393     nagstat->set_status(0,"Success");
394     return 0;
395     }
396     nagstat->set_status(1,"EES is slow in responding");
397     return 0;
398     }
399     # There was a problem, chop of HTTP/1.1 and set status
400     if ("$header" =~ /HTTP\/1.1 .*/) {
401     nagstat->set_status(1,"unexpected answer from host ($header)");
402     } else {
403     nagstat->set_status(2,"not a valid response ($header)");
404     }
405     return 1;
406     }
407    
408     my $msg= <<EOF;
409     <?xml version="1.0" encoding="UTF-8"?>
410     <SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
411     xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
412     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
413     xmlns:xsd="http://www.w3.org/2001/XMLSchema"
414     xmlns:dsig="http://www.w3.org/2000/09/xmldsig#"
415     xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion"
416     xmlns:XACMLcontext="urn:oasis:names:tc:xacml:2.0:context:schema:os"
417     xmlns:XACMLassertion="urn:oasis:names:tc:xacml:2.0:profile:saml2.0:v2:schema:assertion"
418     xmlns:XACMLpolicy="urn:oasis:names:tc:xacml:2.0:policy:schema:os"
419     xmlns:xenc="http://www.w3.org/2001/04/xmlenc#"
420     xmlns:XACMLService="http://www.globus.org/security/XACMLAuthorization/bindings"
421     xmlns:XACMLsamlp="urn:oasis:names:tc:xacml:2.0:profile:saml2.0:v2:schema:protocol"
422     xmlns:samlp="urn:oasis:names:tc:SAML:2.0:protocol">
423     <SOAP-ENV:Body>
424     <XACMLsamlp:XACMLAuthzDecisionQuery CombinePolicies="true" ReturnContext="true"
425     InputContextOnly="false" IssueInstant="2010-03-25T14:55:01Z" Version="2.0"
426     ID="ID-1804289383">
427     <saml:Issuer xsi:type="saml:NameIDType"
428     Format="urn:oasis:names:tc:SAML:1.1:nameid-format:X509SubjectName">NetCat</saml:Issuer>
429     <XACMLcontext:Request xsi:type="XACMLcontext:RequestType">
430     <XACMLcontext:Action xsi:type="XACMLcontext:ActionType">
431     </XACMLcontext:Action>
432     </XACMLcontext:Request>
433     </XACMLsamlp:XACMLAuthzDecisionQuery>
434     </SOAP-ENV:Body>
435     </SOAP-ENV:Envelope>
436     EOF
437    
438     # Parse commandline options
439     getopts();
440    
441     # Initialize logger and set loglevel
442     logger->new($verbose);
443    
444 msalle 2459 # Initialize nagios status logger
445 msalle 2457 nagstat->new();
446    
447     # Initialize signal handling
448     probeipc->new(\&alarm_handler,\&int_handler,$timeout);
449    
450     # run actual EES probe
451     call_ees($msg);
452    
453     # Dump nagios status, log and exit
454     nagios_exit();
455    

Properties

Name Value
svn:executable *

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28