/[pdpsoft]/trunk/nagios/ees/check_ees.in
ViewVC logotype

Annotation of /trunk/nagios/ees/check_ees.in

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2721 - (hide annotations) (download)
Wed Jul 16 13:45:24 2014 UTC (8 years, 2 months ago) by msalle
File size: 12787 byte(s)
Produce proper 'unique' ID, at least much better than before.

1 msalle 2494 #!@PERL@
2 msalle 2457 #
3     # Copyright (C) Nikhef 2011
4     #
5     # Licensed under the Apache License, Version 2.0 (the "License");
6     # you may not use this file except in compliance with the License.
7     # You may obtain a copy of the License at
8     #
9     # http://www.apache.org/licenses/LICENSE-2.0
10     #
11     # Unless required by applicable law or agreed to in writing, software
12     # distributed under the License is distributed on an "AS IS" BASIS,
13     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14     # See the License for the specific language governing permissions and
15     # limitations under the License.
16     #
17     # Author:
18     # Mischa Sall\'e <msalle@nikhef.nl>
19     # NIKHEF Amsterdam, the Netherlands
20     #
21     ########################################################################
22     #
23     # Nagios probe to test functioning of EES
24     #
25     # Nagios state can be one of the following:
26 msalle 2459 # OK:
27     # EES is up and running and responds correctly to SAML2-XACML2-request
28     # WARNING:
29     # EES is running and responds, but either slowly or not with 200 OK
30     # CRITICAL:
31     # EES does not respond or not in time
32     # UNKNOWN
33 msalle 2457 #
34     ########################################################################
35    
36     # DEFAULTS
37 msalle 2590 my $probeversion="@VERSION@";
38 msalle 2457
39 msalle 2720 # Used in the saml:Issuer, will be suffixed with the hostname.
40     my $issuername="ees-nagios@";
41    
42 msalle 2457 # Note the following defaults can be overridden using cmdline options
43     my $deftimeout=10; # Overall timeout for probe
44 msalle 2458 my $defwarning=1; # When to warn about slow running
45 msalle 2459 my $defhost="localhost";# Default hostname for EES service
46     my $defport=6217; # Default portnumber for EES service
47 msalle 2457
48     ########################################################################
49     # Logging package
50     # keeps internal log trace which can be dumped with dump_log
51     ########################################################################
52     package logger;
53     use strict;
54     use warnings;
55     {
56     my $loglevel;
57     my @logstring;
58    
59     # Constructor
60     sub new {
61     my $classname=shift;
62     my $self={}; bless $self;
63     my $level=shift;
64     if (defined $level) {
65     $self->set_loglevel($level);
66     } else {
67     $loglevel=0;
68     }
69     return $self;
70     }
71    
72     # Sets loglevel
73     sub set_loglevel($) {
74     my $self=shift;
75     my $level=shift;
76     $loglevel=$level;
77     }
78    
79     # Logging function: log_func(priority, "logstring\n");
80     sub log_func($@) {
81     my $self=shift;
82     my $prio=shift;
83     return if ($prio > $loglevel);
84     for my $line (@_) {
85     push @logstring,$line;
86     }
87     }
88    
89     # Dumps log
90     sub get_log(@) {
91     my $self=shift;
92     foreach my $myentry ( @logstring ) {
93     print $myentry;
94     }
95     }
96     }
97    
98     ########################################################################
99     # Nagios status printing package
100     # Can set and dump nagios status output
101     ########################################################################
102     package nagstat;
103     {
104     my $code;
105     my $summary;
106     my $perfdata;
107     my @stat;
108    
109     # Constructor
110     sub new() {
111     my $classname=shift;
112     my $self={}; bless $self;
113     $code=3; # Default status unknown
114     $summary=undef;
115     $perfdata=undef;
116     @stat=("OK","WARNING","CRITICAL","UNKNOWN");
117     return $self;
118     }
119    
120     # Set nagios code (0-3) plus summary
121     sub set_status($$) {
122     my $self=shift;
123     if (!defined $summary) {
124     $code=shift;
125     $summary=shift;
126     }
127     }
128    
129     # Set internal performance data
130     sub set_perfdata($) {
131     my $self=shift;
132     $perfdata=shift;
133     }
134    
135     # Printout nagios status, summary and optionally performance data
136     # return value is code (0-3)
137     sub get_status {
138     if (!defined $summary) {
139     $summary="unknown status";
140     }
141     if (defined $perfdata) {
142     print $stat[$code].": ".$summary."|".$perfdata."\n";
143     } else {
144     print $stat[$code].": ".$summary."\n";
145     }
146     return $code;
147     }
148     }
149    
150     ########################################################################
151     # Inter process communication package for nagios probes
152     # Starts alarm handler when receiving alarm which checks status of
153     # probe, and terminates or kills it.
154     ########################################################################
155     package probeipc;
156     {
157     my $timeout;
158    
159     # Constructor: new(exitfunc,[kill time], [term time])
160     sub new() {
161     my $classname=shift;
162     my $self={}; bless $self;
163     my $alarmhandler=shift
164     or die ($classname."::new() needs alarmhandler arg\n");
165     my $inthandler=shift
166     or die ($classname."::new() needs inthandler arg\n");
167     my $timeout=(shift or 10); # probe default timeout is 10
168     $self->set_alarmhandler($alarmhandler);
169     $self->set_inthandler($inthandler);
170     $self->set_timeout($timeout);
171     return $self;
172     }
173    
174     # Sets time after which to send SIGKILL
175     sub set_timeout($) {
176     my $self=shift;
177     $timeout=shift;
178     }
179    
180     # Sets function to call when SIGALRM is caught
181     sub set_alarmhandler($) {
182     my $self=shift;
183     my $alarmhandler=shift;
184     # \& for function reference, $ for stringvar
185     $SIG{'ALRM'} = \&$alarmhandler;
186     }
187    
188     # Sets function to call when SIGINT or SIGTERM is caught
189     sub set_inthandler($) {
190     my $self=shift;
191     my $inthandler=shift;
192     $SIG{'INT'} = \&$inthandler;
193     $SIG{'TERM'} = \&$inthandler;
194     }
195    
196     }
197    
198     ########################################################################
199     # Running main probe package
200     ########################################################################
201     package main;
202     use strict;
203     use warnings;
204    
205     use IO::Socket;
206     use Getopt::Long qw(:config no_ignore_case bundling);
207     use Time::HiRes qw(time alarm);
208 msalle 2720 use POSIX qw(strftime);
209     use Sys::Hostname;
210     use Net::hostent;
211 msalle 2457
212 msalle 2720
213 msalle 2457 my $timeout; # Total maximum runtime for probe
214     my $critical; # Time after which to kill gLExec
215     my $warning; # Time after which to warn about slow gLExec
216     my $host; # EES hostname
217     my $port; # EES portnumber
218     my $verbose; # Verbosity level
219    
220     my $sock; # socket to EES
221 msalle 2459
222 msalle 2457 # Define different stages, such that e.g. the sighandlers know where we are
223     my %stages=(
224     'presock' => 0,
225     'sockopen' => 1,
226     'datasent' => 2,
227     'headrcvd' => 3,
228     'resprcvd' => 4,
229     'sockclosed'=> 5
230     );
231     my $stage=$stages{'presock'}; # Which state the socket is in
232    
233     # Prints usage output
234     sub usage() {
235     (my $name = $0) =~ s/.*\///;
236     print <<EOHELP;
237     Usage: $name [options]
238    
239     Options:
240     -t|--timeout <timeout> maximum runtime for probe, default: $deftimeout sec
241     -c|--critical <timeout> idem
242     -w|--warning <time> runtime after which to warn, default: $defwarning sec
243 msalle 2576 -H|--host <hostname> hostname, default: $defhost
244     -p|--port <portnumber> port number, default: $defport
245 msalle 2457 -v|--verbose be more verbose, more -v means more verbosity
246     -V|--version print version
247     -h|--help show this helptext
248     EOHELP
249     exit 0;
250     }
251    
252     # Prints short usage output (oneline)
253     sub shortusage() {
254     (my $name = $0) =~ s/.*\///;
255     print <<EOHELP;
256     Usage: $name [options]
257     EOHELP
258     }
259    
260     # Prints probe version
261     sub version() {
262     (my $name = $0) =~ s/.*\///;
263     print <<EOHELP;
264     $name version: $probeversion
265     EOHELP
266     }
267    
268     # Parses command line options and sets global variables
269     sub getopts() {
270     my $version;
271     my $help;
272     my $shorthelp;
273    
274     $timeout=$deftimeout;
275     $warning=$defwarning;
276     $host=$defhost;
277     $port=$defport;
278     GetOptions(
279 msalle 2461 "t|timeout=f" => \$timeout,
280     "c|critical=f" => \$timeout,
281     "w|warning=f" => \$warning,
282 msalle 2457 "H|host=s" => \$host,
283     "p|port=i" => \$port,
284     "u|url=s",
285     "v|verbose+" => \$verbose,
286     "help+" => \$help,
287     "h+" => \$shorthelp,
288     "V|version+" => \$version) or &usage and exit(1);
289    
290     $help and &usage and exit(0);
291     $shorthelp and &shortusage and exit(0);
292     $version and &version and exit(0);
293 msalle 2461 $timeout=0 if ($timeout<0);
294     $warning=0 if ($warning<0);
295 msalle 2457 }
296    
297     # Exit function: prints nagios status and dumps log
298     sub nagios_exit() {
299     my $rc=nagstat->get_status();
300    
301     # Logging object
302     logger->get_log();
303    
304     exit $rc;
305     }
306    
307     # Signal handler for SIGALRM
308     sub alarm_handler() {
309     my ($sig)=@_;
310     logger->log_func(2,"Timeout exceeded\n");
311     if ($stage>$stages{'presock'} && $stage<$stages{'sockclosed'}) {
312     logger->log_func(2,"Socket has been opened, closing it\n");
313     close ($sock);
314     }
315     nagstat->set_status(2,"probe timeout exceeded");
316     nagios_exit;
317     }
318    
319     # Signal handler for SIGINT and SIGTERM
320     sub int_handler() {
321     my ($sig)=@_;
322     logger->log_func(2,"Caught signal ".$sig."\n");
323     if ($stage>$stages{'presock'} && $stage<$stages{'sockclosed'}) {
324     logger->log_func(2,"Socket has been opened, closing it\n");
325     close ($sock);
326     }
327     nagstat->set_status(2,"caught signal ".$sig);
328     nagios_exit;
329     }
330    
331 msalle 2720 # Determine hostname and form issue
332     sub getissuer() {
333     my $shorthost=hostname;
334     my $fullhost=gethost($shorthost);
335     my $issuer=$issuername.($fullhost ? $fullhost->name : $shorthost);
336    
337     return $issuer;
338     }
339    
340 msalle 2457 # Actual probe opening socket to EES, sending message, receiving response and
341     # parsing the result
342     sub call_ees($) {
343     my $msg=shift;
344     my $t1;
345     my $t2;
346    
347     # Make sure to have starttime
348     $t1=time();
349    
350     # Set alarm
351     alarm($timeout);
352    
353     logger->log_func(2,"Opening connection to ".$host.":".$port."\n");
354     # Open socket to $host:$port
355     $sock = IO::Socket::INET->new(
356     PeerAddr => $host,
357     PeerPort => $port,
358     Proto => 'tcp'
359     );
360     if (!defined $sock) {
361     nagstat->set_status(2,"Failed to connect ($!)");
362     return 1;
363     }
364     $stage=$stages{'sockopen'};
365    
366     # Send soap message
367     logger->log_func(3,"Socket opened, sending message\n");
368     print $sock $msg;
369     $stage=$stages{'datasent'};
370    
371     # Get header and response
372     logger->log_func(3,"Message sent, waiting for response\n");
373     my $header = <$sock>;
374     if (!defined $header) {
375     my $summary="cannot read from socket ($!)";
376     close($sock);
377     nagstat->set_status(2,$summary);
378     return 1;
379     }
380     $stage=$stages{'headrcvd'};
381    
382     # Chop of any carriage-return or line-feed from header
383     $header =~ s/[\r\n]+$//;
384     logger->log_func(3,"Header ".$header." received\n");
385    
386     # Dump remaining response in log
387     logger->log_func(3,"Reading remaining response\n");
388     while(my $line=<$sock>) {
389     logger->log_func(3,"$line");
390     }
391     logger->log_func(3,"\n");
392     $stage=$stages{'resprcvd'};
393    
394     # Close socket
395     logger->log_func(3,"Response finished, closing socket\n");
396     close($sock);
397     $stage=$stages{'sockclosed'};
398     logger->log_func(3,"Socket closed\n");
399     # We are done with the socket, we have timing statistics
400     $t2=time();
401    
402     # Set performance data
403     my $dt=int(($t2-$t1)*1000+0.5)/1000;
404     nagstat->set_perfdata("${dt}s;$warning;$timeout;0");
405    
406     # Check header
407     if ("$header" eq "HTTP/1.1 200 OK") {
408     if ($dt<$warning) {
409     nagstat->set_status(0,"Success");
410     return 0;
411     }
412     nagstat->set_status(1,"EES is slow in responding");
413     return 0;
414     }
415     # There was a problem, chop of HTTP/1.1 and set status
416     if ("$header" =~ /HTTP\/1.1 .*/) {
417     nagstat->set_status(1,"unexpected answer from host ($header)");
418     } else {
419     nagstat->set_status(2,"not a valid response ($header)");
420     }
421     return 1;
422     }
423    
424 msalle 2720 # UTC time and issuer
425     my $utctime=strftime "%FT%TZ", gmtime;
426     my $issuer=getissuer;
427 msalle 2721 my $uuid=sprintf("%09d", int(rand(10**9)));
428 msalle 2720
429 msalle 2457 my $msg= <<EOF;
430     <?xml version="1.0" encoding="UTF-8"?>
431     <SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
432     xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
433     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
434     xmlns:xsd="http://www.w3.org/2001/XMLSchema"
435     xmlns:dsig="http://www.w3.org/2000/09/xmldsig#"
436     xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion"
437     xmlns:XACMLcontext="urn:oasis:names:tc:xacml:2.0:context:schema:os"
438     xmlns:XACMLassertion="urn:oasis:names:tc:xacml:2.0:profile:saml2.0:v2:schema:assertion"
439     xmlns:XACMLpolicy="urn:oasis:names:tc:xacml:2.0:policy:schema:os"
440     xmlns:xenc="http://www.w3.org/2001/04/xmlenc#"
441     xmlns:XACMLService="http://www.globus.org/security/XACMLAuthorization/bindings"
442     xmlns:XACMLsamlp="urn:oasis:names:tc:xacml:2.0:profile:saml2.0:v2:schema:protocol"
443     xmlns:samlp="urn:oasis:names:tc:SAML:2.0:protocol">
444     <SOAP-ENV:Body>
445     <XACMLsamlp:XACMLAuthzDecisionQuery CombinePolicies="true" ReturnContext="true"
446 msalle 2720 InputContextOnly="false" IssueInstant="$utctime" Version="2.0"
447 msalle 2721 ID="ID-$uuid">
448 msalle 2457 <saml:Issuer xsi:type="saml:NameIDType"
449 msalle 2720 Format="urn:oasis:names:tc:SAML:1.1:nameid-format:X509SubjectName">$issuer</saml:Issuer>
450 msalle 2457 <XACMLcontext:Request xsi:type="XACMLcontext:RequestType">
451     <XACMLcontext:Action xsi:type="XACMLcontext:ActionType">
452     </XACMLcontext:Action>
453     </XACMLcontext:Request>
454     </XACMLsamlp:XACMLAuthzDecisionQuery>
455     </SOAP-ENV:Body>
456     </SOAP-ENV:Envelope>
457     EOF
458    
459     # Parse commandline options
460     getopts();
461    
462     # Initialize logger and set loglevel
463     logger->new($verbose);
464    
465 msalle 2459 # Initialize nagios status logger
466 msalle 2457 nagstat->new();
467    
468     # Initialize signal handling
469     probeipc->new(\&alarm_handler,\&int_handler,$timeout);
470    
471     # run actual EES probe
472     call_ees($msg);
473    
474     # Dump nagios status, log and exit
475     nagios_exit();
476    

Properties

Name Value
svn:executable *

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28