/[pdpsoft]/trunk/nagios/ees/check_ees
ViewVC logotype

Contents of /trunk/nagios/ees/check_ees

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2459 - (show annotations) (download)
Tue Dec 6 10:27:45 2011 UTC (10 years, 9 months ago) by msalle
File size: 12192 byte(s)
Add some clarifying comments.

1 #!/usr/bin/perl
2 #
3 # Copyright (C) Nikhef 2011
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 # Author:
18 # Mischa Sall\'e <msalle@nikhef.nl>
19 # NIKHEF Amsterdam, the Netherlands
20 #
21 ########################################################################
22 #
23 # Nagios probe to test functioning of EES
24 #
25 # Nagios state can be one of the following:
26 # OK:
27 # EES is up and running and responds correctly to SAML2-XACML2-request
28 # WARNING:
29 # EES is running and responds, but either slowly or not with 200 OK
30 # CRITICAL:
31 # EES does not respond or not in time
32 # UNKNOWN
33 #
34 ########################################################################
35
36 # DEFAULTS
37 my $probeversion=0.1;
38
39 # Note the following defaults can be overridden using cmdline options
40 my $deftimeout=10; # Overall timeout for probe
41 my $defwarning=1; # When to warn about slow running
42 my $defhost="localhost";# Default hostname for EES service
43 my $defport=6217; # Default portnumber for EES service
44
45 ########################################################################
46 # Logging package
47 # keeps internal log trace which can be dumped with dump_log
48 ########################################################################
49 package logger;
50 use strict;
51 use warnings;
52 {
53 my $loglevel;
54 my @logstring;
55
56 # Constructor
57 sub new {
58 my $classname=shift;
59 my $self={}; bless $self;
60 my $level=shift;
61 if (defined $level) {
62 $self->set_loglevel($level);
63 } else {
64 $loglevel=0;
65 }
66 return $self;
67 }
68
69 # Sets loglevel
70 sub set_loglevel($) {
71 my $self=shift;
72 my $level=shift;
73 $loglevel=$level;
74 }
75
76 # Logging function: log_func(priority, "logstring\n");
77 sub log_func($@) {
78 my $self=shift;
79 my $prio=shift;
80 return if ($prio > $loglevel);
81 for my $line (@_) {
82 push @logstring,$line;
83 }
84 }
85
86 # Dumps log
87 sub get_log(@) {
88 my $self=shift;
89 foreach my $myentry ( @logstring ) {
90 print $myentry;
91 }
92 }
93 }
94
95 ########################################################################
96 # Nagios status printing package
97 # Can set and dump nagios status output
98 ########################################################################
99 package nagstat;
100 {
101 my $code;
102 my $summary;
103 my $perfdata;
104 my @stat;
105
106 # Constructor
107 sub new() {
108 my $classname=shift;
109 my $self={}; bless $self;
110 $code=3; # Default status unknown
111 $summary=undef;
112 $perfdata=undef;
113 @stat=("OK","WARNING","CRITICAL","UNKNOWN");
114 return $self;
115 }
116
117 # Set nagios code (0-3) plus summary
118 sub set_status($$) {
119 my $self=shift;
120 if (!defined $summary) {
121 $code=shift;
122 $summary=shift;
123 }
124 }
125
126 # Set internal performance data
127 sub set_perfdata($) {
128 my $self=shift;
129 $perfdata=shift;
130 }
131
132 # Printout nagios status, summary and optionally performance data
133 # return value is code (0-3)
134 sub get_status {
135 if (!defined $summary) {
136 $summary="unknown status";
137 }
138 if (defined $perfdata) {
139 print $stat[$code].": ".$summary."|".$perfdata."\n";
140 } else {
141 print $stat[$code].": ".$summary."\n";
142 }
143 return $code;
144 }
145 }
146
147 ########################################################################
148 # Inter process communication package for nagios probes
149 # Starts alarm handler when receiving alarm which checks status of
150 # probe, and terminates or kills it.
151 ########################################################################
152 package probeipc;
153 {
154 my $timeout;
155
156 # Constructor: new(exitfunc,[kill time], [term time])
157 sub new() {
158 my $classname=shift;
159 my $self={}; bless $self;
160 my $alarmhandler=shift
161 or die ($classname."::new() needs alarmhandler arg\n");
162 my $inthandler=shift
163 or die ($classname."::new() needs inthandler arg\n");
164 my $timeout=(shift or 10); # probe default timeout is 10
165 $self->set_alarmhandler($alarmhandler);
166 $self->set_inthandler($inthandler);
167 $self->set_timeout($timeout);
168 return $self;
169 }
170
171 # Sets time after which to send SIGKILL
172 sub set_timeout($) {
173 my $self=shift;
174 $timeout=shift;
175 }
176
177 # Sets function to call when SIGALRM is caught
178 sub set_alarmhandler($) {
179 my $self=shift;
180 my $alarmhandler=shift;
181 # \& for function reference, $ for stringvar
182 $SIG{'ALRM'} = \&$alarmhandler;
183 }
184
185 # Sets function to call when SIGINT or SIGTERM is caught
186 sub set_inthandler($) {
187 my $self=shift;
188 my $inthandler=shift;
189 $SIG{'INT'} = \&$inthandler;
190 $SIG{'TERM'} = \&$inthandler;
191 }
192
193 }
194
195 ########################################################################
196 # Running main probe package
197 ########################################################################
198 package main;
199 use strict;
200 use warnings;
201
202 use IO::Socket;
203 use Getopt::Long qw(:config no_ignore_case bundling);
204 use Time::HiRes qw(time alarm);
205
206 my $timeout; # Total maximum runtime for probe
207 my $critical; # Time after which to kill gLExec
208 my $warning; # Time after which to warn about slow gLExec
209 my $host; # EES hostname
210 my $port; # EES portnumber
211 my $verbose; # Verbosity level
212
213 my $sock; # socket to EES
214
215 # Define different stages, such that e.g. the sighandlers know where we are
216 my %stages=(
217 'presock' => 0,
218 'sockopen' => 1,
219 'datasent' => 2,
220 'headrcvd' => 3,
221 'resprcvd' => 4,
222 'sockclosed'=> 5
223 );
224 my $stage=$stages{'presock'}; # Which state the socket is in
225
226 # Prints usage output
227 sub usage() {
228 (my $name = $0) =~ s/.*\///;
229 print <<EOHELP;
230 Usage: $name [options]
231
232 Options:
233 -t|--timeout <timeout> maximum runtime for probe, default: $deftimeout sec
234 -c|--critical <timeout> idem
235 -w|--warning <time> runtime after which to warn, default: $defwarning sec
236 -H|--host <hostname> hostname
237 -p|--port <portnumber> port number
238 -v|--verbose be more verbose, more -v means more verbosity
239 -V|--version print version
240 -h|--help show this helptext
241 EOHELP
242 exit 0;
243 }
244
245 # Prints short usage output (oneline)
246 sub shortusage() {
247 (my $name = $0) =~ s/.*\///;
248 print <<EOHELP;
249 Usage: $name [options]
250 EOHELP
251 }
252
253 # Prints probe version
254 sub version() {
255 (my $name = $0) =~ s/.*\///;
256 print <<EOHELP;
257 $name version: $probeversion
258 EOHELP
259 }
260
261 # Parses command line options and sets global variables
262 sub getopts() {
263 my $version;
264 my $help;
265 my $shorthelp;
266
267 $timeout=$deftimeout;
268 $warning=$defwarning;
269 $host=$defhost;
270 $port=$defport;
271 GetOptions(
272 "t|timeout=i" => \$timeout,
273 "c|critical=i" => \$timeout,
274 "w|warning=i" => \$warning,
275 "H|host=s" => \$host,
276 "p|port=i" => \$port,
277 "u|url=s",
278 "v|verbose+" => \$verbose,
279 "help+" => \$help,
280 "h+" => \$shorthelp,
281 "V|version+" => \$version) or &usage and exit(1);
282
283 $help and &usage and exit(0);
284 $shorthelp and &shortusage and exit(0);
285 $version and &version and exit(0);
286 }
287
288 # Exit function: prints nagios status and dumps log
289 sub nagios_exit() {
290 my $rc=nagstat->get_status();
291
292 # Logging object
293 logger->get_log();
294
295 exit $rc;
296 }
297
298 # Signal handler for SIGALRM
299 sub alarm_handler() {
300 my ($sig)=@_;
301 logger->log_func(2,"Timeout exceeded\n");
302 if ($stage>$stages{'presock'} && $stage<$stages{'sockclosed'}) {
303 logger->log_func(2,"Socket has been opened, closing it\n");
304 close ($sock);
305 }
306 nagstat->set_status(2,"probe timeout exceeded");
307 nagios_exit;
308 }
309
310 # Signal handler for SIGINT and SIGTERM
311 sub int_handler() {
312 my ($sig)=@_;
313 logger->log_func(2,"Caught signal ".$sig."\n");
314 if ($stage>$stages{'presock'} && $stage<$stages{'sockclosed'}) {
315 logger->log_func(2,"Socket has been opened, closing it\n");
316 close ($sock);
317 }
318 nagstat->set_status(2,"caught signal ".$sig);
319 nagios_exit;
320 }
321
322 # Actual probe opening socket to EES, sending message, receiving response and
323 # parsing the result
324 sub call_ees($) {
325 my $msg=shift;
326 my $t1;
327 my $t2;
328
329 # Make sure to have starttime
330 $t1=time();
331
332 # Set alarm
333 alarm($timeout);
334
335 logger->log_func(2,"Opening connection to ".$host.":".$port."\n");
336 # Open socket to $host:$port
337 $sock = IO::Socket::INET->new(
338 PeerAddr => $host,
339 PeerPort => $port,
340 Proto => 'tcp'
341 );
342 if (!defined $sock) {
343 nagstat->set_status(2,"Failed to connect ($!)");
344 return 1;
345 }
346 $stage=$stages{'sockopen'};
347
348 # Send soap message
349 logger->log_func(3,"Socket opened, sending message\n");
350 print $sock $msg;
351 $stage=$stages{'datasent'};
352
353 # Get header and response
354 logger->log_func(3,"Message sent, waiting for response\n");
355 my $header = <$sock>;
356 if (!defined $header) {
357 my $summary="cannot read from socket ($!)";
358 close($sock);
359 nagstat->set_status(2,$summary);
360 return 1;
361 }
362 $stage=$stages{'headrcvd'};
363
364 # Chop of any carriage-return or line-feed from header
365 $header =~ s/[\r\n]+$//;
366 logger->log_func(3,"Header ".$header." received\n");
367
368 # Dump remaining response in log
369 logger->log_func(3,"Reading remaining response\n");
370 while(my $line=<$sock>) {
371 logger->log_func(3,"$line");
372 }
373 logger->log_func(3,"\n");
374 $stage=$stages{'resprcvd'};
375
376 # Close socket
377 logger->log_func(3,"Response finished, closing socket\n");
378 close($sock);
379 $stage=$stages{'sockclosed'};
380 logger->log_func(3,"Socket closed\n");
381 # We are done with the socket, we have timing statistics
382 $t2=time();
383
384 # Set performance data
385 my $dt=int(($t2-$t1)*1000+0.5)/1000;
386 nagstat->set_perfdata("${dt}s;$warning;$timeout;0");
387
388 # Check header
389 if ("$header" eq "HTTP/1.1 200 OK") {
390 if ($dt<$warning) {
391 nagstat->set_status(0,"Success");
392 return 0;
393 }
394 nagstat->set_status(1,"EES is slow in responding");
395 return 0;
396 }
397 # There was a problem, chop of HTTP/1.1 and set status
398 if ("$header" =~ /HTTP\/1.1 .*/) {
399 nagstat->set_status(1,"unexpected answer from host ($header)");
400 } else {
401 nagstat->set_status(2,"not a valid response ($header)");
402 }
403 return 1;
404 }
405
406 my $msg= <<EOF;
407 <?xml version="1.0" encoding="UTF-8"?>
408 <SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
409 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
410 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
411 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
412 xmlns:dsig="http://www.w3.org/2000/09/xmldsig#"
413 xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion"
414 xmlns:XACMLcontext="urn:oasis:names:tc:xacml:2.0:context:schema:os"
415 xmlns:XACMLassertion="urn:oasis:names:tc:xacml:2.0:profile:saml2.0:v2:schema:assertion"
416 xmlns:XACMLpolicy="urn:oasis:names:tc:xacml:2.0:policy:schema:os"
417 xmlns:xenc="http://www.w3.org/2001/04/xmlenc#"
418 xmlns:XACMLService="http://www.globus.org/security/XACMLAuthorization/bindings"
419 xmlns:XACMLsamlp="urn:oasis:names:tc:xacml:2.0:profile:saml2.0:v2:schema:protocol"
420 xmlns:samlp="urn:oasis:names:tc:SAML:2.0:protocol">
421 <SOAP-ENV:Body>
422 <XACMLsamlp:XACMLAuthzDecisionQuery CombinePolicies="true" ReturnContext="true"
423 InputContextOnly="false" IssueInstant="2010-03-25T14:55:01Z" Version="2.0"
424 ID="ID-1804289383">
425 <saml:Issuer xsi:type="saml:NameIDType"
426 Format="urn:oasis:names:tc:SAML:1.1:nameid-format:X509SubjectName">NetCat</saml:Issuer>
427 <XACMLcontext:Request xsi:type="XACMLcontext:RequestType">
428 <XACMLcontext:Action xsi:type="XACMLcontext:ActionType">
429 </XACMLcontext:Action>
430 </XACMLcontext:Request>
431 </XACMLsamlp:XACMLAuthzDecisionQuery>
432 </SOAP-ENV:Body>
433 </SOAP-ENV:Envelope>
434 EOF
435
436 # Parse commandline options
437 getopts();
438
439 # Initialize logger and set loglevel
440 logger->new($verbose);
441
442 # Initialize nagios status logger
443 nagstat->new();
444
445 # Initialize signal handling
446 probeipc->new(\&alarm_handler,\&int_handler,$timeout);
447
448 # run actual EES probe
449 call_ees($msg);
450
451 # Dump nagios status, log and exit
452 nagios_exit();
453

Properties

Name Value
svn:executable *

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28