1 |
#!/usr/bin/perl -w |
2 |
|
3 |
use strict; |
4 |
use Getopt::Long; |
5 |
|
6 |
my $verbose=0; |
7 |
my $help; |
8 |
my $hostname; |
9 |
|
10 |
my $TIMEOUT = 60; |
11 |
|
12 |
GetOptions("help" => \$help, |
13 |
"hostname|H:s" => \$hostname, |
14 |
"verbose|v+" => \$verbose ); |
15 |
|
16 |
my %ERRORS=(OK=>0, |
17 |
WARNING=>1, |
18 |
CRITICAL=>2, |
19 |
UNKNOWN=>3, |
20 |
DEPENDENT=>4); |
21 |
|
22 |
my $QSTAT = "/usr/bin/qstat"; |
23 |
unless ( -x $QSTAT ) { |
24 |
print "$QSTAT: $!"; |
25 |
return $ERRORS{UNKNOWN}; |
26 |
} |
27 |
|
28 |
my $result = $ERRORS{"UNKNOWN"}; |
29 |
my $message = ""; |
30 |
|
31 |
# Just in case of problems, let's not hang Nagios |
32 |
$SIG{'ALRM'} = sub { |
33 |
print ("ERROR: No response from $hostname querying qstat (alarm timeout)\n"); |
34 |
exit $ERRORS{"CRITICAL"}; |
35 |
}; |
36 |
|
37 |
alarm($TIMEOUT); |
38 |
|
39 |
############################################################################## |
40 |
# output of qstat |
41 |
# |
42 |
my $errors = 0; |
43 |
my $warnings = 0; |
44 |
|
45 |
my $cmd = "$QSTAT"; |
46 |
open QSTAT, "$cmd |"; |
47 |
my $ret = $?; |
48 |
|
49 |
my $perfdata = ""; |
50 |
|
51 |
if ( $ret == 0 ) { |
52 |
my ( $errstr, $warnstr, $okstr ); |
53 |
|
54 |
my %count = ( Q => 0, R => 0 ); |
55 |
while ( <QSTAT> ) { |
56 |
next unless ( /^\d+/ ); # only count lines with job ID |
57 |
my $state = (split /\s+/)[0,4]; |
58 |
|
59 |
$count{$state}++; |
60 |
} |
61 |
|
62 |
# OK: message contains no details |
63 |
$message = "OK; $count{R} running, $count{Q} queued"; |
64 |
$result = $ERRORS{"OK"}; |
65 |
$perfdata = "'queued'=$count{Q};;;;; 'running'=$count{R};;;;"; |
66 |
} |
67 |
else { |
68 |
# Failed to get output of pbsnodes: UNKNOWN |
69 |
$message = "UNKNOWN: could not determine result of $QSTAT"; |
70 |
$result = $ERRORS{"UNKNOWN"}; |
71 |
} |
72 |
|
73 |
alarm(0); |
74 |
|
75 |
|
76 |
close QSTAT; |
77 |
|
78 |
# Write output and return exit code; |
79 |
print "$message|$perfdata\n"; |
80 |
exit($result); |
81 |
|