1 |
#!/usr/bin/perl -w |
2 |
# |
3 |
# Copyright 2014 Stichting Fundamenteel Onderzoek der Materie (FOM-Nikhef) |
4 |
# |
5 |
# Licensed under the Apache License, Version 2.0 (the "License"); |
6 |
# you may not use this file except in compliance with the License. |
7 |
# You may obtain a copy of the License at |
8 |
# |
9 |
# http://www.apache.org/licenses/LICENSE-2.0 |
10 |
# |
11 |
# Unless required by applicable law or agreed to in writing, software |
12 |
# distributed under the License is distributed on an "AS IS" BASIS, |
13 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 |
# See the License for the specific language governing permissions and |
15 |
# limitations under the License. |
16 |
|
17 |
# This Nagios check finds out if IBM Tivoli Storage Manager backups |
18 |
# are alive and active, by perusing the logfile for recent successful |
19 |
# backups. If the latest successful run is more than a day old, a warning |
20 |
# is issued. More than three days is critical. |
21 |
|
22 |
use strict; |
23 |
use Getopt::Long; |
24 |
use Time::Local; |
25 |
use POSIX qw{strftime}; |
26 |
|
27 |
use File::ReadBackwards; |
28 |
|
29 |
my $verbose=0; |
30 |
my $debug; |
31 |
# the logfile to parse |
32 |
my $log = "/var/log/dsmsched.log"; |
33 |
|
34 |
my $TIMEOUT = 10; |
35 |
|
36 |
GetOptions("log:s" => \$log, |
37 |
"debug" => \$debug, |
38 |
"verbose" => \$verbose ); |
39 |
|
40 |
|
41 |
my %ERRORS=(OK=>0, |
42 |
WARNING=>1, |
43 |
CRITICAL=>2, |
44 |
UNKNOWN=>3, |
45 |
DEPENDENT=>4); |
46 |
|
47 |
my $message = "unk"; |
48 |
my $result = $ERRORS{"UNKNOWN"}; |
49 |
|
50 |
# Just in case of problems, let's not hang Nagios |
51 |
$SIG{'ALRM'} = sub { |
52 |
print ("ERROR: No timely response (alarm timeout)\n"); |
53 |
exit $ERRORS{"UNKNOWN"}; |
54 |
}; |
55 |
|
56 |
alarm($TIMEOUT); |
57 |
|
58 |
|
59 |
# we're looking for "04/16/14 21:57:03 Scheduled event 'DAILY_NIKHEF' completed successfully." |
60 |
# timestamp in the file is local time. |
61 |
|
62 |
# find the last such entry in /var/log/dsmsched.log |
63 |
my $loghandle; |
64 |
$loghandle = File::ReadBackwards->new($log); |
65 |
if (!defined($loghandle)) { |
66 |
$message = "ERROR: cannot open $log"; |
67 |
$result = $ERRORS{"CRITICAL"}; |
68 |
print "$message\n"; |
69 |
exit $result; |
70 |
} |
71 |
|
72 |
my $lastentry; |
73 |
my $logline = ""; |
74 |
while (defined($logline = $loghandle->readline ) ) { |
75 |
next unless $logline =~ m{(\d\d)/(\d\d)/(\d\d) (\d\d):(\d\d):(\d\d) Scheduled event .* completed successfully\.}; |
76 |
# just like localtime(), months range 0..11 |
77 |
my ($month, $day, $year, $hours, $minutes, $seconds) = ($1-1, $2, $3, $4, $5, $6); |
78 |
$lastentry = timelocal($seconds, $minutes, $hours, $day, $month, $year); |
79 |
last; |
80 |
} |
81 |
|
82 |
$loghandle->close; |
83 |
|
84 |
if (!defined $lastentry) { |
85 |
$message = "ERROR: no successful events in $log"; |
86 |
$result = $ERRORS{"CRITICAL"}; |
87 |
print "$message\n"; |
88 |
exit $result; |
89 |
} |
90 |
|
91 |
# compare timestamps; if last entry is too long in the past, raise a |
92 |
# warning or an error |
93 |
|
94 |
my $currenttime = time; |
95 |
print "$lastentry\n" if $debug; |
96 |
print "$currenttime\n" if $debug; |
97 |
# strftime is funky; years are counted from 1900, day of month from 1 and |
98 |
# everything else from 0. |
99 |
my $lasttime = strftime("%F %T", $lastentry, 0, 0, 1, 0, 70); |
100 |
|
101 |
if ($currenttime - $lastentry > 24 * 60 * 60) { |
102 |
$message = "WARNING: last successful event is more than a day old [$lasttime]"; |
103 |
$result = $ERRORS{"WARNING"}; |
104 |
} else { |
105 |
$message = "OK [$lasttime]"; |
106 |
$result = $ERRORS{"OK"}; |
107 |
} |
108 |
|
109 |
# more than three days is really critical |
110 |
if ($currenttime - $lastentry > 3 * 24 * 60 * 60) { |
111 |
$message = "ERROR: last successful event is more than three days old [$lasttime]"; |
112 |
$result = $ERRORS{"CRITICAL"}; |
113 |
} |
114 |
|
115 |
alarm(0); |
116 |
|
117 |
print "$message\n"; |
118 |
exit $result; |