#!/usr/bin/perl -w # # Copyright (c) Stichting FOM 2014 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This Nagios check finds out if IBM Tivoli Storage Manager backups # are alive and active, by perusing the logfile for recent successful # backups. If the latest successful run is more than a day old, a warning # is issued. More than three days is critical. use strict; use Getopt::Long; use Time::Local; use POSIX qw{strftime}; my $verbose=0; my $debug; # the logfile to parse my $log = "/var/log/dsmsched.log"; my $TIMEOUT = 10; GetOptions("log:s" => \$log, "debug" => \$debug, "verbose" => \$verbose ); my %ERRORS=(OK=>0, WARNING=>1, CRITICAL=>2, UNKNOWN=>3, DEPENDENT=>4); my $message = "unk"; my $result = $ERRORS{"UNKNOWN"}; # Just in case of problems, let's not hang Nagios $SIG{'ALRM'} = sub { print ("ERROR: No timely response (alarm timeout)\n"); exit $ERRORS{"UNKNOWN"}; }; alarm($TIMEOUT); # we're looking for "04/16/14 21:57:03 Scheduled event 'DAILY_NIKHEF' completed successfully." # timestamp in the file is local time. # find the last such entry in /var/log/dsmsched.log my $loghandle; if ( ! open $loghandle, "<$log" ) { $message = "ERROR: cannot open $log"; $result = $ERRORS{"CRITICAL"}; print "$message\n"; exit $result; } my $lastentry; while (<$loghandle>) { next unless m{(\d\d)/(\d\d)/(\d\d) (\d\d):(\d\d):(\d\d) Scheduled event .* completed successfully\.}; # just like localtime(), months range 0..11 my ($month, $day, $year, $hours, $minutes, $seconds) = ($1-1, $2, $3, $4, $5, $6); $lastentry = timelocal($seconds, $minutes, $hours, $day, $month, $year); } close $loghandle; if (!defined $lastentry) { $message = "ERROR: no successful events in $log"; $result = $ERRORS{"CRITICAL"}; print "$message\n"; exit $result; } # compare timestamps; if last entry is too long in the past, raise a # warning or an error my $currenttime = time; print "$lastentry\n" if $debug; print "$currenttime\n" if $debug; # strftime is funky; years are counted from 1900, day of month from 1 and # everything else from 0. my $lasttime = strftime("%F %T", $lastentry, 0, 0, 1, 0, 70); if ($currenttime - $lastentry > 24 * 60 * 60) { $message = "WARNING: last successful event is more than a day old [$lasttime]"; $result = $ERRORS{"WARNING"}; } else { $message = "OK [$lasttime]"; $result = $ERRORS{"OK"}; } # more than three days is really critical if ($currenttime - $lastentry > 3 * 24 * 60 * 60) { $message = "ERROR: last successful event is more than three days old [$lasttime]"; $result = $ERRORS{"CRITICAL"}; } alarm(0); print "$message\n"; exit $result;