/[pdpsoft]/trunk/nl.nikhef.ndpf.tools/nagios-ndpf-sensors/eventhandlers/cvmfs-errors
ViewVC logotype

Annotation of /trunk/nl.nikhef.ndpf.tools/nagios-ndpf-sensors/eventhandlers/cvmfs-errors

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2566 - (hide annotations) (download)
Tue Oct 2 10:07:27 2012 UTC (9 years, 7 months ago) by ronalds
File size: 2929 byte(s)
also try 3 times to reset error counters in a hard state
1 ronalds 2483 #!/usr/bin/perl
2 ronalds 2319
3 ronalds 2483 use strict;
4 ronalds 2319
5 ronalds 2483 # Nagios event handler to put a MOM offline when cvmfs gives an error
6 ronalds 2319
7 ronalds 2510 if ( @ARGV < 4 ) {
8     print STDERR "Missing required arguments (4 required in total)\n";
9 ronalds 2483 exit 1;
10     }
11 ronalds 2319
12 ronalds 2483 my $debug = $ENV{DEBUG} || 0;
13    
14     my $pbsnodes = "/usr/bin/pbsnodes";
15     my $node_offline_comment = "Nagios event handler: cvmfs error";
16 ronalds 2484 my $hostname = $ENV{HOSTNAME} || `hostname -f`;
17 ronalds 2483 my $verb = 1;
18    
19 ronalds 2484 chomp $hostname;
20 ronalds 2483
21     #
22     # main function
23     #
24     if ( $ARGV[0] eq "OK" ) {
25     # service is fine, may put node back online
26     if ( $ARGV[1] eq "HARD" ) {
27     &check_node_state;
28     }
29 ronalds 2319 }
30 ronalds 2483 elsif ( $ARGV[0] eq "CRITICAL" ) {
31     # there can be many different warnings, some are worse than other
32     if ( $ARGV[1] eq "SOFT" ) {
33     if ( $ARGV[2] > 1 ) {
34     &change_node_state( "offline" );
35     }
36     }
37     elsif ( $ARGV[1] eq "HARD" ) {
38     &change_node_state( "offline" );
39     }
40     }
41 ronalds 2510 elsif ( $ARGV[0] eq "WARNING" ) {
42     # there may be a warning concerning cache errors
43     # try to clear it while in a SOFT state
44 ronalds 2566 if ( $ARGV[1] eq "SOFT" || ( $ARGV[1] eq "HARD" and $ARGV[2] < 4 ) ) {
45 ronalds 2510 &reset_error_counters( $ARGV[3] );
46     }
47    
48     }
49 ronalds 2319
50 ronalds 2483 exit 0;
51 ronalds 2319
52    
53 ronalds 2483
54 ronalds 2510 sub reset_error_counters {
55     my $repo = $_[0];
56     my $cmd = "cvmfs-talk -i $repo reset error counters > /dev/null 2>&1";
57     system( $cmd );
58     }
59 ronalds 2483
60 ronalds 2510
61    
62 ronalds 2483 # verify the file system state
63     sub change_node_state {
64     my $reqstate = $_[0];
65    
66     my $newcmt;
67     my $option;
68     my $curcmt;
69     open PBS, "$pbsnodes -a $hostname |" or die "Cannot read from $pbsnodes\n";
70     while ( <PBS> ) {
71     if ( /^\s*note/ ) {
72     ( $curcmt = $_ ) =~ s!^\s*note\ =\ !!;
73     chomp $curcmt;
74     $debug and print STDERR "current comment = $curcmt\n";
75     last;
76     }
77     }
78     close PBS;
79    
80     if ( $reqstate eq "offline" ) {
81     if ( $curcmt =~ /$node_offline_comment/ ) {
82     $debug and print STDERR "keep comment\n";
83     $newcmt = $curcmt;
84     }
85     else {
86     $debug and print STDERR "add cvmfs comment\n";
87     $newcmt = "$curcmt,$node_offline_comment";
88     }
89     $option = "-o";
90     }
91     elsif ( $reqstate eq "clear" ) {
92     ( $newcmt = $curcmt ) =~ s!$node_offline_comment!!;
93     }
94     else {
95     print STDERR "Invalid state $reqstate\n";
96     return;
97     }
98    
99     # remove leading or trailing commmas
100     $newcmt =~ s!^,+!!;
101     $newcmt =~ s!,+$!!;
102     chomp $newcmt;
103    
104     if ( $newcmt eq "" ) {
105     $option = "-c"; # only clear node state if no other comments
106     }
107     system( "$pbsnodes $option $hostname -N '$newcmt'" );
108     }
109    
110     # verify that node is offline and its note contains the offline comment
111     sub check_node_state {
112     my $nodeline = `$pbsnodes -l $hostname -n`;
113     if ( $nodeline ne "" ) {
114     if ( $nodeline =~ /offline.*$node_offline_comment/ ) {
115     &change_node_state( "clear" );
116     }
117     else {
118     $debug and print STDERR "Will not clear node, could not find comment $node_offline_comment\n";
119     }
120     }
121     }

Properties

Name Value
svn:executable *
svn:keywords id

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28