/[pdpsoft]/trunk/nl.nikhef.ndpf.tools/nagios-ndpf-sensors/eventhandlers/cvmfs-errors
ViewVC logotype

Contents of /trunk/nl.nikhef.ndpf.tools/nagios-ndpf-sensors/eventhandlers/cvmfs-errors

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2566 - (show annotations) (download)
Tue Oct 2 10:07:27 2012 UTC (9 years, 3 months ago) by ronalds
File size: 2929 byte(s)
also try 3 times to reset error counters in a hard state
1 #!/usr/bin/perl
2
3 use strict;
4
5 # Nagios event handler to put a MOM offline when cvmfs gives an error
6
7 if ( @ARGV < 4 ) {
8 print STDERR "Missing required arguments (4 required in total)\n";
9 exit 1;
10 }
11
12 my $debug = $ENV{DEBUG} || 0;
13
14 my $pbsnodes = "/usr/bin/pbsnodes";
15 my $node_offline_comment = "Nagios event handler: cvmfs error";
16 my $hostname = $ENV{HOSTNAME} || `hostname -f`;
17 my $verb = 1;
18
19 chomp $hostname;
20
21 #
22 # main function
23 #
24 if ( $ARGV[0] eq "OK" ) {
25 # service is fine, may put node back online
26 if ( $ARGV[1] eq "HARD" ) {
27 &check_node_state;
28 }
29 }
30 elsif ( $ARGV[0] eq "CRITICAL" ) {
31 # there can be many different warnings, some are worse than other
32 if ( $ARGV[1] eq "SOFT" ) {
33 if ( $ARGV[2] > 1 ) {
34 &change_node_state( "offline" );
35 }
36 }
37 elsif ( $ARGV[1] eq "HARD" ) {
38 &change_node_state( "offline" );
39 }
40 }
41 elsif ( $ARGV[0] eq "WARNING" ) {
42 # there may be a warning concerning cache errors
43 # try to clear it while in a SOFT state
44 if ( $ARGV[1] eq "SOFT" || ( $ARGV[1] eq "HARD" and $ARGV[2] < 4 ) ) {
45 &reset_error_counters( $ARGV[3] );
46 }
47
48 }
49
50 exit 0;
51
52
53
54 sub reset_error_counters {
55 my $repo = $_[0];
56 my $cmd = "cvmfs-talk -i $repo reset error counters > /dev/null 2>&1";
57 system( $cmd );
58 }
59
60
61
62 # verify the file system state
63 sub change_node_state {
64 my $reqstate = $_[0];
65
66 my $newcmt;
67 my $option;
68 my $curcmt;
69 open PBS, "$pbsnodes -a $hostname |" or die "Cannot read from $pbsnodes\n";
70 while ( <PBS> ) {
71 if ( /^\s*note/ ) {
72 ( $curcmt = $_ ) =~ s!^\s*note\ =\ !!;
73 chomp $curcmt;
74 $debug and print STDERR "current comment = $curcmt\n";
75 last;
76 }
77 }
78 close PBS;
79
80 if ( $reqstate eq "offline" ) {
81 if ( $curcmt =~ /$node_offline_comment/ ) {
82 $debug and print STDERR "keep comment\n";
83 $newcmt = $curcmt;
84 }
85 else {
86 $debug and print STDERR "add cvmfs comment\n";
87 $newcmt = "$curcmt,$node_offline_comment";
88 }
89 $option = "-o";
90 }
91 elsif ( $reqstate eq "clear" ) {
92 ( $newcmt = $curcmt ) =~ s!$node_offline_comment!!;
93 }
94 else {
95 print STDERR "Invalid state $reqstate\n";
96 return;
97 }
98
99 # remove leading or trailing commmas
100 $newcmt =~ s!^,+!!;
101 $newcmt =~ s!,+$!!;
102 chomp $newcmt;
103
104 if ( $newcmt eq "" ) {
105 $option = "-c"; # only clear node state if no other comments
106 }
107 system( "$pbsnodes $option $hostname -N '$newcmt'" );
108 }
109
110 # verify that node is offline and its note contains the offline comment
111 sub check_node_state {
112 my $nodeline = `$pbsnodes -l $hostname -n`;
113 if ( $nodeline ne "" ) {
114 if ( $nodeline =~ /offline.*$node_offline_comment/ ) {
115 &change_node_state( "clear" );
116 }
117 else {
118 $debug and print STDERR "Will not clear node, could not find comment $node_offline_comment\n";
119 }
120 }
121 }

Properties

Name Value
svn:executable *
svn:keywords id

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28