/[pdpsoft]/trunk/nl.nikhef.ndpf.tools/ndpf-dpm-tools/bin/dpm-verify-replicas-for-deletion.pl
ViewVC logotype

Annotation of /trunk/nl.nikhef.ndpf.tools/ndpf-dpm-tools/bin/dpm-verify-replicas-for-deletion.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3175 - (hide annotations) (download) (as text)
Wed Mar 29 13:22:04 2017 UTC (4 years, 9 months ago) by dennisvd
File MIME type: text/x-prolog
File size: 13348 byte(s)
Summary: dpm-verify-replicas-for-deletion - checksum other replicas for those flagged for deletion

1 dennisvd 3175 #!/usr/bin/perl -w
2    
3    
4     =head1 NAME
5    
6     dpm-verify-replicas-for-deletion - checksum other replicas for those flagged to be deleted
7    
8     =head1 SYNOPSIS
9    
10     dpm-drain | dpm-verify-replicas-for-deletion [ options ]
11    
12     =head1 OPTIONS
13    
14     =over 4
15    
16     =item B<--usage>
17     Show a summary help text
18    
19     =item B<--help>, B<--man>
20     Show the manual page
21    
22     =item B<--debug>
23     Enable debugging output
24    
25    
26     =back
27    
28     =head1 DESCRIPTION
29    
30     Draining a file system with dpm-drain sometimes leaves files behind
31     due to inconsistencies between the DPM databases and the actual
32     files on disk.
33    
34     Such inconsistencies may be caused by interrupted operations when writing,
35     deleting or replicating files.
36    
37     In order to give the DPM administrator a handle on the state of the
38     files, this tool will take one type of common inconsistency seen in the
39     output of dpm-drain: replicas which are flagged to be deleted.
40    
41     The presence of the flag may not actually be an inconsistency; DPM may
42     be in the process of deleting the replica. But if the file sticks
43     around after several draining attempts there is probably something
44     wrong.
45    
46     The program reads the output of dpm-drain (either directly or saved to
47     a file) and finds, for each case of a replica flagged to be deleted,
48     all other replicas for the same file. It then goes off to checksum all
49     replicas (by ssh) and reports inconsistencies.
50    
51     The script assumes that passwordless ssh login to the DPM disk nodes
52     is set up. The ssh callouts to disk nodes are done in parallel.
53    
54    
55     =cut
56    
57     use warnings;
58     use strict;
59    
60     use Getopt::Long;
61     use Pod::Usage;
62    
63     # We'll use temporary files to communicate with dpm-resolve
64     use File::Temp qw/ tempfile /;
65    
66     # The communication with the ssh subprocesses requires
67     # some low-level I/O handling.
68     use Socket;
69     use IO::Handle;
70     use IO::Poll qw(POLLIN POLLOUT POLLHUP);
71     use Fcntl qw( :DEFAULT O_NONBLOCK );
72     use POSIX ":sys_wait_h";
73    
74    
75     my $debug = 0;
76     my $manpage = 0;
77     my $usage = 0;
78     if (!GetOptions ("debug" => \$debug,
79     "help|?" => \$manpage,
80     "man" => \$manpage,
81     "usage" => \$usage))
82     {
83     pod2usage( -verbose => 1,
84     -exitval => 1);
85     }
86    
87     if ($manpage) {
88     pod2usage( -verbose => 2,
89     -exitval => 0);
90     }
91    
92     if ($usage) {
93     pod2usage( -verbose => 1,
94     -exitval => 0);
95     }
96    
97    
98     # given the output of dpm-drain, find entries that are flagged to be
99     # deleted and verify which replicas there are and if they are healthy.
100     #
101     # Input lines look like this:
102     #
103     # 03/28 11:57:18 22985,0: The file strijker-20.nikhef.nl:/export/data/xenon/xenon.biggrid.nl/2016-10-12/XENON1T-3458-000013000-000013999-000001000.zip.429295550.0 is recorded as being in the process of being deleted, ignoring it during drain
104     #
105     # The program works through several phases:
106     #
107     # Phase 1 - collect the URLs
108     #
109     # Collect the file URLs and run dpm-resolve -r; then run dpm-resolve
110     # -s on the resulting path names. This yields a number of replicas.
111     #
112     # Phase 2 - ssh callout and data collection
113     #
114     # Each host that holds one or more of the replicas will run a checksum
115     # command over ssh. The output is collected.
116     #
117     # Phase 3 - parsing the checksum output
118     #
119     # The output of the checksum callouts is processed and stored
120     #
121     # Phase 4 - reporting
122     #
123     # The program reports which replicas are there, which match
124     # and which mismatch with the original file. This should give
125     # the admin a good idea which files can indeed be deleted.
126    
127     my @replicas;
128     my %cksum;
129    
130     while(<>) {
131     if (/The file (.*) is recorded as being in the process of being deleted/) {
132     push @replicas, $1;
133     }
134     }
135    
136     # write replicas to a temporary file
137     my ($fh, $repfile) = tempfile();
138     for my $i (@replicas) {
139     print $fh $i . "\n";
140     }
141     close $fh;
142     # call dpm-resolve to get a list of dpm name space entries
143     my @resolve_r = `dpm-resolve -r < $repfile`;
144     unlink $repfile;
145    
146     # Bookkeeping; we keep associative arrays for lookup later on;
147     # the sourcereplica has the dpns entry as the key and contains
148     # the replica it all started with; The replicadns has a replica
149     # as the key and the dpns entry as the value;
150     # The dpnsreplica works the other way around but it's an array
151     # reference as thery may be more than one replica per entry.
152    
153     my %sourcereplica;
154     my %replicadpns;
155     my %dpnsreplica;
156    
157     # Another temporary file for the secon call to dpm-resolve
158     ($fh, my $namesfile) = tempfile();
159     for (@resolve_r) {
160     chomp;
161     # format is replica <SPACE> name space path
162     if (/(.*) (.*)/) {
163     # store in a hash for easy lookup
164     $sourcereplica{$2} = $1;
165     $replicadpns{$1} = $2;
166     $dpnsreplica{$2} = ();
167     push @{$dpnsreplica{$2}}, $1;
168     print $fh $2 . "\n";
169     }
170     }
171     close $fh;
172     # now run another dpm-resolve -s to resolve name space to replicas
173     my @rreplicas = `dpm-resolve -s < $namesfile`;
174     unlink $namesfile;
175    
176     # More bookkeeping; we're building a list of replicas to check per
177     # host, so we need to do only one ssh call to each host. This has
178     # gives us the replicas (another array ref) given the host name.
179     my %hostrepls;
180     foreach (@rreplicas) {
181     chomp;
182     if (/(.*) (.*):(.*)/) {
183     $hostrepls{$2} = () unless defined $hostrepls{$2}; # create the array ref
184     $replicadpns{"$2:$3"} = $1;
185     push @{$dpnsreplica{$1}}, "$2:$3";
186     # add the local file path to the server's hash
187     push @{$hostrepls{$2}}, $3;
188     } else {
189     # This is problematic, as the database doesn't know about the replica anymore.
190     # Almost certainly an inconsistency.
191     chop;
192     print "no replica found for $_, originally from $sourcereplica{$_}\n";
193     }
194     }
195    
196     # More bookkeeping; this stuff is just there to have a record
197     # of which processes we start and which hosts we are talking to.
198     # We'll fan out the ssh sessions in parallel so we keep as many
199     # sockets and pids. We need host/socket lookup both ways
200     # as well as host/pid.
201     my %hostpid;
202     my %pidhost;
203     my %hostsocket;
204     my %sockethost;
205     # The collectedoutput is what it says on the tin; we have one
206     # entry per host so we know which replicas in the output of
207     # the checksum program we actually mean.
208     my %collectedoutput;
209    
210     # We're going to use the poll system call to see which of our
211     # processes have anything to report back.
212     # Create the global poll object now as the forkssh routine
213     # will register sockets for POLLIN
214     my $poll = IO::Poll->new();
215     foreach my $key (keys %hostrepls) {
216     $collectedoutput{$key} = "";
217     print "retrieving checksums from $key by ssh.\n";
218     &forkssh($key);
219     }
220    
221    
222     # The remote checksumming processes may take a while, especially on
223     # large files. To give the user a sense of progress, print a '.'
224     # every 100 bytes that we receive from the combined ssh sessions.
225     # These are global variables because we need to retain the values
226     # between calls to readsocketdata().
227     my $totalbytes = 0;
228     my $ticks = 0; # tick mark every 100 bytes
229     # Wait for processes and collect output. This is a wait loop
230     # in which we do two things: collect output as reported ready
231     # by poll and wait for child processes to exit.
232     # It is all non-blocking so we put in a slight delay.
233     my $done = 0; # flag to indicate there are no more processes left
234     while (! $done) {
235     my $p = $poll->poll(100); # milliseconds
236     if ($p == -1) { # oops, what to do?
237     print "poll: $!\n";
238     sleep(1);
239     next;
240     }
241     if ($p) {
242     print "poll: $p\n" if $debug;
243     foreach ($poll->handles(POLLIN)) {
244     print "handle: $_\n" if $debug;
245     # Handle reading data into collectedoutput.
246     # This is why we need the association from
247     # socket to host.
248     &readsocketdata($sockethost{$_});
249     }
250     } else {
251     # probably don't need this sleep around here.
252     #print "sleeping a second...\n" if $debug;
253     #sleep(1);
254     }
255     # Second part, see if any children finished...
256     foreach my $pid ( keys %pidhost) {
257     my $kid = waitpid $pid, WNOHANG; # ...but don't actually wait.
258     if ($kid == $pid) {
259     # reap the child, close the file handle
260     print "reap child $pid\n" if $debug;
261     my $host = $pidhost{$pid};
262     delete $pidhost{$pid};
263     # final chance to read the remainder of any socket data
264     &readsocketdata($host);
265     # remove the handle from the polling
266     $poll->remove($hostsocket{$host});
267     close $hostsocket{$host};
268     # Check if we have any pids left
269     if (! %pidhost) {
270     $done = 1;
271     }
272     } else {
273     #print "no pidhost finished yet.\n";
274     }
275     }
276     }
277    
278     # clear the line after the row of ticks
279     print "\n";
280    
281     # Phase 3 - Parse checksum output
282     #
283     # parse the collectedoutput of each host
284     # output format is
285     # dce4d0699dde26ffd27344c5b51d6ee70d2c9766 /export/data/bla
286     # 1eabdb58336561a5f5c858779430fc6e1255b5aa /export/data/blablabla
287     #
288     # We have to link the replicas to their hashes. Replicas
289     # have host names in them so it's just one big hash.
290     my %replicahash;
291     foreach my $h (keys %collectedoutput) {
292     foreach (split /\n/, $collectedoutput{$h}) {
293     if (/([a-z0-9]*) (.*)/) {
294     my $hash = $1;
295     my $curr = "$h:$2"; # current replica
296     $replicahash{$curr} = $1;
297     } else {
298     # unexpected output goes to stderr (missing files)
299     print STDERR "ERROR $h: $_\n";
300     }
301     }
302     }
303    
304     # Phase 4 - Reporting
305     #
306     # Go back to the original problematic replica.
307     #
308     # combine everything in a single (long) line of output per
309     # replica, annotated with remarks concerning its status.
310    
311     print "\n";
312     foreach my $orig (keys %sourcereplica) {
313     my $ok = 0; # assume everything fine for this one, don't report unless debugging is on
314     my $report = ""; # string to collect the report for this case
315     my $orep = $sourcereplica{$orig};
316     $report .= "$orep: ";
317     my $ohash = $replicahash{$orep};
318     if (! defined $ohash) { $report .= "(missing hash)"; $ohash = 0; $ok = 0; }
319     # through the dpns, find the other reps
320     my $odpns = $replicadpns{$orep};
321     if (! defined $odpns) { $report .= "(missing dpns)\n"; print $report; next; }
322     $report .= " ($odpns) ";
323     if (! @{$dpnsreplica{$odpns}}) {
324     $report .= " seem to be an orphan\n";
325     print $report;
326     next;
327     }
328     my $issingle = 1; # is this the only replica?
329     foreach my $r (@{$dpnsreplica{$odpns}}) {
330     next if $r eq $orep; # same instance
331     $issingle = 0;
332     if ($replicahash{$r} ne $ohash) {
333     $ok = 0;
334     $report .= " hash mismatch $r";
335     } else {
336     $report .= " hash match $r";
337     }
338     }
339     if ($issingle) {
340     $ok = 0;
341     $report .= " (only replica)";
342     }
343    
344     if ( (!$ok) or $debug) {
345     print "$report\n";
346     }
347     }
348    
349     exit 0;
350    
351    
352     # The readsocketdata routine
353     # gets a host name as argument, and will try
354     # to read some data (non-blocking) from the corresponding
355     # socket. It also prints the progress tick marks.
356     sub readsocketdata {
357     my $host = shift;
358     my $sock = $hostsocket{$host};
359     my $moredata = 1;
360     while ($moredata) {
361     my $bytes = sysread($sock, my $in, 1024);
362     if($bytes) {
363     use integer;
364     my $newticks;
365     $totalbytes += $bytes;
366     $newticks = $totalbytes / 100;
367     if ($newticks > $ticks) {
368     print STDERR "." x ($newticks - $ticks);
369     }
370     $ticks = $newticks;
371     print STDERR "read $bytes bytes from $host\n" if $debug;
372     $collectedoutput{$host} .= $in;
373     } else {
374     $moredata = 0;
375     }
376     }
377     }
378    
379    
380     # Fork off a subprocess for the given host.
381     # Create a socketpair; set asynchronous communication
382     # and store the info in the global hashes.
383     #
384     # The child process will launch an ssh session, and this
385     # will not work without a passwordless ssh setup.
386     # The actual command that is run is sha1sum on all the
387     # local replicas for that host.
388     #
389     # There is a potential problem that the command line may become
390     # too long; the fix for that would be to start a scriptlet instead
391     # that reads file names from stdin, but then we would have to feed
392     # stdin asynchronously as well.
393     sub forkssh {
394     my $hostname = shift;
395     socketpair (my $child, my $parent, AF_UNIX, SOCK_STREAM, PF_UNSPEC) or die "socketpair: $!";
396     autoflush $child 1;
397     autoflush $parent 1;
398     # register the sockets in the hash
399     $hostsocket{$hostname} = $child;
400     $sockethost{$child} = $hostname; # search both ways
401     # make sure we do non-blocking I/O
402     my $flags = fcntl $child, F_GETFL, 0;
403     fcntl $child, F_SETFL, $flags | O_NONBLOCK;
404     my $pid = fork();
405    
406     if (not defined $pid) { die "cannot fork: $!"; }
407    
408     if ($pid) {
409     # parent
410     # do nothing, collect data later
411     close $parent;
412     $poll->mask($child, 0);
413     $poll->mask($child => POLLIN); # check for input later
414     $hostpid{$hostname} = $pid;
415     $pidhost{$pid} = $hostname; # make it work both ways
416     return;
417     } else {
418     my @replics;
419     close $child;
420     # redirect stdout to the $parent
421     open (STDOUT, ">&", $parent) or die "can't dup \$parent: $!";
422     open (STDERR, ">&", $parent) or die "can't dup \$parent: $!";
423     close $parent;
424     foreach (@{$hostrepls{$hostname}}) {
425     push @replics, $_;
426     }
427     print STDERR "about to run ssh root\@$hostname sha1sum " . join ("\n", @replics) if $debug;
428     exec("ssh", "root\@$hostname", "sha1sum", @replics);
429     }
430     }
431    
432     =head1 COPYRIGHT AND LICENSE
433    
434     Copyright 2017 NWO-I
435    
436     Licensed under the Apache License, Version 2.0 (the "License");
437     you may not use this file except in compliance with the License.
438     You may obtain a copy of the License at
439    
440     http://www.apache.org/licenses/LICENSE-2.0
441    
442     Unless required by applicable law or agreed to in writing, software
443     distributed under the License is distributed on an "AS IS" BASIS,
444     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
445     See the License for the specific language governing permissions and
446     limitations under the License.
447    
448     =cut
449    
450     =head1 SEE ALSO
451    
452     dpm-drain(1)

Properties

Name Value
svn:executable *

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28