/[pdpsoft]/trunk/nl.nikhef.ndpf.tools/ndpf-dpm-tools/bin/dpm-verify-replicas-for-deletion.pl
ViewVC logotype

Contents of /trunk/nl.nikhef.ndpf.tools/ndpf-dpm-tools/bin/dpm-verify-replicas-for-deletion.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3175 - (show annotations) (download) (as text)
Wed Mar 29 13:22:04 2017 UTC (4 years, 8 months ago) by dennisvd
File MIME type: text/x-prolog
File size: 13348 byte(s)
Summary: dpm-verify-replicas-for-deletion - checksum other replicas for those flagged for deletion

1 #!/usr/bin/perl -w
2
3
4 =head1 NAME
5
6 dpm-verify-replicas-for-deletion - checksum other replicas for those flagged to be deleted
7
8 =head1 SYNOPSIS
9
10 dpm-drain | dpm-verify-replicas-for-deletion [ options ]
11
12 =head1 OPTIONS
13
14 =over 4
15
16 =item B<--usage>
17 Show a summary help text
18
19 =item B<--help>, B<--man>
20 Show the manual page
21
22 =item B<--debug>
23 Enable debugging output
24
25
26 =back
27
28 =head1 DESCRIPTION
29
30 Draining a file system with dpm-drain sometimes leaves files behind
31 due to inconsistencies between the DPM databases and the actual
32 files on disk.
33
34 Such inconsistencies may be caused by interrupted operations when writing,
35 deleting or replicating files.
36
37 In order to give the DPM administrator a handle on the state of the
38 files, this tool will take one type of common inconsistency seen in the
39 output of dpm-drain: replicas which are flagged to be deleted.
40
41 The presence of the flag may not actually be an inconsistency; DPM may
42 be in the process of deleting the replica. But if the file sticks
43 around after several draining attempts there is probably something
44 wrong.
45
46 The program reads the output of dpm-drain (either directly or saved to
47 a file) and finds, for each case of a replica flagged to be deleted,
48 all other replicas for the same file. It then goes off to checksum all
49 replicas (by ssh) and reports inconsistencies.
50
51 The script assumes that passwordless ssh login to the DPM disk nodes
52 is set up. The ssh callouts to disk nodes are done in parallel.
53
54
55 =cut
56
57 use warnings;
58 use strict;
59
60 use Getopt::Long;
61 use Pod::Usage;
62
63 # We'll use temporary files to communicate with dpm-resolve
64 use File::Temp qw/ tempfile /;
65
66 # The communication with the ssh subprocesses requires
67 # some low-level I/O handling.
68 use Socket;
69 use IO::Handle;
70 use IO::Poll qw(POLLIN POLLOUT POLLHUP);
71 use Fcntl qw( :DEFAULT O_NONBLOCK );
72 use POSIX ":sys_wait_h";
73
74
75 my $debug = 0;
76 my $manpage = 0;
77 my $usage = 0;
78 if (!GetOptions ("debug" => \$debug,
79 "help|?" => \$manpage,
80 "man" => \$manpage,
81 "usage" => \$usage))
82 {
83 pod2usage( -verbose => 1,
84 -exitval => 1);
85 }
86
87 if ($manpage) {
88 pod2usage( -verbose => 2,
89 -exitval => 0);
90 }
91
92 if ($usage) {
93 pod2usage( -verbose => 1,
94 -exitval => 0);
95 }
96
97
98 # given the output of dpm-drain, find entries that are flagged to be
99 # deleted and verify which replicas there are and if they are healthy.
100 #
101 # Input lines look like this:
102 #
103 # 03/28 11:57:18 22985,0: The file strijker-20.nikhef.nl:/export/data/xenon/xenon.biggrid.nl/2016-10-12/XENON1T-3458-000013000-000013999-000001000.zip.429295550.0 is recorded as being in the process of being deleted, ignoring it during drain
104 #
105 # The program works through several phases:
106 #
107 # Phase 1 - collect the URLs
108 #
109 # Collect the file URLs and run dpm-resolve -r; then run dpm-resolve
110 # -s on the resulting path names. This yields a number of replicas.
111 #
112 # Phase 2 - ssh callout and data collection
113 #
114 # Each host that holds one or more of the replicas will run a checksum
115 # command over ssh. The output is collected.
116 #
117 # Phase 3 - parsing the checksum output
118 #
119 # The output of the checksum callouts is processed and stored
120 #
121 # Phase 4 - reporting
122 #
123 # The program reports which replicas are there, which match
124 # and which mismatch with the original file. This should give
125 # the admin a good idea which files can indeed be deleted.
126
127 my @replicas;
128 my %cksum;
129
130 while(<>) {
131 if (/The file (.*) is recorded as being in the process of being deleted/) {
132 push @replicas, $1;
133 }
134 }
135
136 # write replicas to a temporary file
137 my ($fh, $repfile) = tempfile();
138 for my $i (@replicas) {
139 print $fh $i . "\n";
140 }
141 close $fh;
142 # call dpm-resolve to get a list of dpm name space entries
143 my @resolve_r = `dpm-resolve -r < $repfile`;
144 unlink $repfile;
145
146 # Bookkeeping; we keep associative arrays for lookup later on;
147 # the sourcereplica has the dpns entry as the key and contains
148 # the replica it all started with; The replicadns has a replica
149 # as the key and the dpns entry as the value;
150 # The dpnsreplica works the other way around but it's an array
151 # reference as thery may be more than one replica per entry.
152
153 my %sourcereplica;
154 my %replicadpns;
155 my %dpnsreplica;
156
157 # Another temporary file for the secon call to dpm-resolve
158 ($fh, my $namesfile) = tempfile();
159 for (@resolve_r) {
160 chomp;
161 # format is replica <SPACE> name space path
162 if (/(.*) (.*)/) {
163 # store in a hash for easy lookup
164 $sourcereplica{$2} = $1;
165 $replicadpns{$1} = $2;
166 $dpnsreplica{$2} = ();
167 push @{$dpnsreplica{$2}}, $1;
168 print $fh $2 . "\n";
169 }
170 }
171 close $fh;
172 # now run another dpm-resolve -s to resolve name space to replicas
173 my @rreplicas = `dpm-resolve -s < $namesfile`;
174 unlink $namesfile;
175
176 # More bookkeeping; we're building a list of replicas to check per
177 # host, so we need to do only one ssh call to each host. This has
178 # gives us the replicas (another array ref) given the host name.
179 my %hostrepls;
180 foreach (@rreplicas) {
181 chomp;
182 if (/(.*) (.*):(.*)/) {
183 $hostrepls{$2} = () unless defined $hostrepls{$2}; # create the array ref
184 $replicadpns{"$2:$3"} = $1;
185 push @{$dpnsreplica{$1}}, "$2:$3";
186 # add the local file path to the server's hash
187 push @{$hostrepls{$2}}, $3;
188 } else {
189 # This is problematic, as the database doesn't know about the replica anymore.
190 # Almost certainly an inconsistency.
191 chop;
192 print "no replica found for $_, originally from $sourcereplica{$_}\n";
193 }
194 }
195
196 # More bookkeeping; this stuff is just there to have a record
197 # of which processes we start and which hosts we are talking to.
198 # We'll fan out the ssh sessions in parallel so we keep as many
199 # sockets and pids. We need host/socket lookup both ways
200 # as well as host/pid.
201 my %hostpid;
202 my %pidhost;
203 my %hostsocket;
204 my %sockethost;
205 # The collectedoutput is what it says on the tin; we have one
206 # entry per host so we know which replicas in the output of
207 # the checksum program we actually mean.
208 my %collectedoutput;
209
210 # We're going to use the poll system call to see which of our
211 # processes have anything to report back.
212 # Create the global poll object now as the forkssh routine
213 # will register sockets for POLLIN
214 my $poll = IO::Poll->new();
215 foreach my $key (keys %hostrepls) {
216 $collectedoutput{$key} = "";
217 print "retrieving checksums from $key by ssh.\n";
218 &forkssh($key);
219 }
220
221
222 # The remote checksumming processes may take a while, especially on
223 # large files. To give the user a sense of progress, print a '.'
224 # every 100 bytes that we receive from the combined ssh sessions.
225 # These are global variables because we need to retain the values
226 # between calls to readsocketdata().
227 my $totalbytes = 0;
228 my $ticks = 0; # tick mark every 100 bytes
229 # Wait for processes and collect output. This is a wait loop
230 # in which we do two things: collect output as reported ready
231 # by poll and wait for child processes to exit.
232 # It is all non-blocking so we put in a slight delay.
233 my $done = 0; # flag to indicate there are no more processes left
234 while (! $done) {
235 my $p = $poll->poll(100); # milliseconds
236 if ($p == -1) { # oops, what to do?
237 print "poll: $!\n";
238 sleep(1);
239 next;
240 }
241 if ($p) {
242 print "poll: $p\n" if $debug;
243 foreach ($poll->handles(POLLIN)) {
244 print "handle: $_\n" if $debug;
245 # Handle reading data into collectedoutput.
246 # This is why we need the association from
247 # socket to host.
248 &readsocketdata($sockethost{$_});
249 }
250 } else {
251 # probably don't need this sleep around here.
252 #print "sleeping a second...\n" if $debug;
253 #sleep(1);
254 }
255 # Second part, see if any children finished...
256 foreach my $pid ( keys %pidhost) {
257 my $kid = waitpid $pid, WNOHANG; # ...but don't actually wait.
258 if ($kid == $pid) {
259 # reap the child, close the file handle
260 print "reap child $pid\n" if $debug;
261 my $host = $pidhost{$pid};
262 delete $pidhost{$pid};
263 # final chance to read the remainder of any socket data
264 &readsocketdata($host);
265 # remove the handle from the polling
266 $poll->remove($hostsocket{$host});
267 close $hostsocket{$host};
268 # Check if we have any pids left
269 if (! %pidhost) {
270 $done = 1;
271 }
272 } else {
273 #print "no pidhost finished yet.\n";
274 }
275 }
276 }
277
278 # clear the line after the row of ticks
279 print "\n";
280
281 # Phase 3 - Parse checksum output
282 #
283 # parse the collectedoutput of each host
284 # output format is
285 # dce4d0699dde26ffd27344c5b51d6ee70d2c9766 /export/data/bla
286 # 1eabdb58336561a5f5c858779430fc6e1255b5aa /export/data/blablabla
287 #
288 # We have to link the replicas to their hashes. Replicas
289 # have host names in them so it's just one big hash.
290 my %replicahash;
291 foreach my $h (keys %collectedoutput) {
292 foreach (split /\n/, $collectedoutput{$h}) {
293 if (/([a-z0-9]*) (.*)/) {
294 my $hash = $1;
295 my $curr = "$h:$2"; # current replica
296 $replicahash{$curr} = $1;
297 } else {
298 # unexpected output goes to stderr (missing files)
299 print STDERR "ERROR $h: $_\n";
300 }
301 }
302 }
303
304 # Phase 4 - Reporting
305 #
306 # Go back to the original problematic replica.
307 #
308 # combine everything in a single (long) line of output per
309 # replica, annotated with remarks concerning its status.
310
311 print "\n";
312 foreach my $orig (keys %sourcereplica) {
313 my $ok = 0; # assume everything fine for this one, don't report unless debugging is on
314 my $report = ""; # string to collect the report for this case
315 my $orep = $sourcereplica{$orig};
316 $report .= "$orep: ";
317 my $ohash = $replicahash{$orep};
318 if (! defined $ohash) { $report .= "(missing hash)"; $ohash = 0; $ok = 0; }
319 # through the dpns, find the other reps
320 my $odpns = $replicadpns{$orep};
321 if (! defined $odpns) { $report .= "(missing dpns)\n"; print $report; next; }
322 $report .= " ($odpns) ";
323 if (! @{$dpnsreplica{$odpns}}) {
324 $report .= " seem to be an orphan\n";
325 print $report;
326 next;
327 }
328 my $issingle = 1; # is this the only replica?
329 foreach my $r (@{$dpnsreplica{$odpns}}) {
330 next if $r eq $orep; # same instance
331 $issingle = 0;
332 if ($replicahash{$r} ne $ohash) {
333 $ok = 0;
334 $report .= " hash mismatch $r";
335 } else {
336 $report .= " hash match $r";
337 }
338 }
339 if ($issingle) {
340 $ok = 0;
341 $report .= " (only replica)";
342 }
343
344 if ( (!$ok) or $debug) {
345 print "$report\n";
346 }
347 }
348
349 exit 0;
350
351
352 # The readsocketdata routine
353 # gets a host name as argument, and will try
354 # to read some data (non-blocking) from the corresponding
355 # socket. It also prints the progress tick marks.
356 sub readsocketdata {
357 my $host = shift;
358 my $sock = $hostsocket{$host};
359 my $moredata = 1;
360 while ($moredata) {
361 my $bytes = sysread($sock, my $in, 1024);
362 if($bytes) {
363 use integer;
364 my $newticks;
365 $totalbytes += $bytes;
366 $newticks = $totalbytes / 100;
367 if ($newticks > $ticks) {
368 print STDERR "." x ($newticks - $ticks);
369 }
370 $ticks = $newticks;
371 print STDERR "read $bytes bytes from $host\n" if $debug;
372 $collectedoutput{$host} .= $in;
373 } else {
374 $moredata = 0;
375 }
376 }
377 }
378
379
380 # Fork off a subprocess for the given host.
381 # Create a socketpair; set asynchronous communication
382 # and store the info in the global hashes.
383 #
384 # The child process will launch an ssh session, and this
385 # will not work without a passwordless ssh setup.
386 # The actual command that is run is sha1sum on all the
387 # local replicas for that host.
388 #
389 # There is a potential problem that the command line may become
390 # too long; the fix for that would be to start a scriptlet instead
391 # that reads file names from stdin, but then we would have to feed
392 # stdin asynchronously as well.
393 sub forkssh {
394 my $hostname = shift;
395 socketpair (my $child, my $parent, AF_UNIX, SOCK_STREAM, PF_UNSPEC) or die "socketpair: $!";
396 autoflush $child 1;
397 autoflush $parent 1;
398 # register the sockets in the hash
399 $hostsocket{$hostname} = $child;
400 $sockethost{$child} = $hostname; # search both ways
401 # make sure we do non-blocking I/O
402 my $flags = fcntl $child, F_GETFL, 0;
403 fcntl $child, F_SETFL, $flags | O_NONBLOCK;
404 my $pid = fork();
405
406 if (not defined $pid) { die "cannot fork: $!"; }
407
408 if ($pid) {
409 # parent
410 # do nothing, collect data later
411 close $parent;
412 $poll->mask($child, 0);
413 $poll->mask($child => POLLIN); # check for input later
414 $hostpid{$hostname} = $pid;
415 $pidhost{$pid} = $hostname; # make it work both ways
416 return;
417 } else {
418 my @replics;
419 close $child;
420 # redirect stdout to the $parent
421 open (STDOUT, ">&", $parent) or die "can't dup \$parent: $!";
422 open (STDERR, ">&", $parent) or die "can't dup \$parent: $!";
423 close $parent;
424 foreach (@{$hostrepls{$hostname}}) {
425 push @replics, $_;
426 }
427 print STDERR "about to run ssh root\@$hostname sha1sum " . join ("\n", @replics) if $debug;
428 exec("ssh", "root\@$hostname", "sha1sum", @replics);
429 }
430 }
431
432 =head1 COPYRIGHT AND LICENSE
433
434 Copyright 2017 NWO-I
435
436 Licensed under the Apache License, Version 2.0 (the "License");
437 you may not use this file except in compliance with the License.
438 You may obtain a copy of the License at
439
440 http://www.apache.org/licenses/LICENSE-2.0
441
442 Unless required by applicable law or agreed to in writing, software
443 distributed under the License is distributed on an "AS IS" BASIS,
444 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
445 See the License for the specific language governing permissions and
446 limitations under the License.
447
448 =cut
449
450 =head1 SEE ALSO
451
452 dpm-drain(1)

Properties

Name Value
svn:executable *

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28