/[pdpsoft]/trunk/nl.nikhef.ndpf.tools/ndpf-dpm-tools/bin/dpm-disk-consistency
ViewVC logotype

Annotation of /trunk/nl.nikhef.ndpf.tools/ndpf-dpm-tools/bin/dpm-disk-consistency

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3182 - (hide annotations) (download)
Fri May 12 14:18:58 2017 UTC (5 years ago) by dennisvd
File size: 8205 byte(s)
Summary: include checksums in consistency check

1 ronalds 1810 #!/usr/bin/perl
2    
3     #
4     # Tool to check the consistency between the DPM database
5     # and the files actually on disk.
6     #
7    
8     use strict;
9    
10     use Getopt::Long;
11     use Sys::Hostname;
12     use DBI;
13 dennisvd 3182 use Digest::Adler32;
14     use Digest::MD5;
15     use DPM::Utils;
16 ronalds 1810
17     my $help = 0;
18     my $verbose = 0;
19 dennisvd 3182 my $resolve = 0;
20     my $checksum_check = 0;
21 ronalds 1810
22     # if $dbfile is not empty, the DB data will be read from it
23     # instead of obtained by executing gridpp_dpm_disk
24     # my $dbfile = '/home/ronalds/tmp/DPM/dpm_db_dump.txt';
25     my $dbfile;
26    
27     my $cfgfile = '/root/DPMINFO';
28    
29     my $logfile;
30     ##$logfile = "-"; # STDOUT for debugging
31     my $filesystem;
32    
33     GetOptions(
34     'help|h' => \$help,
35     'verbose|v+' => \$verbose,
36     'logfile|l:s' => \$logfile,
37     'dbfile:s' => \$dbfile,
38     'cfgfile:s' => \$cfgfile,
39 dennisvd 3182 'resolve|r' => \$resolve,
40     'sum|s' => \$checksum_check
41 ronalds 1810 );
42    
43     $help and &usage;
44    
45     my @now = localtime(time);
46     my $date = sprintf "%04d%02d%02d-%02d%02d%02d",
47     $now[5]+1900, $now[4]+1, $now[3], $now[2], $now[1], $now[0];
48     $logfile ||= "dpm-consistency-check-$date";
49    
50     open LOG, "> $logfile" or die "$logfile: $!\n";
51     ($verbose > 1) and print LOG "Log file: $logfile\n";
52    
53     my %replicas;
54 dennisvd 3182 my %csumtype;
55     my %csumvalue;
56 ronalds 1810 my %count = ( db => 0, disk => 0, only_db => 0, only_disk => 0, db_and_disk => 0, unknown => 0 );
57     my $REPLICA_IN_DB = 1;
58     my $REPLICA_ON_DISK = 2;
59     my @only_db;
60     my @only_disk;
61    
62     my ($db_user, $db_pw, $db_host, $dpns_db_name, $db_port, $dbh);
63    
64     my $filesystem = shift;
65     $filesystem =~ s!/*$!!;
66     if ( ! $filesystem ) {
67     warn "Missing mandatory argument 'filesystem'\n";
68     &usage(1);
69     }
70     if ( ! -d $filesystem ) {
71     warn "$!: $filesystem\n";
72     }
73    
74     my $server = hostname;
75    
76 dennisvd 3182 ($verbose > 0) and print LOG "server: $server filesystem: $filesystem\n";
77 ronalds 1810
78     # Initial design, unoptimized:
79     # 1. Determine all replicas registered in the DB that are stored
80     # on the current server for the given file system.
81     # Store the replicas in hash %replicas with key replica
82 dennisvd 3182 # and value $REPLICA_ON_DISK
83 ronalds 1810 # 2. Determine all files that are present
84     # on the current server for the given file system.
85     # Store the files in hash %replicas with key file
86     # and value $REPLICA_IN_DB
87     # 3. Compare the hashes, determining inconsistencies and
88     # counting statistics
89     #
90     # Later:
91     # - Add an option to automatically remove entries from the
92     # database for which the corresponding file is missing,
93     # or remove the file from disk if there is no corresponding
94     # entry in the database
95     # - Memory consumption might be an issue for this script;
96     # consider splitting the processing by date
97    
98 dennisvd 3182 my $dpm = DPM::Utils->new(conffile => $cfgfile, host => $server);
99 ronalds 1810
100    
101     # Determine replicas registered in the database
102 dennisvd 3182 &get_db_replicas(\%replicas, \%csumtype, \%csumvalue);
103 ronalds 1810
104     # Determine replicas on disk
105     &get_fs_replicas(\%replicas);
106    
107     # Comparison
108 dennisvd 3182 &compare_database_filesystem(\%replicas, \%csumtype, \%csumvalue);
109 ronalds 1810
110     # Results
111     &show_result;
112    
113     close LOG;
114     exit 0;
115    
116    
117    
118     sub usage {
119     my $ret = shift;
120     print STDERR <<EOH;
121     usage: $0 [options] <filesystem>
122     options:
123     help, h Show this help text
124     verbose, v Increase output verbosity
125 dennisvd 3182 resolve, r Resolve the path to a SURL
126     sum, s Calculate and compare the checksum
127 ronalds 1810 EOH
128    
129     exit($ret);
130 dennisvd 3182 }
131 ronalds 1810
132    
133     sub get_db_replicas {
134     my $ref = shift;
135 dennisvd 3182 my $csumtyperef = shift;
136     my $csumvalueref = shift;
137 ronalds 1810
138     # read the replicas registered in the database,
139     # unless a file for debugging is defined
140     if ( ! $dbfile ) {
141     # $dbfile = '/tmp/dpm-db-${date}.dump';
142     # execute the gridpp_* command
143     # my $cmd = "gridpp_dpm_disk --server $server --fs $filesystem > $dbfile";
144     # ($verbose > 0) and print LOG "Executing $cmd\n";
145    
146 dennisvd 3182 my $sql = "SELECT r.sfn, m.csumtype, m.csumvalue FROM Cns_file_replica r, Cns_file_metadata m WHERE "
147     . "r.fileid = m.fileid AND "
148     . "r.status <> 'D' AND "
149     . "r.host=" . $dpm->sql_quote($server) . " AND r.fs=" . $dpm->sql_quote($filesystem);
150     my @results = $dpm->sql_query($sql);
151    
152 ronalds 1810 foreach my $row (@results) {
153     my $replica = $$row{sfn};
154 dennisvd 3182 my $csumtype = $$row{csumtype};
155     my $csumvalue = $$row{csumvalue};
156 ronalds 1810 $replica =~ s!^$server:$filesystem[/]*!!;
157     ($verbose > 2) and print LOG "$replica\n";
158     $$ref{$replica} |= $REPLICA_IN_DB;
159 dennisvd 3182 $$csumtyperef{$replica} = $csumtype;
160     $$csumvalueref{$replica} = $csumvalue;
161 ronalds 1810 $count{db}++;
162     }
163     }
164    
165     ($verbose > 0) and print LOG "Found $count{db} replicas in the database under $filesystem\n";
166     }
167    
168     sub get_fs_replicas {
169     my $ref = shift;
170    
171     # find all files under the specified filesystem
172     my $cmd = "find $filesystem -type f -print";
173    
174     open FS, "$cmd | " or die "$cmd: $!\n";
175     while ( my $file = <FS> ) {
176     $file =~ s!^$filesystem[/]*!!;
177     chomp $file;
178     ($verbose > 2) and print LOG "$file\n";
179     $$ref{$file} |= $REPLICA_ON_DISK;
180     $count{disk}++;
181     }
182     close FS;
183    
184     ($verbose > 0) and print LOG "Found $count{disk} files under $filesystem\n";
185     }
186    
187    
188     sub compare_database_filesystem {
189 dennisvd 3182 my $replica_ref = shift;
190     my $csumtyperef = shift;
191     my $csumvalueref = shift;
192 ronalds 1810
193     my $db_and_disk = ($REPLICA_IN_DB | $REPLICA_ON_DISK);
194     while ( my ($repl, $state) = each(%$replica_ref) ) {
195     ($verbose > 1) and print "$repl $state\t";
196     if ( $state == $REPLICA_IN_DB ) {
197     $count{only_db}++;
198     push(@only_db, $repl);
199     ($verbose > 1) and print LOG "[only in DB]\n";
200     }
201     elsif ( $state == $REPLICA_ON_DISK ) {
202     $count{only_disk}++;
203     push(@only_disk, $repl);
204     ($verbose > 1) and print LOG "[only on disk]\n";
205     }
206     elsif ( $state == $db_and_disk ) {
207     $count{db_and_disk}++;
208 dennisvd 3182 if ($checksum_check) {
209     check_checksum($repl, $csumtyperef, $csumvalueref);
210     }
211     if ($resolve) {
212     check_resolve($repl);
213     }
214 ronalds 1810 ($verbose > 1) and print LOG "[OK]\n";
215     }
216     else {
217     $count{unknown}++;
218     ($verbose > 1) and print LOG "[Unexpected state]\n";
219     }
220     }
221     }
222    
223    
224 dennisvd 3182 sub check_checksum {
225     my $replica = shift;
226     my $csumtyperef = shift;
227     my $csumvalueref = shift;
228     my $cksum;
229     my $digest;
230 ronalds 1810
231 dennisvd 3182 if ( ! $$csumtyperef{$replica} ) {
232     print LOG "cksum: $replica has no recorded checksum type.\n";
233     return;
234     }
235     if ($$csumtyperef{$replica} == "AD") {
236     $digest = Digest::Adler32->new;
237     } elsif ($$csumtyperef{$replica} == "MD") {
238     $digest = Digest::MD5->new;
239     } else {
240     print LOG "cksum: unknown checksum type '" . $$csumtyperef{$replica} . "'\n";
241     return;
242     }
243    
244     if (open(my $fh, "<", $filesystem . "/" . $replica)) {
245     print STDERR "Calculating sum of $filesystem/$replica...\n";
246     $digest->addfile($fh);
247     $cksum = $digest->hexdigest;
248     close $fh;
249     } else {
250     print LOG "cksum: could not open $replica\n";
251     return;
252     }
253     if ($cksum != $$csumvalueref{$replica}) {
254     print LOG "cksum: mismatch for $replica (current $cksum, database has " .
255     $$csumvalueref{$replica} . ", type " . $$csumtyperef{$replica} . ")\n";
256     } else {
257     print LOG "cksum: ok for $replica ($cksum, type " . $$csumtyperef{$replica} . ")\n";
258     }
259     }
260    
261     sub check_resolve {
262     my $replica = shift;
263     # try a full path resolution
264     my $surl = $dpm->replica2surl("$server:/$replica");
265    
266     if (defined $surl) {
267     print LOG "resolve: $replica -> $surl\n";
268     } else {
269     print LOG "resolve: failed for $replica\n";
270     }
271     return;
272     }
273    
274 ronalds 1810 sub show_result {
275     print LOG "# replicas in DB: $count{db}\n";
276     print LOG "# replicas on disk: $count{disk}\n";
277     print LOG "# replicas in DB and on disk: $count{db_and_disk}\n";
278     print LOG "# replicas only in the DB: $count{only_db}\n";
279     print LOG "# replicas only on disk: $count{only_disk}\n";
280    
281     print LOG "\n# Replicas only in the database\n";
282     foreach my $repl (sort @only_db) {
283     print LOG "$server:$filesystem/$repl\n";
284     }
285    
286     print LOG "\n# Replicas only on the fileystem\n";
287     foreach my $repl (sort @only_disk) {
288     print LOG "$server:$filesystem/$repl\n";
289     }
290    
291     print LOG "\n# times: " . join("\t", times);
292     }
293    

Properties

Name Value
svn:executable *
svn:keywords id

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28