#!/usr/bin/perl # # Tool to check the consistency between the DPM database # and the files actually on disk. # use strict; use Getopt::Long; use Sys::Hostname; use DBI; use Digest::Adler32; use Digest::MD5; use DPM::Utils; my $help = 0; my $verbose = 0; my $resolve = 0; my $checksum_check = 0; # if $dbfile is not empty, the DB data will be read from it # instead of obtained by executing gridpp_dpm_disk # my $dbfile = '/home/ronalds/tmp/DPM/dpm_db_dump.txt'; my $dbfile; my $cfgfile = '/root/DPMINFO'; my $logfile; ##$logfile = "-"; # STDOUT for debugging my $filesystem; GetOptions( 'help|h' => \$help, 'verbose|v+' => \$verbose, 'logfile|l:s' => \$logfile, 'dbfile:s' => \$dbfile, 'cfgfile:s' => \$cfgfile, 'resolve|r' => \$resolve, 'sum|s' => \$checksum_check ); $help and &usage; my @now = localtime(time); my $date = sprintf "%04d%02d%02d-%02d%02d%02d", $now[5]+1900, $now[4]+1, $now[3], $now[2], $now[1], $now[0]; $logfile ||= "dpm-consistency-check-$date"; open LOG, "> $logfile" or die "$logfile: $!\n"; ($verbose > 1) and print LOG "Log file: $logfile\n"; my %replicas; my %csumtype; my %csumvalue; my %count = ( db => 0, disk => 0, only_db => 0, only_disk => 0, db_and_disk => 0, unknown => 0 ); my $REPLICA_IN_DB = 1; my $REPLICA_ON_DISK = 2; my @only_db; my @only_disk; my ($db_user, $db_pw, $db_host, $dpns_db_name, $db_port, $dbh); my $filesystem = shift; $filesystem =~ s!/*$!!; if ( ! $filesystem ) { warn "Missing mandatory argument 'filesystem'\n"; &usage(1); } if ( ! -d $filesystem ) { warn "$!: $filesystem\n"; } my $server = hostname; ($verbose > 0) and print LOG "server: $server filesystem: $filesystem\n"; # Initial design, unoptimized: # 1. Determine all replicas registered in the DB that are stored # on the current server for the given file system. # Store the replicas in hash %replicas with key replica # and value $REPLICA_ON_DISK # 2. Determine all files that are present # on the current server for the given file system. # Store the files in hash %replicas with key file # and value $REPLICA_IN_DB # 3. Compare the hashes, determining inconsistencies and # counting statistics # # Later: # - Add an option to automatically remove entries from the # database for which the corresponding file is missing, # or remove the file from disk if there is no corresponding # entry in the database # - Memory consumption might be an issue for this script; # consider splitting the processing by date my $dpm = DPM::Utils->new(conffile => $cfgfile, host => $server); # Determine replicas registered in the database &get_db_replicas(\%replicas, \%csumtype, \%csumvalue); # Determine replicas on disk &get_fs_replicas(\%replicas); # Comparison &compare_database_filesystem(\%replicas, \%csumtype, \%csumvalue); # Results &show_result; close LOG; exit 0; sub usage { my $ret = shift; print STDERR < options: help, h Show this help text verbose, v Increase output verbosity resolve, r Resolve the path to a SURL sum, s Calculate and compare the checksum EOH exit($ret); } sub get_db_replicas { my $ref = shift; my $csumtyperef = shift; my $csumvalueref = shift; # read the replicas registered in the database, # unless a file for debugging is defined if ( ! $dbfile ) { # $dbfile = '/tmp/dpm-db-${date}.dump'; # execute the gridpp_* command # my $cmd = "gridpp_dpm_disk --server $server --fs $filesystem > $dbfile"; # ($verbose > 0) and print LOG "Executing $cmd\n"; my $sql = "SELECT r.sfn, m.csumtype, m.csumvalue FROM Cns_file_replica r, Cns_file_metadata m WHERE " . "r.fileid = m.fileid AND " . "r.status <> 'D' AND " . "r.host=" . $dpm->sql_quote($server) . " AND r.fs=" . $dpm->sql_quote($filesystem); my @results = $dpm->sql_query($sql); foreach my $row (@results) { my $replica = $$row{sfn}; my $csumtype = $$row{csumtype}; my $csumvalue = $$row{csumvalue}; $replica =~ s!^$server:$filesystem[/]*!!; ($verbose > 2) and print LOG "$replica\n"; $$ref{$replica} |= $REPLICA_IN_DB; $$csumtyperef{$replica} = $csumtype; $$csumvalueref{$replica} = $csumvalue; $count{db}++; } } ($verbose > 0) and print LOG "Found $count{db} replicas in the database under $filesystem\n"; } sub get_fs_replicas { my $ref = shift; # find all files under the specified filesystem my $cmd = "find $filesystem -type f -print"; open FS, "$cmd | " or die "$cmd: $!\n"; while ( my $file = ) { $file =~ s!^$filesystem[/]*!!; chomp $file; ($verbose > 2) and print LOG "$file\n"; $$ref{$file} |= $REPLICA_ON_DISK; $count{disk}++; } close FS; ($verbose > 0) and print LOG "Found $count{disk} files under $filesystem\n"; } sub compare_database_filesystem { my $replica_ref = shift; my $csumtyperef = shift; my $csumvalueref = shift; my $db_and_disk = ($REPLICA_IN_DB | $REPLICA_ON_DISK); while ( my ($repl, $state) = each(%$replica_ref) ) { ($verbose > 1) and print "$repl $state\t"; if ( $state == $REPLICA_IN_DB ) { $count{only_db}++; push(@only_db, $repl); ($verbose > 1) and print LOG "[only in DB]\n"; } elsif ( $state == $REPLICA_ON_DISK ) { $count{only_disk}++; push(@only_disk, $repl); ($verbose > 1) and print LOG "[only on disk]\n"; } elsif ( $state == $db_and_disk ) { $count{db_and_disk}++; if ($checksum_check) { check_checksum($repl, $csumtyperef, $csumvalueref); } if ($resolve) { check_resolve($repl); } ($verbose > 1) and print LOG "[OK]\n"; } else { $count{unknown}++; ($verbose > 1) and print LOG "[Unexpected state]\n"; } } } sub check_checksum { my $replica = shift; my $csumtyperef = shift; my $csumvalueref = shift; my $cksum; my $digest; if ( ! $$csumtyperef{$replica} ) { print LOG "cksum: $replica has no recorded checksum type.\n"; return; } if ($$csumtyperef{$replica} == "AD") { $digest = Digest::Adler32->new; } elsif ($$csumtyperef{$replica} == "MD") { $digest = Digest::MD5->new; } else { print LOG "cksum: unknown checksum type '" . $$csumtyperef{$replica} . "'\n"; return; } if (open(my $fh, "<", $filesystem . "/" . $replica)) { print STDERR "Calculating sum of $filesystem/$replica...\n"; $digest->addfile($fh); $cksum = $digest->hexdigest; close $fh; } else { print LOG "cksum: could not open $replica\n"; return; } if ($cksum != $$csumvalueref{$replica}) { print LOG "cksum: mismatch for $replica (current $cksum, database has " . $$csumvalueref{$replica} . ", type " . $$csumtyperef{$replica} . ")\n"; } else { print LOG "cksum: ok for $replica ($cksum, type " . $$csumtyperef{$replica} . ")\n"; } } sub check_resolve { my $replica = shift; # try a full path resolution my $surl = $dpm->replica2surl("$server:/$replica"); if (defined $surl) { print LOG "resolve: $replica -> $surl\n"; } else { print LOG "resolve: failed for $replica\n"; } return; } sub show_result { print LOG "# replicas in DB: $count{db}\n"; print LOG "# replicas on disk: $count{disk}\n"; print LOG "# replicas in DB and on disk: $count{db_and_disk}\n"; print LOG "# replicas only in the DB: $count{only_db}\n"; print LOG "# replicas only on disk: $count{only_disk}\n"; print LOG "\n# Replicas only in the database\n"; foreach my $repl (sort @only_db) { print LOG "$server:$filesystem/$repl\n"; } print LOG "\n# Replicas only on the fileystem\n"; foreach my $repl (sort @only_disk) { print LOG "$server:$filesystem/$repl\n"; } print LOG "\n# times: " . join("\t", times); }