#!/usr/bin/perl # # Tool to check the consistency between the DPM database # and the files actually on disk. # use strict; use Getopt::Long; use Sys::Hostname; use DBI; my $help = 0; my $verbose = 0; # if $dbfile is not empty, the DB data will be read from it # instead of obtained by executing gridpp_dpm_disk # my $dbfile = '/home/ronalds/tmp/DPM/dpm_db_dump.txt'; my $dbfile; my $cfgfile = '/root/DPMINFO'; my $logfile; ##$logfile = "-"; # STDOUT for debugging my $filesystem; GetOptions( 'help|h' => \$help, 'verbose|v+' => \$verbose, 'logfile|l:s' => \$logfile, 'dbfile:s' => \$dbfile, 'cfgfile:s' => \$cfgfile, ); $help and &usage; my @now = localtime(time); my $date = sprintf "%04d%02d%02d-%02d%02d%02d", $now[5]+1900, $now[4]+1, $now[3], $now[2], $now[1], $now[0]; $logfile ||= "dpm-consistency-check-$date"; open LOG, "> $logfile" or die "$logfile: $!\n"; ($verbose > 1) and print LOG "Log file: $logfile\n"; my %replicas; my %count = ( db => 0, disk => 0, only_db => 0, only_disk => 0, db_and_disk => 0, unknown => 0 ); my $REPLICA_IN_DB = 1; my $REPLICA_ON_DISK = 2; my @only_db; my @only_disk; my ($db_user, $db_pw, $db_host, $dpns_db_name, $db_port, $dbh); my $filesystem = shift; $filesystem =~ s!/*$!!; if ( ! $filesystem ) { warn "Missing mandatory argument 'filesystem'\n"; &usage(1); } if ( ! -d $filesystem ) { warn "$!: $filesystem\n"; } my $server = hostname; ($verbose > 0) and print LOG "server: $server filesystem: $filesystem\n"; # Initial design, unoptimized: # 1. Determine all replicas registered in the DB that are stored # on the current server for the given file system. # Store the replicas in hash %replicas with key replica # and value $REPLICA_IN_DB # 2. Determine all files that are present # on the current server for the given file system. # Store the files in hash %replicas with key file # and value $REPLICA_IN_DB # 3. Compare the hashes, determining inconsistencies and # counting statistics # # Later: # - Add an option to automatically remove entries from the # database for which the corresponding file is missing, # or remove the file from disk if there is no corresponding # entry in the database # - Memory consumption might be an issue for this script; # consider splitting the processing by date # initialize database connection &parse_db_config($cfgfile); &sql_init; # Determine replicas registered in the database &get_db_replicas(\%replicas); # Determine replicas on disk &get_fs_replicas(\%replicas); # Comparison &compare_database_filesystem(\%replicas); # Results &show_result; &sql_terminate; close LOG; exit 0; sub usage { my $ret = shift; print STDERR < options: help, h Show this help text verbose, v Increase output verbosity EOH exit($ret); } sub get_db_replicas { my $ref = shift; # read the replicas registered in the database, # unless a file for debugging is defined if ( ! $dbfile ) { # $dbfile = '/tmp/dpm-db-${date}.dump'; # execute the gridpp_* command # my $cmd = "gridpp_dpm_disk --server $server --fs $filesystem > $dbfile"; # ($verbose > 0) and print LOG "Executing $cmd\n"; my $sql = "SELECT sfn FROM Cns_file_replica WHERE " . "host=" . &sql_quote($server) . " AND fs=" . &sql_quote($filesystem); my @results = &sql_query($sql); foreach my $row (@results) { my $replica = $$row{sfn}; $replica =~ s!^$server:$filesystem[/]*!!; ($verbose > 2) and print LOG "$replica\n"; $$ref{$replica} |= $REPLICA_IN_DB; $count{db}++; } } ## # parse the file holding the replicas registered in the DB ## open DB, $dbfile or die "Failed to open $dbfile: $!\n"; ## while ( my $line = ) { ## if ( $line =~ /^Replica: ([\w_\.-]+):(.*) \d+$/ ) { ## my $replica = $2; ## $replica =~ s!^$filesystem[/]*!!; ## ($verbose > 2) and print LOG "$replica\n"; ## $$ref{$replica} |= $REPLICA_IN_DB; ## $count{db}++; ## } ## elsif ( $line =~ /Found (\d+) replica/ ) { ## ($verbose > 1) and print LOG "There are $1 replicas in the DB\n"; ## } ## } ## close DB; ($verbose > 0) and print LOG "Found $count{db} replicas in the database under $filesystem\n"; } sub get_fs_replicas { my $ref = shift; # find all files under the specified filesystem my $cmd = "find $filesystem -type f -print"; open FS, "$cmd | " or die "$cmd: $!\n"; while ( my $file = ) { $file =~ s!^$filesystem[/]*!!; chomp $file; ($verbose > 2) and print LOG "$file\n"; $$ref{$file} |= $REPLICA_ON_DISK; $count{disk}++; } close FS; ($verbose > 0) and print LOG "Found $count{disk} files under $filesystem\n"; } sub compare_database_filesystem { my ($replica_ref) = shift; my $db_and_disk = ($REPLICA_IN_DB | $REPLICA_ON_DISK); while ( my ($repl, $state) = each(%$replica_ref) ) { ($verbose > 1) and print "$repl $state\t"; if ( $state == $REPLICA_IN_DB ) { $count{only_db}++; push(@only_db, $repl); ($verbose > 1) and print LOG "[only in DB]\n"; } elsif ( $state == $REPLICA_ON_DISK ) { $count{only_disk}++; push(@only_disk, $repl); ($verbose > 1) and print LOG "[only on disk]\n"; } elsif ( $state == $db_and_disk ) { $count{db_and_disk}++; ($verbose > 1) and print LOG "[OK]\n"; } else { $count{unknown}++; ($verbose > 1) and print LOG "[Unexpected state]\n"; } } } sub show_result { print LOG "# replicas in DB: $count{db}\n"; print LOG "# replicas on disk: $count{disk}\n"; print LOG "# replicas in DB and on disk: $count{db_and_disk}\n"; print LOG "# replicas only in the DB: $count{only_db}\n"; print LOG "# replicas only on disk: $count{only_disk}\n"; print LOG "\n# Replicas only in the database\n"; foreach my $repl (sort @only_db) { print LOG "$server:$filesystem/$repl\n"; } print LOG "\n# Replicas only on the fileystem\n"; foreach my $repl (sort @only_disk) { print LOG "$server:$filesystem/$repl\n"; } print LOG "\n# times: " . join("\t", times); } # # DB related # sub parse_db_config { my $conf_file = $_[0]; open(DB_CONF, $conf_file) || die "Database configuration file $conf_file cannot be read: $!\n"; my $conf = ; chomp $conf; # Optional match for db name if ($conf =~ s/\/(\w+)$//) { $dpns_db_name = $1; } else { $dpns_db_name = "cns_db"; } # Optional port match if ($conf =~ s/:(\d+)//) { $db_port = $1; } else { $db_port = getservbyname("mysql", "tcp"); } if ($conf =~ /^(\w+)\/([^@]+)@([A-Za-z0-9\.-]+)$/) { $db_user = $1; $db_pw = $2; $db_host = $3; } else { die "Failed to interpret database configuration file. Format should\n", "be a single line:\n", "USER/PASSWORD\@HOST[:PORT][/DPNS_DB_NAME]\n", "Port is optional (defaults to mysql port in services).\n", "Database name is optional (defaults to cns_db).\n"; } print STDERR "DB User: $db_user\nDB Host: $db_host\n", "DPNS DB: $dpns_db_name\n\n" if ($verbose > 0); close DB_CONF; } sub sql_init() { my $dsn = "DBI:mysql:database=$dpns_db_name;host=$db_host;port=$db_port"; $dbh = DBI->connect($dsn, $db_user, $db_pw); if (!$dbh) { die "Failed to connect to MySQL server. Check username/password.\n"; } return(0); } sub sql_terminate() { $dbh->disconnect; } sub sql_quote() { return ($dbh->quote($_[0])); } sub sql_query() { my $query = shift; print STDERR "QUERY: $query\n" if ($verbose > 0); my $sth = $dbh->prepare("$query"); if (!$sth) { die "Failed to execute MySQL query: " . $sth->errstr . "\n"; } if (!$sth->execute) { die "Failed to execute MySQL query: " . $sth->errstr . "\n"; }; print STDERR "SQL Debug: query returned ", $sth->{'NUM_OF_FIELDS'}, " fields.\n" if ($verbose > 0); my $names = $sth->{'NAME'}; my $numFields = $sth->{'NUM_OF_FIELDS'}; print STDERR "No matches!\n" if (($verbose > 0) and $sth->rows == 0); my @aref = (); while (my $ref = $sth->fetchrow_arrayref) { my %result; for (my $i = 0; $i < $numFields; $i++) { $result{$$names[$i]} = $$ref[$i]; } push @aref, \%result; } return(@aref); }