1 |
#!/usr/bin/perl |
2 |
|
3 |
# |
4 |
# Tool to check the consistency between the DPM database |
5 |
# and the files actually on disk. |
6 |
# |
7 |
|
8 |
use strict; |
9 |
|
10 |
use Getopt::Long; |
11 |
use Sys::Hostname; |
12 |
use DBI; |
13 |
|
14 |
my $help = 0; |
15 |
my $verbose = 0; |
16 |
|
17 |
# if $dbfile is not empty, the DB data will be read from it |
18 |
# instead of obtained by executing gridpp_dpm_disk |
19 |
# my $dbfile = '/home/ronalds/tmp/DPM/dpm_db_dump.txt'; |
20 |
my $dbfile; |
21 |
|
22 |
my $cfgfile = '/root/DPMINFO'; |
23 |
|
24 |
my $logfile; |
25 |
##$logfile = "-"; # STDOUT for debugging |
26 |
my $filesystem; |
27 |
|
28 |
GetOptions( |
29 |
'help|h' => \$help, |
30 |
'verbose|v+' => \$verbose, |
31 |
'logfile|l:s' => \$logfile, |
32 |
'dbfile:s' => \$dbfile, |
33 |
'cfgfile:s' => \$cfgfile, |
34 |
); |
35 |
|
36 |
$help and &usage; |
37 |
|
38 |
my @now = localtime(time); |
39 |
my $date = sprintf "%04d%02d%02d-%02d%02d%02d", |
40 |
$now[5]+1900, $now[4]+1, $now[3], $now[2], $now[1], $now[0]; |
41 |
$logfile ||= "dpm-consistency-check-$date"; |
42 |
|
43 |
open LOG, "> $logfile" or die "$logfile: $!\n"; |
44 |
($verbose > 1) and print LOG "Log file: $logfile\n"; |
45 |
|
46 |
my %replicas; |
47 |
my %count = ( db => 0, disk => 0, only_db => 0, only_disk => 0, db_and_disk => 0, unknown => 0 ); |
48 |
my $REPLICA_IN_DB = 1; |
49 |
my $REPLICA_ON_DISK = 2; |
50 |
my @only_db; |
51 |
my @only_disk; |
52 |
|
53 |
my ($db_user, $db_pw, $db_host, $dpns_db_name, $db_port, $dbh); |
54 |
|
55 |
my $filesystem = shift; |
56 |
$filesystem =~ s!/*$!!; |
57 |
if ( ! $filesystem ) { |
58 |
warn "Missing mandatory argument 'filesystem'\n"; |
59 |
&usage(1); |
60 |
} |
61 |
if ( ! -d $filesystem ) { |
62 |
warn "$!: $filesystem\n"; |
63 |
} |
64 |
|
65 |
my $server = hostname; |
66 |
|
67 |
($verbose > 0) and print LOG "server: $server filesystem: $filesystem\n"; |
68 |
|
69 |
|
70 |
# Initial design, unoptimized: |
71 |
# 1. Determine all replicas registered in the DB that are stored |
72 |
# on the current server for the given file system. |
73 |
# Store the replicas in hash %replicas with key replica |
74 |
# and value $REPLICA_IN_DB |
75 |
# 2. Determine all files that are present |
76 |
# on the current server for the given file system. |
77 |
# Store the files in hash %replicas with key file |
78 |
# and value $REPLICA_IN_DB |
79 |
# 3. Compare the hashes, determining inconsistencies and |
80 |
# counting statistics |
81 |
# |
82 |
# Later: |
83 |
# - Add an option to automatically remove entries from the |
84 |
# database for which the corresponding file is missing, |
85 |
# or remove the file from disk if there is no corresponding |
86 |
# entry in the database |
87 |
# - Memory consumption might be an issue for this script; |
88 |
# consider splitting the processing by date |
89 |
|
90 |
|
91 |
# initialize database connection |
92 |
&parse_db_config($cfgfile); |
93 |
&sql_init; |
94 |
|
95 |
# Determine replicas registered in the database |
96 |
&get_db_replicas(\%replicas); |
97 |
|
98 |
# Determine replicas on disk |
99 |
&get_fs_replicas(\%replicas); |
100 |
|
101 |
# Comparison |
102 |
&compare_database_filesystem(\%replicas); |
103 |
|
104 |
# Results |
105 |
&show_result; |
106 |
|
107 |
&sql_terminate; |
108 |
|
109 |
close LOG; |
110 |
exit 0; |
111 |
|
112 |
|
113 |
|
114 |
sub usage { |
115 |
my $ret = shift; |
116 |
print STDERR <<EOH; |
117 |
usage: $0 [options] <filesystem> |
118 |
options: |
119 |
help, h Show this help text |
120 |
verbose, v Increase output verbosity |
121 |
EOH |
122 |
|
123 |
exit($ret); |
124 |
} |
125 |
|
126 |
|
127 |
sub get_db_replicas { |
128 |
my $ref = shift; |
129 |
|
130 |
# read the replicas registered in the database, |
131 |
# unless a file for debugging is defined |
132 |
if ( ! $dbfile ) { |
133 |
# $dbfile = '/tmp/dpm-db-${date}.dump'; |
134 |
# execute the gridpp_* command |
135 |
# my $cmd = "gridpp_dpm_disk --server $server --fs $filesystem > $dbfile"; |
136 |
# ($verbose > 0) and print LOG "Executing $cmd\n"; |
137 |
|
138 |
my $sql = "SELECT sfn FROM Cns_file_replica WHERE " |
139 |
. "host=" . &sql_quote($server) . " AND fs=" . &sql_quote($filesystem); |
140 |
my @results = &sql_query($sql); |
141 |
foreach my $row (@results) { |
142 |
my $replica = $$row{sfn}; |
143 |
$replica =~ s!^$server:$filesystem[/]*!!; |
144 |
($verbose > 2) and print LOG "$replica\n"; |
145 |
$$ref{$replica} |= $REPLICA_IN_DB; |
146 |
$count{db}++; |
147 |
} |
148 |
} |
149 |
|
150 |
## # parse the file holding the replicas registered in the DB |
151 |
## open DB, $dbfile or die "Failed to open $dbfile: $!\n"; |
152 |
## while ( my $line = <DB> ) { |
153 |
## if ( $line =~ /^Replica: ([\w_\.-]+):(.*) \d+$/ ) { |
154 |
## my $replica = $2; |
155 |
## $replica =~ s!^$filesystem[/]*!!; |
156 |
## ($verbose > 2) and print LOG "$replica\n"; |
157 |
## $$ref{$replica} |= $REPLICA_IN_DB; |
158 |
## $count{db}++; |
159 |
## } |
160 |
## elsif ( $line =~ /Found (\d+) replica/ ) { |
161 |
## ($verbose > 1) and print LOG "There are $1 replicas in the DB\n"; |
162 |
## } |
163 |
## } |
164 |
## close DB; |
165 |
|
166 |
($verbose > 0) and print LOG "Found $count{db} replicas in the database under $filesystem\n"; |
167 |
} |
168 |
|
169 |
sub get_fs_replicas { |
170 |
my $ref = shift; |
171 |
|
172 |
# find all files under the specified filesystem |
173 |
my $cmd = "find $filesystem -type f -print"; |
174 |
|
175 |
open FS, "$cmd | " or die "$cmd: $!\n"; |
176 |
while ( my $file = <FS> ) { |
177 |
$file =~ s!^$filesystem[/]*!!; |
178 |
chomp $file; |
179 |
($verbose > 2) and print LOG "$file\n"; |
180 |
$$ref{$file} |= $REPLICA_ON_DISK; |
181 |
$count{disk}++; |
182 |
} |
183 |
close FS; |
184 |
|
185 |
($verbose > 0) and print LOG "Found $count{disk} files under $filesystem\n"; |
186 |
} |
187 |
|
188 |
|
189 |
sub compare_database_filesystem { |
190 |
my ($replica_ref) = shift; |
191 |
|
192 |
my $db_and_disk = ($REPLICA_IN_DB | $REPLICA_ON_DISK); |
193 |
while ( my ($repl, $state) = each(%$replica_ref) ) { |
194 |
($verbose > 1) and print "$repl $state\t"; |
195 |
if ( $state == $REPLICA_IN_DB ) { |
196 |
$count{only_db}++; |
197 |
push(@only_db, $repl); |
198 |
($verbose > 1) and print LOG "[only in DB]\n"; |
199 |
} |
200 |
elsif ( $state == $REPLICA_ON_DISK ) { |
201 |
$count{only_disk}++; |
202 |
push(@only_disk, $repl); |
203 |
($verbose > 1) and print LOG "[only on disk]\n"; |
204 |
} |
205 |
elsif ( $state == $db_and_disk ) { |
206 |
$count{db_and_disk}++; |
207 |
($verbose > 1) and print LOG "[OK]\n"; |
208 |
} |
209 |
else { |
210 |
$count{unknown}++; |
211 |
($verbose > 1) and print LOG "[Unexpected state]\n"; |
212 |
} |
213 |
} |
214 |
} |
215 |
|
216 |
|
217 |
|
218 |
sub show_result { |
219 |
print LOG "# replicas in DB: $count{db}\n"; |
220 |
print LOG "# replicas on disk: $count{disk}\n"; |
221 |
print LOG "# replicas in DB and on disk: $count{db_and_disk}\n"; |
222 |
print LOG "# replicas only in the DB: $count{only_db}\n"; |
223 |
print LOG "# replicas only on disk: $count{only_disk}\n"; |
224 |
|
225 |
print LOG "\n# Replicas only in the database\n"; |
226 |
foreach my $repl (sort @only_db) { |
227 |
print LOG "$server:$filesystem/$repl\n"; |
228 |
} |
229 |
|
230 |
print LOG "\n# Replicas only on the fileystem\n"; |
231 |
foreach my $repl (sort @only_disk) { |
232 |
print LOG "$server:$filesystem/$repl\n"; |
233 |
} |
234 |
|
235 |
print LOG "\n# times: " . join("\t", times); |
236 |
} |
237 |
|
238 |
|
239 |
|
240 |
|
241 |
# |
242 |
# DB related |
243 |
# |
244 |
sub parse_db_config |
245 |
{ |
246 |
my $conf_file = $_[0]; |
247 |
open(DB_CONF, $conf_file) || |
248 |
die "Database configuration file $conf_file cannot be read: $!\n"; |
249 |
|
250 |
my $conf = <DB_CONF>; |
251 |
chomp $conf; |
252 |
# Optional match for db name |
253 |
if ($conf =~ s/\/(\w+)$//) { |
254 |
$dpns_db_name = $1; |
255 |
} else { |
256 |
$dpns_db_name = "cns_db"; |
257 |
} |
258 |
# Optional port match |
259 |
if ($conf =~ s/:(\d+)//) { |
260 |
$db_port = $1; |
261 |
} else { |
262 |
$db_port = getservbyname("mysql", "tcp"); |
263 |
} |
264 |
if ($conf =~ /^(\w+)\/([^@]+)@([A-Za-z0-9\.-]+)$/) { |
265 |
$db_user = $1; |
266 |
$db_pw = $2; |
267 |
$db_host = $3; |
268 |
} else { |
269 |
die "Failed to interpret database configuration file. Format should\n", |
270 |
"be a single line:\n", |
271 |
"USER/PASSWORD\@HOST[:PORT][/DPNS_DB_NAME]\n", |
272 |
"Port is optional (defaults to mysql port in services).\n", |
273 |
"Database name is optional (defaults to cns_db).\n"; |
274 |
} |
275 |
print STDERR "DB User: $db_user\nDB Host: $db_host\n", |
276 |
"DPNS DB: $dpns_db_name\n\n" if ($verbose > 0); |
277 |
close DB_CONF; |
278 |
} |
279 |
|
280 |
|
281 |
sub sql_init() { |
282 |
my $dsn = "DBI:mysql:database=$dpns_db_name;host=$db_host;port=$db_port"; |
283 |
$dbh = DBI->connect($dsn, $db_user, $db_pw); |
284 |
if (!$dbh) { |
285 |
die "Failed to connect to MySQL server. Check username/password.\n"; |
286 |
} |
287 |
return(0); |
288 |
} |
289 |
|
290 |
sub sql_terminate() { |
291 |
$dbh->disconnect; |
292 |
} |
293 |
|
294 |
sub sql_quote() { |
295 |
return ($dbh->quote($_[0])); |
296 |
} |
297 |
|
298 |
sub sql_query() { |
299 |
my $query = shift; |
300 |
|
301 |
print STDERR "QUERY: $query\n" if ($verbose > 0); |
302 |
|
303 |
my $sth = $dbh->prepare("$query"); |
304 |
if (!$sth) { |
305 |
die "Failed to execute MySQL query: " . $sth->errstr . "\n"; |
306 |
} |
307 |
if (!$sth->execute) { |
308 |
die "Failed to execute MySQL query: " . $sth->errstr . "\n"; |
309 |
}; |
310 |
print STDERR "SQL Debug: query returned ", $sth->{'NUM_OF_FIELDS'}, " fields.\n" if ($verbose > 0); |
311 |
|
312 |
my $names = $sth->{'NAME'}; |
313 |
my $numFields = $sth->{'NUM_OF_FIELDS'}; |
314 |
|
315 |
print STDERR "No matches!\n" if (($verbose > 0) and $sth->rows == 0); |
316 |
|
317 |
my @aref = (); |
318 |
while (my $ref = $sth->fetchrow_arrayref) { |
319 |
my %result; |
320 |
for (my $i = 0; $i < $numFields; $i++) { |
321 |
$result{$$names[$i]} = $$ref[$i]; |
322 |
} |
323 |
push @aref, \%result; |
324 |
} |
325 |
|
326 |
return(@aref); |
327 |
} |
328 |
|