/[pdpsoft]/trunk/nl.biggrid.dans/check-file
ViewVC logotype

Contents of /trunk/nl.biggrid.dans/check-file

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2582 - (show annotations) (download)
Tue Nov 20 12:07:59 2012 UTC (9 years, 7 months ago) by svn
File size: 6155 byte(s)
First released version
1 #!/bin/bash
2
3 if [ ! "x$0" = "x${0#-}" ]
4 then
5 echo "Don't source me, just run!" 1>&2
6 return
7 fi
8 . ${0%/*}/libdans.sh
9
10 # Load global defaults (vars: vo, archive, LFC_*, SRM_*)
11 loadDefaults $0
12
13
14 DEBUG=0
15 QUIET=0
16 KEEPGOING=0
17 START_INDEX=1
18 END_INDEX=0
19 USAGE="\
20 $0 - verify the contents of one or multiple tar.gz files
21 Usage: ./check-file [-d|--debug] [-k|--keepgoing] [file|lfn|surl] [[file2|lfn2|surl2] ...]
22 Where:
23 --keepgoing tells $0 to keep going after an error
24 file specifies the full path to a local file OR
25 lfn specifies a Logical File Name on the LFC OR
26 surl specifies a Storage URL (srm:// or gsiftp://)
27 "
28
29 # Parse commandline parameters
30 while [ $# -gt 0 ]
31 do
32 case "$1" in
33 (-h|--help) echo "${USAGE}"
34 exit 0
35 ;;
36 (-d|--debug) let DEBUG+=1
37 ;;
38 (-k|--keepgoing) KEEPGOING=1
39 ;;
40 (-*) echo "Invalid option: $1"
41 exit 1
42 ;;
43 (*) break
44 ;;
45 esac
46 shift
47 done
48
49 if [ $# -lt 1 ]
50 then
51 abort 1 "${USAGE}"
52 fi
53
54 WORKSPACE=workspace.$$
55 TIMESTAMP=check-file.timestamp.$$
56 # set a timestamp to only use newly downloaded files
57 touch ${TIMESTAMP}
58
59 # Loop over all files|lfns|surls
60 for file
61 do
62 do_get=0
63 skip_md5=0
64 symlink=""
65
66 if [ "${file}" != "${file#lfn:/}" ]
67 then
68 do_get=1
69
70 # Verify that we have a valid proxy before continuing
71 checkProxy "${vo}"
72
73 lfcEntry=`lfc-ls -l "${file#lfn:}" 2> /dev/null`
74 if [ $? -ne 0 ]
75 then
76 abort 2 "Error listing file ${file}"
77 fi
78 if [ -z "${lfcEntry}" ]
79 then
80 warn "LFC file ${file} not found"
81 continue
82 fi
83
84 # check if the LFC entry is a symlink
85 if [ `echo "${lfcEntry}" | cut -c1,1` = "l" ]
86 then
87 symlink="${file}"
88 file=`echo "${lfcEntry}" | sed 's/.*-> //'`
89 md5file="${file##*/}"
90 md5file="${md5file%.gz}.md5sum"
91
92 # add the protocol again
93 file="lfn:${file}"
94 debug "Symbolic link detected: ${symlink} -> ${file}"
95
96 debug "md5file should be ${md5file}"
97 # we now know that the current entry is a symlink, usually to a tarball
98 # check if we already calculated the checksums for this tarball since
99 # the start of the script. If so, use it, if not, download the tarball
100 if [ -r "${md5file}" -a "${md5file}" -nt "${TIMESTAMP}" ]
101 then
102 debug "${file} was already checked since the start of the script."
103
104 do_get=0
105 skip_md5=1
106 else
107 debug "Downloading ${file} instead"
108 fi
109 fi
110
111 fi
112 if [ "${file}" != "${file#srm:/}" ]
113 then
114 do_get=1
115 fi
116 if [ "${file}" != "${file#gsiftp:/}" ]
117 then
118 do_get=1
119 fi
120
121 if [ ${do_get} -ne 0 ]
122 then
123 # Verify that we have a valid proxy before continuing
124 checkProxy "${vo}"
125
126 info "Retrieving file ${file}"
127 lcg-cp ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}"
128 if [ $? -ne 0 ]
129 then
130 warn "First attempt failed, sleeping 30 seconds and retrying..."
131 sleep 30
132 lcg-cp ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}"
133 fi
134 if [ $? -ne 0 ]
135 then
136 warn "Second attempt failed, sleeping 90 seconds and retrying..."
137 sleep 90
138 date +"%Y/%m/%d-%H:%M:%S start lcg-cp -v -v ${SRM_TIMEOUTS}" 1>&2
139 lcg-cp -v -v ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}"
140 fi
141 if [ $? -ne 0 ]
142 then
143 abort 3 "Error retrieving ${file}"
144 fi
145
146 file="${file##*/}"
147 fi
148
149 if [ ${skip_md5} -eq 0 ]
150 then
151 # the file should be a .tar.gz file, extract the original archive name from it
152 base=`file "${file}" | sed -n 's/.*compressed data, was \"\(.*\)\",.*/\1/p'`
153 archive="${base%%-[0-9]*}"
154
155 debug "file=${file} base=${base} archive=${archive}"
156
157 CHECKSUM_DIR="${HOME}/dans/${archive}/checksums"
158
159 # fix numbering issue in RACM tar balls
160 if [ "x${archive}" = "xRACM" ]
161 then
162 index=${base#${archive}-}
163 index=${index%.tar}
164 if [ ${#index} -lt 4 ]
165 then
166 base="${archive}-0${index}.tar"
167 fi
168 fi
169
170 # extract the tarball in a separate directory, for easy cleanup later
171 mkdir -p ${WORKSPACE}
172 info "Extracting file ${file} (real name: ${base}.gz) ..."
173 tar -C ${WORKSPACE} -xzf "${file}"
174
175 md5file="${base}.md5sum"
176 if [ -r "${md5file}" ]
177 then
178 warn "\"${md5file}\" exists, appending \".$$\""
179 md5file="${md5file}.$$"
180 fi
181
182 info "Calculating checksums for all files in archive (output=${md5file})"
183 echo "# ${base} START" > "${md5file}"
184 (cd ${WORKSPACE}; md5deep -l -r ${archive}) | sort -k 2 >> "${md5file}"
185 echo "# ${base} END" >> "${md5file}"
186
187 if [ -r "${CHECKSUM_DIR}/${md5file%.$$}" ]
188 then
189 debug "Comparing ${md5file} to ${CHECKSUM_DIR}/${md5file%.$$}"
190
191 diff "${CHECKSUM_DIR}/${md5file%.$$}" "${md5file}" && info "OK" || warn "Mismatch"
192 else
193 debug "${CHECKSUM_DIR}/${md5file%.$$} not found, skipping"
194 fi
195 else
196 debug "Skipping md5deep step, reusing previous results"
197 fi
198
199 if [ -n "${symlink}" ]
200 then
201 entry="${symlink##*/}"
202 else
203 entry="${file##*/}"
204 fi
205
206 if [ "${entry}" != "${base}.gz" ]
207 then
208 grep "${entry}" "${md5file}"
209 fi
210
211 # delete the tar ball (if downloaded!)
212 if [ ${do_get} -ne 0 ]
213 then
214 rm -f "${file}"
215 fi
216 # clear out the ${WORKSPACE}
217 rm -rf ${WORKSPACE}
218 done
219
220 # remove the timestamp
221 rm -f ${TIMESTAMP}

Properties

Name Value
svn:executable

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28