51 |
abort 1 "${USAGE}" |
abort 1 "${USAGE}" |
52 |
fi |
fi |
53 |
|
|
54 |
|
WORKSPACE=workspace.$$ |
55 |
|
TIMESTAMP=check-file.timestamp.$$ |
56 |
|
# set a timestamp to only use newly downloaded files |
57 |
|
touch ${TIMESTAMP} |
58 |
|
|
59 |
# Loop over all files|lfns|surls |
# Loop over all files|lfns|surls |
60 |
for file |
for file |
61 |
do |
do |
62 |
do_get=0 |
do_get=0 |
63 |
if [ "${file}" != "${file#lfn://}" ] |
skip_md5=0 |
64 |
|
symlink="" |
65 |
|
|
66 |
|
if [ "${file}" != "${file#lfn:/}" ] |
67 |
then |
then |
68 |
do_get=1 |
do_get=1 |
69 |
|
|
70 |
|
# Verify that we have a valid proxy before continuing |
71 |
|
checkProxy "${vo}" |
72 |
|
|
73 |
|
lfcEntry=`lfc-ls -l "${file#lfn:}" 2> /dev/null` |
74 |
|
if [ $? -ne 0 ] |
75 |
|
then |
76 |
|
abort 2 "Error listing file ${file}" |
77 |
|
fi |
78 |
|
if [ -z "${lfcEntry}" ] |
79 |
|
then |
80 |
|
warn "LFC file ${file} not found" |
81 |
|
continue |
82 |
|
fi |
83 |
|
|
84 |
|
# check if the LFC entry is a symlink |
85 |
|
if [ `echo "${lfcEntry}" | cut -c1,1` = "l" ] |
86 |
|
then |
87 |
|
symlink="${file}" |
88 |
|
file=`echo "${lfcEntry}" | sed 's/.*-> //'` |
89 |
|
md5file="${file##*/}" |
90 |
|
md5file="${md5file%.gz}.md5sum" |
91 |
|
|
92 |
|
# add the protocol again |
93 |
|
file="lfn:${file}" |
94 |
|
debug "Symbolic link detected: ${symlink} -> ${file}" |
95 |
|
|
96 |
|
debug "md5file should be ${md5file}" |
97 |
|
# we now know that the current entry is a symlink, usually to a tarball |
98 |
|
# check if we already calculated the checksums for this tarball since |
99 |
|
# the start of the script. If so, use it, if not, download the tarball |
100 |
|
if [ -r "${md5file}" -a "${md5file}" -nt "${TIMESTAMP}" ] |
101 |
|
then |
102 |
|
debug "${file} was already checked since the start of the script." |
103 |
|
|
104 |
|
do_get=0 |
105 |
|
skip_md5=1 |
106 |
|
else |
107 |
|
debug "Downloading ${file} instead" |
108 |
|
fi |
109 |
|
fi |
110 |
|
|
111 |
fi |
fi |
112 |
if [ "${file}" != "${file#srm://}" ] |
if [ "${file}" != "${file#srm:/}" ] |
113 |
then |
then |
114 |
do_get=1 |
do_get=1 |
115 |
fi |
fi |
116 |
if [ "${file}" != "${file#gsiftp://}" ] |
if [ "${file}" != "${file#gsiftp:/}" ] |
117 |
then |
then |
118 |
do_get=1 |
do_get=1 |
119 |
fi |
fi |
120 |
|
|
121 |
if [ ${do_get} -ne 0 ] |
if [ ${do_get} -ne 0 ] |
122 |
then |
then |
123 |
|
# Verify that we have a valid proxy before continuing |
124 |
|
checkProxy "${vo}" |
125 |
|
|
126 |
info "Retrieving file ${file}" |
info "Retrieving file ${file}" |
127 |
lcg-cp $SRM_TIMEOUTS "${file}" file://$PWD/"${file##*/}" |
lcg-cp ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}" |
128 |
if [ $? -ne 0 ] |
if [ $? -ne 0 ] |
129 |
then |
then |
130 |
warn "First attempt failed, sleeping 30 seconds and retrying..." |
warn "First attempt failed, sleeping 30 seconds and retrying..." |
131 |
sleep 30 |
sleep 30 |
132 |
lcg-cp $SRM_TIMEOUTS "${file}" file://$PWD/"${file##*/}" |
lcg-cp ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}" |
133 |
fi |
fi |
134 |
if [ $? -ne 0 ] |
if [ $? -ne 0 ] |
135 |
then |
then |
136 |
warn "Second attempt failed, sleeping 90 seconds and retrying..." |
warn "Second attempt failed, sleeping 90 seconds and retrying..." |
137 |
sleep 90 |
sleep 90 |
138 |
date +"%Y/%m/%d-%H:%M:%S start lcg-cp -v -v $SRM_TIMEOUTS" 1>&2 |
date +"%Y/%m/%d-%H:%M:%S start lcg-cp -v -v ${SRM_TIMEOUTS}" 1>&2 |
139 |
lcg-cp -v -v $SRM_TIMEOUTS "${file}" file://$PWD/"${file##*/}" |
lcg-cp -v -v ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}" |
140 |
fi |
fi |
141 |
if [ $? -ne 0 ] |
if [ $? -ne 0 ] |
142 |
then |
then |
146 |
file="${file##*/}" |
file="${file##*/}" |
147 |
fi |
fi |
148 |
|
|
149 |
# the file should be a .tar.gz file, extract the original archive name from it |
if [ ${skip_md5} -eq 0 ] |
|
base=`file "${file}" | sed -n 's/.*compressed data, was \"\(.*\)\",.*/\1/p'` |
|
|
archive="${base%%-[0-9]*}" |
|
|
|
|
|
debug "file=${file} base=${base} archive=${archive}" |
|
|
|
|
|
CHECKSUM_DIR="${HOME}/dans/${archive}/checksums" |
|
|
|
|
|
# fix numbering issue in RACM tar balls |
|
|
if [ "x${archive}" = "xRACM" ] |
|
150 |
then |
then |
151 |
index=${base#${archive}-} |
# the file should be a .tar.gz file, extract the original archive name from it |
152 |
index=${index%.tar} |
base=`file "${file}" | sed -n 's/.*compressed data, was \"\(.*\)\",.*/\1/p'` |
153 |
if [ ${#index} -lt 4 ] |
archive="${base%%-[0-9]*}" |
154 |
|
|
155 |
|
debug "file=${file} base=${base} archive=${archive}" |
156 |
|
|
157 |
|
CHECKSUM_DIR="${HOME}/dans/${archive}/checksums" |
158 |
|
|
159 |
|
# fix numbering issue in RACM tar balls |
160 |
|
if [ "x${archive}" = "xRACM" ] |
161 |
then |
then |
162 |
base="${archive}-0${index}.tar" |
index=${base#${archive}-} |
163 |
|
index=${index%.tar} |
164 |
|
if [ ${#index} -lt 4 ] |
165 |
|
then |
166 |
|
base="${archive}-0${index}.tar" |
167 |
|
fi |
168 |
fi |
fi |
|
fi |
|
|
|
|
|
# extract the tarball in a separate directory, for easy cleanup later |
|
|
mkdir -p workspace |
|
|
info "Extracting file ${file} (real name: ${base}.gz) ..." |
|
|
tar -C workspace -xzf "${file}" |
|
169 |
|
|
170 |
md5file="${base}.md5sum" |
# extract the tarball in a separate directory, for easy cleanup later |
171 |
if [ -r "${md5file}" ] |
mkdir -p ${WORKSPACE} |
172 |
then |
info "Extracting file ${file} (real name: ${base}.gz) ..." |
173 |
warn "\"${md5file}\" exists, append \".$$\"" |
tar -C ${WORKSPACE} -xzf "${file}" |
174 |
md5file="${md5file}.$$" |
|
175 |
|
md5file="${base}.md5sum" |
176 |
|
if [ -r "${md5file}" ] |
177 |
|
then |
178 |
|
warn "\"${md5file}\" exists, appending \".$$\"" |
179 |
|
md5file="${md5file}.$$" |
180 |
|
fi |
181 |
|
|
182 |
|
info "Calculating checksums for all files in archive (output=${md5file})" |
183 |
|
echo "# ${base} START" > "${md5file}" |
184 |
|
(cd ${WORKSPACE}; md5deep -l -r ${archive}) | sort -k 2 >> "${md5file}" |
185 |
|
echo "# ${base} END" >> "${md5file}" |
186 |
|
|
187 |
|
if [ -r "${CHECKSUM_DIR}/${md5file%.$$}" ] |
188 |
|
then |
189 |
|
debug "Comparing ${md5file} to ${CHECKSUM_DIR}/${md5file%.$$}" |
190 |
|
|
191 |
|
diff "${CHECKSUM_DIR}/${md5file%.$$}" "${md5file}" && info "OK" || warn "Mismatch" |
192 |
|
else |
193 |
|
debug "${CHECKSUM_DIR}/${md5file%.$$} not found, skipping" |
194 |
|
fi |
195 |
|
else |
196 |
|
debug "Skipping md5deep step, reusing previous results" |
197 |
fi |
fi |
198 |
|
|
199 |
info "Calculating checksums for all files in archive (output=${md5file})" |
if [ -n "${symlink}" ] |
|
echo "# ${base} START" > "${md5file}" |
|
|
(cd workspace; md5deep -l -r ${archive}) | sort -k 2 >> "${md5file}" |
|
|
echo "# ${base} END" >> "${md5file}" |
|
|
|
|
|
if [ -r "${CHECKSUM_DIR}/${md5file%.$$}" ] |
|
200 |
then |
then |
201 |
debug "Comparing ${md5file} to ${CHECKSUM_DIR}/${md5file%.$$}" |
entry="${symlink##*/}" |
|
|
|
|
diff "${CHECKSUM_DIR}/${md5file%.$$}" "${md5file}" && info "OK" || warn "Mismatch" |
|
202 |
else |
else |
203 |
debug "${CHECKSUM_DIR}/${md5file%.$$} not found, skipping" |
entry="${file##*/}" |
204 |
fi |
fi |
205 |
|
|
206 |
if [ "${file##*/}" != "${base}.gz" ] |
if [ "${entry}" != "${base}.gz" ] |
207 |
then |
then |
208 |
grep "${file##*/}" "${md5file}" |
grep "${entry}" "${md5file}" |
209 |
fi |
fi |
210 |
|
|
211 |
# delete the tar ball (if downloaded!) |
# delete the tar ball (if downloaded!) |
213 |
then |
then |
214 |
rm -f "${file}" |
rm -f "${file}" |
215 |
fi |
fi |
216 |
# clear out the workspace |
# clear out the ${WORKSPACE} |
217 |
rm -rf workspace |
rm -rf ${WORKSPACE} |
218 |
done |
done |
219 |
|
|
220 |
|
# remove the timestamp |
221 |
|
rm -f ${TIMESTAMP} |