#!/bin/bash if [ ! "x$0" = "x${0#-}" ] then echo "Don't source me, just run!" 1>&2 return fi . ${0%/*}/libdans.sh # Load global defaults (vars: vo, archive, LFC_*, SRM_*) loadDefaults $0 DEBUG=0 QUIET=0 KEEPGOING=0 START_INDEX=1 END_INDEX=0 USAGE="\ $0 - verify the contents of one or multiple tar.gz files Usage: ./check-file [-d|--debug] [-k|--keepgoing] [file|lfn|surl] [[file2|lfn2|surl2] ...] Where: --keepgoing tells $0 to keep going after an error file specifies the full path to a local file OR lfn specifies a Logical File Name on the LFC OR surl specifies a Storage URL (srm:// or gsiftp://) " # Parse commandline parameters while [ $# -gt 0 ] do case "$1" in (-h|--help) echo "${USAGE}" exit 0 ;; (-d|--debug) let DEBUG+=1 ;; (-k|--keepgoing) KEEPGOING=1 ;; (-*) echo "Invalid option: $1" exit 1 ;; (*) break ;; esac shift done if [ $# -lt 1 ] then abort 1 "${USAGE}" fi WORKSPACE=workspace.$$ TIMESTAMP=check-file.timestamp.$$ # set a timestamp to only use newly downloaded files touch ${TIMESTAMP} # Loop over all files|lfns|surls for file do do_get=0 skip_md5=0 symlink="" if [ "${file}" != "${file#lfn:/}" ] then do_get=1 # Verify that we have a valid proxy before continuing checkProxy "${vo}" lfcEntry=`lfc-ls -l "${file#lfn:}" 2> /dev/null` if [ $? -ne 0 ] then abort 2 "Error listing file ${file}" fi if [ -z "${lfcEntry}" ] then warn "LFC file ${file} not found" continue fi # check if the LFC entry is a symlink if [ `echo "${lfcEntry}" | cut -c1,1` = "l" ] then symlink="${file}" file=`echo "${lfcEntry}" | sed 's/.*-> //'` md5file="${file##*/}" md5file="${md5file%.gz}.md5sum" # add the protocol again file="lfn:${file}" debug "Symbolic link detected: ${symlink} -> ${file}" debug "md5file should be ${md5file}" # we now know that the current entry is a symlink, usually to a tarball # check if we already calculated the checksums for this tarball since # the start of the script. If so, use it, if not, download the tarball if [ -r "${md5file}" -a "${md5file}" -nt "${TIMESTAMP}" ] then debug "${file} was already checked since the start of the script." do_get=0 skip_md5=1 else debug "Downloading ${file} instead" fi fi fi if [ "${file}" != "${file#srm:/}" ] then do_get=1 fi if [ "${file}" != "${file#gsiftp:/}" ] then do_get=1 fi if [ ${do_get} -ne 0 ] then # Verify that we have a valid proxy before continuing checkProxy "${vo}" info "Retrieving file ${file}" lcg-cp ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}" if [ $? -ne 0 ] then warn "First attempt failed, sleeping 30 seconds and retrying..." sleep 30 lcg-cp ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}" fi if [ $? -ne 0 ] then warn "Second attempt failed, sleeping 90 seconds and retrying..." sleep 90 date +"%Y/%m/%d-%H:%M:%S start lcg-cp -v -v ${SRM_TIMEOUTS}" 1>&2 lcg-cp -v -v ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}" fi if [ $? -ne 0 ] then abort 3 "Error retrieving ${file}" fi file="${file##*/}" fi if [ ${skip_md5} -eq 0 ] then # the file should be a .tar.gz file, extract the original archive name from it base=`file "${file}" | sed -n 's/.*compressed data, was \"\(.*\)\",.*/\1/p'` archive="${base%%-[0-9]*}" debug "file=${file} base=${base} archive=${archive}" CHECKSUM_DIR="${HOME}/dans/${archive}/checksums" # fix numbering issue in RACM tar balls if [ "x${archive}" = "xRACM" ] then index=${base#${archive}-} index=${index%.tar} if [ ${#index} -lt 4 ] then base="${archive}-0${index}.tar" fi fi # extract the tarball in a separate directory, for easy cleanup later mkdir -p ${WORKSPACE} info "Extracting file ${file} (real name: ${base}.gz) ..." tar -C ${WORKSPACE} -xzf "${file}" md5file="${base}.md5sum" if [ -r "${md5file}" ] then warn "\"${md5file}\" exists, appending \".$$\"" md5file="${md5file}.$$" fi info "Calculating checksums for all files in archive (output=${md5file})" echo "# ${base} START" > "${md5file}" (cd ${WORKSPACE}; md5deep -l -r ${archive}) | sort -k 2 >> "${md5file}" echo "# ${base} END" >> "${md5file}" if [ -r "${CHECKSUM_DIR}/${md5file%.$$}" ] then debug "Comparing ${md5file} to ${CHECKSUM_DIR}/${md5file%.$$}" diff "${CHECKSUM_DIR}/${md5file%.$$}" "${md5file}" && info "OK" || warn "Mismatch" else debug "${CHECKSUM_DIR}/${md5file%.$$} not found, skipping" fi else debug "Skipping md5deep step, reusing previous results" fi if [ -n "${symlink}" ] then entry="${symlink##*/}" else entry="${file##*/}" fi if [ "${entry}" != "${base}.gz" ] then grep "${entry}" "${md5file}" fi # delete the tar ball (if downloaded!) if [ ${do_get} -ne 0 ] then rm -f "${file}" fi # clear out the ${WORKSPACE} rm -rf ${WORKSPACE} done # remove the timestamp rm -f ${TIMESTAMP}