#!/bin/bash if [ ! "x$0" = "x${0#-}" ] then echo "Don't source me, just run!" 1>&2 return fi . ${0%/*}/libdans.sh # Load global defaults (vars: vo, archive, LFC_*, SRM_*) loadDefaults $0 DEBUG=0 QUIET=0 KEEPGOING=0 START_INDEX=1 END_INDEX=0 USAGE="\ $0 - verify the contents of one or multiple tar.gz files Usage: ./check-file [-d|--debug] [-k|--keepgoing] [file|lfn|surl] [[file2|lfn2|surl2] ...] Where: --keepgoing tells $0 to keep going after an error file specifies the full path to a local file OR lfn specifies a Logical File Name on the LFC OR surl specifies a Storage URL (srm:// or gsiftp://) " # Parse commandline parameters while [ $# -gt 0 ] do case "$1" in (-h|--help) echo "${USAGE}" exit 0 ;; (-d|--debug) let DEBUG+=1 ;; (-k|--keepgoing) KEEPGOING=1 ;; (-*) echo "Invalid option: $1" exit 1 ;; (*) break ;; esac shift done if [ $# -lt 1 ] then abort 1 "${USAGE}" fi # Loop over all files|lfns|surls for file do do_get=0 if [ "${file}" != "${file#lfn://}" ] then do_get=1 fi if [ "${file}" != "${file#srm://}" ] then do_get=1 fi if [ "${file}" != "${file#gsiftp://}" ] then do_get=1 fi if [ ${do_get} -ne 0 ] then info "Retrieving file ${file}" lcg-cp $SRM_TIMEOUTS "${file}" file://$PWD/"${file##*/}" if [ $? -ne 0 ] then warn "First attempt failed, sleeping 30 seconds and retrying..." sleep 30 lcg-cp $SRM_TIMEOUTS "${file}" file://$PWD/"${file##*/}" fi if [ $? -ne 0 ] then warn "Second attempt failed, sleeping 90 seconds and retrying..." sleep 90 date +"%Y/%m/%d-%H:%M:%S start lcg-cp -v -v $SRM_TIMEOUTS" 1>&2 lcg-cp -v -v $SRM_TIMEOUTS "${file}" file://$PWD/"${file##*/}" fi if [ $? -ne 0 ] then abort 3 "Error retrieving ${file}" fi file="${file##*/}" fi # the file should be a .tar.gz file, extract the original archive name from it base=`file "${file}" | sed -n 's/.*compressed data, was \"\(.*\)\",.*/\1/p'` archive="${base%%-[0-9]*}" debug "file=${file} base=${base} archive=${archive}" CHECKSUM_DIR="${HOME}/dans/${archive}/checksums" # fix numbering issue in RACM tar balls if [ "x${archive}" = "xRACM" ] then index=${base#${archive}-} index=${index%.tar} if [ ${#index} -lt 4 ] then base="${archive}-0${index}.tar" fi fi # extract the tarball in a separate directory, for easy cleanup later mkdir -p workspace info "Extracting file ${file} (real name: ${base}.gz) ..." tar -C workspace -xzf "${file}" md5file="${base}.md5sum" if [ -r "${md5file}" ] then warn "\"${md5file}\" exists, append \".$$\"" md5file="${md5file}.$$" fi info "Calculating checksums for all files in archive (output=${md5file})" echo "# ${base} START" > "${md5file}" (cd workspace; md5deep -l -r ${archive}) | sort -k 2 >> "${md5file}" echo "# ${base} END" >> "${md5file}" if [ -r "${CHECKSUM_DIR}/${md5file%.$$}" ] then debug "Comparing ${md5file} to ${CHECKSUM_DIR}/${md5file%.$$}" diff "${CHECKSUM_DIR}/${md5file%.$$}" "${md5file}" && info "OK" || warn "Mismatch" else debug "${CHECKSUM_DIR}/${md5file%.$$} not found, skipping" fi if [ "${file##*/}" != "${base}.gz" ] then grep "${file##*/}" "${md5file}" fi # delete the tar ball (if downloaded!) if [ ${do_get} -ne 0 ] then rm -f "${file}" fi # clear out the workspace rm -rf workspace done