/[pdpsoft]/trunk/nl.biggrid.dans/check-file
ViewVC logotype

Diff of /trunk/nl.biggrid.dans/check-file

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2581 by svn, Tue Nov 20 12:05:55 2012 UTC revision 2582 by svn, Tue Nov 20 12:07:59 2012 UTC
# Line 51  then Line 51  then
51      abort 1 "${USAGE}"      abort 1 "${USAGE}"
52  fi  fi
53    
54    WORKSPACE=workspace.$$
55    TIMESTAMP=check-file.timestamp.$$
56    # set a timestamp to only use newly downloaded files
57    touch  ${TIMESTAMP}
58    
59  # Loop over all files|lfns|surls  # Loop over all files|lfns|surls
60  for file  for file
61  do  do
62      do_get=0      do_get=0
63      if [ "${file}" != "${file#lfn://}" ]      skip_md5=0
64        symlink=""
65    
66        if [ "${file}" != "${file#lfn:/}" ]
67      then      then
68          do_get=1          do_get=1
69    
70            # Verify that we have a valid proxy before continuing
71            checkProxy "${vo}"
72    
73            lfcEntry=`lfc-ls -l "${file#lfn:}" 2> /dev/null`
74            if [ $? -ne 0 ]
75            then
76                abort 2 "Error listing file ${file}"
77            fi
78            if [ -z "${lfcEntry}" ]
79            then
80                warn "LFC file ${file} not found"
81                continue
82            fi
83    
84            # check if the LFC entry is a symlink
85            if [ `echo "${lfcEntry}" | cut -c1,1` = "l" ]
86            then
87                symlink="${file}"
88                file=`echo "${lfcEntry}" | sed 's/.*-> //'`
89                md5file="${file##*/}"
90                md5file="${md5file%.gz}.md5sum"
91    
92                # add the protocol again
93                file="lfn:${file}"
94                debug "Symbolic link detected: ${symlink} -> ${file}"
95    
96                debug "md5file should be ${md5file}"
97                # we now know that the current entry is a symlink, usually to a tarball
98                # check if we already calculated the checksums for this tarball since
99                # the start of the script. If so, use it, if not, download the tarball
100                if [ -r "${md5file}" -a "${md5file}" -nt "${TIMESTAMP}" ]
101                then
102                    debug "${file} was already checked since the start of the script."
103                    
104                    do_get=0
105                    skip_md5=1
106                else
107                    debug "Downloading ${file} instead"
108                fi
109            fi
110            
111      fi      fi
112      if [ "${file}" != "${file#srm://}" ]      if [ "${file}" != "${file#srm:/}" ]
113      then      then
114          do_get=1          do_get=1
115      fi      fi
116      if [ "${file}" != "${file#gsiftp://}" ]      if [ "${file}" != "${file#gsiftp:/}" ]
117      then      then
118          do_get=1          do_get=1
119      fi      fi
120    
121      if [ ${do_get} -ne 0 ]      if [ ${do_get} -ne 0 ]
122      then      then
123            # Verify that we have a valid proxy before continuing
124            checkProxy "${vo}"
125    
126          info "Retrieving file ${file}"          info "Retrieving file ${file}"
127          lcg-cp $SRM_TIMEOUTS "${file}" file://$PWD/"${file##*/}"          lcg-cp ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}"
128          if [ $? -ne 0 ]          if [ $? -ne 0 ]
129          then          then
130              warn "First attempt failed, sleeping 30 seconds and retrying..."              warn "First attempt failed, sleeping 30 seconds and retrying..."
131              sleep 30              sleep 30
132              lcg-cp $SRM_TIMEOUTS "${file}" file://$PWD/"${file##*/}"              lcg-cp ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}"
133          fi          fi
134          if [ $? -ne 0 ]          if [ $? -ne 0 ]
135          then          then
136              warn "Second attempt failed, sleeping 90 seconds and retrying..."              warn "Second attempt failed, sleeping 90 seconds and retrying..."
137              sleep 90              sleep 90
138              date +"%Y/%m/%d-%H:%M:%S start lcg-cp -v -v $SRM_TIMEOUTS" 1>&2              date +"%Y/%m/%d-%H:%M:%S start lcg-cp -v -v ${SRM_TIMEOUTS}" 1>&2
139              lcg-cp -v -v $SRM_TIMEOUTS "${file}" file://$PWD/"${file##*/}"              lcg-cp -v -v ${SRM_TIMEOUTS} "${file}" file://$PWD/"${file##*/}"
140          fi          fi
141          if [ $? -ne 0 ]          if [ $? -ne 0 ]
142          then          then
# Line 93  do Line 146  do
146          file="${file##*/}"          file="${file##*/}"
147      fi      fi
148    
149      # the file should be a .tar.gz file, extract the original archive name from it      if [ ${skip_md5} -eq 0 ]
     base=`file "${file}" | sed -n 's/.*compressed data, was \"\(.*\)\",.*/\1/p'`  
     archive="${base%%-[0-9]*}"  
       
     debug "file=${file} base=${base} archive=${archive}"  
   
     CHECKSUM_DIR="${HOME}/dans/${archive}/checksums"  
   
     # fix numbering issue in RACM tar balls  
     if [ "x${archive}" = "xRACM" ]  
150      then      then
151          index=${base#${archive}-}          # the file should be a .tar.gz file, extract the original archive name from it
152          index=${index%.tar}          base=`file "${file}" | sed -n 's/.*compressed data, was \"\(.*\)\",.*/\1/p'`
153          if [ ${#index} -lt 4 ]          archive="${base%%-[0-9]*}"
154            
155            debug "file=${file} base=${base} archive=${archive}"
156        
157            CHECKSUM_DIR="${HOME}/dans/${archive}/checksums"
158        
159            # fix numbering issue in RACM tar balls
160            if [ "x${archive}" = "xRACM" ]
161          then          then
162              base="${archive}-0${index}.tar"              index=${base#${archive}-}
163                index=${index%.tar}
164                if [ ${#index} -lt 4 ]
165                then
166                    base="${archive}-0${index}.tar"
167                fi
168          fi          fi
     fi  
   
     # extract the tarball in a separate directory, for easy cleanup later  
     mkdir -p workspace  
     info "Extracting file ${file} (real name: ${base}.gz) ..."  
     tar -C workspace -xzf "${file}"  
169    
170      md5file="${base}.md5sum"          # extract the tarball in a separate directory, for easy cleanup later
171      if [ -r "${md5file}" ]          mkdir -p ${WORKSPACE}
172      then          info "Extracting file ${file} (real name: ${base}.gz) ..."
173          warn "\"${md5file}\" exists, append \".$$\""          tar -C ${WORKSPACE} -xzf "${file}"
174          md5file="${md5file}.$$"      
175            md5file="${base}.md5sum"
176            if [ -r "${md5file}" ]
177            then
178                warn "\"${md5file}\" exists, appending \".$$\""
179                md5file="${md5file}.$$"
180            fi
181        
182            info "Calculating checksums for all files in archive (output=${md5file})"
183            echo "# ${base} START" > "${md5file}"
184            (cd ${WORKSPACE}; md5deep -l -r ${archive}) | sort -k 2 >> "${md5file}"
185            echo "# ${base} END" >> "${md5file}"
186        
187            if [ -r "${CHECKSUM_DIR}/${md5file%.$$}" ]
188            then
189                debug "Comparing ${md5file} to ${CHECKSUM_DIR}/${md5file%.$$}"
190        
191                diff "${CHECKSUM_DIR}/${md5file%.$$}" "${md5file}" && info "OK" || warn "Mismatch"
192            else
193                debug "${CHECKSUM_DIR}/${md5file%.$$} not found, skipping"
194            fi
195        else
196            debug "Skipping md5deep step, reusing previous results"
197      fi      fi
198    
199      info "Calculating checksums for all files in archive (output=${md5file})"      if [ -n "${symlink}" ]
     echo "# ${base} START" > "${md5file}"  
     (cd workspace; md5deep -l -r ${archive}) | sort -k 2 >> "${md5file}"  
     echo "# ${base} END" >> "${md5file}"  
   
     if [ -r "${CHECKSUM_DIR}/${md5file%.$$}" ]  
200      then      then
201          debug "Comparing ${md5file} to ${CHECKSUM_DIR}/${md5file%.$$}"          entry="${symlink##*/}"
   
         diff "${CHECKSUM_DIR}/${md5file%.$$}" "${md5file}" && info "OK" || warn "Mismatch"  
202      else      else
203          debug "${CHECKSUM_DIR}/${md5file%.$$} not found, skipping"          entry="${file##*/}"
204      fi      fi
205      
206      if [ "${file##*/}" != "${base}.gz" ]      if [ "${entry}" != "${base}.gz" ]
207      then      then
208          grep "${file##*/}" "${md5file}"          grep "${entry}" "${md5file}"
209      fi      fi
210    
211      # delete the tar ball (if downloaded!)      # delete the tar ball (if downloaded!)
# Line 148  do Line 213  do
213      then      then
214          rm -f "${file}"          rm -f "${file}"
215      fi      fi
216      # clear out the workspace      # clear out the ${WORKSPACE}
217      rm -rf workspace      rm -rf ${WORKSPACE}
218  done  done
219    
220    # remove the timestamp
221    rm -f ${TIMESTAMP}

Legend:
Removed from v.2581  
changed lines
  Added in v.2582

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28