1 |
dennisvd |
1904 |
#!/bin/bash |
2 |
|
|
|
3 |
|
|
# FIXMELATER |
4 |
|
|
delegation=dennis |
5 |
|
|
|
6 |
|
|
mopup=0 |
7 |
|
|
|
8 |
|
|
while getopts m opt; do |
9 |
|
|
case $opt in |
10 |
|
|
m) |
11 |
|
|
mopup=1 |
12 |
|
|
;; |
13 |
|
|
?) |
14 |
|
|
echo "ERROR: unknown option $opt." |
15 |
|
|
exit 1 |
16 |
|
|
;; |
17 |
|
|
esac |
18 |
|
|
done |
19 |
|
|
shift $(($OPTIND - 1)) |
20 |
|
|
|
21 |
|
|
# look up the resources in the data file |
22 |
|
|
|
23 |
|
|
if [ ! -r resources.dat ] ; then |
24 |
|
|
echo "ERROR: Missing resource datafile resources.dat" >&2 |
25 |
|
|
exit 1 |
26 |
|
|
fi |
27 |
|
|
|
28 |
|
|
sites=`awk ' $1 !~ /^#/ { print $1 }' resources.dat` |
29 |
|
|
|
30 |
|
|
pids= |
31 |
|
|
for i in $sites ; do |
32 |
|
|
# create a work directory |
33 |
|
|
workdir=$i |
34 |
|
|
if [ ! -d $workdir ]; then |
35 |
|
|
mkdir $workdir |
36 |
|
|
fi |
37 |
|
|
( cd $workdir |
38 |
|
|
endpoint=`awk '$1 == "'$i'" { print $2 }' ../resources.dat` |
39 |
|
|
jobfile=pkgsrc-status.jdl |
40 |
|
|
if [ ! -r $jobfile ]; then |
41 |
|
|
cat > $jobfile <<EOF |
42 |
|
|
Executable = "pkgsrc-cmd.sh"; |
43 |
|
|
Arguments = "-d check"; |
44 |
|
|
Stdoutput = "stdout"; |
45 |
|
|
StdError = "stderror"; |
46 |
|
|
InputSandbox = "../pkgsrc-cmd.sh"; |
47 |
|
|
OutputSandbox = {"stdout","status.txt"}; |
48 |
|
|
EOF |
49 |
|
|
fi |
50 |
|
|
if grep ^https jids ; then |
51 |
|
|
echo "DEBUG: jids file already has jobs, skipping $i" |
52 |
|
|
else |
53 |
|
|
echo "DEBUG: glite-wms-job-submit -d $delegation -o jids -r $endpoint $jobfile" |
54 |
|
|
glite-wms-job-submit -d $delegation -o jids -r $endpoint $jobfile |
55 |
|
|
if [ $? -ne 0 ]; then |
56 |
|
|
echo "Error: Failed to submit job to $i" >&2 |
57 |
|
|
fi |
58 |
|
|
fi |
59 |
|
|
) & |
60 |
|
|
pids="$pids $!" |
61 |
|
|
done |
62 |
|
|
|
63 |
|
|
for i in $pids ; do |
64 |
|
|
wait $i |
65 |
|
|
if [ $? -ne 0 ] ; then |
66 |
|
|
echo "ERROR: job $i failed." |
67 |
|
|
fi |
68 |
|
|
done |
69 |
|
|
|
70 |
|
|
# wait and poll; systematically go through all the jid files and check their |
71 |
|
|
# status. When a job has finished, fetch the results and clean up. |
72 |
|
|
|
73 |
|
|
# $1 is job id |
74 |
|
|
get_job_output() { |
75 |
|
|
jobhash=`echo $1 | sed -e 's,.*/,,'` |
76 |
|
|
|
77 |
|
|
# mkdir $jobdir || die "can't mkdir $jobdir" |
78 |
|
|
glite-wms-job-output --noint --nosubdir --logfile $jobhash.log --dir $jobhash $1 |
79 |
|
|
if [ $? -ne 0 ] ; then |
80 |
|
|
echo "failed to get job output for $1" >&2 |
81 |
|
|
echo "job output retrieval failure" >&3 |
82 |
|
|
return 1 |
83 |
|
|
fi |
84 |
|
|
# just append |
85 |
|
|
cat $jobhash/status.txt >> status |
86 |
|
|
cat $jobhash/stdout >> job.log |
87 |
|
|
rm -rf $jobhash |
88 |
|
|
} |
89 |
|
|
|
90 |
|
|
# $1 = jobid $2 = state |
91 |
|
|
get_logging_and_clear() { |
92 |
|
|
# get the logging info for the job ... |
93 |
|
|
echo "job $1 is $2, getting logging and removing it" |
94 |
|
|
glite-wms-job-logging-info -o joblog-$jobhash.log \ |
95 |
|
|
--noint -v 3 $1 |
96 |
|
|
# ... then remove the jobid from the list. |
97 |
|
|
grep -v -F "$1" jids > jids.new && mv jids.new jids |
98 |
|
|
} |
99 |
|
|
|
100 |
|
|
waitlonger=1 |
101 |
|
|
|
102 |
|
|
while [ $waitlonger -eq 1 ]; do |
103 |
|
|
waitlonger=0 |
104 |
|
|
for i in $sites ; do |
105 |
|
|
cd $i |
106 |
|
|
if test -f jids && grep -q '^https:' jids; then |
107 |
|
|
jobs=`grep -v '^#' jids` |
108 |
|
|
for j in $jobs ; do |
109 |
|
|
jobhash=`echo $j | sed -e 's,.*/,,'` |
110 |
|
|
rm -f jobstate |
111 |
|
|
glite-wms-job-status --noint --logfile joblog -o jobstate $j > /dev/null 2>&1 |
112 |
|
|
if [ $? -ne 0 ]; then |
113 |
|
|
echo "failed to retrieve job status for $j; skipping." |
114 |
|
|
break |
115 |
|
|
fi |
116 |
|
|
# grep the status file for "Current Status" |
117 |
|
|
state=`sed -n -e '/Current Status/ s/.*:\s*// p' jobstate` |
118 |
|
|
case $state in |
119 |
|
|
"Done"* ) |
120 |
|
|
# job is done, get the output |
121 |
|
|
echo "job $j is done, getting output" |
122 |
|
|
get_job_output $j |
123 |
|
|
get_logging_and_clear $j $state |
124 |
|
|
rm jobstate |
125 |
|
|
;; |
126 |
|
|
Cleared* | Aborted* | Cancelled* ) |
127 |
|
|
get_logging_and_clear $j $state |
128 |
|
|
rm jobstate |
129 |
|
|
;; |
130 |
|
|
* ) |
131 |
|
|
# job still in queue. |
132 |
|
|
waitlonger=1 |
133 |
|
|
;; |
134 |
|
|
esac |
135 |
|
|
done |
136 |
|
|
fi |
137 |
|
|
cd .. |
138 |
|
|
done |
139 |
|
|
sleep 10 |
140 |
|
|
done |
141 |
|
|
|
142 |
|
|
# fetch results, interpret and publish |
143 |
|
|
|