1 |
#!/usr/bin/env python |
2 |
# $Id$ |
3 |
# Source: $URL$ |
4 |
# J. A. Templon, NIKHEF/PDP 2011 |
5 |
|
6 |
# script to do yearly WLCG accounting and format it like I want. |
7 |
# note : sorting logic is based on a by-month grouping; if you change that, |
8 |
# you'll need to change some of the python code. |
9 |
|
10 |
# constants to be changed each year |
11 |
|
12 |
pledges = { # pledges in HS06 units |
13 |
'alice' : 1844, |
14 |
'atlas' : 17580, |
15 |
'lhcb' : 8827 |
16 |
} |
17 |
total_cap = 39671 |
18 |
|
19 |
import optparse |
20 |
|
21 |
p = optparse.OptionParser(description="Program to generate a CSV dump of computing usage " + \ |
22 |
" by WLCG over a specified period") |
23 |
|
24 |
# dbpassword is mandatory |
25 |
# start date is mandatory |
26 |
# end date is optional; proving nothing means end date is 3 months after start |
27 |
# end date can be provided in format yyyy-mm-dd or as +3m (for 3 months later than start) |
28 |
|
29 |
p.add_option("-p",action="store",dest="dbpassw",default=None, |
30 |
help="password for NDPF accounting DB") |
31 |
debug = 0 |
32 |
|
33 |
opts, args = p.parse_args() |
34 |
|
35 |
import sys |
36 |
if not opts.dbpassw : |
37 |
sys.stderr.write("Error: password to NDPF accounting DB must be provided with -p\n") |
38 |
sys.stderr.write("Bailing out.\n") |
39 |
sys.exit(1) |
40 |
|
41 |
if len(args) < 1: |
42 |
sys.stderr.write("Error: no date argument detected. A start date must be provided.\n") |
43 |
sys.stderr.write("Bailing out.\n") |
44 |
sys.exit(1) |
45 |
|
46 |
start_ascii = args[0] |
47 |
import datetime |
48 |
def parsedate(s): |
49 |
try: |
50 |
farr=s.split("-") |
51 |
iarr = list() |
52 |
for f in farr: |
53 |
iarr.append(int(f)) |
54 |
return datetime.date(iarr[0],iarr[1],iarr[2]) |
55 |
except: |
56 |
sys.stderr.write("Error parsing date string " + s + "\n") |
57 |
raise |
58 |
|
59 |
if len(args) > 1: |
60 |
end_ascii = args[1] |
61 |
else: |
62 |
end_ascii = None |
63 |
|
64 |
SDATE = parsedate(start_ascii) # starting date; script logic assumes start of month |
65 |
if not end_ascii: |
66 |
nmon = 3 |
67 |
elif end_ascii[:1] == '+': |
68 |
if end_ascii[-1:] != 'm': |
69 |
sys.stderr.write("error in end date string. accepted formats are"+ |
70 |
" 2011-05-27 or +3m\n") |
71 |
sys.exit(1) |
72 |
nmon = int(end_ascii[1:end_ascii.index('m')]) |
73 |
else: |
74 |
nmon = None |
75 |
|
76 |
if nmon: |
77 |
eyear = SDATE.year |
78 |
emonth = SDATE.month + nmon |
79 |
if emonth > 12: |
80 |
emonth = emonth - 12 |
81 |
eyear = eyear + 1 |
82 |
EDATE = datetime.date(eyear,emonth,SDATE.day) |
83 |
else: |
84 |
EDATE = parsedate(end_ascii) # starting date; script logic assumes start of month |
85 |
|
86 |
print "generating data for jobs between", SDATE, "and", EDATE |
87 |
vos = ['alice', 'atlas', 'lhcb'] |
88 |
groups = { |
89 |
'alice': ['palice', 'alicesgm'], |
90 |
'atlas': ['atlb', 'atlaspil', 'patlas', 'atlsgm'], |
91 |
'lhcb' : ['lhcb', 'lhcbpil', 'lhcbprd', 'lhcbsgm'] |
92 |
} |
93 |
|
94 |
ACCBASECMD = ['/usr/local/bin/accuse'] |
95 |
ACCBASEARG = ('--byendtime -dbpasswd ' + opts.dbpassw + ' -f lcg -m').split() |
96 |
|
97 |
import subprocess |
98 |
|
99 |
perlout=dict() |
100 |
|
101 |
# gather raw accuse output |
102 |
|
103 |
for vo in vos: |
104 |
groupargs = [] |
105 |
for g in groups[vo]: |
106 |
groupargs += ["-g",g] |
107 |
args = ACCBASECMD + ACCBASEARG + ["-s", SDATE.isoformat(), "-e", EDATE.isoformat()] + groupargs |
108 |
perlout[vo] = subprocess.Popen(args, stdout=subprocess.PIPE).communicate()[0] |
109 |
|
110 |
def hms2dec(str): |
111 |
h,m,s = str.split(':') |
112 |
return float(h) + (float(m) + float(s)/60.)/60. |
113 |
|
114 |
# parse output |
115 |
# results in data structure like this |
116 |
|
117 |
# data = parsed['alice']['2010-02']['hs06days'] |
118 |
|
119 |
import re |
120 |
mpatt = re.compile(r'20[012][0-9]-[01][0-9]') |
121 |
parsed = dict() |
122 |
for vo in vos: |
123 |
parsed[vo] = dict() |
124 |
lines = perlout[vo].split('\n') |
125 |
for line in lines: |
126 |
if mpatt.match(line) or line.find('Summed') == 0: |
127 |
fields = line.split() |
128 |
cpu = hms2dec(fields[1]) |
129 |
wall = hms2dec(fields[2]) |
130 |
hs06days = float(fields[4]) * 4 |
131 |
njobs = int(fields[5]) |
132 |
parsed[vo][fields[0]] = { 'cpu' : cpu, 'wall': wall, |
133 |
'hs06days': hs06days, 'njobs' : njobs } |
134 |
|
135 |
# output generation ... big damn csv file |
136 |
|
137 |
import csv |
138 |
writer = csv.writer(open('tmp.csv', 'wb'), delimiter=',', |
139 |
quotechar='|', quoting=csv.QUOTE_MINIMAL) |
140 |
|
141 |
ONEDAY=datetime.timedelta(days=1) |
142 |
|
143 |
# per-VO segment |
144 |
|
145 |
for vo in vos: |
146 |
writer.writerow(["Data for",vo]) |
147 |
writer.writerow(["Month", "hs06days.used", "cpu/wall", "pledged", "jobs", "days"]) |
148 |
|
149 |
monthstart = SDATE |
150 |
|
151 |
while monthstart < EDATE: |
152 |
|
153 |
if monthstart.month < 12 : |
154 |
startnextmonth = monthstart.replace(month=monthstart.month+1) |
155 |
else: |
156 |
d1 = monthstart.replace(month=1) |
157 |
startnextmonth = d1.replace(year=monthstart.year+1) |
158 |
monthend = startnextmonth - ONEDAY |
159 |
if monthend > EDATE: |
160 |
monthend = EDATE |
161 |
ndays = (monthend - monthstart + ONEDAY).days |
162 |
monthkey = monthstart.isoformat()[:7] |
163 |
if monthkey not in parsed[vo].keys(): |
164 |
parsed[vo][monthkey] = { |
165 |
'hs06days' : 0, 'cpu' : 0.1, 'wall' : 0.1, 'njobs' : 0 |
166 |
} |
167 |
td = parsed[vo][monthkey] |
168 |
writer.writerow([monthkey, |
169 |
td['hs06days'], |
170 |
td['cpu']/td['wall'], |
171 |
ndays * pledges[vo], |
172 |
td['njobs'], |
173 |
ndays |
174 |
]) |
175 |
monthstart = monthend + ONEDAY |
176 |
|
177 |
writer.writerow([' ',' ']) |
178 |
|
179 |
# usage plots |
180 |
|
181 |
writer.writerow(["hs06 days used per VO"]) |
182 |
writer.writerow(["Month", "lhcb.used", "atlas.used", "alice.used"]) |
183 |
monthstart = SDATE |
184 |
while monthstart < EDATE: |
185 |
if monthstart.month < 12 : |
186 |
startnextmonth = monthstart.replace(month=monthstart.month+1) |
187 |
else: |
188 |
d1 = monthstart.replace(month=1) |
189 |
startnextmonth = d1.replace(year=monthstart.year+1) |
190 |
monthend = startnextmonth - ONEDAY |
191 |
if monthend > EDATE: |
192 |
monthend = EDATE |
193 |
ndays = (monthend - monthstart + ONEDAY).days |
194 |
monthkey = monthstart.isoformat()[:7] |
195 |
td = parsed[vo][monthkey] |
196 |
writer.writerow([monthkey, |
197 |
parsed['lhcb'][monthkey]['hs06days'], |
198 |
parsed['atlas'][monthkey]['hs06days'], |
199 |
parsed['alice'][monthkey]['hs06days'] |
200 |
]) |
201 |
monthstart = monthend + ONEDAY |
202 |
|
203 |
writer.writerow([' ',' ']) |
204 |
|
205 |
# pledge fraction plots |
206 |
|
207 |
writer.writerow(["pledge fraction used per VO"]) |
208 |
writer.writerow(["Month", "lhcb.frac", "atlas.frac", "alice.frac"]) |
209 |
monthstart = SDATE |
210 |
while monthstart < EDATE: |
211 |
if monthstart.month < 12 : |
212 |
startnextmonth = monthstart.replace(month=monthstart.month+1) |
213 |
else: |
214 |
d1 = monthstart.replace(month=1) |
215 |
startnextmonth = d1.replace(year=monthstart.year+1) |
216 |
monthend = startnextmonth - ONEDAY |
217 |
if monthend > EDATE: |
218 |
monthend = EDATE |
219 |
ndays = (monthend - monthstart + ONEDAY).days |
220 |
monthkey = monthstart.isoformat()[:7] |
221 |
td = parsed[vo][monthkey] |
222 |
writer.writerow([monthkey, |
223 |
parsed['lhcb'][monthkey]['hs06days']/(pledges['lhcb']*ndays), |
224 |
parsed['atlas'][monthkey]['hs06days']/(pledges['atlas']*ndays), |
225 |
parsed['alice'][monthkey]['hs06days']/(pledges['alice']*ndays) |
226 |
]) |
227 |
monthstart = monthend + ONEDAY |
228 |
|
229 |
writer.writerow([' ',' ']) |
230 |
|
231 |
# cpu eff plots |
232 |
|
233 |
writer.writerow(["ratio cpu to wall time used (eff) per VO"]) |
234 |
writer.writerow(["Month", "lhcb.eff", "atlas.eff", "alice.eff"]) |
235 |
monthstart = SDATE |
236 |
while monthstart < EDATE: |
237 |
if monthstart.month < 12 : |
238 |
startnextmonth = monthstart.replace(month=monthstart.month+1) |
239 |
else: |
240 |
d1 = monthstart.replace(month=1) |
241 |
startnextmonth = d1.replace(year=monthstart.year+1) |
242 |
monthend = startnextmonth - ONEDAY |
243 |
if monthend > EDATE: |
244 |
monthend = EDATE |
245 |
ndays = (monthend - monthstart + ONEDAY).days |
246 |
monthkey = monthstart.isoformat()[:7] |
247 |
td = parsed[vo][monthkey] |
248 |
writer.writerow([monthkey, |
249 |
parsed['lhcb'][monthkey]['cpu']/parsed['lhcb'][monthkey]['wall'], |
250 |
parsed['atlas'][monthkey]['cpu']/parsed['atlas'][monthkey]['wall'], |
251 |
parsed['alice'][monthkey]['cpu']/parsed['alice'][monthkey]['wall'], |
252 |
]) |
253 |
monthstart = monthend + ONEDAY |
254 |
|
255 |
writer.writerow([' ',' ']) |
256 |
|
257 |
writer.writerow(["Pledges and capacties (HS06)"]) |
258 |
writer.writerow(["Group", "power"]) |
259 |
writer.writerow(["ATLAS", pledges["atlas"]]) |
260 |
writer.writerow(["LHCb", pledges["lhcb"] ]) |
261 |
writer.writerow(["ALICE", pledges["alice"]]) |
262 |
writer.writerow(["farmcap", total_cap]) |
263 |
|