1 |
#!/usr/bin/env python |
2 |
# $Id$ |
3 |
# Source: $URL$ |
4 |
# J. A. Templon, NIKHEF/PDP 2011 |
5 |
|
6 |
# script to do yearly WLCG accounting and format it like I want. |
7 |
# note : sorting logic is based on a by-month grouping; if you change that, |
8 |
# you'll need to change some of the python code. |
9 |
|
10 |
# constants to be changed each year |
11 |
|
12 |
pledges = { # pledges in SI06 units |
13 |
'alice' : 3209, |
14 |
'atlas' : 30586, |
15 |
'lhcb' : 15388 |
16 |
} |
17 |
total_cap = 81718 |
18 |
|
19 |
import optparse |
20 |
|
21 |
usage = "usage: %prog -p DBpwd start-date [end-date]\n" + \ |
22 |
"date format yyyy-mm-dd; default end date is 3 months after start date" |
23 |
|
24 |
p = optparse.OptionParser(description="Program to generate a CSV dump of computing usage " + \ |
25 |
" by WLCG over a specified period",usage=usage) |
26 |
|
27 |
# dbpassword is mandatory |
28 |
# start date is mandatory |
29 |
# end date is optional; proving nothing means end date is 3 months after start |
30 |
# end date can be provided in format yyyy-mm-dd or as +3m (for 3 months later than start) |
31 |
|
32 |
p.add_option("-p",action="store",dest="dbpassw",default=None, |
33 |
help="password for NDPF accounting DB") |
34 |
debug = 0 |
35 |
|
36 |
opts, args = p.parse_args() |
37 |
|
38 |
import sys |
39 |
if not opts.dbpassw : |
40 |
sys.stderr.write("Error: password to NDPF accounting DB must be provided with -p\n") |
41 |
sys.stderr.write("Bailing out.\n") |
42 |
sys.exit(1) |
43 |
|
44 |
if len(args) < 1: |
45 |
sys.stderr.write("Error: no date argument detected. A start date must be provided.\n") |
46 |
sys.stderr.write("Format: yyyy-mm-dd\n") |
47 |
sys.stderr.write("Bailing out.\n") |
48 |
sys.exit(1) |
49 |
|
50 |
start_ascii = args[0] |
51 |
import datetime |
52 |
def parsedate(s): |
53 |
try: |
54 |
farr=s.split("-") |
55 |
iarr = list() |
56 |
for f in farr: |
57 |
iarr.append(int(f)) |
58 |
return datetime.date(iarr[0],iarr[1],iarr[2]) |
59 |
except: |
60 |
sys.stderr.write("Error parsing date string " + s + "\n") |
61 |
raise |
62 |
|
63 |
if len(args) > 1: |
64 |
end_ascii = args[1] |
65 |
else: |
66 |
end_ascii = None |
67 |
|
68 |
SDATE = parsedate(start_ascii) # starting date; script logic assumes start of month |
69 |
if not end_ascii: |
70 |
nmon = 3 |
71 |
elif end_ascii[:1] == '+': |
72 |
if end_ascii[-1:] != 'm': |
73 |
sys.stderr.write("error in end date string. accepted formats are"+ |
74 |
" 2011-05-27 or +3m\n") |
75 |
sys.exit(1) |
76 |
nmon = int(end_ascii[1:end_ascii.index('m')]) |
77 |
else: |
78 |
nmon = None |
79 |
|
80 |
if nmon: |
81 |
eyear = SDATE.year |
82 |
emonth = SDATE.month + nmon |
83 |
if emonth > 12: |
84 |
emonth = emonth - 12 |
85 |
eyear = eyear + 1 |
86 |
EDATE = datetime.date(eyear,emonth,SDATE.day) |
87 |
else: |
88 |
EDATE = parsedate(end_ascii) # starting date; script logic assumes start of month |
89 |
|
90 |
print "generating data for jobs between", SDATE, "and", EDATE |
91 |
vos = ['alice', 'atlas', 'lhcb'] |
92 |
groups = { |
93 |
'alice': ['palice', 'alicesgm', 'alicepil'], |
94 |
'atlas': ['atlb', 'atlaspil', 'patlas', 'atlsgm'], |
95 |
'lhcb' : ['lhcb', 'lhcbpil', 'lhcbprd', 'lhcbsgm'] |
96 |
} |
97 |
|
98 |
ACCBASECMD = ['/usr/local/bin/accuse'] |
99 |
ACCBASEARG = ('--byendtime -dbpasswd ' + opts.dbpassw + ' -f lcg -m').split() |
100 |
|
101 |
import subprocess |
102 |
|
103 |
perlout=dict() |
104 |
|
105 |
# gather raw accuse output |
106 |
|
107 |
for vo in vos: |
108 |
groupargs = [] |
109 |
for g in groups[vo]: |
110 |
groupargs += ["-g",g] |
111 |
args = ACCBASECMD + ACCBASEARG + ["-s", SDATE.isoformat(), "-e", EDATE.isoformat()] + groupargs |
112 |
perlout[vo] = subprocess.Popen(args, stdout=subprocess.PIPE).communicate()[0] |
113 |
|
114 |
def hms2dec(str): |
115 |
h,m,s = str.split(':') |
116 |
return float(h) + (float(m) + float(s)/60.)/60. |
117 |
|
118 |
# parse output |
119 |
# results in data structure like this |
120 |
|
121 |
# data = parsed['alice']['2010-02']['si06days'] |
122 |
|
123 |
import re |
124 |
mpatt = re.compile(r'20[012][0-9]-[01][0-9]') |
125 |
parsed = dict() |
126 |
for vo in vos: |
127 |
parsed[vo] = dict() |
128 |
lines = perlout[vo].split('\n') |
129 |
for line in lines: |
130 |
if mpatt.match(line) or line.find('Summed') == 0: |
131 |
fields = line.split() |
132 |
cpu = hms2dec(fields[1]) |
133 |
wall = hms2dec(fields[2]) |
134 |
si06days = float(fields[4]) / 0.185 |
135 |
njobs = int(fields[5]) |
136 |
parsed[vo][fields[0]] = { 'cpu' : cpu, 'wall': wall, |
137 |
'si06days': si06days, 'njobs' : njobs } |
138 |
|
139 |
hepvos = [ 'alice', 'atlas', 'lhcb', 'auger', 'cms', 'geant', 'dzero', 'xenon', 'gear' ] |
140 |
othervos = [ 'biomed','dans','enmr.eu','esr', 'lofar', 'lsgrid', 'ncf', 'bbmri.nl', 'vlemed', |
141 |
'tutor', 'phicos', 'chem.biggrid.nl' ] |
142 |
local = [ 'nikatlas', 'niklhcb', 'astrop', 'virgo', 'antares' ] |
143 |
admin = [ 'gridstaff' ] |
144 |
probes = [ 'ops', 'ops.nl', 'p4', 'dteam'] |
145 |
|
146 |
groups = { |
147 |
'alice': ['palice', 'alicesgm', 'alicepil'], |
148 |
'atlas': ['atlb', 'atlaspil', 'patlas', 'atlsgm'], |
149 |
'lhcb' : ['lhcb', 'lhcbpil', 'lhcbprd', 'lhcbsgm'], |
150 |
'gear' : ['poola' ], |
151 |
'auger' : ['auger', 'augsgm'], |
152 |
'cms' : ['cms'], |
153 |
'geant' : ['geant'], |
154 |
'dzero' : ['pdzero'], |
155 |
'xenon' : ['pxenon', 'pxenonsm'], |
156 |
'biomed' : ['biome','biomesgm'], |
157 |
'dans' : ['dans'], |
158 |
'enmr.eu' : ['enmr', 'enmrsgm'], |
159 |
'esr' : ['esr'], |
160 |
'lofar' : ['lofar','plofarsm'], |
161 |
'lsgrid' : ['lsgrid'], |
162 |
'ncf' : [ 'ncf'], |
163 |
'tutor' : [ 'tutor' ], |
164 |
'ops' : [ 'ops', 'opspil'], |
165 |
'phicos' : [ 'phicos','phicosgm' ], |
166 |
'bbmri.nl' : [ 'pbbmri','poolb' ], |
167 |
'chem.biggrid.nl' : [ 'pbchem' ], |
168 |
'drihm' : [ 'pdrihm' ], |
169 |
'ops.nl' : ['popsnl'], |
170 |
'p4' : [ 'pvier', 'pviersm' ], |
171 |
'vlemed' : [ 'vlemed', 'vlemedsm' ], |
172 |
'dteam' : ['dteam' ], |
173 |
'nikatlas' : ['atlas'], |
174 |
'niklhcb' : [ 'bfys'], |
175 |
'astrop' : [ 'astrop' ], |
176 |
'antares' : [ 'antares' ], |
177 |
'virgo' : [ 'virgo' ], |
178 |
'gridstaff' : ['datagrid','emin', 'tbadmin'] |
179 |
} |
180 |
|
181 |
# gather all unix groups, for check later that we haven't missed anything |
182 |
|
183 |
allunixgroups = [ ] |
184 |
for g in groups.keys(): |
185 |
allunixgroups.extend(groups[g]) |
186 |
|
187 |
basecmd = 'mysql -u anon -p' + opts.dbpassw + ' -h bedstee.nikhef.nl accounting' |
188 |
|
189 |
ACCSELECT = "select sum(job.WallGHzEquivalent),groupid.id,count(job.JobID) from job,groupid where job.groupid=groupid.UniqueID and job.EndTime >= " \ |
190 |
+ "'" + SDATE.isoformat() + "' and job.EndTime <= '" + EDATE.isoformat() + "' group by groupid.id;" |
191 |
|
192 |
import subprocess |
193 |
|
194 |
p = subprocess.Popen(basecmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
195 |
out, err = p.communicate(ACCSELECT) |
196 |
|
197 |
lines = out.split('\n') |
198 |
resdict = dict() |
199 |
|
200 |
for l in lines[1:-1]: |
201 |
f = l.split() |
202 |
resdict[f[1]] = float(f[0]) |
203 |
|
204 |
for k in resdict.keys(): |
205 |
if k not in allunixgroups: |
206 |
print |
207 |
print "unknown unix group " + k + " found in mysql output, please check" |
208 |
print |
209 |
|
210 |
sums = dict() |
211 |
for g in groups: |
212 |
ts = 0 |
213 |
for ug in groups[g]: |
214 |
if ug in resdict.keys(): |
215 |
ts = ts + resdict[ug] |
216 |
sums[g] = ts |
217 |
|
218 |
sumothervos = 0 |
219 |
for g in othervos: |
220 |
sumothervos = sumothervos + sums[g] |
221 |
|
222 |
sumlocal = 0 |
223 |
for g in local: |
224 |
sumlocal = sumlocal + sums[g] |
225 |
|
226 |
sumprobes = 0 |
227 |
for g in probes: |
228 |
sumprobes = sumprobes + sums[g] |
229 |
|
230 |
sumadmin = 0 |
231 |
for g in admin: |
232 |
sumadmin = sumadmin + sums[g] |
233 |
|
234 |
class Table(object): |
235 |
def __init__(self, header, columnheaders): |
236 |
self.header = header |
237 |
self.columnheaders = columnheaders |
238 |
self.data = dict() |
239 |
def add(self, name, val): |
240 |
self.data[name] = val |
241 |
|
242 |
heptab = Table("HEP Usage @ Nikhef from " + SDATE.isoformat() + " to " + EDATE.isoformat(), |
243 |
["group", "WallGHzEquiv"]) |
244 |
for g in hepvos: |
245 |
heptab.add(g, sums[g]) |
246 |
heptab.add("non-HEP",sumothervos) |
247 |
heptab.add("local", sumlocal) |
248 |
heptab.add("probes", sumprobes) |
249 |
heptab.add("admin", sumadmin) |
250 |
|
251 |
othertab = Table("non-HEP Usage @ Nikhef from " + SDATE.isoformat() + " to " + EDATE.isoformat(), |
252 |
["group", "WallGHzEquiv"]) |
253 |
|
254 |
for g in othervos: |
255 |
othertab.add(g, sums[g]) |
256 |
othertab.add("probes", sumprobes) |
257 |
othertab.add("admin", sumadmin) |
258 |
|
259 |
# output generation ... big damn csv file |
260 |
|
261 |
import csv |
262 |
writer = csv.writer(open('tmp.csv', 'wb'), delimiter=',', |
263 |
quotechar='|', quoting=csv.QUOTE_MINIMAL) |
264 |
|
265 |
ONEDAY=datetime.timedelta(days=1) |
266 |
|
267 |
# per-VO segment |
268 |
|
269 |
for vo in vos: |
270 |
writer.writerow(["Data for",vo]) |
271 |
writer.writerow(["Month", "si06days.used", "cpu/wall", "pledged", "jobs", "days"]) |
272 |
|
273 |
monthstart = SDATE |
274 |
|
275 |
while monthstart < EDATE: |
276 |
|
277 |
if monthstart.month < 12 : |
278 |
startnextmonth = monthstart.replace(month=monthstart.month+1) |
279 |
else: |
280 |
d1 = monthstart.replace(month=1) |
281 |
startnextmonth = d1.replace(year=monthstart.year+1) |
282 |
monthend = startnextmonth - ONEDAY |
283 |
if monthend > EDATE: |
284 |
monthend = EDATE |
285 |
ndays = (monthend - monthstart + ONEDAY).days |
286 |
monthkey = monthstart.isoformat()[:7] |
287 |
if monthkey not in parsed[vo].keys(): |
288 |
parsed[vo][monthkey] = { |
289 |
'si06days' : 0, 'cpu' : 0.1, 'wall' : 0.1, 'njobs' : 0 |
290 |
} |
291 |
td = parsed[vo][monthkey] |
292 |
writer.writerow([monthkey, |
293 |
td['si06days'], |
294 |
td['cpu']/td['wall'], |
295 |
ndays * pledges[vo], |
296 |
td['njobs'], |
297 |
ndays |
298 |
]) |
299 |
monthstart = monthend + ONEDAY |
300 |
|
301 |
writer.writerow([' ',' ']) |
302 |
|
303 |
# usage plots |
304 |
|
305 |
writer.writerow(["si06 days used per VO"]) |
306 |
writer.writerow(["Month", "lhcb.used", "atlas.used", "alice.used"]) |
307 |
monthstart = SDATE |
308 |
while monthstart < EDATE: |
309 |
if monthstart.month < 12 : |
310 |
startnextmonth = monthstart.replace(month=monthstart.month+1) |
311 |
else: |
312 |
d1 = monthstart.replace(month=1) |
313 |
startnextmonth = d1.replace(year=monthstart.year+1) |
314 |
monthend = startnextmonth - ONEDAY |
315 |
if monthend > EDATE: |
316 |
monthend = EDATE |
317 |
ndays = (monthend - monthstart + ONEDAY).days |
318 |
monthkey = monthstart.isoformat()[:7] |
319 |
td = parsed[vo][monthkey] |
320 |
writer.writerow([monthkey, |
321 |
parsed['lhcb'][monthkey]['si06days'], |
322 |
parsed['atlas'][monthkey]['si06days'], |
323 |
parsed['alice'][monthkey]['si06days'] |
324 |
]) |
325 |
monthstart = monthend + ONEDAY |
326 |
|
327 |
writer.writerow([' ',' ']) |
328 |
|
329 |
# pledge fraction plots |
330 |
|
331 |
writer.writerow(["pledge fraction used per VO"]) |
332 |
writer.writerow(["Month", "lhcb.frac", "atlas.frac", "alice.frac"]) |
333 |
monthstart = SDATE |
334 |
while monthstart < EDATE: |
335 |
if monthstart.month < 12 : |
336 |
startnextmonth = monthstart.replace(month=monthstart.month+1) |
337 |
else: |
338 |
d1 = monthstart.replace(month=1) |
339 |
startnextmonth = d1.replace(year=monthstart.year+1) |
340 |
monthend = startnextmonth - ONEDAY |
341 |
if monthend > EDATE: |
342 |
monthend = EDATE |
343 |
ndays = (monthend - monthstart + ONEDAY).days |
344 |
monthkey = monthstart.isoformat()[:7] |
345 |
td = parsed[vo][monthkey] |
346 |
writer.writerow([monthkey, |
347 |
parsed['lhcb'][monthkey]['si06days']/(pledges['lhcb']*ndays), |
348 |
parsed['atlas'][monthkey]['si06days']/(pledges['atlas']*ndays), |
349 |
parsed['alice'][monthkey]['si06days']/(pledges['alice']*ndays) |
350 |
]) |
351 |
monthstart = monthend + ONEDAY |
352 |
|
353 |
writer.writerow([' ',' ']) |
354 |
|
355 |
# cpu eff plots |
356 |
|
357 |
writer.writerow(["ratio cpu to wall time used (eff) per VO"]) |
358 |
writer.writerow(["Month", "lhcb.eff", "atlas.eff", "alice.eff"]) |
359 |
monthstart = SDATE |
360 |
while monthstart < EDATE: |
361 |
if monthstart.month < 12 : |
362 |
startnextmonth = monthstart.replace(month=monthstart.month+1) |
363 |
else: |
364 |
d1 = monthstart.replace(month=1) |
365 |
startnextmonth = d1.replace(year=monthstart.year+1) |
366 |
monthend = startnextmonth - ONEDAY |
367 |
if monthend > EDATE: |
368 |
monthend = EDATE |
369 |
ndays = (monthend - monthstart + ONEDAY).days |
370 |
monthkey = monthstart.isoformat()[:7] |
371 |
td = parsed[vo][monthkey] |
372 |
writer.writerow([monthkey, |
373 |
parsed['lhcb'][monthkey]['cpu']/parsed['lhcb'][monthkey]['wall'], |
374 |
parsed['atlas'][monthkey]['cpu']/parsed['atlas'][monthkey]['wall'], |
375 |
parsed['alice'][monthkey]['cpu']/parsed['alice'][monthkey]['wall'], |
376 |
]) |
377 |
monthstart = monthend + ONEDAY |
378 |
|
379 |
writer.writerow([' ',' ']) |
380 |
|
381 |
writer.writerow(["Pledges and capacties (SI06)"]) |
382 |
writer.writerow(["Group", "power"]) |
383 |
writer.writerow(["ATLAS", pledges["atlas"]]) |
384 |
writer.writerow(["LHCb", pledges["lhcb"] ]) |
385 |
writer.writerow(["ALICE", pledges["alice"]]) |
386 |
writer.writerow(["farmcap", total_cap]) |
387 |
writer.writerow([' ',' ']) |
388 |
|
389 |
for tabl in [heptab, othertab]: |
390 |
writer.writerow([tabl.header]) |
391 |
writer.writerow(tabl.columnheaders) |
392 |
|
393 |
groups_sorted = sorted(tabl.data, key=tabl.data.get, reverse=True) |
394 |
for g in groups_sorted: |
395 |
writer.writerow([g, tabl.data[g]]) |
396 |
|
397 |
writer.writerow([' ',' ']) |
398 |
|