#!/usr/bin/env python # $Id$ # Source: $URL$ # J. A. Templon, NIKHEF/PDP 2011 # script to do yearly WLCG accounting and format it like I want. # note : sorting logic is based on a by-month grouping; if you change that, # you'll need to change some of the python code. # constants to be changed each year pledges = { # pledges in SI06 units 'alice' : 3183, 'atlas' : 30342, 'lhcb' : 15264 } total_cap = 68062 import optparse usage = "usage: %prog -p DBpwd start-date [end-date]\n" + \ "date format yyyy-mm-dd; default end date is 3 months after start date" p = optparse.OptionParser(description="Program to generate a CSV dump of computing usage " + \ " by WLCG over a specified period",usage=usage) # dbpassword is mandatory # start date is mandatory # end date is optional; proving nothing means end date is 3 months after start # end date can be provided in format yyyy-mm-dd or as +3m (for 3 months later than start) p.add_option("-p",action="store",dest="dbpassw",default=None, help="password for NDPF accounting DB") debug = 0 opts, args = p.parse_args() import sys if not opts.dbpassw : sys.stderr.write("Error: password to NDPF accounting DB must be provided with -p\n") sys.stderr.write("Bailing out.\n") sys.exit(1) if len(args) < 1: sys.stderr.write("Error: no date argument detected. A start date must be provided.\n") sys.stderr.write("Format: yyyy-mm-dd\n") sys.stderr.write("Bailing out.\n") sys.exit(1) start_ascii = args[0] import datetime def parsedate(s): try: farr=s.split("-") iarr = list() for f in farr: iarr.append(int(f)) return datetime.date(iarr[0],iarr[1],iarr[2]) except: sys.stderr.write("Error parsing date string " + s + "\n") raise if len(args) > 1: end_ascii = args[1] else: end_ascii = None SDATE = parsedate(start_ascii) # starting date; script logic assumes start of month if not end_ascii: nmon = 3 elif end_ascii[:1] == '+': if end_ascii[-1:] != 'm': sys.stderr.write("error in end date string. accepted formats are"+ " 2011-05-27 or +3m\n") sys.exit(1) nmon = int(end_ascii[1:end_ascii.index('m')]) else: nmon = None if nmon: eyear = SDATE.year emonth = SDATE.month + nmon if emonth > 12: emonth = emonth - 12 eyear = eyear + 1 EDATE = datetime.date(eyear,emonth,SDATE.day) else: EDATE = parsedate(end_ascii) # starting date; script logic assumes start of month print "generating data for jobs between", SDATE, "and", EDATE vos = ['alice', 'atlas', 'lhcb'] groups = { 'alice': ['palice', 'alicesgm'], 'atlas': ['atlb', 'atlaspil', 'patlas', 'atlsgm'], 'lhcb' : ['lhcb', 'lhcbpil', 'lhcbprd', 'lhcbsgm'] } ACCBASECMD = ['/usr/local/bin/accuse'] ACCBASEARG = ('--byendtime -dbpasswd ' + opts.dbpassw + ' -f lcg -m').split() import subprocess perlout=dict() # gather raw accuse output for vo in vos: groupargs = [] for g in groups[vo]: groupargs += ["-g",g] args = ACCBASECMD + ACCBASEARG + ["-s", SDATE.isoformat(), "-e", EDATE.isoformat()] + groupargs perlout[vo] = subprocess.Popen(args, stdout=subprocess.PIPE).communicate()[0] def hms2dec(str): h,m,s = str.split(':') return float(h) + (float(m) + float(s)/60.)/60. # parse output # results in data structure like this # data = parsed['alice']['2010-02']['si06days'] import re mpatt = re.compile(r'20[012][0-9]-[01][0-9]') parsed = dict() for vo in vos: parsed[vo] = dict() lines = perlout[vo].split('\n') for line in lines: if mpatt.match(line) or line.find('Summed') == 0: fields = line.split() cpu = hms2dec(fields[1]) wall = hms2dec(fields[2]) si06days = float(fields[4]) / 0.185 njobs = int(fields[5]) parsed[vo][fields[0]] = { 'cpu' : cpu, 'wall': wall, 'si06days': si06days, 'njobs' : njobs } hepvos = [ 'alice', 'atlas', 'lhcb', 'auger', 'cms', 'geant', 'dzero', 'xenon', 'gear' ] othervos = [ 'biomed','dans','enmr.eu','esr', 'lofar', 'lsgrid', 'ncf', 'bbmri.nl', 'vlemed', 'tutor', 'phicos', 'chem.biggrid.nl' ] local = [ 'nikatlas', 'niklhcb', 'astrop', 'virgo', 'antares' ] admin = [ 'gridstaff' ] probes = [ 'ops', 'ops.nl', 'p4', 'dteam'] groups = { 'alice': ['palice', 'alicesgm'], 'atlas': ['atlb', 'atlaspil', 'patlas', 'atlsgm'], 'lhcb' : ['lhcb', 'lhcbpil', 'lhcbprd', 'lhcbsgm'], 'gear' : ['poola' ], 'auger' : ['auger', 'augsgm'], 'cms' : ['cms'], 'geant' : ['geant'], 'dzero' : ['pdzero'], 'xenon' : ['pxenon', 'pxenonsm'], 'biomed' : ['biome','biomesgm'], 'dans' : ['dans'], 'enmr.eu' : ['enmr', 'enmrsgm'], 'esr' : ['esr'], 'lofar' : ['lofar','plofarsm'], 'lsgrid' : ['lsgrid'], 'ncf' : [ 'ncf'], 'tutor' : [ 'tutor' ], 'ops' : [ 'ops', 'opspil'], 'phicos' : [ 'phicos','phicosgm' ], 'bbmri.nl' : [ 'pbbmri','poolb' ], 'chem.biggrid.nl' : [ 'pbchem' ], 'ops.nl' : ['popsnl'], 'p4' : [ 'pvier', 'pviersm' ], 'vlemed' : [ 'vlemed', 'vlemedsm' ], 'dteam' : ['dteam' ], 'nikatlas' : ['atlas'], 'niklhcb' : [ 'bfys'], 'astrop' : [ 'astrop' ], 'antares' : [ 'antares' ], 'virgo' : [ 'virgo' ], 'gridstaff' : ['datagrid','emin', 'tbadmin'] } # gather all unix groups, for check later that we haven't missed anything allunixgroups = [ ] for g in groups.keys(): allunixgroups.extend(groups[g]) basecmd = 'mysql -u anon -p' + opts.dbpassw + ' -h bedstee.nikhef.nl accounting' ACCSELECT = "select sum(job.WallGHzEquivalent),groupid.id,count(job.JobID) from job,groupid where job.groupid=groupid.UniqueID and job.EndTime >= " \ + "'" + SDATE.isoformat() + "' and job.EndTime <= '" + EDATE.isoformat() + "' group by groupid.id;" import subprocess p = subprocess.Popen(basecmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate(ACCSELECT) lines = out.split('\n') resdict = dict() for l in lines[1:-1]: f = l.split() resdict[f[1]] = float(f[0]) for k in resdict.keys(): if k not in allunixgroups: print print "unknown unix group " + k + " found in mysql output, please check" print sums = dict() for g in groups: ts = 0 for ug in groups[g]: if ug in resdict.keys(): ts = ts + resdict[ug] sums[g] = ts sumothervos = 0 for g in othervos: sumothervos = sumothervos + sums[g] sumlocal = 0 for g in local: sumlocal = sumlocal + sums[g] sumprobes = 0 for g in probes: sumprobes = sumprobes + sums[g] sumadmin = 0 for g in admin: sumadmin = sumadmin + sums[g] class Table(object): def __init__(self, header, columnheaders): self.header = header self.columnheaders = columnheaders self.data = dict() def add(self, name, val): self.data[name] = val heptab = Table("HEP Usage @ Nikhef from " + SDATE.isoformat() + " to " + EDATE.isoformat(), ["group", "WallGHzEquiv"]) for g in hepvos: heptab.add(g, sums[g]) heptab.add("non-HEP",sumothervos) heptab.add("local", sumlocal) heptab.add("probes", sumprobes) heptab.add("admin", sumadmin) othertab = Table("non-HEP Usage @ Nikhef from " + SDATE.isoformat() + " to " + EDATE.isoformat(), ["group", "WallGHzEquiv"]) for g in othervos: othertab.add(g, sums[g]) othertab.add("probes", sumprobes) othertab.add("admin", sumadmin) # output generation ... big damn csv file import csv writer = csv.writer(open('tmp.csv', 'wb'), delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) ONEDAY=datetime.timedelta(days=1) # per-VO segment for vo in vos: writer.writerow(["Data for",vo]) writer.writerow(["Month", "si06days.used", "cpu/wall", "pledged", "jobs", "days"]) monthstart = SDATE while monthstart < EDATE: if monthstart.month < 12 : startnextmonth = monthstart.replace(month=monthstart.month+1) else: d1 = monthstart.replace(month=1) startnextmonth = d1.replace(year=monthstart.year+1) monthend = startnextmonth - ONEDAY if monthend > EDATE: monthend = EDATE ndays = (monthend - monthstart + ONEDAY).days monthkey = monthstart.isoformat()[:7] if monthkey not in parsed[vo].keys(): parsed[vo][monthkey] = { 'si06days' : 0, 'cpu' : 0.1, 'wall' : 0.1, 'njobs' : 0 } td = parsed[vo][monthkey] writer.writerow([monthkey, td['si06days'], td['cpu']/td['wall'], ndays * pledges[vo], td['njobs'], ndays ]) monthstart = monthend + ONEDAY writer.writerow([' ',' ']) # usage plots writer.writerow(["si06 days used per VO"]) writer.writerow(["Month", "lhcb.used", "atlas.used", "alice.used"]) monthstart = SDATE while monthstart < EDATE: if monthstart.month < 12 : startnextmonth = monthstart.replace(month=monthstart.month+1) else: d1 = monthstart.replace(month=1) startnextmonth = d1.replace(year=monthstart.year+1) monthend = startnextmonth - ONEDAY if monthend > EDATE: monthend = EDATE ndays = (monthend - monthstart + ONEDAY).days monthkey = monthstart.isoformat()[:7] td = parsed[vo][monthkey] writer.writerow([monthkey, parsed['lhcb'][monthkey]['si06days'], parsed['atlas'][monthkey]['si06days'], parsed['alice'][monthkey]['si06days'] ]) monthstart = monthend + ONEDAY writer.writerow([' ',' ']) # pledge fraction plots writer.writerow(["pledge fraction used per VO"]) writer.writerow(["Month", "lhcb.frac", "atlas.frac", "alice.frac"]) monthstart = SDATE while monthstart < EDATE: if monthstart.month < 12 : startnextmonth = monthstart.replace(month=monthstart.month+1) else: d1 = monthstart.replace(month=1) startnextmonth = d1.replace(year=monthstart.year+1) monthend = startnextmonth - ONEDAY if monthend > EDATE: monthend = EDATE ndays = (monthend - monthstart + ONEDAY).days monthkey = monthstart.isoformat()[:7] td = parsed[vo][monthkey] writer.writerow([monthkey, parsed['lhcb'][monthkey]['si06days']/(pledges['lhcb']*ndays), parsed['atlas'][monthkey]['si06days']/(pledges['atlas']*ndays), parsed['alice'][monthkey]['si06days']/(pledges['alice']*ndays) ]) monthstart = monthend + ONEDAY writer.writerow([' ',' ']) # cpu eff plots writer.writerow(["ratio cpu to wall time used (eff) per VO"]) writer.writerow(["Month", "lhcb.eff", "atlas.eff", "alice.eff"]) monthstart = SDATE while monthstart < EDATE: if monthstart.month < 12 : startnextmonth = monthstart.replace(month=monthstart.month+1) else: d1 = monthstart.replace(month=1) startnextmonth = d1.replace(year=monthstart.year+1) monthend = startnextmonth - ONEDAY if monthend > EDATE: monthend = EDATE ndays = (monthend - monthstart + ONEDAY).days monthkey = monthstart.isoformat()[:7] td = parsed[vo][monthkey] writer.writerow([monthkey, parsed['lhcb'][monthkey]['cpu']/parsed['lhcb'][monthkey]['wall'], parsed['atlas'][monthkey]['cpu']/parsed['atlas'][monthkey]['wall'], parsed['alice'][monthkey]['cpu']/parsed['alice'][monthkey]['wall'], ]) monthstart = monthend + ONEDAY writer.writerow([' ',' ']) writer.writerow(["Pledges and capacties (SI06)"]) writer.writerow(["Group", "power"]) writer.writerow(["ATLAS", pledges["atlas"]]) writer.writerow(["LHCb", pledges["lhcb"] ]) writer.writerow(["ALICE", pledges["alice"]]) writer.writerow(["farmcap", total_cap]) writer.writerow([' ',' ']) for tabl in [heptab, othertab]: writer.writerow([tabl.header]) writer.writerow(tabl.columnheaders) groups_sorted = sorted(tabl.data, key=tabl.data.get, reverse=True) for g in groups_sorted: writer.writerow([g, tabl.data[g]]) writer.writerow([' ',' '])