#!/usr/bin/env python
# $Id$
# Source: $URL$
# J. A. Templon, NIKHEF/PDP 2011

# script to do yearly WLCG accounting and format it like I want.
# note : sorting logic is based on a by-month grouping; if you change that,
#        you'll need to change some of the python code.

# constants to be changed each year

pledges = {          # pledges in SI06 units
    'alice' : 3209,
    'atlas' : 30586,
    'lhcb'  : 15388
    }
total_cap = 81718

import optparse

usage = "usage: %prog -p DBpwd start-date [end-date]\n" + \
        "date format yyyy-mm-dd; default end date is 3 months after start date"

p = optparse.OptionParser(description="Program to generate a CSV dump of computing usage " + \
                          " by WLCG over a specified period",usage=usage)

# dbpassword is mandatory
# start date is mandatory
# end date is optional; proving nothing means end date is 3 months after start
# end date can be provided in format yyyy-mm-dd or as +3m (for 3 months later than start)

p.add_option("-p",action="store",dest="dbpassw",default=None,
             help="password for NDPF accounting DB")
debug = 0

opts, args = p.parse_args()

import sys
if not opts.dbpassw :
    sys.stderr.write("Error: password to NDPF accounting DB must be provided with -p\n")
    sys.stderr.write("Bailing out.\n")
    sys.exit(1)
    
if len(args) < 1:
    sys.stderr.write("Error: no date argument detected.  A start date must be provided.\n")
    sys.stderr.write("Format: yyyy-mm-dd\n")
    sys.stderr.write("Bailing out.\n")
    sys.exit(1)
    
start_ascii = args[0]
import datetime
def parsedate(s):
    try:
        farr=s.split("-")
        iarr = list()
        for f in farr:
            iarr.append(int(f))
        return datetime.date(iarr[0],iarr[1],iarr[2])
    except:
        sys.stderr.write("Error parsing date string " + s + "\n")
        raise

if len(args) > 1:
    end_ascii = args[1]
else:
    end_ascii = None

SDATE = parsedate(start_ascii)  # starting date; script logic assumes start of month
if not end_ascii:
    nmon = 3
elif end_ascii[:1] == '+':
    if end_ascii[-1:] != 'm':
        sys.stderr.write("error in end date string.  accepted formats are"+
                         " 2011-05-27 or +3m\n")
        sys.exit(1)
    nmon = int(end_ascii[1:end_ascii.index('m')])
else:
    nmon = None

if nmon:
    eyear = SDATE.year
    emonth = SDATE.month + nmon
    if emonth > 12:
        emonth = emonth - 12
        eyear = eyear + 1
    EDATE = datetime.date(eyear,emonth,SDATE.day)
else:
    EDATE = parsedate(end_ascii)  # starting date; script logic assumes start of month

print "generating data for jobs between", SDATE, "and", EDATE
vos = ['alice', 'atlas', 'lhcb']
groups = {
    'alice': ['palice', 'alicesgm'],
    'atlas': ['atlb', 'atlaspil', 'patlas', 'atlsgm'],
    'lhcb' : ['lhcb', 'lhcbpil', 'lhcbprd', 'lhcbsgm']
    }

ACCBASECMD = ['/usr/local/bin/accuse']
ACCBASEARG = ('--byendtime -dbpasswd ' + opts.dbpassw + ' -f lcg -m').split()

import subprocess

perlout=dict()

# gather raw accuse output

for vo in vos:
    groupargs = []
    for g in groups[vo]:
        groupargs += ["-g",g]
    args = ACCBASECMD + ACCBASEARG + ["-s", SDATE.isoformat(), "-e", EDATE.isoformat()] + groupargs
    perlout[vo] = subprocess.Popen(args, stdout=subprocess.PIPE).communicate()[0]

def hms2dec(str):
    h,m,s = str.split(':')
    return float(h) + (float(m) + float(s)/60.)/60.

# parse output
# results in data structure like this

# data = parsed['alice']['2010-02']['si06days']

import re
mpatt = re.compile(r'20[012][0-9]-[01][0-9]')
parsed = dict()
for vo in vos:
    parsed[vo] = dict()
    lines = perlout[vo].split('\n')
    for line in lines:
        if mpatt.match(line) or line.find('Summed') == 0:
            fields = line.split()
            cpu = hms2dec(fields[1])
            wall = hms2dec(fields[2])
            si06days = float(fields[4]) / 0.185
            njobs = int(fields[5])
            parsed[vo][fields[0]] = { 'cpu' : cpu, 'wall': wall,
                                      'si06days': si06days, 'njobs' : njobs }

hepvos   = [ 'alice', 'atlas', 'lhcb', 'auger', 'cms', 'geant', 'dzero', 'xenon', 'gear' ]
othervos = [ 'biomed','dans','enmr.eu','esr', 'lofar', 'lsgrid', 'ncf', 'bbmri.nl', 'vlemed',
            'tutor', 'phicos', 'chem.biggrid.nl' ]
local    = [ 'nikatlas', 'niklhcb', 'astrop', 'virgo', 'antares' ]
admin    = [ 'gridstaff' ]
probes   = [ 'ops', 'ops.nl', 'p4', 'dteam']

groups = {
    'alice': ['palice', 'alicesgm'],
    'atlas': ['atlb', 'atlaspil', 'patlas', 'atlsgm'],
    'lhcb' : ['lhcb', 'lhcbpil', 'lhcbprd', 'lhcbsgm'],
    'gear' : ['poola' ],
    'auger' : ['auger', 'augsgm'],
    'cms' : ['cms'],
    'geant' : ['geant'],
    'dzero' : ['pdzero'],
    'xenon' : ['pxenon', 'pxenonsm'],
    'biomed' : ['biome','biomesgm'],
    'dans'  : ['dans'],
    'enmr.eu' : ['enmr', 'enmrsgm'],
    'esr' : ['esr'],
    'lofar' : ['lofar','plofarsm'],
    'lsgrid' : ['lsgrid'],
    'ncf' : [ 'ncf'],
    'tutor' : [ 'tutor' ],
    'ops' : [ 'ops', 'opspil'],
    'phicos' : [ 'phicos','phicosgm' ],
    'bbmri.nl' : [ 'pbbmri','poolb' ],
    'chem.biggrid.nl' : [ 'pbchem' ],
    'ops.nl' : ['popsnl'],
    'p4' : [ 'pvier', 'pviersm' ],
    'vlemed' : [ 'vlemed', 'vlemedsm' ],
    'dteam' : ['dteam' ],
    'nikatlas' : ['atlas'],
    'niklhcb' : [ 'bfys'],
    'astrop' : [ 'astrop' ],
    'antares' : [ 'antares' ],
    'virgo' : [ 'virgo' ],
    'gridstaff' : ['datagrid','emin', 'tbadmin']
    }

# gather all unix groups, for check later that we haven't missed anything

allunixgroups = [ ]
for g in groups.keys():
    allunixgroups.extend(groups[g])

basecmd = 'mysql -u anon -p' + opts.dbpassw + ' -h bedstee.nikhef.nl accounting'

ACCSELECT  = "select sum(job.WallGHzEquivalent),groupid.id,count(job.JobID) from job,groupid where job.groupid=groupid.UniqueID and job.EndTime >= " \
              + "'" + SDATE.isoformat() + "' and job.EndTime <= '" + EDATE.isoformat() + "' group by groupid.id;"

import subprocess

p = subprocess.Popen(basecmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate(ACCSELECT)

lines = out.split('\n')
resdict = dict()

for l in lines[1:-1]:
    f = l.split()
    resdict[f[1]] = float(f[0])

for k in resdict.keys():
    if k not in allunixgroups:
        print
        print "unknown unix group " + k + " found in mysql output, please check"
        print
        
sums = dict()
for g in groups:
    ts = 0
    for ug in groups[g]:
        if ug in resdict.keys():
            ts = ts + resdict[ug]
    sums[g] = ts

sumothervos = 0
for g in othervos:
    sumothervos = sumothervos + sums[g]

sumlocal = 0
for g in local:
    sumlocal = sumlocal + sums[g]

sumprobes = 0
for g in probes:
    sumprobes = sumprobes + sums[g]

sumadmin = 0
for g in admin:
    sumadmin = sumadmin + sums[g]

class Table(object):
    def __init__(self, header, columnheaders):
        self.header = header
        self.columnheaders = columnheaders
        self.data = dict()
    def add(self, name, val):
        self.data[name] = val

heptab = Table("HEP Usage @ Nikhef from " + SDATE.isoformat() + " to " + EDATE.isoformat(),
               ["group", "WallGHzEquiv"])
for g in hepvos:
    heptab.add(g, sums[g])
heptab.add("non-HEP",sumothervos)
heptab.add("local", sumlocal)
heptab.add("probes", sumprobes)
heptab.add("admin", sumadmin)

othertab = Table("non-HEP Usage @ Nikhef from " + SDATE.isoformat() + " to " + EDATE.isoformat(),
               ["group", "WallGHzEquiv"])

for g in othervos:
    othertab.add(g, sums[g])
othertab.add("probes", sumprobes)
othertab.add("admin", sumadmin)

# output generation ... big damn csv file

import csv
writer = csv.writer(open('tmp.csv', 'wb'), delimiter=',',
                    quotechar='|', quoting=csv.QUOTE_MINIMAL)

ONEDAY=datetime.timedelta(days=1)

# per-VO segment

for vo in vos:
    writer.writerow(["Data for",vo])
    writer.writerow(["Month", "si06days.used", "cpu/wall", "pledged", "jobs", "days"])
    
    monthstart = SDATE

    while monthstart < EDATE:

        if monthstart.month < 12 :
            startnextmonth = monthstart.replace(month=monthstart.month+1)
        else:
            d1 = monthstart.replace(month=1)
            startnextmonth = d1.replace(year=monthstart.year+1)
        monthend =  startnextmonth - ONEDAY
        if monthend > EDATE:
            monthend = EDATE
        ndays = (monthend - monthstart + ONEDAY).days
        monthkey = monthstart.isoformat()[:7]
        if monthkey not in parsed[vo].keys():
            parsed[vo][monthkey] = {
                'si06days' : 0, 'cpu' : 0.1, 'wall' : 0.1, 'njobs' : 0
                }
        td = parsed[vo][monthkey]
        writer.writerow([monthkey,
                         td['si06days'],
                         td['cpu']/td['wall'],
                         ndays * pledges[vo],
                         td['njobs'],
                         ndays
                         ])
        monthstart = monthend + ONEDAY

    writer.writerow([' ',' '])

# usage plots

writer.writerow(["si06 days used per VO"])
writer.writerow(["Month", "lhcb.used", "atlas.used", "alice.used"])
monthstart = SDATE
while monthstart < EDATE:
    if monthstart.month < 12 :
        startnextmonth = monthstart.replace(month=monthstart.month+1)
    else:
        d1 = monthstart.replace(month=1)
        startnextmonth = d1.replace(year=monthstart.year+1)
    monthend =  startnextmonth - ONEDAY
    if monthend > EDATE:
        monthend = EDATE
    ndays = (monthend - monthstart + ONEDAY).days
    monthkey = monthstart.isoformat()[:7]
    td = parsed[vo][monthkey]
    writer.writerow([monthkey,
                     parsed['lhcb'][monthkey]['si06days'],
                     parsed['atlas'][monthkey]['si06days'],
                     parsed['alice'][monthkey]['si06days']
                     ])
    monthstart = monthend + ONEDAY

writer.writerow([' ',' '])

# pledge fraction plots

writer.writerow(["pledge fraction used per VO"])
writer.writerow(["Month", "lhcb.frac", "atlas.frac", "alice.frac"])
monthstart = SDATE
while monthstart < EDATE:
    if monthstart.month < 12 :
        startnextmonth = monthstart.replace(month=monthstart.month+1)
    else:
        d1 = monthstart.replace(month=1)
        startnextmonth = d1.replace(year=monthstart.year+1)
    monthend =  startnextmonth - ONEDAY
    if monthend > EDATE:
        monthend = EDATE
    ndays = (monthend - monthstart + ONEDAY).days
    monthkey = monthstart.isoformat()[:7]
    td = parsed[vo][monthkey]
    writer.writerow([monthkey,
                     parsed['lhcb'][monthkey]['si06days']/(pledges['lhcb']*ndays),
                     parsed['atlas'][monthkey]['si06days']/(pledges['atlas']*ndays),
                     parsed['alice'][monthkey]['si06days']/(pledges['alice']*ndays)
                     ])
    monthstart = monthend + ONEDAY

writer.writerow([' ',' '])

# cpu eff plots

writer.writerow(["ratio cpu to wall time used (eff) per VO"])
writer.writerow(["Month", "lhcb.eff", "atlas.eff", "alice.eff"])
monthstart = SDATE
while monthstart < EDATE:
    if monthstart.month < 12 :
        startnextmonth = monthstart.replace(month=monthstart.month+1)
    else:
        d1 = monthstart.replace(month=1)
        startnextmonth = d1.replace(year=monthstart.year+1)
    monthend =  startnextmonth - ONEDAY
    if monthend > EDATE:
        monthend = EDATE
    ndays = (monthend - monthstart + ONEDAY).days
    monthkey = monthstart.isoformat()[:7]
    td = parsed[vo][monthkey]
    writer.writerow([monthkey,
                     parsed['lhcb'][monthkey]['cpu']/parsed['lhcb'][monthkey]['wall'],
                     parsed['atlas'][monthkey]['cpu']/parsed['atlas'][monthkey]['wall'],
                     parsed['alice'][monthkey]['cpu']/parsed['alice'][monthkey]['wall'],
                     ])
    monthstart = monthend + ONEDAY

writer.writerow([' ',' '])

writer.writerow(["Pledges and capacties (SI06)"])
writer.writerow(["Group",   "power"])
writer.writerow(["ATLAS",   pledges["atlas"]])
writer.writerow(["LHCb",    pledges["lhcb"] ])
writer.writerow(["ALICE",   pledges["alice"]])
writer.writerow(["farmcap", total_cap])
writer.writerow([' ',' '])

for tabl in [heptab, othertab]:
    writer.writerow([tabl.header])
    writer.writerow(tabl.columnheaders)
    
    groups_sorted = sorted(tabl.data, key=tabl.data.get, reverse=True)
    for g in groups_sorted:
        writer.writerow([g, tabl.data[g]])

    writer.writerow([' ',' '])

