1 |
#!/usr/bin/env python |
2 |
from torque_utils import pbsnodes |
3 |
|
4 |
import os |
5 |
# line below is needed since we have torque2 utils also in PATH ... |
6 |
# this for korf (torque4), put right dir up front. |
7 |
|
8 |
os.environ['PATH']='/opt/torque4/bin:' + os.environ['PATH'] |
9 |
|
10 |
import sys |
11 |
import subprocess |
12 |
|
13 |
GROUPS = ['atlb', 'atlaspil', 'datagrid', 'pbbmri', 'virgo', 'pvier', 'lsgrid', 'atlsgm'] |
14 |
MCQUEUE = 'atlasmc7' |
15 |
TORQUE = "korf.nikhef.nl" |
16 |
|
17 |
import pickle |
18 |
|
19 |
# pkl_file = open('/data/tunnel/user/templon/wnodes-korf.pkl', 'rb') |
20 |
# wnodes_k = pickle.load(pkl_file ) |
21 |
# pkl_file.close() |
22 |
|
23 |
# wnodes = wnodes_k |
24 |
|
25 |
wnodes = pbsnodes() |
26 |
|
27 |
mcnodes = list() |
28 |
|
29 |
for node in wnodes: |
30 |
if \ |
31 |
'mc' in node.properties \ |
32 |
and 'el7' not in node.properties \ |
33 |
and node.state.count('offline') == 0: |
34 |
mcnodes.append(node) |
35 |
|
36 |
mcslots = 0 |
37 |
mcfree = 0 |
38 |
mcrun = 0 |
39 |
|
40 |
usedkeys = ['egroup', 'jobid', 'queue', 'job_state', 'euser', 'exec_host' ] |
41 |
|
42 |
import torqueJobs |
43 |
import torqueAttMappers as tam |
44 |
|
45 |
import tempfile |
46 |
qsfname = tempfile.mktemp(".txt","qsmf",os.environ['HOME']+"/tmp") |
47 |
|
48 |
import time |
49 |
# os.system('/usr/bin/qstat -f %s@%s' %(MCQUEUE, TORQUE) + ' > ' + qsfname) |
50 |
os.system('/opt/torque4/bin/qstat-torque -f %s@%s' \ |
51 |
% ( MCQUEUE,'korf.nikhef.nl') + ' > ' + qsfname) |
52 |
now = time.mktime(time.localtime()) |
53 |
|
54 |
jlist = torqueJobs.qs_parsefile(qsfname) |
55 |
|
56 |
os.system('mv ' + qsfname + ' ' + os.environ['HOME'] + '/tmp/qstat.last.mcpool.txt') |
57 |
|
58 |
def mapatts(indict,inkeys): |
59 |
sdict = tam.sub_dict(indict,inkeys) |
60 |
odict = dict() |
61 |
for k in sdict.keys(): |
62 |
if k in tam.tfl and sdict[k]: |
63 |
secs = tam.hms(sdict[k]) |
64 |
sdict[k] = secs/3600. |
65 |
if k in tam.mfl and sdict[k]: |
66 |
mebi = tam.memconvert(sdict[k]) |
67 |
sdict[k] = mebi |
68 |
if k in tam.tfl2 and sdict[k]: |
69 |
secs = tam.tconv(sdict[k]) |
70 |
sdict[k] = secs |
71 |
elif k == 'job_state': |
72 |
statelett = sdict[k] |
73 |
if statelett in ['Q','W']: |
74 |
sdict[k] = 'queued' |
75 |
elif statelett in ['R','E']: |
76 |
sdict[k] = 'running' |
77 |
elif k == 'exec_host' and sdict[k]: |
78 |
termpos = sdict[k].find('/') |
79 |
wnstring = sdict[k][:termpos] |
80 |
odict['wn'] = wnstring |
81 |
if sdict[k]: |
82 |
odict[k] = sdict[k] |
83 |
return odict |
84 |
|
85 |
mcjobdict = dict() |
86 |
for j in jlist: |
87 |
mcjobdict[j['jobid']] = mapatts(j,usedkeys) |
88 |
|
89 |
# {'queue': 'atlasmc', 'euser': 'templon', 'job_state': 'queued', \ |
90 |
# 'egroup': 'datagrid', 'jobid': '43609804.stro.nikhef.nl'} |
91 |
|
92 |
runjobdict = dict() # runjobdict[wn][group] = num running procs |
93 |
|
94 |
nq = dict() |
95 |
nr = dict() |
96 |
|
97 |
for g in GROUPS: |
98 |
nq[g] = 0 |
99 |
nr[g] = 0 |
100 |
|
101 |
for jid in mcjobdict : |
102 |
thisj = mcjobdict[jid] |
103 |
if thisj['job_state'] == 'queued': |
104 |
nq[thisj['egroup']] += 1 |
105 |
elif thisj['job_state'] == 'running': |
106 |
if thisj['wn'] not in runjobdict.keys(): |
107 |
runjobdict[thisj['wn']] = dict() |
108 |
if thisj['egroup'] not in runjobdict[thisj['wn']].keys(): |
109 |
runjobdict[thisj['wn']][thisj['egroup']] = 0 |
110 |
runjobdict[thisj['wn']][thisj['egroup']] += thisj['exec_host'].count('+') + 1 |
111 |
|
112 |
for n in mcnodes: |
113 |
mcslots += n.numCpu |
114 |
mcfree += n.freeCpu |
115 |
if n.name in runjobdict: |
116 |
for gd in runjobdict[n.name]: |
117 |
nr[gd] += runjobdict[n.name][gd] |
118 |
|
119 |
rtot = sum(nr.values()) |
120 |
|
121 |
import rrdtool |
122 |
DATADIR=os.environ['HOME'] + '/ndpfdata/' |
123 |
rrdtool.update(DATADIR+'capacity.mcpool.rrd', 'N:%d' % (mcslots)) |
124 |
rrdtool.update(DATADIR+'unused.mcpool.rrd', 'N:%d' % (mcfree)) |
125 |
rrdtool.update(DATADIR+'nonmc.running.mcpool.rrd', 'N:%d' % (mcslots - mcfree - rtot)) |
126 |
|
127 |
for g in nr: |
128 |
rrdtool.update(DATADIR+'%s.running.mcpool.rrd' % g, 'N:%d' % (nr[g])) |
129 |
rrdtool.update(DATADIR+'%s.queued.mcpool.rrd' % g, 'N:%d' % (nq[g])) |