1 |
#-- |
2 |
# torqueJobs.py -- lower level functions for parsing various forms of information about |
3 |
# jobs running under a Torque LRMS |
4 |
# $URL$ |
5 |
# $Id$ |
6 |
|
7 |
#-- |
8 |
|
9 |
# logFileRecordParser : parse a line from a torque accounting file |
10 |
|
11 |
# function logFileRecordParser |
12 |
# input : a line from a PBS accounting file |
13 |
# output : a tuple (timestamp, recordtype, jobid, attributes) |
14 |
# timestamp : unix timestamp of the record in the log file |
15 |
# recordtype : what kind of record was it, e.g. Q = job queued, S = job started, E = end, etc. |
16 |
# attributes : dictionary containing all other information contained in the record. |
17 |
|
18 |
# search pattern for parsing string using "re" module |
19 |
# for successful search, fields are: |
20 |
# 1) timestamp |
21 |
# 2) event type (Q,S,E,D, etc) |
22 |
# 3) local PBS jobID |
23 |
# 4) rest of line (key=value) to be parsed otherwise |
24 |
# this structure is matched by evpatt |
25 |
|
26 |
import re |
27 |
evpatt = "^(.+);([A-Z]);(.+);(.*)" |
28 |
|
29 |
# tpatt matches strings of form key=val |
30 |
# lookahead assertion is necessary to work around presence of ' ' and '=' in some |
31 |
# 'val' strings (like account, or neednodes with multiple processors) |
32 |
|
33 |
tpatt = r'[a-z._A-Z]+=[a-z0-9A-Z=/: -@_]+?(?=$| [a-z._A-Z]+=)' |
34 |
tprog = re.compile(tpatt) |
35 |
|
36 |
def logFileRecordParser(record,debug=None): |
37 |
import time |
38 |
r_timestamp = None |
39 |
r_type = None |
40 |
r_jobid = None |
41 |
r_attrs = None |
42 |
|
43 |
m = record.split(';',3) |
44 |
if len(m) != 4: |
45 |
print "parse patt failed, offending line is" |
46 |
print record |
47 |
return |
48 |
if debug: |
49 |
print "timestamp", m[0] |
50 |
print "code", m[1] |
51 |
print "jobid", m[2] |
52 |
print "attrs", m[3] |
53 |
|
54 |
tmatch = tprog.findall(m[3]) |
55 |
if debug: |
56 |
print "result of key=val match pattern:", tmatch |
57 |
|
58 |
# parse timestamp |
59 |
|
60 |
ttup=time.strptime(m[0],"%m/%d/%Y %H:%M:%S") |
61 |
|
62 |
# last element of time tuple is DST, but PBS log files |
63 |
# don't specify time zone. Setting the last element of |
64 |
# the tuple to -1 asks libC to figure it out based on |
65 |
# local time zone of machine |
66 |
|
67 |
atup = ttup[:8] + (-1,) |
68 |
r_timestamp = int(time.mktime(atup)) |
69 |
r_type = m[1] |
70 |
r_jobid = m[2] |
71 |
r_attrs = keyvallist2dict(tmatch) |
72 |
|
73 |
return (r_timestamp, r_type, r_jobid, r_attrs) |
74 |
|
75 |
## following takes as arg a list of key=val pairs, returns a dict with the same |
76 |
## structure. example input string: |
77 |
## ['user=tdykstra', 'group=niktheorie', 'jobname=Q11_241828.gjob'] |
78 |
|
79 |
def keyvallist2dict(kvlist): |
80 |
d = {} |
81 |
for f in kvlist: |
82 |
kv=f.split("=",1) |
83 |
if len(kv) == 2: |
84 |
d[kv[0]] = kv[1] |
85 |
else: |
86 |
print "tried to split:", f, ", result was:", kv |
87 |
raise CantHappenException |
88 |
return d |
89 |
|
90 |
# functions for parsing output of 'qstat -f'. |
91 |
|
92 |
def qs_parsejob(instring): |
93 |
# function for parsing a block of text corresponding to a single job. |
94 |
|
95 |
# figure out indentation (avoid hard wiring ;-)) by comparing lengths of stripped and unstripped versions |
96 |
|
97 |
l1 = instring.split('\n',2) |
98 |
indent = len(l1[1]) - len(l1[1].lstrip()) |
99 |
|
100 |
# deal with continuation lines |
101 |
|
102 |
decont_string = instring.replace('\n\t','') |
103 |
|
104 |
# resplit based on indentation |
105 |
|
106 |
l2 = decont_string.split('\n'+indent*' ') |
107 |
indict = dict() |
108 |
|
109 |
indict['jobid'] = l2[0].strip() |
110 |
for line in l2[1:]: |
111 |
flds=line.split(' = ') |
112 |
indict[flds[0]] = flds[1] |
113 |
|
114 |
return indict |
115 |
|
116 |
|
117 |
def qs_parsefile(ifilename): |
118 |
# function for parsing entire output of 'qstat -f' |
119 |
jobIDstr = 'Job Id: ' |
120 |
filestring = open(ifilename,'r').read().strip() |
121 |
if filestring[:len(jobIDstr)] != jobIDstr: |
122 |
print 'Fatal error parsing qstat -f output: does not begin with ' +\ |
123 |
jobIDstr |
124 |
sys.exit(3) |
125 |
|
126 |
reststring = filestring[len(jobIDstr):] |
127 |
jobchunks = reststring.split('\n\n'+jobIDstr) |
128 |
jlist = list() |
129 |
for chunk in jobchunks: |
130 |
jlist.append(qs_parsejob(chunk)) |
131 |
return jlist |