1 |
#! /usr/bin/env python |
2 |
# $Id$ |
3 |
# Source: $URL$ |
4 |
# J. A. Templon, NIKHEF/PDP 2011 |
5 |
|
6 |
import optparse |
7 |
|
8 |
p = optparse.OptionParser(description="Program to make rrdtool plots " + \ |
9 |
"of job running jobs by unix group") |
10 |
|
11 |
# p.add_option("-r",action="store",dest="minsize",default='0',help="minimum size of dirs considered; can use suffixes k,M,G for multiples of 1000**{1,2,3} bytes") |
12 |
# p.add_option("--qdel",action="store_true",dest="deljobs",help="delete jobs for which TMPDIR is larger than MINSIZE",default=False) |
13 |
|
14 |
p.add_option("--rank-only",action="store_true",dest="rankonly", |
15 |
help="don't plot, just print ranking of groups",default=False) |
16 |
|
17 |
debug = 0 |
18 |
|
19 |
opts, args = p.parse_args() |
20 |
|
21 |
import os |
22 |
|
23 |
NUMGROUPS=8 |
24 |
DATADIR=os.environ['HOME'] + '/ndpfdata/' |
25 |
PLOTDIR=os.environ['HOME'] + '/public_html/' |
26 |
# 8 class qualitative paired color scheme |
27 |
|
28 |
colors = [ "#A6CEE3", "#1F77B4", "#B2DF8A", "#33A02C", |
29 |
"#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00" ] |
30 |
|
31 |
colors.reverse() |
32 |
|
33 |
# for reference : ranges of the RRAs |
34 |
# base step size 60 sec |
35 |
# step 1 : 60 sec x 1600 points : 1600 min : 26,67 hr : 1.11 days |
36 |
# step 2 : 120 sec x 1200 points : 2400 min : 40 hr : 1,67 days |
37 |
# step 10 : 600 sec x 1800 points : 18 000 min : 300 hr : 12,5 days |
38 |
# step 30 : 1800 sec x 2500 points : 75 000 min : 1250 hr : 52.08 days |
39 |
# step 120 : 7200 sec x 1000 points : 120 000 min : 2000 hr : 83.33 days |
40 |
# step 480 : 28800 sec x 1000 points : 480 000 min : 8000 hr : 333.33 days |
41 |
# step 1440 : 86400 sec x 3650 points : 3650 days : 10 years |
42 |
|
43 |
# resolutions of RRAs |
44 |
|
45 |
# 1 : 60 sec : 1 min |
46 |
# 2 : 120 sec : 2 min |
47 |
# 3 : 600 sec : 10 min |
48 |
# 4 : 1 800 sec : 30 min |
49 |
# 5 : 7 200 sec : 120 min : 2 hr |
50 |
# 6 : 28 800 sec : 480 min : 8 hr |
51 |
# 7 : 86 400 sec : 1440 min : 24 hr : 1 day |
52 |
|
53 |
plotrangedef = { |
54 |
'hr' : { 'timeargs' : [ '-s', 'n-200min', '-e', 'n' ], |
55 |
'timetag' : 'hr', |
56 |
'avrange' : 24*3600, |
57 |
'avres' : 60, |
58 |
'sizeargs' : { 'small' : [ '--width', '200', '--height', '125', |
59 |
'--x-grid', |
60 |
'MINUTE:20:HOUR:1:HOUR:1:0:%H:00' |
61 |
], |
62 |
'large' : [ '--width', '800', '--height', '500' ] |
63 |
}, |
64 |
}, |
65 |
'day' : { 'timeargs' : [ '-s', 'n-2000min', '-e', 'n' ], |
66 |
'timetag' : 'day', |
67 |
'avrange' : 24*3600, |
68 |
'avres' : 60, |
69 |
'sizeargs' : { 'small' : [ '--width', '200', '--height', '125', |
70 |
'--x-grid', |
71 |
'HOUR:6:DAY:1:HOUR:12:0:%a %H:00' |
72 |
], |
73 |
'large' : [ '--width', '1000', '--height', '625' ] |
74 |
}, |
75 |
}, |
76 |
'week' : { 'timeargs' : [ '-s', 'n-288hr', '-e', 'n' ], |
77 |
'timetag' : 'week', |
78 |
'avrange' : 7*24*3600, |
79 |
'avres' : 600, |
80 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '125' ], |
81 |
'large' : [ '--width', '1728', '--height', '375', |
82 |
'-n', 'DEFAULT:16:'] |
83 |
}, |
84 |
}, |
85 |
|
86 |
# note the construction "repr(576*n)" here --- this is because the plot is |
87 |
# (a multiple of) 576 pixels, and 120 min is one of the RRAs, so choosing |
88 |
# a lower limit of 576*120 gives us a plot with one pixel per RRA bin. |
89 |
|
90 |
'month' : { 'timeargs' : [ '-s', 'n-'+repr(576*120)+'min', '-e', 'n'], |
91 |
'timetag' : 'month', |
92 |
'avrange' : 31*24*3600, |
93 |
'avres' : 1800, |
94 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105'], |
95 |
'large' : [ '--width', '2304', '--height', '420', |
96 |
'-n', 'DEFAULT:18:', |
97 |
'--x-grid', |
98 |
'HOUR:12:DAY:1:DAY:3:86400:%d-%b' |
99 |
] |
100 |
}, |
101 |
}, |
102 |
|
103 |
'year' : { 'timeargs' : [ '-s', 'n-'+repr(576*1440)+'min', '-e', 'n'], |
104 |
'timetag' : 'year', |
105 |
'avrange' : 365*24*3600, |
106 |
'avres' : 86400, |
107 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105'], |
108 |
'large' : [ '--width', '2304', '--height', '420'] |
109 |
}, |
110 |
}, |
111 |
|
112 |
# adjusting for nice plot ... now 15/4 of a year |
113 |
|
114 |
'alltime' : { 'timeargs' : [ '-s', 'n-'+repr(15*365*1440/4)+'min', '-e', 'n'], |
115 |
'timetag' : 'alltime', |
116 |
'avrange' : 15*365*24*3600/4, |
117 |
'avres' : 86400, |
118 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105', |
119 |
'--x-grid', |
120 |
'MONTH:3:YEAR:1:YEAR:1:31536000:%Y' |
121 |
], |
122 |
'large' : [ '--width', '2304', '--height', '420', |
123 |
'--x-grid', |
124 |
'MONTH:1:YEAR:1:MONTH:3:2592000:%b-%Y' |
125 |
] |
126 |
}, |
127 |
}, |
128 |
|
129 |
|
130 |
} |
131 |
|
132 |
commonargs = ['--imgformat', 'PNG', |
133 |
'--legend-position=east', '--legend-direction=bottomup'] |
134 |
|
135 |
import rrdtool |
136 |
import time |
137 |
import glob |
138 |
|
139 |
### function definitions |
140 |
|
141 |
def doplot(glist, dbtype, psize, timetag, sizeargs, timeargs, pcents, ranktype): |
142 |
|
143 |
grouplist = glist[:] |
144 |
defs = list() |
145 |
plots = list() |
146 |
|
147 |
data_defs = list() |
148 |
plot_defs = list() |
149 |
|
150 |
gcolors = dict() |
151 |
|
152 |
for idx in range(len(grouplist)): |
153 |
gcolors[grouplist[idx]] = colors[idx] |
154 |
|
155 |
if ranktype == 'bottom': |
156 |
if 'unused' in grouplist: |
157 |
grouplist.remove('unused') |
158 |
grouplist.insert(0,'unused') |
159 |
if 'offline' in grouplist: |
160 |
grouplist.remove('offline') |
161 |
grouplist.insert(0,'offline') |
162 |
|
163 |
if dbtype == 'queued': |
164 |
if 'unused' in grouplist: grouplist.remove('unused') |
165 |
if 'offline' in grouplist: grouplist.remove('offline') |
166 |
|
167 |
if ranktype == 'top': |
168 |
for group in (grouplist + ['total']): |
169 |
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
170 |
otherstring = 'CDEF:other=total,' |
171 |
for group in grouplist: |
172 |
otherstring += group + ',' |
173 |
otherstring += (len(grouplist)-1) * '+,' + '-' |
174 |
|
175 |
# print otherstring |
176 |
data_defs.append(otherstring) |
177 |
elif ranktype == 'bottom': |
178 |
for group in grouplist: |
179 |
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
180 |
else: |
181 |
print 'Unknown ranktype detected:', ranktype |
182 |
sys.exit(2) |
183 |
|
184 |
sumshown = 0 |
185 |
for idx in range(len(grouplist)): |
186 |
group = grouplist[idx] |
187 |
if group == 'unused': |
188 |
acolor = '#d8d8d8' |
189 |
elif group == "offline": |
190 |
acolor = "#790ead" |
191 |
else: |
192 |
acolor = gcolors[group] |
193 |
pdefstr = 'AREA' ':' + group + acolor + ':' + "%8s" % (group) |
194 |
if pcents: |
195 |
pdefstr = pdefstr + ' (' + "%4.1f" % (pcents[group]) + ')' |
196 |
sumshown += float(pcents[group]) |
197 |
pdefstr = pdefstr + '\\n' |
198 |
pdefstr = pdefstr + ':STACK' |
199 |
plot_defs.append(pdefstr) |
200 |
|
201 |
if ranktype == 'top': |
202 |
pdefstr = 'AREA' ':' + 'other' + '#794044' + ':' + ' other' |
203 |
if pcents: |
204 |
pdefstr = pdefstr + ' (' + "%4.1f" % (100 - sumshown) + ')' |
205 |
pdefstr = pdefstr + '\\n' |
206 |
plot_defs.insert(0,pdefstr) |
207 |
plot_defs.append("LINE:total#000000") # :total") |
208 |
|
209 |
pargs = [ PLOTDIR + dbtype + '-' + timetag + '-' + ranktype + '-' + \ |
210 |
psize + '.png'] + commonargs + ['-l', '0'] + sizeargs[psize] + \ |
211 |
timeargs + data_defs + plot_defs |
212 |
rrdtool.graph( *pargs ) |
213 |
|
214 |
def doplot_wait(glist, dbtype, psize, timetag, sizeargs, timeargs, ranktype): |
215 |
|
216 |
grouplist = glist[:] |
217 |
defs = list() |
218 |
plots = list() |
219 |
|
220 |
data_defs = list() |
221 |
plot_defs = list() |
222 |
|
223 |
if dbtype == 'waittime': |
224 |
if 'unused' in grouplist: grouplist.remove('unused') |
225 |
if 'offline' in grouplist: grouplist.remove('offline') |
226 |
|
227 |
for group in (grouplist + ['rollover','lastroll']): |
228 |
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
229 |
|
230 |
for idx in range(len(grouplist)): |
231 |
group = grouplist[idx] |
232 |
if group == 'unused': |
233 |
acolor = '#d8d8d8' |
234 |
elif group == "offline": |
235 |
acolor = "#790ead" |
236 |
else: |
237 |
acolor = colors[idx] |
238 |
pdefstr = 'LINE3' ':' + group + acolor + ':' + group |
239 |
pdefstr = pdefstr + '\\n' |
240 |
plot_defs.append(pdefstr) |
241 |
|
242 |
plot_defs.append('LINE2:rollover#660198') |
243 |
plot_defs.append('LINE2:lastroll#000000') |
244 |
|
245 |
pargs = [ PLOTDIR + dbtype + '-' + timetag + '-' + ranktype + '-' + \ |
246 |
psize + '.png'] + ['--slope-mode', '-o'] + commonargs + \ |
247 |
sizeargs[psize] + timeargs + data_defs + plot_defs |
248 |
rrdtool.graph( *pargs ) |
249 |
|
250 |
def makeplots(prangedef): |
251 |
|
252 |
resolu = prangedef['avres'] |
253 |
|
254 |
# first need to find "top eight" list |
255 |
# base it on running jobs |
256 |
|
257 |
now=int(time.mktime(time.localtime())) |
258 |
end = (now / resolu) * resolu |
259 |
start = end - (prangedef['avrange']) + resolu |
260 |
|
261 |
### block finding 'top N' group list ### |
262 |
|
263 |
tgroup = dict() # structure tgroup[groupname] = total of hourly average |
264 |
|
265 |
running_files = glob.glob(DATADIR+'*.running.rrd') |
266 |
for db in running_files: |
267 |
group = db[len(DATADIR):db.find('.running.rrd')] |
268 |
tup = rrdtool.fetch(db,'AVERAGE','-r', repr(resolu), |
269 |
'-s', repr(start), '-e', repr(end)) |
270 |
vallist = [0] # start with zero, in case no vals returned, get zero as answer |
271 |
for tup2 in tup[2]: |
272 |
val = tup2[0] |
273 |
if val: |
274 |
vallist.append(val) |
275 |
|
276 |
# put numbers in meaningful units now. result returned is an |
277 |
# integration over the time range, of averages over "resolu" |
278 |
# ... native resolution is in minutes, so multiplying by |
279 |
# (resolu / 60) puts the answer in core-minutes; dividing by |
280 |
# the number of minutes in the range gives the average number |
281 |
# of cores occupied, over the range |
282 |
|
283 |
if group == "total": |
284 |
totval = sum(vallist) * (resolu / 60) / ( (end-start) / 60. ) |
285 |
else: |
286 |
tgroup[group] = sum(vallist) * (resolu / 60) / ( (end-start) / 60. ) |
287 |
|
288 |
pgroup = dict() |
289 |
for g in tgroup.keys(): |
290 |
pgroup[g] = 100*tgroup[g]/totval |
291 |
|
292 |
### start block 'top N' and 'next N' group list ### |
293 |
|
294 |
groups_sorted = sorted(tgroup, key=tgroup.get, reverse=False) # increasing useage towards end of list |
295 |
|
296 |
topgroups = groups_sorted[ -NUMGROUPS: ] |
297 |
bottgroups = groups_sorted[-2*NUMGROUPS:-NUMGROUPS] |
298 |
|
299 |
for glist in [topgroups, bottgroups]: |
300 |
if 'unused' in glist: |
301 |
glist.remove('unused') |
302 |
glist.append('unused') # makes it always appear at top of plot (except see offline) |
303 |
if 'offline' in glist: |
304 |
glist.remove('offline') |
305 |
glist.append('offline') # makes it always appear at top of plot |
306 |
|
307 |
if opts.rankonly and prangedef['timetag'] != 'hr' : |
308 |
print "Ranked average running jobs over period", prangedef['timetag'] |
309 |
print "%10s %12d" % ('total', totval) |
310 |
rank = 0 |
311 |
for g in reversed(groups_sorted): |
312 |
if tgroup[g] > 0: |
313 |
rank += 1 |
314 |
print "%2d. %10s %9.4f %9.4f%%" % (rank, g, tgroup[g], pgroup[g]) |
315 |
return |
316 |
|
317 |
### end block 'top N' group list ### |
318 |
|
319 |
### block generating plots ### |
320 |
|
321 |
|
322 |
for dbtype in ['queued', 'running', 'waittime']: |
323 |
|
324 |
### this is a bit of a hack : we only want pgroups for when it's |
325 |
### a 'running' database and we don't want it for timetag hour. |
326 |
### fix this up here |
327 |
|
328 |
if dbtype == 'running' and prangedef['timetag'] != 'hr': |
329 |
percents = pgroup |
330 |
else: |
331 |
percents = None |
332 |
for grps in [ ('top', topgroups), ('bottom', bottgroups) ]: |
333 |
for psize in ['small', 'large']: |
334 |
if dbtype == 'waittime': |
335 |
doplot_wait(grps[1], dbtype, psize, prangedef['timetag'], |
336 |
prangedef['sizeargs'], |
337 |
prangedef['timeargs'], grps[0] |
338 |
) |
339 |
else: |
340 |
doplot(grps[1], dbtype, psize, prangedef['timetag'], |
341 |
prangedef['sizeargs'], |
342 |
prangedef['timeargs'], |
343 |
percents, grps[0] |
344 |
) |
345 |
|
346 |
for k in ['hr', 'day', 'week', 'month', 'year', 'alltime']: # plotrangedef.keys(): |
347 |
makeplots(plotrangedef[k]) |
348 |
|
349 |
import sys |
350 |
sys.exit(0) |