1 |
#! /usr/bin/env python |
2 |
# $Id$ |
3 |
# Source: $URL$ |
4 |
# J. A. Templon, NIKHEF/PDP 2011 |
5 |
|
6 |
import optparse |
7 |
|
8 |
p = optparse.OptionParser(description="Program to make rrdtool plots " + \ |
9 |
"of job running jobs by unix group") |
10 |
|
11 |
# p.add_option("-r",action="store",dest="minsize",default='0',help="minimum size of dirs considered; can use suffixes k,M,G for multiples of 1000**{1,2,3} bytes") |
12 |
# p.add_option("--qdel",action="store_true",dest="deljobs",help="delete jobs for which TMPDIR is larger than MINSIZE",default=False) |
13 |
|
14 |
p.add_option("--rank-only",action="store_true",dest="rankonly", |
15 |
help="don't plot, just print ranking of groups",default=False) |
16 |
|
17 |
p.add_option("-c", "--calmonth",action="store",dest="month", |
18 |
help="plot for a calendar month. Format for specified month: 201707 for\n July 2017, or prev for the most recent fully-completed month.",default=None) |
19 |
|
20 |
|
21 |
debug = 0 |
22 |
|
23 |
opts, args = p.parse_args() |
24 |
|
25 |
import os |
26 |
|
27 |
DATADIR=os.environ['HOME'] + '/ndpfdata/' |
28 |
PLOTDIR='/www/grid/stats/groupviews/ndpf/' |
29 |
|
30 |
# 8 class qualitative paired color scheme |
31 |
|
32 |
colors = [ "#A6CEE3", "#1F77B4", "#B2DF8A", "#33A02C", |
33 |
"#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00" ] |
34 |
|
35 |
colors.reverse() |
36 |
|
37 |
# for reference : ranges of the RRAs |
38 |
# base step size 60 sec |
39 |
# step 1 : 60 sec x 1600 points : 1600 min : 26,67 hr : 1.11 days |
40 |
# step 2 : 120 sec x 1200 points : 2400 min : 40 hr : 1,67 days |
41 |
# step 10 : 600 sec x 1800 points : 18 000 min : 300 hr : 12,5 days |
42 |
# step 30 : 1800 sec x 2500 points : 75 000 min : 1250 hr : 52.08 days |
43 |
# step 120 : 7200 sec x 1000 points : 120 000 min : 2000 hr : 83.33 days |
44 |
# step 480 : 28800 sec x 1000 points : 480 000 min : 8000 hr : 333.33 days |
45 |
# step 1440 : 86400 sec x 3650 points : 3650 days : 10 years |
46 |
|
47 |
# resolutions of RRAs |
48 |
|
49 |
# 1 : 60 sec : 1 min |
50 |
# 2 : 120 sec : 2 min |
51 |
# 3 : 600 sec : 10 min |
52 |
# 4 : 1 800 sec : 30 min |
53 |
# 5 : 7 200 sec : 120 min : 2 hr |
54 |
# 6 : 28 800 sec : 480 min : 8 hr |
55 |
# 7 : 86 400 sec : 1440 min : 24 hr : 1 day |
56 |
|
57 |
NUMQS = 40 # number of quarters for "alltime" plot |
58 |
|
59 |
plotrangedef = { |
60 |
'hr' : { 'timeargs' : [ '-s', 'n-200min', '-e', 'n' ], |
61 |
'timetag' : 'hr', |
62 |
'avrange' : 3*3600, |
63 |
'avres' : 60, |
64 |
'sizeargs' : { 'small' : [ '--width', '200', '--height', '125', |
65 |
'--x-grid', |
66 |
'MINUTE:20:HOUR:1:HOUR:1:0:%H:00' |
67 |
], |
68 |
'large' : [ '--width', '800', '--height', '500' ] |
69 |
}, |
70 |
'numgroups' : 4, |
71 |
}, |
72 |
'day' : { 'timeargs' : [ '-s', 'n-2000min', '-e', 'n' ], |
73 |
'timetag' : 'day', |
74 |
'avrange' : 24*3600, |
75 |
'avres' : 60, |
76 |
'sizeargs' : { 'small' : [ '--width', '200', '--height', '125', |
77 |
'--x-grid', |
78 |
'HOUR:6:DAY:1:HOUR:12:0:%a %H:00' |
79 |
], |
80 |
'large' : [ '--width', '1000', '--height', '625' ] |
81 |
}, |
82 |
'numgroups' : 5, |
83 |
}, |
84 |
'week' : { 'timeargs' : [ '-s', 'n-288hr', '-e', 'n' ], |
85 |
'timetag' : 'week', |
86 |
'avrange' : 7*24*3600, |
87 |
'avres' : 600, |
88 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '125' ], |
89 |
'large' : [ '--width', '1728', '--height', '375', |
90 |
'-n', 'DEFAULT:16:'] |
91 |
}, |
92 |
'numgroups' : 6, |
93 |
}, |
94 |
|
95 |
# note the construction "repr(576*n)" here --- this is because the plot is |
96 |
# (a multiple of) 576 pixels, and 120 min is one of the RRAs, so choosing |
97 |
# a lower limit of 576*120 gives us a plot with one pixel per RRA bin. |
98 |
|
99 |
'month' : { 'timeargs' : [ '-s', 'n-'+repr(576*120)+'min', '-e', 'n'], |
100 |
'timetag' : 'month', |
101 |
'avrange' : 31*24*3600, |
102 |
'avres' : 1800, |
103 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105'], |
104 |
'large' : [ '--width', '2304', '--height', '420', |
105 |
'-n', 'DEFAULT:18:', |
106 |
'--x-grid', |
107 |
'HOUR:12:DAY:1:DAY:3:86400:%d-%b' |
108 |
] |
109 |
}, |
110 |
'numgroups' : 7, |
111 |
}, |
112 |
# note the construction "repr(372*n)" here --- this is because the plot is |
113 |
# based on 120 min bins, and 372 bins of 120 minutes is 31 days. |
114 |
# 120 min is the granularity of one of the RRAs. The string START |
115 |
# needs to be replaced at run time based on the program arguments. |
116 |
|
117 |
'cmonth' : { 'timeargs' : [ '-s', 'START', '-e', 's+'+repr(372*120)+'min'], |
118 |
'timetag' : 'cmonth', |
119 |
'avrange' : 31*24*3600, |
120 |
'avres' : 1800, |
121 |
'sizeargs' : { |
122 |
'small' : [ '--width', '372', '--height', '105', |
123 |
'--x-grid', 'DAY:1:DAY:10:DAY:5:0:%d' ], |
124 |
'large' : [ '--width', '1488', '--height', '420' # , |
125 |
# '-n', 'DEFAULT:18:', |
126 |
# '--x-grid', |
127 |
# 'HOUR:12:DAY:1:DAY:3:86400:%d-%b' |
128 |
] |
129 |
}, |
130 |
'numgroups' : 7, |
131 |
}, |
132 |
|
133 |
|
134 |
'year' : { 'timeargs' : [ '-s', 'n-'+repr(576*1440)+'min', '-e', 'n'], |
135 |
'timetag' : 'year', |
136 |
'avrange' : 365*24*3600, |
137 |
'avres' : 86400, |
138 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105'], |
139 |
'large' : [ '--width', '2304', '--height', '420'] |
140 |
}, |
141 |
'numgroups' : 8, |
142 |
}, |
143 |
|
144 |
# adjusting for nice plot ... set NUMQS above; range is NUMQS/4 years |
145 |
|
146 |
'alltime' : { 'timeargs' : [ '-s', 'n-'+repr(NUMQS*365*1440/4)+'min', '-e', 'n'], |
147 |
'timetag' : 'alltime', |
148 |
'avrange' : NUMQS*365*24*3600/4, |
149 |
'avres' : 86400, |
150 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105', |
151 |
'--x-grid', |
152 |
'MONTH:3:YEAR:1:YEAR:1:31536000:%Y' |
153 |
], |
154 |
'large' : [ '--width', '2304', '--height', '420', |
155 |
'--x-grid', |
156 |
'MONTH:1:YEAR:1:MONTH:3:2592000:%b-%Y' |
157 |
] |
158 |
}, |
159 |
'numgroups' : 8, |
160 |
}, |
161 |
|
162 |
|
163 |
} |
164 |
|
165 |
commonargs = ['--imgformat', 'PNG', |
166 |
'--legend-position=east', '--legend-direction=bottomup'] |
167 |
|
168 |
import rrdtool |
169 |
import time |
170 |
import glob |
171 |
|
172 |
### function definitions |
173 |
|
174 |
def doplot(glist, dbtype, psize, timetag, sizeargs, timeargs, pcents, ranktype): |
175 |
|
176 |
grouplist = glist[:] |
177 |
defs = list() |
178 |
plots = list() |
179 |
|
180 |
data_defs = list() |
181 |
plot_defs = list() |
182 |
|
183 |
gcolors = dict() |
184 |
|
185 |
skipgroup = 0 |
186 |
if ranktype == 'bottom': |
187 |
if 'unused' in grouplist: |
188 |
grouplist.remove('unused') |
189 |
grouplist.insert(0,'unused') |
190 |
if 'offline' in grouplist: |
191 |
grouplist.remove('offline') |
192 |
grouplist.insert(0,'offline') |
193 |
else: |
194 |
if 'unused' in grouplist: |
195 |
skipgroup += 1 |
196 |
if 'offline' in grouplist: |
197 |
skipgroup += 1 |
198 |
|
199 |
ngr = len(grouplist) |
200 |
ncol = len(colors) |
201 |
for idx in range(ngr): |
202 |
thisgrp = grouplist[idx] |
203 |
if thisgrp == 'unused': |
204 |
gcolors[thisgrp] = '#d8d8d8' |
205 |
elif thisgrp == "offline": |
206 |
gcolors[thisgrp] = "#790ead" |
207 |
else: |
208 |
gcolors[thisgrp] = colors[(ncol - ngr) + idx + skipgroup] |
209 |
|
210 |
if dbtype == 'queued': |
211 |
if 'unused' in grouplist: grouplist.remove('unused') |
212 |
if 'offline' in grouplist: grouplist.remove('offline') |
213 |
|
214 |
if ranktype == 'top': |
215 |
for group in (grouplist + ['total']): |
216 |
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
217 |
otherstring = 'CDEF:other=total,' |
218 |
for group in grouplist: |
219 |
otherstring += group + ',' |
220 |
otherstring += (len(grouplist)-1) * '+,' + '-' |
221 |
|
222 |
# print otherstring |
223 |
data_defs.append(otherstring) |
224 |
elif ranktype == 'bottom': |
225 |
for group in grouplist: |
226 |
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
227 |
else: |
228 |
print 'Unknown ranktype detected:', ranktype |
229 |
sys.exit(2) |
230 |
|
231 |
sumshown = 0 |
232 |
for idx in range(len(grouplist)): |
233 |
group = grouplist[idx] |
234 |
acolor = gcolors[group] |
235 |
pdefstr = 'AREA' ':' + group + acolor + ':' + "%8s" % (group) |
236 |
if pcents: |
237 |
pdefstr = pdefstr + ' (' + "%4.1f" % (pcents[group]) + ')' |
238 |
sumshown += float(pcents[group]) |
239 |
pdefstr = pdefstr + '\\n' |
240 |
pdefstr = pdefstr + ':STACK' |
241 |
plot_defs.append(pdefstr) |
242 |
|
243 |
if ranktype == 'top': |
244 |
pdefstr = 'AREA' ':' + 'other' + '#794044' + ':' + ' other' |
245 |
if pcents: |
246 |
pdefstr = pdefstr + ' (' + "%4.1f" % (100 - sumshown) + ')' |
247 |
pdefstr = pdefstr + '\\n' |
248 |
plot_defs.insert(0,pdefstr) |
249 |
plot_defs.append("LINE:total#000000") # :total") |
250 |
|
251 |
pargs = [ PLOTDIR + dbtype + '-' + timetag + '-' + ranktype + '-' + \ |
252 |
psize + '.png'] + commonargs + ['-l', '0'] + sizeargs[psize] + \ |
253 |
timeargs + data_defs + plot_defs |
254 |
rrdtool.graph( *pargs ) |
255 |
|
256 |
def doplot_wait(glist, dbtype, psize, timetag, sizeargs, timeargs, ranktype): |
257 |
|
258 |
grouplist = glist[:] |
259 |
defs = list() |
260 |
plots = list() |
261 |
|
262 |
data_defs = list() |
263 |
plot_defs = list() |
264 |
|
265 |
skipgroup = 0 |
266 |
if dbtype == 'waittime': |
267 |
if 'unused' in grouplist: |
268 |
grouplist.remove('unused') |
269 |
skipgroup += 1 |
270 |
if 'offline' in grouplist: |
271 |
grouplist.remove('offline') |
272 |
skipgroup += 1 |
273 |
|
274 |
for group in (grouplist + ['rollover','lastroll']): |
275 |
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
276 |
|
277 |
ncol = len(colors) |
278 |
ngr = len(grouplist) |
279 |
for idx in range(len(grouplist)): |
280 |
group = grouplist[idx] |
281 |
if group == 'unused': |
282 |
acolor = '#d8d8d8' |
283 |
skipgroup += 1 |
284 |
elif group == "offline": |
285 |
acolor = "#790ead" |
286 |
skipgroup += 1 |
287 |
else: |
288 |
acolor = colors[(ncol - ngr) + idx ] |
289 |
pdefstr = 'LINE3' ':' + group + acolor + ':' + group |
290 |
pdefstr = pdefstr + '\\n' |
291 |
plot_defs.append(pdefstr) |
292 |
|
293 |
plot_defs.append('LINE2:rollover#660198') |
294 |
plot_defs.append('LINE2:lastroll#000000') |
295 |
|
296 |
pargs = [ PLOTDIR + dbtype + '-' + timetag + '-' + ranktype + '-' + \ |
297 |
psize + '.png'] + ['--slope-mode', '-o'] + commonargs + \ |
298 |
sizeargs[psize] + timeargs + data_defs + plot_defs |
299 |
rrdtool.graph( *pargs ) |
300 |
|
301 |
def makeplots(prangedef): |
302 |
|
303 |
resolu = prangedef['avres'] |
304 |
NUMGROUPS = prangedef['numgroups'] |
305 |
|
306 |
# first need to find "top eight" list |
307 |
# base it on running jobs |
308 |
|
309 |
if prangedef['timetag'] == "cmonth": |
310 |
cmstarttime = time.strptime(prangedef['timeargs'][1], "%Y%m%d" ) |
311 |
start = int(time.mktime(cmstarttime)) |
312 |
start = (start / resolu) * resolu |
313 |
end = start + prangedef['avrange'] |
314 |
else: |
315 |
now=int(time.mktime(time.localtime())) |
316 |
end = (now / resolu) * resolu |
317 |
start = end - (prangedef['avrange']) + resolu |
318 |
|
319 |
### block finding 'top N' group list ### |
320 |
|
321 |
tgroup = dict() # structure tgroup[groupname] = total of hourly average |
322 |
|
323 |
running_files = glob.glob(DATADIR+'*.running.rrd') |
324 |
for db in running_files: |
325 |
group = db[len(DATADIR):db.find('.running.rrd')] |
326 |
tup = rrdtool.fetch(db,'AVERAGE','-r', repr(resolu), |
327 |
'-s', repr(start), '-e', repr(end)) |
328 |
vallist = [0] # start with zero, in case no vals returned, get zero as answer |
329 |
for tup2 in tup[2]: |
330 |
val = tup2[0] |
331 |
if val: |
332 |
vallist.append(val) |
333 |
|
334 |
# put numbers in meaningful units now. result returned is an |
335 |
# integration over the time range, of averages over "resolu" |
336 |
# ... native resolution is in minutes, so multiplying by |
337 |
# (resolu / 60) puts the answer in core-minutes; dividing by |
338 |
# the number of minutes in the range gives the average number |
339 |
# of cores occupied, over the range |
340 |
|
341 |
if group == "total": |
342 |
totval = sum(vallist) * (resolu / 60) / ( (end-start) / 60. ) |
343 |
else: |
344 |
tgroup[group] = sum(vallist) * (resolu / 60) / ( (end-start) / 60. ) |
345 |
|
346 |
pgroup = dict() |
347 |
for g in tgroup.keys(): |
348 |
pgroup[g] = 100*tgroup[g]/totval |
349 |
|
350 |
### start block 'top N' and 'next N' group list ### |
351 |
|
352 |
groups_sorted = sorted(tgroup, key=tgroup.get, reverse=False) # increasing useage towards end of list |
353 |
|
354 |
topgroups = groups_sorted[ -NUMGROUPS: ] |
355 |
bottgroups = groups_sorted[-2*NUMGROUPS:-NUMGROUPS] |
356 |
|
357 |
for glist in [topgroups, bottgroups]: |
358 |
if 'unused' in glist: |
359 |
glist.remove('unused') |
360 |
glist.append('unused') # makes it always appear at top of plot (except see offline) |
361 |
if 'offline' in glist: |
362 |
glist.remove('offline') |
363 |
glist.append('offline') # makes it always appear at top of plot |
364 |
|
365 |
if opts.rankonly: # and prangedef['timetag'] != 'hr' : |
366 |
print "Ranked average running jobs over period", prangedef['timetag'] |
367 |
print "%10s %12d" % ('total', totval) |
368 |
rank = 0 |
369 |
for g in reversed(groups_sorted): |
370 |
if tgroup[g] > 0: |
371 |
rank += 1 |
372 |
print "%2d. %10s %9.4f %9.4f%%" % (rank, g, tgroup[g], pgroup[g]) |
373 |
return |
374 |
|
375 |
### end block 'top N' group list ### |
376 |
|
377 |
### block generating plots ### |
378 |
|
379 |
|
380 |
for dbtype in ['queued', 'running', 'waittime']: |
381 |
|
382 |
### this is a bit of a hack : we only want pgroups for when it's |
383 |
### a 'running' database and we don't want it for timetag hour. |
384 |
### fix this up here |
385 |
|
386 |
if dbtype == 'running': # and prangedef['timetag'] != 'hr': |
387 |
percents = pgroup |
388 |
else: |
389 |
percents = None |
390 |
for grps in [ ('top', topgroups), ('bottom', bottgroups) ]: |
391 |
for psize in ['small', 'large']: |
392 |
if dbtype == 'waittime': |
393 |
doplot_wait(grps[1], dbtype, psize, prangedef['timetag'], |
394 |
prangedef['sizeargs'], |
395 |
prangedef['timeargs'], grps[0] |
396 |
) |
397 |
else: |
398 |
doplot(grps[1], dbtype, psize, prangedef['timetag'], |
399 |
prangedef['sizeargs'], |
400 |
prangedef['timeargs'], |
401 |
percents, grps[0] |
402 |
) |
403 |
|
404 |
if opts.rankonly: |
405 |
from signal import signal, SIGPIPE, SIG_DFL |
406 |
signal(SIGPIPE,SIG_DFL) |
407 |
import sys |
408 |
|
409 |
if opts.month: |
410 |
if opts.month == 'prev': |
411 |
import datetime |
412 |
today = datetime.date.today() |
413 |
first = today.replace(day=1) |
414 |
lastMonth = first - datetime.timedelta(days=1) |
415 |
curmonthstring = lastMonth.strftime("%Y%m") |
416 |
else: |
417 |
curmonthstring = opts.month |
418 |
|
419 |
monthstr = curmonthstring + "01" |
420 |
plotrangedef['cmonth']['timeargs'][1] = monthstr |
421 |
makeplots(plotrangedef['cmonth']) |
422 |
sys.exit(0) |
423 |
|
424 |
for k in ['hr', 'day', 'week', 'month', 'year', 'alltime']: # plotrangedef.keys(): |
425 |
makeplots(plotrangedef[k]) |
426 |
|
427 |
|
428 |
sys.stdout.flush() |
429 |
sys.stdout.close() |
430 |
|
431 |
sys.stderr.flush() |
432 |
sys.stderr.close() |
433 |
|
434 |
sys.exit(0) |