1 |
#! /usr/bin/env python |
2 |
# $Id$ |
3 |
# Source: $URL$ |
4 |
# J. A. Templon, NIKHEF/PDP 2011 |
5 |
|
6 |
import optparse |
7 |
|
8 |
p = optparse.OptionParser(description="Program to make rrdtool plots " + \ |
9 |
"of job running jobs by unix group") |
10 |
|
11 |
# p.add_option("-r",action="store",dest="minsize",default='0',help="minimum size of dirs considered; can use suffixes k,M,G for multiples of 1000**{1,2,3} bytes") |
12 |
# p.add_option("--qdel",action="store_true",dest="deljobs",help="delete jobs for which TMPDIR is larger than MINSIZE",default=False) |
13 |
|
14 |
p.add_option("--rank-only",action="store_true",dest="rankonly", |
15 |
help="don't plot, just print ranking of groups",default=False) |
16 |
|
17 |
debug = 0 |
18 |
|
19 |
opts, args = p.parse_args() |
20 |
|
21 |
import os |
22 |
|
23 |
NUMGROUPS=8 |
24 |
DATADIR=os.environ['HOME'] + '/ndpfdata/' |
25 |
PLOTDIR=os.environ['HOME'] + '/public_html/' |
26 |
# 8 class qualitative paired color scheme |
27 |
|
28 |
colors = [ "#A6CEE3", "#1F77B4", "#B2DF8A", "#33A02C", |
29 |
"#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00" ] |
30 |
|
31 |
colors.reverse() |
32 |
|
33 |
# for reference : ranges of the RRAs |
34 |
# base step size 60 sec |
35 |
# step 1 : 60 sec x 1600 points : 1600 min : 26,67 hr : 1.11 days |
36 |
# step 2 : 120 sec x 1200 points : 2400 min : 40 hr : 1,67 days |
37 |
# step 10 : 600 sec x 1800 points : 18 000 min : 300 hr : 12,5 days |
38 |
# step 30 : 1800 sec x 2500 points : 75 000 min : 1250 hr : 52.08 days |
39 |
# step 120 : 7200 sec x 1000 points : 120 000 min : 2000 hr : 83.33 days |
40 |
# step 480 : 28800 sec x 1000 points : 480 000 min : 8000 hr : 333.33 days |
41 |
# step 1440 : 86400 sec x 3650 points : 3650 days : 10 years |
42 |
|
43 |
# resolutions of RRAs |
44 |
|
45 |
# 1 : 60 sec : 1 min |
46 |
# 2 : 120 sec : 2 min |
47 |
# 3 : 600 sec : 10 min |
48 |
# 4 : 1 800 sec : 30 min |
49 |
# 5 : 7 200 sec : 120 min : 2 hr |
50 |
# 6 : 28 800 sec : 480 min : 8 hr |
51 |
# 7 : 86 400 sec : 1440 min : 24 hr : 1 day |
52 |
|
53 |
plotrangedef = { |
54 |
'hr' : { 'timeargs' : [ '-s', 'n-200min', '-e', 'n' ], |
55 |
'timetag' : 'hr', |
56 |
'avrange' : 24*3600, |
57 |
'avres' : 60, |
58 |
'sizeargs' : { 'small' : [ '--width', '200', '--height', '125', |
59 |
'--x-grid', |
60 |
'MINUTE:20:HOUR:1:HOUR:1:0:%H:00' |
61 |
], |
62 |
'large' : [ '--width', '800', '--height', '500' ] |
63 |
}, |
64 |
}, |
65 |
'day' : { 'timeargs' : [ '-s', 'n-2000min', '-e', 'n' ], |
66 |
'timetag' : 'day', |
67 |
'avrange' : 24*3600, |
68 |
'avres' : 60, |
69 |
'sizeargs' : { 'small' : [ '--width', '200', '--height', '125', |
70 |
'--x-grid', |
71 |
'HOUR:6:DAY:1:HOUR:12:0:%a %H:00' |
72 |
], |
73 |
'large' : [ '--width', '1000', '--height', '625' ] |
74 |
}, |
75 |
}, |
76 |
'week' : { 'timeargs' : [ '-s', 'n-288hr', '-e', 'n' ], |
77 |
'timetag' : 'week', |
78 |
'avrange' : 7*24*3600, |
79 |
'avres' : 600, |
80 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '125' ], |
81 |
'large' : [ '--width', '1728', '--height', '375', |
82 |
'-n', 'DEFAULT:16:'] |
83 |
}, |
84 |
}, |
85 |
|
86 |
# note the construction "repr(576*n)" here --- this is because the plot is |
87 |
# (a multiple of) 576 pixels, and 120 min is one of the RRAs, so choosing |
88 |
# a lower limit of 576*120 gives us a plot with one pixel per RRA bin. |
89 |
|
90 |
'month' : { 'timeargs' : [ '-s', 'n-'+repr(576*120)+'min', '-e', 'n'], |
91 |
'timetag' : 'month', |
92 |
'avrange' : 31*24*3600, |
93 |
'avres' : 1800, |
94 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105'], |
95 |
'large' : [ '--width', '2304', '--height', '420', |
96 |
'-n', 'DEFAULT:18:', |
97 |
'--x-grid', |
98 |
'HOUR:12:DAY:1:DAY:3:86400:%d-%b' |
99 |
] |
100 |
}, |
101 |
}, |
102 |
|
103 |
'year' : { 'timeargs' : [ '-s', 'n-'+repr(576*1440)+'min', '-e', 'n'], |
104 |
'timetag' : 'year', |
105 |
'avrange' : 365*24*3600, |
106 |
'avres' : 86400, |
107 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105'], |
108 |
'large' : [ '--width', '2304', '--height', '420'] |
109 |
}, |
110 |
}, |
111 |
|
112 |
# adjusting for nice plot ... now 9/4 of a year |
113 |
|
114 |
'alltime' : { 'timeargs' : [ '-s', 'n-'+repr(9*365*1440/4)+'min', '-e', 'n'], |
115 |
'timetag' : 'alltime', |
116 |
'avrange' : 9*365*24*3600/4, |
117 |
'avres' : 86400, |
118 |
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105', |
119 |
'--x-grid', |
120 |
'MONTH:3:YEAR:1:YEAR:1:31536000:%Y' |
121 |
], |
122 |
'large' : [ '--width', '2304', '--height', '420', |
123 |
'--x-grid', |
124 |
'MONTH:1:YEAR:1:MONTH:3:2592000:%b-%Y' |
125 |
] |
126 |
}, |
127 |
}, |
128 |
|
129 |
|
130 |
} |
131 |
|
132 |
commonargs = ['--imgformat', 'PNG', |
133 |
'--legend-position=east', '--legend-direction=bottomup'] |
134 |
|
135 |
import rrdtool |
136 |
import time |
137 |
import glob |
138 |
|
139 |
### function definitions |
140 |
|
141 |
def doplot(glist, dbtype, psize, timetag, sizeargs, timeargs, pcents, ranktype): |
142 |
|
143 |
grouplist = glist[:] |
144 |
defs = list() |
145 |
plots = list() |
146 |
|
147 |
data_defs = list() |
148 |
plot_defs = list() |
149 |
|
150 |
if dbtype == 'queued': |
151 |
if 'unused' in grouplist: grouplist.remove('unused') |
152 |
if 'offline' in grouplist: grouplist.remove('offline') |
153 |
|
154 |
if ranktype == 'bottom': |
155 |
if 'unused' in glist: |
156 |
glist.remove('unused') |
157 |
glist.insert(0,'unused') |
158 |
if 'offline' in glist: |
159 |
glist.remove('offline') |
160 |
glist.insert(0,'offline') |
161 |
|
162 |
if ranktype == 'top': |
163 |
for group in (grouplist + ['total']): |
164 |
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
165 |
otherstring = 'CDEF:other=total,' |
166 |
for group in grouplist: |
167 |
otherstring += group + ',' |
168 |
otherstring += (len(grouplist)-1) * '+,' + '-' |
169 |
|
170 |
# print otherstring |
171 |
data_defs.append(otherstring) |
172 |
elif ranktype == 'bottom': |
173 |
for group in grouplist: |
174 |
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
175 |
else: |
176 |
print 'Unknown ranktype detected:', ranktype |
177 |
sys.exit(2) |
178 |
|
179 |
sumshown = 0 |
180 |
for idx in range(len(grouplist)): |
181 |
group = grouplist[idx] |
182 |
if group == 'unused': |
183 |
acolor = '#858885' |
184 |
elif group == "offline": |
185 |
acolor = "#790ead" |
186 |
else: |
187 |
acolor = colors[idx] |
188 |
pdefstr = 'AREA' ':' + group + acolor + ':' + group |
189 |
if pcents: |
190 |
pdefstr = pdefstr + ' (' + "%4.1f" % (pcents[group]) + ')' |
191 |
sumshown += float(pcents[group]) |
192 |
pdefstr = pdefstr + '\\n' |
193 |
pdefstr = pdefstr + ':STACK' |
194 |
plot_defs.append(pdefstr) |
195 |
|
196 |
if ranktype == 'top': |
197 |
pdefstr = 'AREA' ':' + 'other' + '#794044' + ':' + 'other' |
198 |
if pcents: |
199 |
pdefstr = pdefstr + ' (' + "%4.1f" % (100 - sumshown) + ')' |
200 |
pdefstr = pdefstr + '\\n' |
201 |
plot_defs.insert(0,pdefstr) |
202 |
plot_defs.append("LINE:total#000000") # :total") |
203 |
|
204 |
pargs = [ PLOTDIR + dbtype + '-' + timetag + '-' + ranktype + '-' + \ |
205 |
psize + '.png'] + commonargs + ['-l', '0'] + sizeargs[psize] + \ |
206 |
timeargs + data_defs + plot_defs |
207 |
rrdtool.graph( *pargs ) |
208 |
|
209 |
def doplot_wait(glist, dbtype, psize, timetag, sizeargs, timeargs, ranktype): |
210 |
|
211 |
grouplist = glist[:] |
212 |
defs = list() |
213 |
plots = list() |
214 |
|
215 |
data_defs = list() |
216 |
plot_defs = list() |
217 |
|
218 |
if dbtype == 'waittime': |
219 |
if 'unused' in grouplist: grouplist.remove('unused') |
220 |
if 'offline' in grouplist: grouplist.remove('offline') |
221 |
|
222 |
for group in (grouplist + ['rollover','lastroll']): |
223 |
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
224 |
|
225 |
for idx in range(len(grouplist)): |
226 |
group = grouplist[idx] |
227 |
if group == 'unused': |
228 |
acolor = '#858885' |
229 |
elif group == "offline": |
230 |
acolor = "#790ead" |
231 |
else: |
232 |
acolor = colors[idx] |
233 |
pdefstr = 'LINE3' ':' + group + acolor + ':' + group |
234 |
pdefstr = pdefstr + '\\n' |
235 |
plot_defs.append(pdefstr) |
236 |
|
237 |
plot_defs.append('LINE2:rollover#660198') |
238 |
plot_defs.append('LINE2:lastroll#000000') |
239 |
|
240 |
pargs = [ PLOTDIR + dbtype + '-' + timetag + '-' + ranktype + '-' + \ |
241 |
psize + '.png'] + ['--slope-mode', '-o'] + commonargs + \ |
242 |
sizeargs[psize] + timeargs + data_defs + plot_defs |
243 |
rrdtool.graph( *pargs ) |
244 |
|
245 |
def makeplots(prangedef): |
246 |
|
247 |
resolu = prangedef['avres'] |
248 |
|
249 |
# first need to find "top eight" list |
250 |
# base it on running jobs |
251 |
|
252 |
now=int(time.mktime(time.localtime())) |
253 |
end = (now / resolu) * resolu |
254 |
start = end - (prangedef['avrange']) + resolu |
255 |
|
256 |
### block finding 'top N' group list ### |
257 |
|
258 |
tgroup = dict() # structure tgroup[groupname] = total of hourly average |
259 |
|
260 |
running_files = glob.glob(DATADIR+'*.running.rrd') |
261 |
for db in running_files: |
262 |
group = db[len(DATADIR):db.find('.running.rrd')] |
263 |
tup = rrdtool.fetch(db,'AVERAGE','-r', repr(resolu), |
264 |
'-s', repr(start), '-e', repr(end)) |
265 |
vallist = [0] # start with zero, in case no vals returned, get zero as answer |
266 |
for tup2 in tup[2]: |
267 |
val = tup2[0] |
268 |
if val: |
269 |
vallist.append(val) |
270 |
|
271 |
# put numbers in meaningful units now. result returned is an |
272 |
# integration over the time range, of averages over "resolu" |
273 |
# ... native resolution is in minutes, so multiplying by |
274 |
# (resolu / 60) puts the answer in core-minutes; dividing by |
275 |
# the number of minutes in the range gives the average number |
276 |
# of cores occupied, over the range |
277 |
|
278 |
if group == "total": |
279 |
totval = sum(vallist) * (resolu / 60) / ( (end-start) / 60. ) |
280 |
else: |
281 |
tgroup[group] = sum(vallist) * (resolu / 60) / ( (end-start) / 60. ) |
282 |
|
283 |
pgroup = dict() |
284 |
for g in tgroup.keys(): |
285 |
pgroup[g] = 100*tgroup[g]/totval |
286 |
|
287 |
### start block 'top N' and 'next N' group list ### |
288 |
|
289 |
groups_sorted = sorted(tgroup, key=tgroup.get, reverse=False) # increasing useage towards end of list |
290 |
|
291 |
topgroups = groups_sorted[ -NUMGROUPS: ] |
292 |
bottgroups = groups_sorted[-2*NUMGROUPS:-NUMGROUPS] |
293 |
|
294 |
for glist in [topgroups, bottgroups]: |
295 |
if 'unused' in glist: |
296 |
glist.remove('unused') |
297 |
glist.append('unused') # makes it always appear at top of plot (except see offline) |
298 |
if 'offline' in glist: |
299 |
glist.remove('offline') |
300 |
glist.append('offline') # makes it always appear at top of plot |
301 |
|
302 |
if opts.rankonly and prangedef['timetag'] != 'hr' : |
303 |
print "Ranked average running jobs over period", prangedef['timetag'] |
304 |
print "%10s %12d" % ('total', totval) |
305 |
rank = 0 |
306 |
for g in reversed(groups_sorted): |
307 |
if tgroup[g] > 0: |
308 |
rank += 1 |
309 |
print "%2d. %10s %9.4f %9.4f%%" % (rank, g, tgroup[g], pgroup[g]) |
310 |
return |
311 |
|
312 |
### end block 'top N' group list ### |
313 |
|
314 |
### block generating plots ### |
315 |
|
316 |
|
317 |
for dbtype in ['queued', 'running', 'waittime']: |
318 |
|
319 |
### this is a bit of a hack : we only want pgroups for when it's |
320 |
### a 'running' database and we don't want it for timetag hour. |
321 |
### fix this up here |
322 |
|
323 |
if dbtype == 'running' and prangedef['timetag'] != 'hr': |
324 |
percents = pgroup |
325 |
else: |
326 |
percents = None |
327 |
for grps in [ ('top', topgroups), ('bottom', bottgroups) ]: |
328 |
for psize in ['small', 'large']: |
329 |
if dbtype == 'waittime': |
330 |
doplot_wait(grps[1], dbtype, psize, prangedef['timetag'], |
331 |
prangedef['sizeargs'], |
332 |
prangedef['timeargs'], grps[0] |
333 |
) |
334 |
else: |
335 |
doplot(grps[1], dbtype, psize, prangedef['timetag'], |
336 |
prangedef['sizeargs'], |
337 |
prangedef['timeargs'], |
338 |
percents, grps[0] |
339 |
) |
340 |
|
341 |
for k in ['hr', 'day', 'week', 'month', 'year', 'alltime']: # plotrangedef.keys(): |
342 |
makeplots(plotrangedef[k]) |
343 |
|
344 |
import sys |
345 |
sys.exit(0) |