1 |
templon |
2355 |
#! /usr/bin/env python |
2 |
templon |
2377 |
# $Id$ |
3 |
|
|
# Source: $URL$ |
4 |
|
|
# J. A. Templon, NIKHEF/PDP 2011 |
5 |
templon |
2355 |
|
6 |
templon |
2556 |
import optparse |
7 |
|
|
|
8 |
|
|
p = optparse.OptionParser(description="Program to make rrdtool plots " + \ |
9 |
|
|
"of job running jobs by unix group") |
10 |
|
|
|
11 |
|
|
# p.add_option("-r",action="store",dest="minsize",default='0',help="minimum size of dirs considered; can use suffixes k,M,G for multiples of 1000**{1,2,3} bytes") |
12 |
|
|
# p.add_option("--qdel",action="store_true",dest="deljobs",help="delete jobs for which TMPDIR is larger than MINSIZE",default=False) |
13 |
|
|
|
14 |
|
|
p.add_option("--rank-only",action="store_true",dest="rankonly", |
15 |
|
|
help="don't plot, just print ranking of groups",default=False) |
16 |
|
|
|
17 |
|
|
debug = 0 |
18 |
|
|
|
19 |
|
|
opts, args = p.parse_args() |
20 |
|
|
|
21 |
templon |
2357 |
import os |
22 |
|
|
|
23 |
|
|
DATADIR=os.environ['HOME'] + '/ndpfdata/' |
24 |
templon |
3113 |
PLOTDIR='/www/grid/stats/groupviews/ndpf/' |
25 |
templon |
3061 |
|
26 |
templon |
2358 |
# 8 class qualitative paired color scheme |
27 |
|
|
|
28 |
|
|
colors = [ "#A6CEE3", "#1F77B4", "#B2DF8A", "#33A02C", |
29 |
|
|
"#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00" ] |
30 |
|
|
|
31 |
|
|
colors.reverse() |
32 |
|
|
|
33 |
templon |
2556 |
# for reference : ranges of the RRAs |
34 |
|
|
# base step size 60 sec |
35 |
|
|
# step 1 : 60 sec x 1600 points : 1600 min : 26,67 hr : 1.11 days |
36 |
|
|
# step 2 : 120 sec x 1200 points : 2400 min : 40 hr : 1,67 days |
37 |
|
|
# step 10 : 600 sec x 1800 points : 18 000 min : 300 hr : 12,5 days |
38 |
|
|
# step 30 : 1800 sec x 2500 points : 75 000 min : 1250 hr : 52.08 days |
39 |
|
|
# step 120 : 7200 sec x 1000 points : 120 000 min : 2000 hr : 83.33 days |
40 |
|
|
# step 480 : 28800 sec x 1000 points : 480 000 min : 8000 hr : 333.33 days |
41 |
|
|
# step 1440 : 86400 sec x 3650 points : 3650 days : 10 years |
42 |
templon |
2355 |
|
43 |
templon |
2556 |
# resolutions of RRAs |
44 |
|
|
|
45 |
|
|
# 1 : 60 sec : 1 min |
46 |
|
|
# 2 : 120 sec : 2 min |
47 |
|
|
# 3 : 600 sec : 10 min |
48 |
|
|
# 4 : 1 800 sec : 30 min |
49 |
|
|
# 5 : 7 200 sec : 120 min : 2 hr |
50 |
|
|
# 6 : 28 800 sec : 480 min : 8 hr |
51 |
|
|
# 7 : 86 400 sec : 1440 min : 24 hr : 1 day |
52 |
|
|
|
53 |
templon |
3268 |
NUMQS = 33 # number of quarters for "alltime" plot |
54 |
templon |
3151 |
|
55 |
templon |
2398 |
plotrangedef = { |
56 |
templon |
2556 |
'hr' : { 'timeargs' : [ '-s', 'n-200min', '-e', 'n' ], |
57 |
templon |
2398 |
'timetag' : 'hr', |
58 |
templon |
3268 |
'avrange' : 3*3600, |
59 |
templon |
2398 |
'avres' : 60, |
60 |
templon |
2556 |
'sizeargs' : { 'small' : [ '--width', '200', '--height', '125', |
61 |
|
|
'--x-grid', |
62 |
|
|
'MINUTE:20:HOUR:1:HOUR:1:0:%H:00' |
63 |
|
|
], |
64 |
|
|
'large' : [ '--width', '800', '--height', '500' ] |
65 |
templon |
2398 |
}, |
66 |
templon |
3268 |
'numgroups' : 4, |
67 |
templon |
2398 |
}, |
68 |
templon |
2556 |
'day' : { 'timeargs' : [ '-s', 'n-2000min', '-e', 'n' ], |
69 |
templon |
2398 |
'timetag' : 'day', |
70 |
|
|
'avrange' : 24*3600, |
71 |
|
|
'avres' : 60, |
72 |
templon |
2556 |
'sizeargs' : { 'small' : [ '--width', '200', '--height', '125', |
73 |
|
|
'--x-grid', |
74 |
|
|
'HOUR:6:DAY:1:HOUR:12:0:%a %H:00' |
75 |
|
|
], |
76 |
|
|
'large' : [ '--width', '1000', '--height', '625' ] |
77 |
templon |
2398 |
}, |
78 |
templon |
3268 |
'numgroups' : 5, |
79 |
templon |
2399 |
}, |
80 |
templon |
2556 |
'week' : { 'timeargs' : [ '-s', 'n-288hr', '-e', 'n' ], |
81 |
templon |
2399 |
'timetag' : 'week', |
82 |
|
|
'avrange' : 7*24*3600, |
83 |
templon |
2556 |
'avres' : 600, |
84 |
|
|
'sizeargs' : { 'small' : [ '--width', '576', '--height', '125' ], |
85 |
|
|
'large' : [ '--width', '1728', '--height', '375', |
86 |
|
|
'-n', 'DEFAULT:16:'] |
87 |
templon |
2399 |
}, |
88 |
templon |
2821 |
'numgroups' : 6, |
89 |
templon |
2399 |
}, |
90 |
|
|
|
91 |
templon |
2556 |
# note the construction "repr(576*n)" here --- this is because the plot is |
92 |
|
|
# (a multiple of) 576 pixels, and 120 min is one of the RRAs, so choosing |
93 |
|
|
# a lower limit of 576*120 gives us a plot with one pixel per RRA bin. |
94 |
|
|
|
95 |
|
|
'month' : { 'timeargs' : [ '-s', 'n-'+repr(576*120)+'min', '-e', 'n'], |
96 |
templon |
2399 |
'timetag' : 'month', |
97 |
|
|
'avrange' : 31*24*3600, |
98 |
templon |
2556 |
'avres' : 1800, |
99 |
|
|
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105'], |
100 |
|
|
'large' : [ '--width', '2304', '--height', '420', |
101 |
|
|
'-n', 'DEFAULT:18:', |
102 |
|
|
'--x-grid', |
103 |
|
|
'HOUR:12:DAY:1:DAY:3:86400:%d-%b' |
104 |
|
|
] |
105 |
templon |
2399 |
}, |
106 |
templon |
3268 |
'numgroups' : 7, |
107 |
templon |
2556 |
}, |
108 |
|
|
|
109 |
templon |
2615 |
'year' : { 'timeargs' : [ '-s', 'n-'+repr(576*1440)+'min', '-e', 'n'], |
110 |
templon |
2556 |
'timetag' : 'year', |
111 |
|
|
'avrange' : 365*24*3600, |
112 |
|
|
'avres' : 86400, |
113 |
|
|
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105'], |
114 |
|
|
'large' : [ '--width', '2304', '--height', '420'] |
115 |
|
|
}, |
116 |
templon |
2821 |
'numgroups' : 8, |
117 |
templon |
2556 |
}, |
118 |
templon |
2616 |
|
119 |
templon |
3151 |
# adjusting for nice plot ... set NUMQS above; range is NUMQS/4 years |
120 |
templon |
2661 |
|
121 |
templon |
3151 |
'alltime' : { 'timeargs' : [ '-s', 'n-'+repr(NUMQS*365*1440/4)+'min', '-e', 'n'], |
122 |
templon |
2618 |
'timetag' : 'alltime', |
123 |
templon |
3151 |
'avrange' : NUMQS*365*24*3600/4, |
124 |
templon |
2618 |
'avres' : 86400, |
125 |
|
|
'sizeargs' : { 'small' : [ '--width', '576', '--height', '105', |
126 |
|
|
'--x-grid', |
127 |
|
|
'MONTH:3:YEAR:1:YEAR:1:31536000:%Y' |
128 |
|
|
], |
129 |
|
|
'large' : [ '--width', '2304', '--height', '420', |
130 |
|
|
'--x-grid', |
131 |
|
|
'MONTH:1:YEAR:1:MONTH:3:2592000:%b-%Y' |
132 |
|
|
] |
133 |
|
|
}, |
134 |
templon |
2821 |
'numgroups' : 8, |
135 |
templon |
2616 |
}, |
136 |
templon |
2356 |
|
137 |
templon |
2618 |
|
138 |
|
|
} |
139 |
|
|
|
140 |
templon |
2556 |
commonargs = ['--imgformat', 'PNG', |
141 |
templon |
2399 |
'--legend-position=east', '--legend-direction=bottomup'] |
142 |
|
|
|
143 |
templon |
2398 |
import rrdtool |
144 |
templon |
2357 |
import time |
145 |
templon |
2398 |
import glob |
146 |
templon |
2357 |
|
147 |
templon |
2398 |
### function definitions |
148 |
templon |
2362 |
|
149 |
templon |
2663 |
def doplot(glist, dbtype, psize, timetag, sizeargs, timeargs, pcents, ranktype): |
150 |
templon |
2362 |
|
151 |
templon |
2624 |
grouplist = glist[:] |
152 |
templon |
2362 |
defs = list() |
153 |
|
|
plots = list() |
154 |
templon |
2358 |
|
155 |
templon |
2362 |
data_defs = list() |
156 |
|
|
plot_defs = list() |
157 |
|
|
|
158 |
templon |
2677 |
gcolors = dict() |
159 |
|
|
|
160 |
templon |
3131 |
skipgroup = 0 |
161 |
templon |
2677 |
if ranktype == 'bottom': |
162 |
|
|
if 'unused' in grouplist: |
163 |
|
|
grouplist.remove('unused') |
164 |
|
|
grouplist.insert(0,'unused') |
165 |
|
|
if 'offline' in grouplist: |
166 |
|
|
grouplist.remove('offline') |
167 |
|
|
grouplist.insert(0,'offline') |
168 |
templon |
3131 |
else: |
169 |
|
|
if 'unused' in grouplist: |
170 |
|
|
skipgroup += 1 |
171 |
|
|
if 'offline' in grouplist: |
172 |
|
|
skipgroup += 1 |
173 |
|
|
|
174 |
|
|
for idx in range(len(grouplist)): |
175 |
|
|
thisgrp = grouplist[idx] |
176 |
|
|
if thisgrp == 'unused': |
177 |
|
|
gcolors[thisgrp] = '#d8d8d8' |
178 |
|
|
elif thisgrp == "offline": |
179 |
|
|
gcolors[thisgrp] = "#790ead" |
180 |
|
|
else: |
181 |
|
|
gcolors[thisgrp] = colors[idx+skipgroup] |
182 |
|
|
|
183 |
templon |
2624 |
if dbtype == 'queued': |
184 |
|
|
if 'unused' in grouplist: grouplist.remove('unused') |
185 |
|
|
if 'offline' in grouplist: grouplist.remove('offline') |
186 |
|
|
|
187 |
templon |
2663 |
if ranktype == 'top': |
188 |
|
|
for group in (grouplist + ['total']): |
189 |
|
|
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
190 |
|
|
otherstring = 'CDEF:other=total,' |
191 |
|
|
for group in grouplist: |
192 |
|
|
otherstring += group + ',' |
193 |
|
|
otherstring += (len(grouplist)-1) * '+,' + '-' |
194 |
templon |
2362 |
|
195 |
templon |
3113 |
# print otherstring |
196 |
templon |
2663 |
data_defs.append(otherstring) |
197 |
|
|
elif ranktype == 'bottom': |
198 |
|
|
for group in grouplist: |
199 |
|
|
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
200 |
|
|
else: |
201 |
|
|
print 'Unknown ranktype detected:', ranktype |
202 |
|
|
sys.exit(2) |
203 |
templon |
2623 |
|
204 |
templon |
2617 |
sumshown = 0 |
205 |
templon |
2362 |
for idx in range(len(grouplist)): |
206 |
|
|
group = grouplist[idx] |
207 |
templon |
3131 |
acolor = gcolors[group] |
208 |
templon |
2677 |
pdefstr = 'AREA' ':' + group + acolor + ':' + "%8s" % (group) |
209 |
templon |
2556 |
if pcents: |
210 |
|
|
pdefstr = pdefstr + ' (' + "%4.1f" % (pcents[group]) + ')' |
211 |
templon |
2617 |
sumshown += float(pcents[group]) |
212 |
templon |
2380 |
pdefstr = pdefstr + '\\n' |
213 |
templon |
2623 |
pdefstr = pdefstr + ':STACK' |
214 |
templon |
2362 |
plot_defs.append(pdefstr) |
215 |
|
|
|
216 |
templon |
2663 |
if ranktype == 'top': |
217 |
templon |
2677 |
pdefstr = 'AREA' ':' + 'other' + '#794044' + ':' + ' other' |
218 |
templon |
2663 |
if pcents: |
219 |
|
|
pdefstr = pdefstr + ' (' + "%4.1f" % (100 - sumshown) + ')' |
220 |
|
|
pdefstr = pdefstr + '\\n' |
221 |
|
|
plot_defs.insert(0,pdefstr) |
222 |
|
|
plot_defs.append("LINE:total#000000") # :total") |
223 |
templon |
2362 |
|
224 |
templon |
2663 |
pargs = [ PLOTDIR + dbtype + '-' + timetag + '-' + ranktype + '-' + \ |
225 |
|
|
psize + '.png'] + commonargs + ['-l', '0'] + sizeargs[psize] + \ |
226 |
|
|
timeargs + data_defs + plot_defs |
227 |
templon |
2362 |
rrdtool.graph( *pargs ) |
228 |
|
|
|
229 |
templon |
2663 |
def doplot_wait(glist, dbtype, psize, timetag, sizeargs, timeargs, ranktype): |
230 |
templon |
2398 |
|
231 |
templon |
2624 |
grouplist = glist[:] |
232 |
templon |
2365 |
defs = list() |
233 |
|
|
plots = list() |
234 |
|
|
|
235 |
|
|
data_defs = list() |
236 |
|
|
plot_defs = list() |
237 |
|
|
|
238 |
templon |
3131 |
skipgroup = 0 |
239 |
templon |
2624 |
if dbtype == 'waittime': |
240 |
templon |
3131 |
if 'unused' in grouplist: |
241 |
|
|
grouplist.remove('unused') |
242 |
|
|
skipgroup += 1 |
243 |
|
|
if 'offline' in grouplist: |
244 |
|
|
grouplist.remove('offline') |
245 |
|
|
skipgroup += 1 |
246 |
templon |
2624 |
|
247 |
templon |
2556 |
for group in (grouplist + ['rollover','lastroll']): |
248 |
templon |
2365 |
data_defs.append('DEF:'+group+'='+DATADIR+group+'.'+dbtype+'.rrd:'+dbtype+':AVERAGE') |
249 |
|
|
|
250 |
|
|
for idx in range(len(grouplist)): |
251 |
|
|
group = grouplist[idx] |
252 |
templon |
2623 |
if group == 'unused': |
253 |
templon |
2734 |
acolor = '#d8d8d8' |
254 |
templon |
3131 |
skipgroup += 1 |
255 |
templon |
2623 |
elif group == "offline": |
256 |
|
|
acolor = "#790ead" |
257 |
templon |
3131 |
skipgroup += 1 |
258 |
templon |
2623 |
else: |
259 |
templon |
3131 |
acolor = colors[idx+skipgroup] |
260 |
templon |
2623 |
pdefstr = 'LINE3' ':' + group + acolor + ':' + group |
261 |
templon |
2380 |
pdefstr = pdefstr + '\\n' |
262 |
templon |
2365 |
plot_defs.append(pdefstr) |
263 |
|
|
|
264 |
templon |
2556 |
plot_defs.append('LINE2:rollover#660198') |
265 |
|
|
plot_defs.append('LINE2:lastroll#000000') |
266 |
templon |
2365 |
|
267 |
templon |
2663 |
pargs = [ PLOTDIR + dbtype + '-' + timetag + '-' + ranktype + '-' + \ |
268 |
|
|
psize + '.png'] + ['--slope-mode', '-o'] + commonargs + \ |
269 |
|
|
sizeargs[psize] + timeargs + data_defs + plot_defs |
270 |
templon |
2365 |
rrdtool.graph( *pargs ) |
271 |
|
|
|
272 |
templon |
2398 |
def makeplots(prangedef): |
273 |
|
|
|
274 |
|
|
resolu = prangedef['avres'] |
275 |
templon |
2821 |
NUMGROUPS = prangedef['numgroups'] |
276 |
templon |
2398 |
|
277 |
|
|
# first need to find "top eight" list |
278 |
|
|
# base it on running jobs |
279 |
|
|
|
280 |
|
|
now=int(time.mktime(time.localtime())) |
281 |
|
|
end = (now / resolu) * resolu |
282 |
|
|
start = end - (prangedef['avrange']) + resolu |
283 |
|
|
|
284 |
|
|
### block finding 'top N' group list ### |
285 |
|
|
|
286 |
|
|
tgroup = dict() # structure tgroup[groupname] = total of hourly average |
287 |
|
|
|
288 |
|
|
running_files = glob.glob(DATADIR+'*.running.rrd') |
289 |
|
|
for db in running_files: |
290 |
|
|
group = db[len(DATADIR):db.find('.running.rrd')] |
291 |
|
|
tup = rrdtool.fetch(db,'AVERAGE','-r', repr(resolu), |
292 |
|
|
'-s', repr(start), '-e', repr(end)) |
293 |
|
|
vallist = [0] # start with zero, in case no vals returned, get zero as answer |
294 |
|
|
for tup2 in tup[2]: |
295 |
|
|
val = tup2[0] |
296 |
|
|
if val: |
297 |
|
|
vallist.append(val) |
298 |
templon |
2579 |
|
299 |
templon |
2663 |
# put numbers in meaningful units now. result returned is an |
300 |
|
|
# integration over the time range, of averages over "resolu" |
301 |
|
|
# ... native resolution is in minutes, so multiplying by |
302 |
|
|
# (resolu / 60) puts the answer in core-minutes; dividing by |
303 |
|
|
# the number of minutes in the range gives the average number |
304 |
|
|
# of cores occupied, over the range |
305 |
templon |
2579 |
|
306 |
templon |
2556 |
if group == "total": |
307 |
templon |
2579 |
totval = sum(vallist) * (resolu / 60) / ( (end-start) / 60. ) |
308 |
templon |
2556 |
else: |
309 |
templon |
2579 |
tgroup[group] = sum(vallist) * (resolu / 60) / ( (end-start) / 60. ) |
310 |
templon |
2398 |
|
311 |
templon |
2556 |
pgroup = dict() |
312 |
|
|
for g in tgroup.keys(): |
313 |
|
|
pgroup[g] = 100*tgroup[g]/totval |
314 |
|
|
|
315 |
templon |
2663 |
### start block 'top N' and 'next N' group list ### |
316 |
|
|
|
317 |
templon |
2661 |
groups_sorted = sorted(tgroup, key=tgroup.get, reverse=False) # increasing useage towards end of list |
318 |
|
|
|
319 |
templon |
2663 |
topgroups = groups_sorted[ -NUMGROUPS: ] |
320 |
|
|
bottgroups = groups_sorted[-2*NUMGROUPS:-NUMGROUPS] |
321 |
templon |
2398 |
|
322 |
templon |
2663 |
for glist in [topgroups, bottgroups]: |
323 |
|
|
if 'unused' in glist: |
324 |
|
|
glist.remove('unused') |
325 |
|
|
glist.append('unused') # makes it always appear at top of plot (except see offline) |
326 |
|
|
if 'offline' in glist: |
327 |
|
|
glist.remove('offline') |
328 |
|
|
glist.append('offline') # makes it always appear at top of plot |
329 |
|
|
|
330 |
templon |
2579 |
if opts.rankonly and prangedef['timetag'] != 'hr' : |
331 |
|
|
print "Ranked average running jobs over period", prangedef['timetag'] |
332 |
|
|
print "%10s %12d" % ('total', totval) |
333 |
|
|
rank = 0 |
334 |
templon |
2556 |
for g in reversed(groups_sorted): |
335 |
|
|
if tgroup[g] > 0: |
336 |
templon |
2579 |
rank += 1 |
337 |
|
|
print "%2d. %10s %9.4f %9.4f%%" % (rank, g, tgroup[g], pgroup[g]) |
338 |
templon |
2556 |
return |
339 |
|
|
|
340 |
templon |
2398 |
### end block 'top N' group list ### |
341 |
|
|
|
342 |
|
|
### block generating plots ### |
343 |
|
|
|
344 |
templon |
2556 |
|
345 |
templon |
2398 |
for dbtype in ['queued', 'running', 'waittime']: |
346 |
templon |
2556 |
|
347 |
|
|
### this is a bit of a hack : we only want pgroups for when it's |
348 |
|
|
### a 'running' database and we don't want it for timetag hour. |
349 |
|
|
### fix this up here |
350 |
|
|
|
351 |
|
|
if dbtype == 'running' and prangedef['timetag'] != 'hr': |
352 |
|
|
percents = pgroup |
353 |
|
|
else: |
354 |
|
|
percents = None |
355 |
templon |
2663 |
for grps in [ ('top', topgroups), ('bottom', bottgroups) ]: |
356 |
|
|
for psize in ['small', 'large']: |
357 |
|
|
if dbtype == 'waittime': |
358 |
|
|
doplot_wait(grps[1], dbtype, psize, prangedef['timetag'], |
359 |
|
|
prangedef['sizeargs'], |
360 |
|
|
prangedef['timeargs'], grps[0] |
361 |
|
|
) |
362 |
|
|
else: |
363 |
|
|
doplot(grps[1], dbtype, psize, prangedef['timetag'], |
364 |
|
|
prangedef['sizeargs'], |
365 |
|
|
prangedef['timeargs'], |
366 |
|
|
percents, grps[0] |
367 |
|
|
) |
368 |
templon |
2362 |
|
369 |
templon |
3239 |
if opts.rankonly: |
370 |
|
|
from signal import signal, SIGPIPE, SIG_DFL |
371 |
|
|
signal(SIGPIPE,SIG_DFL) |
372 |
|
|
|
373 |
templon |
2616 |
for k in ['hr', 'day', 'week', 'month', 'year', 'alltime']: # plotrangedef.keys(): |
374 |
templon |
2399 |
makeplots(plotrangedef[k]) |
375 |
|
|
|
376 |
templon |
2357 |
import sys |
377 |
templon |
3239 |
|
378 |
|
|
sys.stdout.flush() |
379 |
|
|
sys.stdout.close() |
380 |
|
|
|
381 |
|
|
sys.stderr.flush() |
382 |
|
|
sys.stderr.close() |
383 |
|
|
|
384 |
templon |
2357 |
sys.exit(0) |