/[pdpsoft]/nl.nikhef.ndpf.mcfloat/trunk/mcfloat
ViewVC logotype

Diff of /nl.nikhef.ndpf.mcfloat/trunk/mcfloat

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2784 by templon, Thu Mar 12 12:54:43 2015 UTC revision 2785 by templon, Wed May 13 13:32:01 2015 UTC
# Line 1  Line 1 
1  #!/usr/bin/env python  #!/usr/bin/env python
2  from torque_utils import pbsnodes  from torque_utils import pbsnodes
3    
4  # this config allows mcore to run up to full ATLAS allocation (27.02.2015)  # this config allows mcore to run up to full ATLAS allocation (07.04.2015)
5    # full allocation right now is 47% of "non-retired" capacity = knal + mars + car
6    # just configure 47% of each class.
7    
8  MAXDRAIN = 32            # max num of nodes allowed to drain  MAXDRAIN = 32            # max num of nodes allowed to drain
9  MAXFREE  = 49            # max num of free slots to tolerate  MAXFREE  = 49            # max num of free slots to tolerate
10  MAXFREEPERNODE = 3  MAXFREEPERNODE = 3
11  NODESPERGRAB = 3         # number of nodes to grab each time around  NODESPERGRAB = 3         # number of nodes to grab each time around
12  CANDIDATE_NODES = [ 'wn-car-0%02d.farm.nikhef.nl' % (n) for n in range(1,97) ] + \  
13                    [ 'wn-knal-0%02d.farm.nikhef.nl' % (n) for n in range(1,19) ]  # parameterize percent capacity -- 47% is too conservative, make this variable.
14    
15    CAPACFRAC = 0.60
16    CANDIDATE_NODES = [ 'wn-car-0%02d.farm.nikhef.nl' % (n) for n in range(1, int(CAPACFRAC*96)+1) ] + \
17                      [ 'wn-knal-0%02d.farm.nikhef.nl' % (n) for n in range(1,int(CAPACFRAC*18)+1) ] + \
18                      [ 'wn-mars-0%02d.farm.nikhef.nl' % (n) for n in range(20,20+int(CAPACFRAC*57)+1) ]
19    
20  MCQUEUE  = 'atlasmc'  MCQUEUE  = 'atlasmc'
21    
22  # settings below appropriate for knal nodes (32 cores)  # settings below appropriate for knal nodes (32 cores)
# Line 183  usage = "usage: %prog [-d debug_level] [ Line 192  usage = "usage: %prog [-d debug_level] [
192  p = optparse.OptionParser(description="Monitor state of multicore pool and adjust size as needed",  p = optparse.OptionParser(description="Monitor state of multicore pool and adjust size as needed",
193                            usage=usage)                            usage=usage)
194    
195  p.add_option("-n",action="store_true",dest="noopt",default=False,  p.add_option("-A",action="store_true",dest="addall",default=False,
196               help="don't do anything, just print what would have been done")               help="add all eligible nodes to multicore pool")
197  p.add_option("-l",action="store",dest="logfile",default=None,  p.add_option("-l",action="store",dest="logfile",default=None,
198               help="log actions and information to LOGFILE (default stdout)")               help="log actions and information to LOGFILE (default stdout)")
199  p.add_option("-L",action="store",dest="loglevel",default="INFO",  p.add_option("-L",action="store",dest="loglevel",default="INFO",
200               help="print messages of LOGLEVEL (DEBUG, INFO, WARNING, ..., default INFO")               help="print messages of LOGLEVEL (DEBUG, INFO, WARNING, ..., default INFO")
201    p.add_option("-n",action="store_true",dest="noopt",default=False,
202                 help="don't do anything, just print what would have been done")
203    p.add_option("-i",action="store_true",dest="info",default=False,
204                 help="print info on all nodes in multicore pool")
205    
206  import logging  import logging
207    
# Line 237  for node in mcnodes: Line 250  for node in mcnodes:
250          draining_slots += node.freeCpu          draining_slots += node.freeCpu
251          if node.freeCpu > 0 : draining_nodes += 1          if node.freeCpu > 0 : draining_nodes += 1
252    
253    if opts.info:
254        waiting, node_tuples = getmcjobinfo(mcdedicated)
255        for t in node_tuples:
256            if t[2] == 0:
257                print "%28s has %2d running mc jobs, %2d empty slots" % (t[0], t[2], t[3])
258            else:
259                print "%28s has %2d running mc jobs, %2d empty slots, ratio %4.1f" % (t[0], t[2], t[3], float(t[3])/t[2])
260        sys.exit(0)
261    candidate_nodes = list()
262    for n in mcnodes:
263        if n not in mcdedicated:
264            candidate_nodes.append(n)
265    
266    if opts.addall:
267        for grabbed_node in candidate_nodes:
268            logging.info("%s added to mc node pool" % (grabbed_node.name))
269    #        candidate_nodes.remove(grabbed_node)
270            if not opts.noopt: add_to_mc_pool(grabbed_node)
271        sys.exit(0)
272    
273  # check for dedicated nodes with too few jobs  # check for dedicated nodes with too few jobs
274    
275  nodes_with_too_few_jobs = list()  nodes_with_too_few_jobs = list()
# Line 319  if (MAXFREE - draining_slots) >= MAXFREE Line 352  if (MAXFREE - draining_slots) >= MAXFREE
352                        (MAXDRAIN, draining_nodes))                        (MAXDRAIN, draining_nodes))
353          logging.debug("there are also waiting jobs: try to grab another node")          logging.debug("there are also waiting jobs: try to grab another node")
354          # build a list of candidates to grab          # build a list of candidates to grab
         candidate_nodes = list()  
         for n in mcnodes:  
             if n not in mcdedicated:  
                 candidate_nodes.append(n)  
355          logging.debug("found %d candidate nodes to dedicate to mc" % len(candidate_nodes))          logging.debug("found %d candidate nodes to dedicate to mc" % len(candidate_nodes))
356          if len(candidate_nodes) < 1:          if len(candidate_nodes) < 1:
357              logging.debug("no more nodes, bailing out, nothing more I can do")              logging.debug("no more nodes, bailing out, nothing more I can do")

Legend:
Removed from v.2784  
changed lines
  Added in v.2785

grid.support@nikhef.nl
ViewVC Help
Powered by ViewVC 1.1.28