1
2 """Encapsulates all necessary things for a cluster-job, like setting up, running, restarting"""
3
4 import os,sys
5 from os import path,unlink
6 from threading import Thread,Lock,Timer
7
8 from PyFoam.Applications.Decomposer import Decomposer
9 from PyFoam.Applications.Runner import Runner
10 from PyFoam.Applications.SteadyRunner import SteadyRunner
11 from PyFoam.Applications.CloneCase import CloneCase
12 from PyFoam.FoamInformation import changeFoamVersion
13 from PyFoam.Error import error,warning
14 from PyFoam import configuration as config
15 from PyFoam.FoamInformation import oldAppConvention as oldApp
16 from PyFoam.RunDictionary.SolutionDirectory import SolutionDirectory
17
31
32
34 """ All Cluster-jobs are to be derived from this base-class
35
36 The actual jobs are implemented by overriding methods
37
38 There is a number of variables in this class that are used to
39 'communicate' information between the various stages"""
40
41 - def __init__(self,
42 basename,
43 arrayJob=False,
44 hardRestart=False,
45 autoParallel=True,
46 doAutoReconstruct=None,
47 foamVersion=None,
48 compileOption=None,
49 useFoamMPI=False,
50 multiRegion=False,
51 isDecomposed=False):
52 """Initializes the Job
53 @param basename: Basis name of the job
54 @param arrayJob: this job is a parameter variation. The tasks
55 are identified by their task-id
56 @param hardRestart: treat the job as restarted
57 @param autoParallel: Parallelization is handled by the base-class
58 @param doAutoReconstruct: Automatically reconstruct the case if
59 autoParalellel is set. If the value is None then it is looked up from
60 the configuration
61 @param foamVersion: The foam-Version that is to be used
62 @param compileOption: Forces compile-option (usually 'Opt' or 'Debug')
63 @param useFoamMPI: Use the OpenMPI supplied with OpenFOAM
64 @param multiRegion: This job consists of multiple regions
65 @param isDecomposed: Assume that the job is already decomposed"""
66
67
68
69 if not os.environ.has_key("JOB_ID"):
70 error("Not an SGE-job. Environment variable JOB_ID is missing")
71 self.jobID=int(os.environ["JOB_ID"])
72 self.jobName=os.environ["JOB_NAME"]
73
74 self.basename=path.join(path.abspath(path.curdir),basename)
75
76 sgeRestarted=False
77 if os.environ.has_key("RESTARTED"):
78 sgeRestarted=(int(os.environ["RESTARTED"])!=0)
79
80 if sgeRestarted or hardRestart:
81 self.restarted=True
82 else:
83 self.restarted=False
84
85 if foamVersion==None:
86 foamVersion=config().get("OpenFOAM","Version")
87
88 changeFoamVersion(foamVersion,compileOption=compileOption)
89
90 if not os.environ.has_key("WM_PROJECT_VERSION"):
91 error("No OpenFOAM-Version seems to be configured. Set the foamVersion-parameter")
92
93 self.autoParallel=autoParallel
94
95 self.doAutoReconstruct=doAutoReconstruct
96 if self.doAutoReconstruct==None:
97 self.doAutoReconstruct=config().getboolean("ClusterJob","doAutoReconstruct")
98
99 self.multiRegion=multiRegion
100
101 self.hostfile=None
102 self.nproc=1
103
104 if os.environ.has_key("NSLOTS"):
105 self.nproc=int(os.environ["NSLOTS"])
106 self.message("Running on",self.nproc,"CPUs")
107 if self.nproc>1:
108
109 self.hostfile=path.join(os.environ["TMP"],"machines")
110 self.message("Using the machinefile",self.hostfile)
111 self.message("Contents of the machinefile:",open(self.hostfile).readlines())
112
113 self.ordinaryEnd=True
114 self.listenToTimer=False
115
116 self.taskID=None
117 self.arrayJob=arrayJob
118
119 if self.arrayJob:
120 self.taskID=int(os.environ["SGE_TASK_ID"])
121
122 if not useFoamMPI and not foamVersion in eval(config().get("ClusterJob","useFoamMPI",default='[]')):
123
124 self.message("Adding Cluster-specific paths")
125 os.environ["PATH"]=config().get("ClusterJob","path")+":"+os.environ["PATH"]
126 os.environ["LD_LIBRARY_PATH"]=config().get("ClusterJob","ldpath")+":"+os.environ["LD_LIBRARY_PATH"]
127
128 self.isDecomposed=isDecomposed
129
131 """Return a string with the full job-ID"""
132 result=str(self.jobID)
133 if self.arrayJob:
134 result+=":"+str(self.taskID)
135 return result
136
138 print "=== CLUSTERJOB: ",
139 for t in txt:
140 print t,
141 print " ==="
142 sys.stdout.flush()
143
150
152 """The file with the job information"""
153 jobfile="%s.%d" % (self.jobName,self.jobID)
154 if self.arrayJob:
155 jobfile+=".%d" % self.taskID
156 jobfile+=".pyFoam.clusterjob"
157 jobfile=path.join(path.dirname(self.basename),jobfile)
158
159 return jobfile
160
162 """The file that makes the job write a checkpoint"""
163 return self.jobFile()+".checkpoint"
164
166 """The file that makes the job write a checkpoint and end"""
167 return self.jobFile()+".stop"
168
233
235 """Returns the actual directory of the case
236 To be overridden if appropriate"""
237 if self.arrayJob:
238 return "%s.%05d" % (self.basename,self.taskID)
239 else:
240 return self.basename
241
243 """Returns just the name of the case"""
244 return path.basename(self.casedir())
245
246 - def foamRun(self,application,
247 args=[],
248 foamArgs=[],
249 steady=False,
250 multiRegion=None,
251 progress=False,
252 noLog=False):
253 """Runs a foam utility on the case.
254 If it is a parallel job and the grid has
255 already been decomposed (and not yet reconstructed) it is run in
256 parallel
257 @param application: the Foam-Application that is to be run
258 @param foamArgs: A list if with the additional arguments for the
259 Foam-Application
260 @param args: A list with additional arguments for the Runner-object
261 @param steady: Use the steady-runner
262 @param multiRegion: Run this on multiple regions (if None: I don't have an opinion on this)
263 @param progress: Only output the time and nothing else
264 @param noLog: Do not generate a logfile"""
265
266 arglist=args[:]
267 arglist+=["--job-id=%s" % self.fullJobId()]
268
269 if self.isDecomposed and self.nproc>1:
270 arglist+=["--procnr=%d" % self.nproc,
271 "--machinefile=%s" % self.hostfile]
272
273 if progress:
274 arglist+=["--progress"]
275 if noLog:
276 arglist+=["--no-log"]
277
278 if self.multiRegion:
279 if multiRegion==None or multiRegion==True:
280 arglist+=["--all-regions"]
281 elif multiRegion and not self.multiRegion:
282 warning("This is not a multi-region case, so trying to run stuff multi-region won't do any good")
283
284 if self.restarted:
285 arglist+=["--restart"]
286
287 arglist+=[application]
288 if oldApp():
289 arglist+=[".",self.casename()]
290 else:
291 arglist+=["-case",self.casename()]
292
293 arglist+=foamArgs
294
295 self.message("Executing",arglist)
296
297 if steady:
298 self.message("Running Steady")
299 runner=SteadyRunner(args=arglist)
300 else:
301 runner=Runner(args=arglist)
302
304 """Automatically decomposes the grid with a metis-algorithm"""
305
306 if path.isdir(path.join(self.casedir(),"processor0")):
307 warning("A processor directory already exists. There might be a problem")
308 args=["--method=metis",
309 "--clear",
310 self.casename(),
311 self.nproc,
312 "--job-id=%s" % self.fullJobId()]
313
314 if self.multiRegion:
315 args.append("--all-regions")
316
317 deco=Decomposer(args=args)
318
320 """Default reconstruction of a parallel run"""
321
322 if self.doAutoReconstruct:
323 self.isDecomposed=False
324
325 self.foamRun("reconstructPar",
326 args=["--logname=ReconstructPar"])
327 else:
328 self.message("No reconstruction (because asked to)")
329
330 - def setup(self,parameters):
331 """Set up the job. Called in the beginning if the
332 job has not been restarted
333
334 Usual tasks include grid conversion/setup, mesh decomposition etc
335
336 @param parameters: a dictionary with parameters"""
337
338 pass
339
340 - def postDecomposeSetup(self,parameters):
341 """Additional setup, to be executed when the grid is already decomposed
342
343 Usually for tasks that can be done on a decomposed grid
344
345 @param parameters: a dictionary with parameters"""
346
347 pass
348
349 - def run(self,parameters):
350 """Run the actual job. Usually the solver.
351 @param parameters: a dictionary with parameters"""
352
353 pass
354
356 """Additional cleanup, to be executed when the grid is still decomposed
357
358 Usually for tasks that can be done on a decomposed grid
359
360 @param parameters: a dictionary with parameters"""
361
362 pass
363
365 """Clean up after a job
366 @param parameters: a dictionary with parameters"""
367
368 pass
369
371 """Additional reconstruction of parallel runs (Stuff that the
372 OpenFOAM-reconstructPar doesn't do
373 @param parameters: a dictionary with parameters"""
374
375 pass
376
378 """Parameters for a specific task
379 @param id: the id of the task
380 @return: a dictionary with parameters for this task"""
381
382 error("taskParameter not implemented. Not a parameterized job")
383
384 return {}
385
396
398 if self.listenToTimer:
399 self.ordinaryEnd=False
400 f=open(path.join(self.basename,"stop"),"w")
401 f.write("Geh z'haus")
402 f.close()
403 unlink(self.stopFile())
404 else:
405 warning("I'm not listening to your callbacks")
406
408 """A Cluster-Job that executes a solver. It implements the run-function.
409 If a template-case is specified, the case is copied"""
410
411 - def __init__(self,basename,solver,
412 template=None,
413 cloneParameters=[],
414 arrayJob=False,
415 hardRestart=False,
416 autoParallel=True,
417 doAutoReconstruct=None,
418 foamVersion=None,
419 compileOption=None,
420 useFoamMPI=False,
421 steady=False,
422 multiRegion=False,
423 progress=False,
424 solverProgress=False,
425 solverNoLog=False,
426 isDecomposed=False):
427 """@param template: Name of the template-case. It is assumed that
428 it resides in the same directory as the actual case
429 @param cloneParameters: a list with additional parameters for the
430 CloneCase-object that copies the template
431 @param solverProgress: Only writes the current time of the solver"""
432
433 ClusterJob.__init__(self,basename,
434 arrayJob=arrayJob,
435 hardRestart=hardRestart,
436 autoParallel=autoParallel,
437 doAutoReconstruct=doAutoReconstruct,
438 foamVersion=foamVersion,
439 compileOption=compileOption,
440 useFoamMPI=useFoamMPI,
441 multiRegion=multiRegion,
442 isDecomposed=isDecomposed)
443 self.solver=solver
444 self.steady=steady
445 if template!=None and not self.restarted:
446 template=path.join(path.dirname(self.casedir()),template)
447 if path.abspath(basename)==path.abspath(template):
448 error("The basename",basename,"and the template",template,"are the same directory")
449 if isDecomposed:
450 cloneParameters+=["--parallel"]
451 clone=CloneCase(
452 args=cloneParameters+[template,self.casedir(),"--follow-symlinks"])
453 self.solverProgress=solverProgress
454 self.solverNoLog=solverNoLog
455
456 - def run(self,parameters):
457 self.foamRun(self.solver,
458 steady=self.steady,
459 multiRegion=False,
460 progress=self.solverProgress,
461 noLog=self.solverNoLog)
462