1
2 """Encapsulates all necessary things for a cluster-job, like setting
3 up, running, restarting"""
4
5 import os,sys,subprocess
6 from os import path,unlink
7 from threading import Thread,Lock,Timer
8
9 from PyFoam.Applications.Decomposer import Decomposer
10 from PyFoam.Applications.Runner import Runner
11 from PyFoam.Applications.SteadyRunner import SteadyRunner
12 from PyFoam.Applications.CloneCase import CloneCase
13 from PyFoam.Applications.FromTemplate import FromTemplate
14
15 from PyFoam.FoamInformation import changeFoamVersion
16 from PyFoam.FoamInformation import foamVersion as getFoamVersion
17 from PyFoam.Error import error,warning
18 from PyFoam import configuration as config
19 from PyFoam.FoamInformation import oldAppConvention as oldApp
20 from PyFoam.RunDictionary.SolutionDirectory import SolutionDirectory
21
22 from PyFoam.ThirdParty.six import print_,iteritems
23
37
38
40 """ All Cluster-jobs are to be derived from this base-class
41
42 The actual jobs are implemented by overriding methods
43
44 There is a number of variables in this class that are used to
45 'communicate' information between the various stages"""
46
47 - def __init__(self,
48 basename,
49 arrayJob=False,
50 hardRestart=False,
51 autoParallel=True,
52 doAutoReconstruct=None,
53 foamVersion=None,
54 compileOption=None,
55 useFoamMPI=False,
56 multiRegion=False,
57 parameters={},
58 isDecomposed=False):
59 """Initializes the Job
60 @param basename: Basis name of the job
61 @param arrayJob: this job is a parameter variation. The tasks
62 are identified by their task-id
63 @param hardRestart: treat the job as restarted
64 @param autoParallel: Parallelization is handled by the base-class
65 @param doAutoReconstruct: Automatically reconstruct the case if
66 autoParalellel is set. If the value is None then it is looked up from
67 the configuration
68 @param foamVersion: The foam-Version that is to be used
69 @param compileOption: Forces compile-option (usually 'Opt' or 'Debug')
70 @param useFoamMPI: Use the OpenMPI supplied with OpenFOAM
71 @param multiRegion: This job consists of multiple regions
72 @param parameters: Dictionary with parameters that are being passed to the Runner
73 @param isDecomposed: Assume that the job is already decomposed"""
74
75
76
77 if not "JOB_ID" in os.environ:
78 error("Not an SGE-job. Environment variable JOB_ID is missing")
79 self.jobID=int(os.environ["JOB_ID"])
80 self.jobName=os.environ["JOB_NAME"]
81
82 self.basename=path.join(path.abspath(path.curdir),basename)
83
84 sgeRestarted=False
85 if "RESTARTED" in os.environ:
86 sgeRestarted=(int(os.environ["RESTARTED"])!=0)
87
88 if sgeRestarted or hardRestart:
89 self.restarted=True
90 else:
91 self.restarted=False
92
93 if foamVersion==None:
94 foamVersion=config().get("OpenFOAM","Version")
95
96 changeFoamVersion(foamVersion,compileOption=compileOption)
97
98 if not "WM_PROJECT_VERSION" in os.environ:
99 error("No OpenFOAM-Version seems to be configured. Set the foamVersion-parameter")
100
101 self.autoParallel=autoParallel
102
103 self.doAutoReconstruct=doAutoReconstruct
104 if self.doAutoReconstruct==None:
105 self.doAutoReconstruct=config().getboolean("ClusterJob","doAutoReconstruct")
106
107 self.multiRegion=multiRegion
108
109 self.parameters=parameters
110
111 self.hostfile=None
112 self.nproc=1
113
114 if "NSLOTS" in os.environ:
115 self.nproc=int(os.environ["NSLOTS"])
116 self.message("Running on",self.nproc,"CPUs")
117 if self.nproc>1:
118
119 self.hostfile=path.join(os.environ["TMP"],"machines")
120 if config().getboolean("ClusterJob","useMachineFile"):
121 self.message("Using the machinefile",self.hostfile)
122 self.message("Contents of the machinefile:",open(self.hostfile).readlines())
123 else:
124 self.message("No machinefile used because switched off with 'useMachineFile'")
125
126 self.ordinaryEnd=True
127 self.listenToTimer=False
128
129 self.taskID=None
130 self.arrayJob=arrayJob
131
132 if self.arrayJob:
133 self.taskID=int(os.environ["SGE_TASK_ID"])
134
135 if not useFoamMPI and not foamVersion in eval(config().get("ClusterJob","useFoamMPI",default='[]')):
136
137 self.message("Adding Cluster-specific paths")
138 os.environ["PATH"]=config().get("ClusterJob","path")+":"+os.environ["PATH"]
139 os.environ["LD_LIBRARY_PATH"]=config().get("ClusterJob","ldpath")+":"+os.environ["LD_LIBRARY_PATH"]
140
141 self.isDecomposed=isDecomposed
142
144 """Return a string with the full job-ID"""
145 result=str(self.jobID)
146 if self.arrayJob:
147 result+=":"+str(self.taskID)
148 return result
149
156
163
165 """The file with the job information"""
166 jobfile="%s.%d" % (self.jobName,self.jobID)
167 if self.arrayJob:
168 jobfile+=".%d" % self.taskID
169 jobfile+=".pyFoam.clusterjob"
170 jobfile=path.join(path.dirname(self.basename),jobfile)
171
172 return jobfile
173
175 """The file that makes the job write a checkpoint"""
176 return self.jobFile()+".checkpoint"
177
179 """The file that makes the job write a checkpoint and end"""
180 return self.jobFile()+".stop"
181
249
251 """Returns the actual directory of the case
252 To be overridden if appropriate"""
253 if self.arrayJob:
254 return "%s.%05d" % (self.basename,self.taskID)
255 else:
256 return self.basename
257
259 """Returns just the name of the case"""
260 return path.basename(self.casedir())
261
263 """Execute a shell command in the case directory. No checking done
264 @param cmd: the command as a string"""
265 oldDir=os.getcwd()
266 self.message("Changing directory to",self.casedir())
267 os.chdir(self.casedir())
268 self.message("Executing",cmd)
269 try:
270 retcode = subprocess.call(cmd,shell=True)
271 if retcode < 0:
272 self.message(cmd,"was terminated by signal", -retcode)
273 else:
274 self.message(cmd,"returned", retcode)
275 except OSError:
276 e = sys.exc_info()[1]
277 self.message(cmd,"Execution failed:", e)
278
279 self.message("Executiong of",cmd,"ended")
280 self.message("Changing directory back to",oldDir)
281 os.chdir(oldDir)
282
284 """Looks for a template file and evaluates the template using
285 the usual parameters
286 @param fileName: the name of the file that will be
287 constructed. The template file is the same plus the extension '.template'"""
288
289 self.message("Building file",fileName,"from template with parameters",
290 self.parameters)
291
292 argList=["--output-file=%s" % path.join(self.casedir(),fileName),
293 "--dump-used-values"
294 ]
295
296 tmpl=FromTemplate(args=argList,
297 parameters=self.parameters)
298
299 - def foamRun(self,application,
300 args=[],
301 foamArgs=[],
302 steady=False,
303 multiRegion=True,
304 progress=False,
305 compress=False,
306 noLog=False):
307 """Runs a foam utility on the case.
308 If it is a parallel job and the grid has
309 already been decomposed (and not yet reconstructed) it is run in
310 parallel
311 @param application: the Foam-Application that is to be run
312 @param foamArgs: A list if with the additional arguments for the
313 Foam-Application
314 @param compress: Compress the log-file
315 @param args: A list with additional arguments for the Runner-object
316 @param steady: Use the steady-runner
317 @param multiRegion: Run this on multiple regions (if None: I don't have an opinion on this)
318 @param progress: Only output the time and nothing else
319 @param noLog: Do not generate a logfile"""
320
321 arglist=args[:]
322 arglist+=["--job-id=%s" % self.fullJobId()]
323 for k,v in iteritems(self.parameters):
324 arglist+=["--parameter=%s:%s" % (str(k),str(v))]
325
326 if self.isDecomposed and self.nproc>1:
327 arglist+=["--procnr=%d" % self.nproc]
328 if config().getboolean("ClusterJob","useMachineFile"):
329 arglist+=["--machinefile=%s" % self.hostfile]
330
331 arglist+=["--echo-command-prefix='=== Executing'"]
332
333 if progress:
334 arglist+=["--progress"]
335 if noLog:
336 arglist+=["--no-log"]
337 if compress:
338 arglist+=["--compress"]
339
340 if self.multiRegion:
341 if multiRegion:
342 arglist+=["--all-regions"]
343 elif multiRegion:
344 warning("This is not a multi-region case, so trying to run stuff multi-region won't do any good")
345
346 if self.restarted:
347 arglist+=["--restart"]
348
349 arglist+=[application]
350 if oldApp():
351 arglist+=[".",self.casename()]
352 else:
353 arglist+=["-case",self.casename()]
354
355 arglist+=foamArgs
356
357 self.message("Executing",arglist)
358
359 if steady:
360 self.message("Running Steady")
361 runner=SteadyRunner(args=arglist)
362 else:
363 runner=Runner(args=arglist)
364
366 """Automatically decomposes the grid with a metis-algorithm"""
367
368 if path.isdir(path.join(self.casedir(),"processor0")):
369 warning("A processor directory already exists. There might be a problem")
370
371 defaultMethod="metis"
372
373 if getFoamVersion()>=(1,6):
374 defaultMethod="scotch"
375
376 args=["--method="+defaultMethod,
377 "--clear",
378 self.casename(),
379 self.nproc,
380 "--job-id=%s" % self.fullJobId()]
381
382 if self.multiRegion:
383 args.append("--all-regions")
384
385 deco=Decomposer(args=args)
386
388 """Default reconstruction of a parallel run"""
389
390 if self.doAutoReconstruct:
391 self.isDecomposed=False
392
393 self.foamRun("reconstructPar",
394 args=["--logname=ReconstructPar"])
395 else:
396 self.message("No reconstruction (because asked to)")
397
398 - def setup(self,parameters):
399 """Set up the job. Called in the beginning if the
400 job has not been restarted
401
402 Usual tasks include grid conversion/setup, mesh decomposition etc
403
404 @param parameters: a dictionary with parameters"""
405
406 pass
407
408 - def postDecomposeSetup(self,parameters):
409 """Additional setup, to be executed when the grid is already decomposed
410
411 Usually for tasks that can be done on a decomposed grid
412
413 @param parameters: a dictionary with parameters"""
414
415 pass
416
417 - def run(self,parameters):
418 """Run the actual job. Usually the solver.
419 @param parameters: a dictionary with parameters"""
420
421 pass
422
424 """Additional cleanup, to be executed when the grid is still decomposed
425
426 Usually for tasks that can be done on a decomposed grid
427
428 @param parameters: a dictionary with parameters"""
429
430 pass
431
433 """Clean up after a job
434 @param parameters: a dictionary with parameters"""
435
436 pass
437
439 """Additional reconstruction of parallel runs (Stuff that the
440 OpenFOAM-reconstructPar doesn't do
441 @param parameters: a dictionary with parameters"""
442
443 pass
444
446 """Parameters for a specific task
447 @param id: the id of the task
448 @return: a dictionary with parameters for this task"""
449
450 error("taskParameter not implemented. Not a parameterized job")
451
452 return {}
453
455 """Additional parameters
456 @return: a dictionary with parameters for this task"""
457
458 warning("Method 'additionalParameters' not implemented. Not a problem. Just saying")
459
460 return {}
461
472
474 if self.listenToTimer:
475 self.ordinaryEnd=False
476 f=open(path.join(self.basename,"stop"),"w")
477 f.write("Geh z'haus")
478 f.close()
479 unlink(self.stopFile())
480 else:
481 warning("I'm not listening to your callbacks")
482
484 """A Cluster-Job that executes a solver. It implements the run-function.
485 If a template-case is specified, the case is copied"""
486
487 - def __init__(self,basename,solver,
488 template=None,
489 cloneParameters=[],
490 arrayJob=False,
491 hardRestart=False,
492 autoParallel=True,
493 doAutoReconstruct=None,
494 foamVersion=None,
495 compileOption=None,
496 useFoamMPI=False,
497 steady=False,
498 multiRegion=False,
499 parameters={},
500 progress=False,
501 solverProgress=False,
502 solverNoLog=False,
503 solverLogCompress=False,
504 isDecomposed=False):
505 """@param template: Name of the template-case. It is assumed that
506 it resides in the same directory as the actual case
507 @param cloneParameters: a list with additional parameters for the
508 CloneCase-object that copies the template
509 @param solverProgress: Only writes the current time of the solver"""
510
511 ClusterJob.__init__(self,basename,
512 arrayJob=arrayJob,
513 hardRestart=hardRestart,
514 autoParallel=autoParallel,
515 doAutoReconstruct=doAutoReconstruct,
516 foamVersion=foamVersion,
517 compileOption=compileOption,
518 useFoamMPI=useFoamMPI,
519 multiRegion=multiRegion,
520 parameters=parameters,
521 isDecomposed=isDecomposed)
522 self.solver=solver
523 self.steady=steady
524 if template!=None and not self.restarted:
525 template=path.join(path.dirname(self.casedir()),template)
526 if path.abspath(basename)==path.abspath(template):
527 error("The basename",basename,"and the template",template,"are the same directory")
528 if isDecomposed:
529 cloneParameters+=["--parallel"]
530 clone=CloneCase(
531 args=cloneParameters+[template,self.casedir(),"--follow-symlinks"])
532 self.solverProgress=solverProgress
533 self.solverNoLog=solverNoLog
534 self.solverLogCompress=solverLogCompress
535
536 - def run(self,parameters):
537 self.foamRun(self.solver,
538 steady=self.steady,
539 multiRegion=False,
540 progress=self.solverProgress,
541 noLog=self.solverNoLog,
542 compress=self.solverLogCompress)
543
544
545