1
2 """Encapsulates all necessary things for a cluster-job, like setting up, running, restarting"""
3
4 import os,sys
5 from os import path,unlink
6 from threading import Thread,Lock,Timer
7
8 from PyFoam.Applications.Decomposer import Decomposer
9 from PyFoam.Applications.Runner import Runner
10 from PyFoam.Applications.SteadyRunner import SteadyRunner
11 from PyFoam.Applications.CloneCase import CloneCase
12 from PyFoam.FoamInformation import changeFoamVersion
13 from PyFoam.Error import error,warning
14 from PyFoam import configuration as config
15 from PyFoam.FoamInformation import oldAppConvention as oldApp
16
30
31
33 """ All Cluster-jobs are to be derived from this base-class
34
35 The actual jobs are implemented by overriding methods
36
37 There is a number of variables in this class that are used to
38 'communicate' information between the various stages"""
39
40 - def __init__(self,basename,
41 arrayJob=False,
42 hardRestart=False,
43 autoParallel=True,
44 foamVersion=None,
45 useFoamMPI=False,
46 multiRegion=False):
47 """Initializes the Job
48 @param basename: Basis name of the job
49 @param arrayJob: this job is a parameter variation. The tasks
50 are identified by their task-id
51 @param hardRestart: treat the job as restarted
52 @param autoParallel: Parallelization is handled by the base-class
53 @param foamVersion: The foam-Version that is to be used
54 @param useFoamMPI: Use the OpenMPI supplied with OpenFOAM
55 @param multiRegion: This job consists of multiple regions"""
56
57
58
59 if not os.environ.has_key("JOB_ID"):
60 error("Not an SGE-job. Environment variable JOB_ID is missing")
61 self.jobID=int(os.environ["JOB_ID"])
62 self.jobName=os.environ["JOB_NAME"]
63
64 self.basename=path.join(path.abspath(path.curdir),basename)
65
66 sgeRestarted=False
67 if os.environ.has_key("RESTARTED"):
68 sgeRestarted=(int(os.environ["RESTARTED"])!=0)
69
70 if sgeRestarted or hardRestart:
71 self.restarted=True
72 else:
73 self.restarted=False
74
75 if foamVersion==None:
76 foamVersion=config().get("OpenFOAM","Version")
77
78 changeFoamVersion(foamVersion)
79
80 if not os.environ.has_key("WM_PROJECT_VERSION"):
81 error("No OpenFOAM-Version seems to be configured. Set the foamVersion-parameter")
82
83 self.autoParallel=autoParallel
84 self.multiRegion=multiRegion
85
86 self.hostfile=None
87 self.nproc=1
88
89 if os.environ.has_key("NSLOTS"):
90 self.nproc=int(os.environ["NSLOTS"])
91 self.message("Running on",self.nproc,"CPUs")
92 if self.nproc>1:
93
94 self.hostfile=path.join(os.environ["TMP"],"machines")
95 self.message("Using the machinefile",self.hostfile)
96 self.message("Contents of the machinefile:",open(self.hostfile).readlines())
97
98 self.ordinaryEnd=True
99 self.listenToTimer=False
100
101 self.taskID=None
102 self.arrayJob=arrayJob
103
104 if self.arrayJob:
105 self.taskID=int(os.environ["SGE_TASK_ID"])
106
107 if not useFoamMPI and not foamVersion in eval(config().get("ClusterJob","useFoamMPI",default='[]')):
108
109 self.message("Adding Cluster-specific paths")
110 os.environ["PATH"]=config().get("ClusterJob","path")+":"+os.environ["PATH"]
111 os.environ["LD_LIBRARY_PATH"]=config().get("ClusterJob","ldpath")+":"+os.environ["LD_LIBRARY_PATH"]
112
113 self.isDecomposed=False
114
116 print "=== CLUSTERJOB: ",
117 for t in txt:
118 print t,
119 print " ==="
120 sys.stdout.flush()
121
123 self.message("Setting Job state to",txt)
124 fName=path.join(self.casedir(),"ClusterJobState")
125 f=open(fName,"w")
126 f.write(txt+"\n")
127 f.close()
128
130 """The file with the job information"""
131 jobfile="%s.%d" % (self.jobName,self.jobID)
132 if self.arrayJob:
133 jobfile+=".%d" % self.taskID
134 jobfile+=".pyFoam.clusterjob"
135 jobfile=path.join(path.dirname(self.basename),jobfile)
136
137 return jobfile
138
140 """The file that makes the job write a checkpoint"""
141 return self.jobFile()+".checkpoint"
142
144 """The file that makes the job write a checkpoint and end"""
145 return self.jobFile()+".stop"
146
213
215 """Returns the actual directory of the case
216 To be overridden if appropriate"""
217 if self.arrayJob:
218 return "%s.%05d" % (self.basename,self.taskID)
219 else:
220 return self.basename
221
223 """Returns just the name of the case"""
224 return path.basename(self.casedir())
225
226 - def foamRun(self,application,
227 args=[],
228 foamArgs=[],
229 steady=False,
230 multiRegion=None,
231 progress=False,
232 noLog=False):
233 """Runs a foam utility on the case.
234 If it is a parallel job and the grid has
235 already been decomposed (and not yet reconstructed) it is run in
236 parallel
237 @param application: the Foam-Application that is to be run
238 @param foamArgs: A list if with the additional arguments for the
239 Foam-Application
240 @param args: A list with additional arguments for the Runner-object
241 @param steady: Use the steady-runner
242 @param multiRegion: Run this on multiple regions (if None: I don't have an opinion on this)
243 @param progress: Only output the time and nothing else
244 @param noLog: Do not generate a logfile"""
245
246 arglist=args[:]
247 if self.isDecomposed and self.nproc>1:
248 arglist+=["--procnr=%d" % self.nproc,
249 "--machinefile=%s" % self.hostfile]
250 if progress:
251 arglist+=["--progress"]
252 if noLog:
253 arglist+=["--no-log"]
254
255 if self.multiRegion:
256 if multiRegion==None or multiRegion==True:
257 arglist+=["--all-regions"]
258 elif multiRegion and not self.multiRegion:
259 warning("This is not a multi-region case, so trying to run stuff multi-region won't do any good")
260
261 if self.restarted:
262 arglist+=["--restart"]
263
264 arglist+=[application]
265 if oldApp():
266 arglist+=[".",self.casename()]
267 else:
268 arglist+=["-case",self.casename()]
269
270 arglist+=foamArgs
271
272 self.message("Executing",arglist)
273
274 if steady:
275 self.message("Running Steady")
276 runner=SteadyRunner(args=arglist)
277 else:
278 runner=Runner(args=arglist)
279
281 """Automatically decomposes the grid with a metis-algorithm"""
282
283 if path.isdir(path.join(self.casedir(),"processor0")):
284 warning("A processor directory already exists. There might be a problem")
285 args=["--method=metis",
286 "--clear",
287 self.casename(),
288 self.nproc]
289
290 if self.multiRegion:
291 args.append("--all-regions")
292
293 deco=Decomposer(args=args)
294
296 """Default reconstruction of a parallel run"""
297
298 self.foamRun("reconstructPar",
299 args=["--logname=ReconstructPar"])
300
301 - def setup(self,parameters):
302 """Set up the job. Called in the beginning if the
303 job has not been restarted
304
305 Usual tasks include grid conversion/setup, mesh decomposition etc
306
307 @param parameters: a dictionary with parameters"""
308
309 pass
310
311 - def postDecomposeSetup(self,parameters):
312 """Additional setup, to be executed when the grid is already decomposed
313
314 Usually for tasks that can be done on a decomposed grid
315
316 @param parameters: a dictionary with parameters"""
317
318 pass
319
320 - def run(self,parameters):
321 """Run the actual job. Usually the solver.
322 @param parameters: a dictionary with parameters"""
323
324 pass
325
327 """Additional cleanup, to be executed when the grid is still decomposed
328
329 Usually for tasks that can be done on a decomposed grid
330
331 @param parameters: a dictionary with parameters"""
332
333 pass
334
336 """Clean up after a job
337 @param parameters: a dictionary with parameters"""
338
339 pass
340
342 """Additional reconstruction of parallel runs (Stuff that the
343 OpenFOAM-reconstructPar doesn't do
344 @param parameters: a dictionary with parameters"""
345
346 pass
347
349 """Parameters for a specific task
350 @param id: the id of the task
351 @return: a dictionary with parameters for this task"""
352
353 error("taskParameter not implemented. Not a parameterized job")
354
355 return {}
356
367
369 if self.listenToTimer:
370 self.ordinaryEnd=False
371 f=open(path.join(self.basename,"stop"),"w")
372 f.write("Geh z'haus")
373 f.close()
374 unlink(self.stopFile())
375 else:
376 warning("I'm not listening to your callbacks")
377
379 """A Cluster-Job that executes a solver. It implements the run-function.
380 If a template-case is specified, the case is copied"""
381
382 - def __init__(self,basename,solver,
383 template=None,
384 cloneParameters=[],
385 arrayJob=False,
386 hardRestart=False,
387 autoParallel=True,
388 foamVersion=None,
389 useFoamMPI=False,
390 steady=False,
391 multiRegion=False,
392 progress=False,
393 solverProgress=False,
394 solverNoLog=False):
395 """@param template: Name of the template-case. It is assumed that
396 it resides in the same directory as the actual case
397 @param cloneParameters: a list with additional parameters for the
398 CloneCase-object that copies the template
399 @param solverProgress: Only writes the current time of the solver"""
400
401 ClusterJob.__init__(self,basename,
402 arrayJob=arrayJob,
403 hardRestart=hardRestart,
404 autoParallel=autoParallel,
405 foamVersion=foamVersion,
406 useFoamMPI=useFoamMPI,
407 multiRegion=multiRegion)
408 self.solver=solver
409 self.steady=steady
410 if template!=None and not self.restarted:
411 template=path.join(path.dirname(self.casedir()),template)
412 if path.abspath(basename)==path.abspath(template):
413 error("The basename",basename,"and the template",template,"are the same directory")
414 clone=CloneCase(
415 args=cloneParameters+[template,self.casedir(),"--follow-symlinks"])
416 self.solverProgress=solverProgress
417 self.solverNoLog=solverNoLog
418
419 - def run(self,parameters):
420 self.foamRun(self.solver,
421 steady=self.steady,
422 multiRegion=False,
423 progress=self.solverProgress,
424 noLog=self.solverNoLog)
425