Package PyFoam :: Package Basics :: Module SpreadsheetData
[hide private]
[frames] | no frames]

Source Code for Module PyFoam.Basics.SpreadsheetData

  1  #  ICE Revision: $Id: $  
  2  """ 
  3  Data that can go into a spreadsheet (title line and rectangular data) 
  4  """ 
  5   
  6  import numpy,copy 
  7   
  8  from PyFoam.Error import error,FatalErrorPyFoamException,warning 
  9   
10 -class WrongDataSize(FatalErrorPyFoamException):
11 - def __init__(self):
12 FatalErrorPyFoamException.__init__(self,"Size of the arrays differs")
13
14 -class SpreadsheetData(object):
15 """ 16 Collects data that could go into a spreadsheet. The focus of this class is on 17 storing all the data at once 18 """
19 - def __init__(self, 20 csvName=None, 21 txtName=None, 22 data=None, 23 names=None, 24 title=None):
25 """Either this is constructed from a file or from the data and the column headers 26 27 @param csvName: name of the CSV-file the data should be constructed from, 28 @param txtName: name of a file the data should be constructed from, 29 @param data: the actual data to use 30 @param names: the names for the column header 31 @param title: a name that is used to make unique heades names""" 32 33 self.title=title 34 35 if csvName and data: 36 error("SpreadsheetData is either constructed from data or from a file") 37 38 if csvName: 39 try: 40 rec=numpy.recfromcsv(csvName) 41 data=[tuple(float(x) for x in i) for i in rec] 42 names=list(rec.dtype.names) 43 except AttributeError: 44 # for old numpy-versions 45 data=map(tuple,numpy.loadtxt(csvName,delimiter=',',skiprows=1)) 46 names=open(csvName).readline().strip().split(',') 47 48 # redo this to make sure that everything is float 49 self.data=numpy.array(data,dtype=zip(names,['f8']*len(names))) 50 elif txtName: 51 try: 52 rec=numpy.recfromtxt(txtName,names=True) 53 data=[tuple(float(x) for x in i) for i in rec] 54 names=list(rec.dtype.names) 55 except AttributeError: 56 # for old numpy-versions 57 data=map(tuple,numpy.loadtxt(txtName)) 58 names=open(txtName).readline().strip().split()[1:] 59 60 # redo this to make sure that everything is float 61 self.data=numpy.array(data,dtype=zip(names,['f8']*len(names))) 62 else: 63 if data!=None and names==None: 64 error("No names given for the data") 65 66 self.data=numpy.array(map(tuple,data),dtype=zip(names,['f8']*len(names))) 67 68 if self.title!=None: 69 self.data.dtype.names=[self.data.dtype.names[0]]+map(lambda x:self.title+" "+x,self.data.dtype.names[1:])
70
71 - def names(self):
72 return copy.copy(self.data.dtype.names)
73
74 - def size(self):
75 return self.data.size
76
77 - def writeCSV(self,fName, 78 delimiter=","):
79 """Write data to a CSV-file 80 @param fName: Name of the file 81 @param delimiter: Delimiter to be used in the CSV-file""" 82 83 f=open(fName,"w") 84 f.write(delimiter.join(self.names())+"\n") 85 numpy.savetxt(f,self.data,delimiter=delimiter)
86
87 - def tRange(self,time=None):
88 """Return the range of times 89 @param time: name of the time. If None the first column is used""" 90 if time==None: 91 time=self.names()[0] 92 t=self.data[time] 93 94 return (t[0],t[-1])
95
96 - def join(self,other,time=None,prefix=None):
97 """Join this object with another. Assume that they have the same 98 amount of rows and that they have one column that designates the 99 time and is called the same and has the same values 100 @param other: the other array 101 @param time: name of the time. If None the first column is used 102 @param prefix: String that is added to the other names. If none is given then 103 the title is used""" 104 if time==None: 105 time=self.names()[0] 106 if prefix==None: 107 prefix=other.title 108 if prefix==None: 109 prefix="other_" 110 else: 111 prefix+="_" 112 113 t1=self.data[time] 114 t2=other.data[time] 115 if len(t1)!=len(t2): 116 raise WrongDataSize() 117 if max(abs(t1-t2))>1e-10: 118 error("Times do not have the same values") 119 120 names=[] 121 data=[] 122 for n in self.names(): 123 names.append(n) 124 data.append(self.data[n]) 125 126 for n in other.names(): 127 if n!=time: 128 if n in self.names(): 129 names.append(prefix+n) 130 else: 131 names.append(n) 132 data.append(other.data[n]) 133 134 return SpreadsheetData(names=names, 135 data=numpy.array(data).transpose())
136
137 - def __add__(self,other):
138 """Convinience function for joining data""" 139 return self.join(other)
140
141 - def append(self, 142 name, 143 data, 144 allowDuplicates=False):
145 """Add another column to the data. Assumes that the number of rows is right 146 @param name: the name of the column 147 @param data: the actual data 148 @param allowDuplicates: If the name already exists make it unique by appending _1, _2 ...""" 149 150 arr = numpy.asarray(data) 151 newname=name 152 if newname in self.names() and allowDuplicates: 153 cnt=1 154 while newname in self.names(): 155 newname="%s_%d" % (name,cnt) 156 cnt+=1 157 warning("Changing name",name,"to",newname,"bacause it already exists in the data") 158 newdtype = numpy.dtype(self.data.dtype.descr + [(newname, 'f8')]) 159 newrec = numpy.empty(self.data.shape, dtype=newdtype) 160 for field in self.data.dtype.fields: 161 newrec[field] = self.data[field] 162 newrec[name] = arr 163 164 self.data=newrec
165
166 - def __call__(self, 167 t, 168 name, 169 time=None, 170 invalidExtend=False, 171 noInterpolation=False):
172 """'Evaluate' the data at a specific time by linear interpolation 173 @param t: the time at which the data should be evaluated 174 @param name: name of the data column to be evaluated. Assumes that that column 175 is ordered in ascending order 176 @param time: name of the time column. If none is given then the first column is assumed 177 @param invalidExtend: if t is out of the valid range then use the smallest or the biggest value. If False use nan 178 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'""" 179 180 if time==None: 181 time=self.names()[0] 182 183 x=self.data[time] 184 y=self.data[name] 185 186 # get extremes 187 if t<x[0]: 188 if invalidExtend: 189 return y[0] 190 else: 191 return float('nan') 192 elif t>x[-1]: 193 if invalidExtend: 194 return y[-1] 195 else: 196 return float('nan') 197 198 if noInterpolation: 199 if t==x[0]: 200 return y[0] 201 elif t==x[-1]: 202 return y[-1] 203 204 iLow=0 205 iHigh=len(x)-1 206 207 while (iHigh-iLow)>1: 208 iNew = iLow + (iHigh-iLow)/2 209 210 if x[iNew]==t: 211 # we got lucky 212 return y[iNew] 213 elif t < x[iNew]: 214 iHigh=iNew 215 else: 216 iLow=iNew 217 if noInterpolation: 218 return float('nan') 219 else: 220 return y[iLow] + (y[iHigh]-y[iLow])*(t-x[iLow])/(x[iHigh]-x[iLow])
221
222 - def addTimes(self,times,time=None,interpolate=False,invalidExtend=False):
223 """Extend the data so that all new times are represented (add rows 224 if they are not there) 225 @param time: the name of the column with the time 226 @param times: the times that shoild be there 227 @param interpolate: interpolate the data in new rows. Otherwise 228 insert 'nan' 229 @param invalidExtend: if t is out of the valid range then use 230 the smallest or the biggest value. If False use nan""" 231 232 if time==None: 233 time=self.names()[0] 234 235 if len(times)==len(self.data[time]): 236 same=True 237 for i in range(len(times)): 238 if times[i]!=self.data[time][i]: 239 same=False 240 break 241 if same: 242 # No difference between the times 243 return 244 245 newData=[] 246 otherI=0 247 originalI=0 248 while otherI<len(times): 249 goOn=originalI<len(self.data[time]) 250 while goOn and times[otherI]>self.data[time][originalI]: 251 newData.append(self.data[originalI]) 252 originalI+=1 253 goOn=originalI<len(self.data[time]) 254 255 append=True 256 if originalI<len(self.data[time]): 257 if times[otherI]==self.data[time][originalI]: 258 newData.append(self.data[originalI]) 259 originalI+=1 260 otherI+=1 261 append=False 262 263 if append: 264 t=times[otherI] 265 newRow=[] 266 for n in self.names(): 267 if n==time: 268 newRow.append(t) 269 elif interpolate: 270 newRow.append(self(t,n,time=time,invalidExtend=invalidExtend)) 271 else: 272 newRow.append(float('nan')) 273 newData.append(newRow) 274 otherI+=1 275 276 while originalI<len(self.data[time]): 277 newData.append(self.data[originalI]) 278 originalI+=1 279 280 self.data=numpy.array(map(tuple,newData),dtype=self.data.dtype)
281
282 - def resample(self, 283 other, 284 name, 285 time=None, 286 invalidExtend=False, 287 extendData=False, 288 noInterpolation=False):
289 """Calculate values from another dataset at the same times as in this data-set 290 @param other: the other data-set 291 @param name: name of the data column to be evaluated. Assumes that that column 292 is ordered in ascending order 293 @param time: name of the time column. If none is given then the first column is assumed 294 @param invalidExtend: see __call__ 295 @param extendData: if the time range of x is bigger than the range then extend the range before resampling 296 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'""" 297 if time==None: 298 time=self.names()[0] 299 300 if extendData and ( 301 self.data[time][0] > other.data[time][0] or \ 302 self.data[time][-1] < other.data[time][-1]): 303 pre=[] 304 i=0 305 while other.data[time][i] < self.data[time][0]: 306 data=[] 307 for n in self.names(): 308 if n==time: 309 data.append(other.data[time][i]) 310 else: 311 data.append(float('nan')) 312 pre.append(data) 313 i+=1 314 if i>=len(other.data[time]): 315 break 316 if len(pre)>0: 317 self.data=numpy.concatenate((numpy.array(map(tuple,pre),dtype=self.data.dtype),self.data)) 318 319 post=[] 320 i=-1 321 while other.data[time][i] > self.data[time][-1]: 322 data=[] 323 for n in self.names(): 324 if n==time: 325 data.append(other.data[time][i]) 326 else: 327 data.append(float('nan')) 328 post.append(data) 329 i-=1 330 if abs(i)>=len(other.data[time])+1: 331 break 332 333 post.reverse() 334 if len(post)>0: 335 self.data=numpy.concatenate((self.data,numpy.array(map(tuple,post),dtype=self.data.dtype))) 336 337 result=[] 338 339 for t in self.data[time]: 340 result.append(other(t,name, 341 time=time, 342 invalidExtend=invalidExtend, 343 noInterpolation=noInterpolation)) 344 345 return result
346
347 - def compare(self,other,name,time=None):
348 """Compare this data-set with another. The time-points of this dataset are used as 349 a reference. Returns a dictionary with a number of norms: maximum absolute 350 difference, average absolute difference 351 on all timepoints, average absolute difference weighted by time 352 @param other: the other data-set 353 @param name: name of the data column to be evaluated. Assumes that that column 354 is ordered in ascending order 355 @param time: name of the time column. If none is given then the first column is assumed""" 356 357 if time==None: 358 time=self.names()[0] 359 360 x=self.data[time] 361 y=self.data[name] 362 y2=self.resample(other,name,time=time,invalidExtend=True) 363 364 maxDiff=0 365 sumDiff=0 366 sumWeighted=0 367 368 for i,t in enumerate(x): 369 val1=y[i] 370 val2=y2[i] 371 diff=abs(val1-val2) 372 maxDiff=max(diff,maxDiff) 373 sumDiff+=diff 374 weight=0 375 if i>0: 376 weight+=(t-x[i-1])/2 377 if i<(len(x)-1): 378 weight+=(x[i+1]-t)/2 379 sumWeighted+=weight*diff 380 381 return { "max" : maxDiff, 382 "average" : sumDiff/len(x), 383 "wAverage" : sumWeighted/(x[-1]-x[0]), 384 "tMin": x[0], 385 "tMax": x[-1]}
386
387 - def metrics(self,name,time=None):
388 """Calculates the metrics for a data set. Returns a dictionary 389 with a number of norms: minimum, maximum, average, average weighted by time 390 @param name: name of the data column to be evaluated. Assumes that that column 391 is ordered in ascending order 392 @param time: name of the time column. If none is given then the first column is assumed""" 393 394 if time==None: 395 time=self.names()[0] 396 397 x=self.data[time] 398 y=self.data[name] 399 400 minVal=1e40 401 maxVal=-1e40 402 sum=0 403 sumWeighted=0 404 405 for i,t in enumerate(x): 406 val=y[i] 407 maxVal=max(val,maxVal) 408 minVal=min(val,minVal) 409 sum+=val 410 weight=0 411 if i>0: 412 weight+=(t-x[i-1])/2 413 if i<(len(x)-1): 414 weight+=(x[i+1]-t)/2 415 sumWeighted+=weight*val 416 417 return { "max" : maxVal, 418 "min" : minVal, 419 "average" : sum/len(x), 420 "wAverage" : sumWeighted/(x[-1]-x[0]), 421 "tMin": x[0], 422 "tMax": x[-1]}
423