Package trunk :: Package BIP :: Package Bayes :: Module Melding
[hide private]

Source Code for Module trunk.BIP.Bayes.Melding

   1  # -*- coding:utf-8 -*- 
   2  #----------------------------------------------------------------------------- 
   3  # Name:        Melding.py 
   4  # Purpose:     The Bayesian melding Class provides 
   5  #              uncertainty analyses for simulation models. 
   6  # 
   7  # Author:      Flávio Codeço Coelho 
   8  # 
   9  # Created:     2003/08/10 
  10  # Copyright:   (c) 2003-2010 by the Author 
  11  # Licence:     GPL v3 
  12  #----------------------------------------------------------------------------- 
  13  import cPickle as CP 
  14  import copy 
  15  import os 
  16  import sqlite3 
  17  import sys 
  18  import xmlrpclib 
  19  from multiprocessing import Pool 
  20  from time import time 
  21   
  22  import numpy 
  23  import pylab as P 
  24  from numpy import array, nan_to_num, zeros, ones, mean, var, sqrt, floor, isnan,  nansum, median 
  25  from numpy.core.records import recarray 
  26  from numpy.random import randint, random, seed 
  27  from scipy import stats, optimize as optim 
  28  from scipy.stats import nanmean 
  29  from scipy.stats.kde import gaussian_kde 
  30   
  31  import PlotMeld as PM 
  32  import lhs 
  33  import like 
  34  from BIP.Bayes.Samplers import MCMC 
  35  import pdb 
  36   
  37   
  38  #try: 
  39  #    import psyco 
  40  #    psyco.full() 
  41  #except: 
  42  #    pass 
  43  try: 
  44      from BIP.Viz.realtime import RTplot 
  45      from liveplots.xmlrpcserver import rpc_plot 
  46      Viz=True 
  47  except: 
  48      Viz=False 
  49      print r"""Please install Gnuplot-py to enable realtime visualization. 
  50      http://gnuplot-py.sourceforge.net/ 
  51      """ 
  52   
  53  if Viz: 
  54      dtplot = RTplot();phiplot = RTplot();thplot = RTplot() 
  55   
  56  __docformat__ = "restructuredtext en" 
  57   
58 -class FitModel(object):
59 """ 60 Fit a model to data generating 61 Bayesian posterior distributions of input and 62 outputs of the model. 63 """
64 - def __init__(self, K,model, inits,tf,thetanames,phinames,wl=None ,nw=1,verbose=False,burnin=1000, constraints=[]):
65 """ 66 Initialize the model fitter. 67 68 :Parameters: 69 - `K`: Number of samples from the priors. On MCMC also the number of samples of the posterior. 70 - `model`: Callable (function) returning the output of the model, from a set of parameter values received as argument. 71 - `inits`: inits initial values for the model's variables. 72 - `tf`: Length of the simulation, in units of time. 73 - `phinames`: List of names (strings) with names of the model's variables 74 - `thetanames`: List of names (strings) with names of parameters included on the inference. 75 - `wl`: window lenght length of the inference window. 76 - `nw`: Number of windows to analyze on iterative inference mode 77 - `verbose`: Verbosity level: 0, 1 or 2. 78 - `burnin`: number of burnin samples, used in the case on mcmc method. 79 """ 80 try: 81 assert wl<=tf 82 except AssertionError: 83 sys.exit("Window Length cannot be larger that Length of the simulation(tf)" ) 84 assert isinstance(constraints, list) 85 self.K = K 86 self.L = .1*K if K>2000 else K 87 self.finits = inits #first initial values 88 self.ftf = tf 89 self.full_len = wl*nw if wl !=None else tf 90 self.inits = inits 91 self.tf = tf 92 self.ew = 0 #expanding windows? 93 self.totpop = sum(inits) 94 self.model = model 95 self.nphi = len(phinames) 96 self.ntheta = len(thetanames) 97 self.phinames = phinames 98 self.thetanames = thetanames 99 self.model.func_globals['inits'] = self.inits 100 self.model.func_globals['tf'] = self.tf 101 self.model.func_globals['thetanames'] = self.thetanames 102 self.wl = wl 103 self.nw = nw 104 self.done_running = False 105 self.prior_set = False 106 self.burnin = burnin 107 self.verbose = verbose 108 self.constraints = constraints 109 self.pool = False #this will be updated by the run method. 110 self.Me = Meld(K=self.K,L=self.L,model=self.model,ntheta=self.ntheta,nphi=self.nphi,verbose=self.verbose) 111 self.AIC = 0 112 self.BIC = 0 113 self.DIC = 0 114 # To be defined by self.set.priors 115 self.pdists = None 116 self.ppars = None 117 self.plims = None 118 self.tdists = None 119 self.tpars = None 120 self.tlims = None
121
122 - def _plot_MAP(self,data,pmap):
123 """ 124 Generates a plot of a full run of the model parameterized with the maximum a posteriori 125 estimates of the parameters. 126 127 :Parameters: 128 - `data`: data dictionary as passed to optimize 129 - `pmap`: MAP parameter values 130 """ 131 p = RTplot(persist=1) 132 self.model.func_globals['inits'] = self.finits; self.model.func_globals['tf'] = self.full_len 133 simseries = self.model(list(pmap)) 134 self.model.func_globals['inits'] = self.inits; self.model.func_globals['tf'] = self.tf 135 if 'time' in data: 136 data.pop('time') 137 for n,d in data.items(): 138 p.plotlines(nan_to_num(d).tolist(),range(len(d)),['Obs. %s'%n], '','points', 0) 139 v=self.phinames.index(n) 140 p.plotlines(simseries.T[v].tolist(),range(len(d)), data.keys(), "Simulation with MAP parameters %s=%s"%(self.thetanames,pmap))
141
142 - def AIC_from_RSS(self,):
143 """ 144 Calculates the Akaike information criterion from the residual sum of squares 145 of the best fitting run. 146 """ 147 pass
148
149 - def optimize(self, data, p0, optimizer='scipy', tol=0.0001, verbose=0, plot=0):
150 """ 151 Finds best parameters using an optimization approach 152 153 :Parameters: 154 - `data`: Dictionary of observed series 155 - `p0`: Sequence (list or tuple) of initial values for the parameters 156 - `optimizer`: Optimization library to use: 'scipy': fmin (Nelder-Mead) or 'oo':OpenOpt.NLP 157 - `tol`: Tolerance of the error 158 - `verbose`: If true show stats of the optimization run at the end 159 - `plot`: If true plots a run based on the optimized parameters. 160 """ 161 try: 162 import openopt 163 oo = True 164 except ImportError: 165 oo = False 166 assert isinstance(p0,(list,tuple)) 167 def mse(theta): 168 s1 = model_as_ra(theta, self.model, self.phinames) 169 return self._rms_error(s1, data)
170 if optimizer == "scipy": 171 potimo = optim.fmin(mse,p0,ftol=tol, disp=verbose) 172 elif optimizer == "oo": 173 if not oo: 174 potimo = optim.fmin(mse,p0,ftol=tol, disp=verbose) 175 else: 176 lb = array([l[0] for l in self.tlims]) if self.tlims else array([-numpy.inf]*self.ntheta) 177 ub = array([l[1] for l in self.tlims]) if self.tlims else array([numpy.inf]*self.ntheta) 178 p = openopt.NLP(mse, p0, lb=lb, ub=ub, ftol=tol, iprint=10) 179 p.solve('ralg') 180 potimo = p.xf 181 else:#use fmin as fallback method 182 potimo = optim.fmin(mse,p0,ftol=tol, disp=verbose) 183 if plot: 184 self._plot_MAP(data, potimo) 185 return potimo
186
187 - def _rms_error(self, s1, s2):
188 ''' 189 Calculates a the error between a model- 190 generated time series and a observed time series. 191 It uses a normalized RMS deviation. 192 193 :Parameters: 194 - `s1`: model-generated time series. 195 - `s2`: observed time series. dictionary with keys matching names of s1 196 :Types: 197 - `s1`: Record array or list. 198 - `s2`: Dictionary or list; must be a dictionary if s1 is a RA 199 200 s1 and s2 can also be both lists of lists or lists of arrays of the same length. 201 202 :Return: 203 The Root mean square deviation between `s1` and `s2`. 204 ''' 205 if isinstance(s1, recarray): 206 assert isinstance(s2, dict) 207 err = [] 208 for k in s2.keys(): 209 if k not in s1.dtype.names: 210 continue 211 ls1 = len(s1[k]) #handles the cases where data is slightly longer that simulated series. 212 # print s2[k] 213 # try: 214 # pdb.set_trace() 215 if len(s2[k].shape) >1: 216 s2[k] = s2[k].mean(axis=1) 217 dif = s1[k]-s2[k][:ls1].astype(float) 218 219 dif[isnan(dif)] = 0 220 e = sqrt(mean(dif**2)) 221 # except TypeError: 222 # print s1[k], s2[k] 223 224 err.append(e) 225 elif isinstance(s1, list): 226 assert isinstance(s2, list) and len(s1) ==len(s2) 227 s1 = array(s1).astype(float) 228 s2 = array(s2).astype(float) 229 err = [sqrt(nanmean((s-t)**2)) for s, t in zip(s1, s2) ] 230 #err = [sum((s-t)**2./t**2) for s, t in zip(s1, s2)] 231 rmsd = nan_to_num(mean(err)) 232 return rmsd
233
234 - def set_priors(self,tdists,tpars, tlims,pdists,ppars,plims):
235 """ 236 Set the prior distributions for Phi and Theta 237 238 :Parameters: 239 - `pdists`: distributions for the output variables. For example: [scipy.stats.uniform,scipy.stats.norm] 240 - `ppars`: paramenters for the distributions in pdists. For example: [(0,1),(0,1)] 241 - `plims`: Limits of the range of each phi. List of (min,max) tuples. 242 - `tdists`: same as pdists, but for input parameters (Theta). 243 - `tpars`: same as ppars, but for tdists. 244 - `tlims`: Limits of the range of each theta. List of (min,max) tuples. 245 """ 246 self.pdists = pdists 247 self.ppars = ppars 248 self.plims = plims 249 self.tdists = tdists 250 self.tpars = tpars 251 self.tlims = tlims 252 self._init_priors() 253 self.prior_set = True
254
255 - def prior_sample(self):
256 """ 257 Generates a set of sample from the starting theta prior distributions 258 for reporting purposes. 259 260 :Returns: 261 Dictionary with (name,sample) pairs 262 """ 263 s = {} 264 for i, n in enumerate(self.thetanames): 265 s[n]=self.tdists[i](*self.tpars[i]).rvs(self.K) 266 return s
267
268 - def _init_priors(self, prior=None):
269 """ 270 Initialize priors either from distributions or previous posteriors 271 """ 272 if prior!=None and prior['theta'] and prior['phi']: 273 self.Me.setThetaFromData(self.thetanames,prior['theta'],self.tlims) 274 self.Me.setPhiFromData(self.phinames,prior['phi'],self.plims) 275 else: 276 print "++++>" 277 self.Me.setTheta(self.thetanames,self.tdists,self.tpars, self.tlims) 278 self.Me.setPhi(self.phinames,self.pdists,self.ppars,self.plims)
279
280 - def do_inference(self, prior, data, predlen, method, likvar):
281 """ 282 """ 283 self._init_priors(prior) 284 succ=0 285 att = 1 286 for n in data.keys(): 287 if n not in self.phinames: 288 data.pop(n) 289 if method == "SIR": 290 while not succ: #run sir Until is able to get a fit 291 print 'attempt #',att 292 succ = self.Me.sir(data=data,variance=likvar,pool=self.pool,t=self.tf) 293 att += 1 294 pt,series = self.Me.getPosteriors(t=self.tf) 295 elif method == "MCMC": 296 while not succ: #run sir Until is able to get a fitd == "mcmc": 297 print 'attempt #',att 298 succ = self.Me.mcmc_run(data,t=self.tf,likvariance=likvar,burnin=self.burnin, method = 'MH') 299 pt = self.Me.post_theta 300 series = self.Me.post_phi 301 elif method == "DREAM": 302 while not succ: #run sir Until is able to get a fitd == "mcmc": 303 print 'attempt #',att 304 succ = self.Me.mcmc_run(data,t=self.tf,likvariance=likvar,burnin=self.burnin, method='dream', constraints=self.constraints) 305 pt = self.Me.post_theta 306 series = self.Me.post_phi 307 elif method == "ABC": 308 #TODO: allow passing of fitfun 309 self.Me.abcRun(data=data,fitfun=None,pool=self.pool, t=self.tf) 310 pt,series = self.Me.getPosteriors(t=self.tf) 311 if self.Me.stop_now: 312 # if fitting has been prematurely interrupted by user 313 return None,None,None, None, None 314 pp = series[:,-1] 315 # TODO: figure out what to do by default with inits 316 if self.nw >1 and self.adjinits and not self.ew: 317 adiff = array([abs(pp[vn]-data[vn][-1]) for vn in data.keys()]) 318 diff = adiff.sum(axis=0) #sum errors for all oserved variables 319 initind = diff.tolist().index(min(diff)) 320 self.inits = [pp[vn][initind] for vn in self.phinames] 321 for i, v in enumerate(self.phinames): 322 if v in data.keys(): 323 self.inits[i] = data[v][-1] 324 self.model.func_globals['inits'] = self.inits 325 326 if predlen: 327 predseries = self.Me.getPosteriors(predlen)[1] 328 return pt,pp, series,predseries,att
329
330 - def _save_to_db(self, dbname, data):
331 ''' 332 Saves data to a sqlite3 db. 333 334 :Parameters: 335 - `dbname`: name of the database file 336 - `data`: Data dictionary as created by `format_db_tables` method. 337 ''' 338 def row_generator(var): 339 ''' 340 var is a dictionary. 341 ''' 342 # if isinstance(var, numpy.recarray): 343 # for repl in var: 344 # if isinstance(repl, numpy.recarray): # this is the case of variables, where each replicate is a time series 345 # for t in repl: 346 # if not isinstance(t, numpy.core.records.record): 347 # t=(t,) 348 # yield tuple(t) #variable tuple at time t in replicate repl 349 # else: #this is the case of parameters 350 # yield tuple(repl) 351 # elif isinstance(var, dict): 352 # try: 353 for r in zip(*var.values()): 354 if not isinstance(r, tuple): 355 r = (r,) 356 t = [] 357 for i in r: 358 if isinstance(i, numpy.ndarray): 359 t+=i.tolist() 360 else: 361 t += [i] 362 r = tuple(t) 363 yield r
364 # except: 365 # print var.keys(), var.values() 366 367 create = True 368 if not dbname.endswith('.sqlite'): 369 dbname += '.sqlite' 370 if os.path.exists(dbname): 371 create = False 372 con = sqlite3.connect(dbname) 373 #create tables 374 for k, v in data.items(): 375 if isinstance(v, dict): 376 labs = [] 377 for k2, v2 in v.items(): 378 if isinstance(v2, numpy.ndarray) and len(v2.shape)>1: 379 labs+=[k2+str(i) for i in range(v2.shape[1])] 380 else: 381 labs.append(k2) 382 nv = len(labs)#+1 #variables plus primary key 383 tstrc = k+'(pk integer primary key asc autoincrement,'+','.join(labs)+')' 384 tstr = k+'('+','.join(labs)+')' 385 if create: 386 con.execute('create table '+tstrc) 387 # elif isinstance(v, numpy.recarray): 388 # nv = len(v.dtype.names) +1 #variables plus primary key 389 # tstr = k+"("+','.join(v.dtype.names)+')' 390 # if create: 391 # con.execute("create table "+ tstrc) 392 else: 393 raise TypeError("Non-valid data structure.") 394 #print "insert into "+tstr+" values("+",".join(['?']*nv)+")" 395 con.executemany("insert into "+tstr+" values("+",".join(['?']*nv)+")", row_generator(v)) 396 con.commit() 397 con.close()
398 - def run(self, data,method,likvar,pool=False,adjinits=True,ew=0, dbname='results', monitor=False, initheta=[]):
399 """ 400 Fit the model against data 401 402 :Parameters: 403 - `data`: dictionary with variable names and observed series, as Key and value respectively. 404 - `method`: Inference method: "ABC", "SIR", "MCMC" or "DREAM" 405 - `likvar`: Variance of the likelihood function in the SIR and MCMC method 406 - `pool`: Pool priors on model's outputs. 407 - `adjinits`: whether to adjust inits to data 408 - `ew`: Whether to use expanding windows instead of moving ones. 409 - `dbname`: name of the sqlite3 database 410 - `monitor`: Whether to monitor certains variables during the inference. If not False, should be a list of valid phi variable names. 411 - `initheta`: starting position in parameter space for the sampling to start. (only used by MCMC and DREAM) 412 """ 413 self.ew = ew 414 self.adjinits = adjinits 415 self.pool = pool 416 assert isinstance(initheta, list) #type checking 417 self.Me.initheta = initheta 418 if not self.prior_set: return 419 if monitor: 420 self._monitor_setup() 421 start = time() 422 d = data 423 prior = {'theta':[],'phi':[]} 424 os.system('rm %s_*.pickle'%dbname) 425 if self.wl == None: 426 self.wl = floor(len(d.values()[0])/self.nw) 427 wl = self.wl 428 for w in range(self.nw): 429 t0 = time() 430 print '==> Window # %s of %s!'%(w+1,self.nw) 431 if w>0: 432 rt = (self.nw-w+1)*tel 433 print "==> Remaining time: %s minutes and %s seconds."%(rt//60, rt%60) 434 self.tf=(w+1)*wl if ew else wl #setting tf according to window type 435 self.model.func_globals['tf'] = self.tf 436 d2 = {} 437 for k,v in d.items():#Slicing data to the current window 438 d2[k] = v[:self.tf] if ew else v[w*wl:w*wl+wl] 439 if w==0 and adjinits: 440 for n in d2.keys(): 441 if n not in self.phinames: 442 continue 443 i = self.phinames.index(n) 444 self.inits[i] = nan_to_num(d2[n][0]) if not isinstance(d2[n][0], numpy.ndarray ) else nan_to_num(nanmean(d2[n][0])) 445 #TODO: figure out how to balance the total pop 446 # self.inits[0] += self.totpop-sum(self.inits) #adjusting susceptibles 447 self.model.func_globals['inits'] = self.inits 448 pt,pp, series,predseries,att = self.do_inference(data=d2, prior=prior,predlen=wl, method=method,likvar=likvar) 449 if self.Me.stop_now: 450 return 451 self.AIC += 2. * (self.ntheta - self.Me.likmax) # 2k - 2 ln(L) 452 self.BIC += self.ntheta * numpy.log(self.wl*len(d2)) - 2. * self.Me.likmax # k ln(n) - 2 ln(L) 453 self.DIC = self.Me.DIC 454 # ===Saving results=== 455 f = open('%s_%s%s'%(dbname, w, ".pickle"),'w') 456 #save weekly posteriors of theta and phi, posteriors of series, data (d) and predictions(z) 457 CP.dump((pt,series,d,predseries, att*self.K),f) 458 f.close() 459 if dbname: 460 if os.path.exists(dbname+".sqlite") and w ==0: 461 os.remove(dbname+".sqlite") 462 self._format_db_tables(dbname, w, data, pt, series, predseries, self.AIC, self.BIC, self.DIC) 463 prior = {'theta':[],'phi':[]} 464 for n in pt.dtype.names: 465 prior['theta'].append(pt[n]) 466 #beta,alpha,sigma,Ri = median(pt.beta),median(pt.alpha),median(pt.sigma),median(pt.Ri 467 for n in pp.dtype.names: 468 #print compress(isinf(pp[n]),pp[n]) 469 prior['phi'].append(pp[n]) 470 if monitor: 471 self._monitor_plot(series,prior,d2,w,data,vars=monitor) 472 473 tel = time()-t0 474 self.AIC /=self.nw #averaging 475 self.BIC /=self.nw 476 self.Me.AIC = self.AIC 477 self.Me.BIC = self.BIC 478 self.DIC = self.Me.DIC 479 print "time: %s seconds"%(time()-start) 480 481 self.done_running = True
482
483 - def _format_db_tables(self, dbname, w, data, pt, series, predseries, AIC, BIC, DIC):
484 """ 485 Formats results for writing to database 486 """ 487 #TODO: Write tests for this 488 # data table does not require formatting 489 # Dates for this window 490 self.wl=int(self.wl) 491 if 'time' in data: 492 if isinstance(data['time'], numpy.ndarray): 493 ts = data['time'].tolist() 494 else: 495 ts = data['time'] 496 dates = ts[w*self.wl:w*self.wl+self.wl] 497 preddates = ts[(w+1)*self.wl:(w+1)*self.wl+self.wl] 498 else: 499 dates = range(w*self.wl, w*self.wl+self.wl) 500 preddates = range((w+1)*self.wl, (w+1)*self.wl+self.wl) 501 # Parameters table 502 ptd = {'time':[dates[-1]]*len(pt[pt.dtype.names[0]])} 503 for n in pt.dtype.names: 504 ptd[n] = pt[n] 505 # Series table 506 seriesd = {'time':dates*series.shape[0]} 507 predseriesd = {'time':preddates*series.shape[0]} 508 for n in series.dtype.names: 509 seriesd[n] = series[n].ravel() 510 for n in predseries.dtype.names: 511 predseriesd[n] = predseries[n].ravel() 512 # AIC and BIC table 513 gof = {'time':[dates[-1]], 'AIC':[AIC], 'BIC':[BIC], 'DIC':[DIC]} 514 self._save_to_db(dbname, {'post_theta':ptd, 515 'series':seriesd, 516 'data': data, 517 'predicted_series':predseriesd, 518 'GOF':gof, 519 })
520
521 - def _monitor_setup(self):
522 """ 523 Sets up realtime plotting for inference 524 """ 525 #theta histograms (for current window) 526 self.hst = xmlrpclib.ServerProxy('http://localhost:%s'%rpc_plot(hold=1), allow_none=1)#RTplot() 527 #full data and simulated series 528 self.fsp = xmlrpclib.ServerProxy('http://localhost:%s'%rpc_plot(hold=1), allow_none=1)#RTplot() 529 # phi time series (model output for the current window) 530 self.ser = xmlrpclib.ServerProxy('http://localhost:%s'%rpc_plot(hold=1), allow_none=1)#RTplot()
531
532 - def _get95_bands(self,series,vname):
533 ''' 534 Returns 95% bands for series of all variables in vname 535 536 :Parameters: 537 - `series`: record array containing the series 538 - `vname`: list of strings of variables in series for which we want to get the bands 539 ''' 540 i5 = array([stats.scoreatpercentile(t,2.5) for t in series[vname].T]) 541 i95 = array([stats.scoreatpercentile(t,97.5) for t in series[vname].T]) 542 return i5,i95
543
544 - def _long_term_prediction_plot(self, cpars,vind, w):
545 """ 546 Plots the simulated trajectory predicted from best fit parameters. 547 548 :Parameters: 549 - `cpars`: best fit parameter set 550 - `vind`: List with indices(in self.phinames) to variables to be plotted 551 - `w`: current window number. 552 """ 553 if self.full_len-(self.wl*(w+1)) == 0: 554 return 555 self.model.func_globals['tf'] = self.full_len if self.ew else self.full_len-(self.wl*(w+1)) 556 if self.ew: self.model.func_globals['inits'] = self.finits 557 simseries = self.model(cpars) 558 simseries = [simseries[:, i].tolist() for i in range(self.nphi) if i in vind] 559 snames = [n for n in self.phinames if i in vind] 560 self.model.func_globals['tf'] = self.tf 561 xinit = 0 if self.ew else self.wl*w+self.wl 562 # print xinit, xinit+len(simseries[0]) 563 self.fsp.lines(simseries,range(xinit,xinit+len(simseries[0])), snames, "Best fit simulation after window %s"%(w+1))
564 565 566
567 - def _monitor_plot(self, series, prior, d2,w,data, vars):
568 """ 569 Plots real time data 570 571 :Parameters: 572 - `series`: Record array with the simulated series. 573 - `prior`: Dctionary with the prior sample of Theta 574 - `d2`: Dictionary with data for the current fitting window. 575 - `w`: Integer id of the current fitting window. 576 - `data`: Dictionary with the full dataset. 577 - `vars`: List with variable names to be plotted. 578 """ 579 # diff = 0 580 for vn in d2.keys(): 581 #sum errors for all oserved variables 582 if isinstance(d2[vn], numpy.ndarray) and len(d2[vn].shape)>1: 583 diff = abs(series[vn][:, -1]-nanmean(d2[vn][-1])) 584 else: 585 diff = abs(series[vn][:, -1]-d2[vn][-1]) 586 # print diff, min(diff) 587 initind = diff.tolist().index(min(diff)) 588 vindices = [self.phinames.index(n) for n in vars] 589 for n in vars: 590 if n not in d2: 591 continue 592 i5,i95 = self._get95_bands(series,n) 593 self.ser.lines(series[n][initind].tolist(), None, ["Best run's %s"%n], 'Window %s'%(w+1)) 594 self.ser.lines(i5.tolist(),None, ['2.5%'], 'Window %s'%(w+1)) 595 self.ser.lines(i95.tolist(),None, ['97.5%'],'Window %s'%(w+1)) 596 self.ser.lines(d2[n].tolist(),None, ['Obs. %s'%n], 'Window %s'%(w+1), 'points') 597 self.fsp.lines(data[n].T.tolist(),None, ['Obs. %s'%n], 'Window %s'%(w+1), 'points') 598 self.hst.histogram(array(prior['theta']).tolist(), self.thetanames,'Window %s'%(w+1), 1) 599 cpars = [prior['theta'][i][initind] for i in range(self.ntheta)] 600 self._long_term_prediction_plot(cpars,vindices, w) 601 self.ser.clearFig() 602 self.hst.clearFig() 603 self.fsp.clearFig()
604
605 - def plot_results(self, names=[], dbname="results", savefigs=0):
606 """ 607 Plot the final results of the inference 608 """ 609 if not names: 610 names = self.phinames 611 try: #read the data files 612 pt,series,predseries,obs = self._read_results(dbname) 613 except: 614 if not self.done_running: 615 return 616 if obs.has_key('time'): 617 tim = numpy.array(obs['time']) 618 else: 619 tim = numpy.arange(self.nw*self.wl) 620 #PM.plot_par_series(range(len(pt)),pt) 621 priors = self.prior_sample() 622 PM.plot_par_violin(tim[self.wl-1::self.wl],pt, priors) 623 P.xlabel('windows') 624 if savefigs: 625 P.savefig(dbname+"_violin.svg") 626 P.figure() 627 PM.plot_series2(tim,obs,series,names=names, wl=self.wl) 628 if savefigs: 629 P.savefig(dbname+"_series.svg") 630 if self.nw > 1: 631 P.figure() 632 PM.pred_new_cases(obs,predseries,self.nw,names,self.wl) 633 P.gca().legend(loc=0) 634 P.xlabel('windows') 635 P.figure() 636 PM.plot_series2(tim,obs,predseries,names=names, 637 title='Predicted vs. Observed series',lag=True) 638 P.xlabel('windows') 639 if savefigs: 640 P.savefig(dbname+"_predseries.svg") 641 P.show()
642
643 - def _read_results(self, nam):
644 """ 645 read results from disk 646 """ 647 pt,series,predseries = [],[],[] 648 649 for w in range(self.nw): 650 fn = "%s_%s.pickle"%(nam, w) 651 print fn 652 f = open(fn,'r') 653 a,b,obs,pred, samples = CP.load(f) 654 f.close() 655 pt.append(a) 656 series.append(b) 657 predseries.append(pred) 658 return pt,series,predseries,obs
659 660
661 -class Meld(object):
662 """ 663 Bayesian Melding class 664 """
665 - def __init__(self, K, L, model, ntheta, nphi, alpha = 0.5, verbose = 0, viz=False ):
666 """ 667 Initializes the Melding class. 668 669 :Parameters: 670 - `K`: Number of replicates of the model run. Also determines the prior sample size. 671 - `L`: Number of samples from the Posterior distributions. Usually 10% of K. 672 - `model`: Callable taking theta as argument and returning phi = M(theta). 673 - `ntheta`: Number of inputs to the model (parameters). 674 - `nphi`: Number of outputs of the model (State-variables) 675 - `verbose`: 0,1, 2: whether to show more information about the computations 676 - `viz`: Boolean. Wether to show graphical outputs of the fitting process 677 """ 678 self.K = K 679 self.L = L 680 self.verbose = verbose 681 self.model = model 682 self.likelist = [] #list of likelihoods 683 self.q1theta = recarray(K,formats=['f8']*ntheta) #Theta Priors (record array) 684 self.post_theta = recarray(L,formats=['f8']*ntheta) #Theta Posteriors (record array) 685 self.q2phi = recarray(K,formats=['f8']*nphi) #Phi Priors (record array) 686 self.phi = recarray(K,formats=['f8']*nphi) #Phi model-induced Priors (record array) 687 self.q2type = [] #list of distribution types 688 self.post_phi = recarray(L,formats=['f8']*nphi) #Phi Posteriors (record array) 689 self.ntheta = ntheta 690 self.nphi = nphi 691 self.thetapars = [] 692 self.phipars = [] 693 self.alpha = alpha #pooling weight of user-provided phi priors 694 self.done_running = False 695 self.theta_dists = {}#parameterized rngs for each parameter 696 self.phi_dists = {}#parameterized rngs for each variable 697 self.likmax = -numpy.inf 698 self.initheta = [] 699 self.AIC = None 700 self.BIC = None 701 self.DIC = None 702 self.stop_now = False #Flag to interrupt fitting loop. inner loop checks this flag and exit when it is true. 703 self.current_step = 0 704 self.proposal_variance = 0.0000001 705 self.adaptscalefactor = 1 #adaptive variance. Used my metropolis hastings 706 self.salt_band = 0.1 #relaxation factor of the prior limits 707 if Viz: #Gnuplot installed 708 self.viz = viz 709 else: 710 self.viz = False 711 if self.verbose == 2: 712 self.every_run_plot = xmlrpclib.ServerProxy('http://localhost:%s'%rpc_plot(hold=1), allow_none=1) 713 self.po = Pool() #pool of processes for parallel processing
714
715 - def current_plot(self, series, data, idx, vars=[], step=0):
716 """ 717 Plots the last simulated series 718 719 :Parameters: 720 - `series`: Record array with the simulated series. 721 - `idx`: Integer index of the curve to plot . 722 - `data`: Dictionary with the full dataset. 723 - `vars`: List with variable names to be plotted. 724 - `step`: Step of the chain 725 """ 726 try: 727 if self.lastidx == idx: 728 return 729 except AttributeError: 730 pass 731 #print series.shape, idx 732 best_series = [series[k][idx].tolist() for k in data.keys()] 733 d = [data[k].tolist() for k in data.keys()] 734 self.every_run_plot.lines(d,[],data.keys(), "Best fit. Last updated on %s"%step, 'points' ) 735 self.every_run_plot.lines(best_series,[],data.keys(), "Best fit. Last updated on %s"%step, 'lines' ) 736 self.every_run_plot.clearFig() 737 self.lastidx = idx
738
739 - def setPhi(self, names, dists=[stats.norm], pars=[(0, 1)], limits=[(-5,5)]):
740 """ 741 Setup the models Outputs, or Phi, and generate the samples from prior distributions 742 needed for the melding replicates. 743 744 :Parameters: 745 - `names`: list of string with the names of the variables. 746 - `dists`: is a list of RNG from scipy.stats 747 - `pars`: is a list of tuples of variables for each prior distribution, respectively. 748 - `limits`: lower and upper limits on the support of variables. 749 """ 750 if len(names) != self.nphi: 751 raise ValueError("Number of names(%s) does not match the number of output variables(%s)."%(len(names),self.nphi)) 752 self.q2phi.dtype.names = names 753 self.phi.dtype.names = names 754 self.post_phi.dtype.names = names 755 self.plimits = limits 756 self.phipars = pars 757 for n,d,p in zip(names,dists,pars): 758 self.q2phi[n] = lhs.lhs(d,p,self.K).ravel() 759 self.q2type.append(d.name) 760 self.phi_dists[n]=d(*p)
761 762 763
764 - def setTheta(self, names, dists=[stats.norm], pars=[(0, 1)], lims=[(0, 1)]):
765 """ 766 Setup the models inputs and generate the samples from prior distributions 767 needed for the dists the melding replicates. 768 769 :Parameters: 770 - `names`: list of string with the names of the parameters. 771 - `dists`: is a list of RNG from scipy.stats 772 - `pars`: is a list of tuples of parameters for each prior distribution, respectivelydists 773 """ 774 self.q1theta.dtype.names = names 775 self.post_theta.dtype.names = names 776 self.thetapars = pars 777 self.tlimits = lims 778 if os.path.exists('q1theta'): 779 self.q1theta = CP.load(open('q1theta','r')) 780 else: 781 for n,d,p in zip(names,dists,pars): 782 self.q1theta[n] = lhs.lhs(d,p,self.K).ravel() 783 self.theta_dists[n]=d(*p)
784
785 - def add_salt(self,dataset,band):
786 """ 787 Adds a few extra uniformly distributed data 788 points beyond the dataset range. 789 This is done by adding from a uniform dist. 790 791 :Parameters: 792 - `dataset`: vector of data 793 - `band`: Fraction of range to extend [0,1[ 794 795 :Returns: 796 Salted dataset. 797 """ 798 dmax = max(dataset) 799 dmin = min(dataset) 800 drange = dmax-dmin 801 hb = drange*band/2. 802 d = numpy.concatenate((dataset,stats.uniform(dmin-hb,dmax-dmin+hb).rvs(self.K*.05))) 803 return d
804
805 - def setThetaFromData(self,names,data, limits):
806 """ 807 Setup the model inputs and set the prior distributions from the vectors 808 in data. 809 This method is to be used when the prior distributions are available in 810 the form of a sample from an empirical distribution such as a bayesian 811 posterior. 812 In order to expand the samples provided, K samples are generated from a 813 kernel density estimate of the original sample. 814 815 :Parameters: 816 - `names`: list of string with the names of the parameters. 817 - `data`: list of vectors. Samples of a proposed distribution 818 - `limits`: List of (min,max) tuples for each theta to make sure samples are not generated outside these limits. 819 """ 820 self.q1theta.dtype.names = names 821 self.post_theta.dtype.names = names 822 self.tlimits = limits 823 tlimits = dict(zip(names,limits)) 824 class Proposal: 825 "class wrapping a kde adding similar interface to stats dists" 826 def __init__(self,name, dist): 827 self.name = name 828 self.dist = dist
829 def __call__(self): 830 smp = -numpy.inf 831 while not (smp>=tlimits[self.name][0] and smp<=tlimits[self.name][1]): 832 smp = self.dist.resample(1)[0][0] 833 return smp
834 def rvs(self, *args, **kwds): 835 if 'size' in kwds: 836 sz = kwds['size'] 837 else: 838 sz = 1 839 #TODO implement return of multiple samples here if necessary 840 smp = -numpy.inf 841 while not (smp>=tlimits[self.name][0] and smp<=tlimits[self.name][1]): 842 smp = self.dist.resample(sz)[0][0] 843 return smp 844 def stats(self, moments): 845 if isinstance(self.dist, (stats.rv_continuous, stats.rv_discrete)): 846 return self.dist.stats(moments='m') 847 else: 848 return self.dist.dataset.mean() 849 def pdf(self, x): 850 return self.dist.evaluate(x)[0] 851 def pmf(self, x): 852 return self.dist.evaluate(x)[0] 853 854 if os.path.exists('q1theta'): 855 self.q1theta = CP.load(open('q1theta','r')) 856 else: 857 for n,d,lim in zip(names,data,limits): 858 smp = [] 859 #add some points uniformly across the support 860 #to help MCMC to escape from prior bounds 861 salted = self.add_salt(d,self.salt_band) 862 863 dist = gaussian_kde(salted) 864 while len(smp)<self.K: 865 smp += [x for x in dist.resample(self.K)[0] if x >= lim[0] and x <= lim[1]] 866 #print self.q1theta[n].shape, array(smp[:self.K]).shape 867 self.q1theta[n] = array(smp[:self.K]) 868 self.theta_dists[n] = Proposal(copy.deepcopy(n),copy.deepcopy(dist)) 869
870 - def setPhiFromData(self,names,data,limits):
871 """ 872 Setup the model outputs and set their prior distributions from the 873 vectors in data. 874 This method is to be used when the prior distributions are available in 875 the form of a sample from an empirical distribution such as a bayesian 876 posterior. 877 In order to expand the samples provided, K samples are generated from a 878 kernel density estimate of the original sample. 879 880 :Parameters: 881 - `names`: list of string with the names of the variables. 882 - `data`: list of vectors. Samples of the proposed distribution. 883 - `limits`: list of tuples (ll,ul),lower and upper limits on the support of variables. 884 """ 885 self.q2phi.dtype.names = names 886 self.phi.dtype.names = names 887 self.post_phi.dtype.names = names 888 self.limits = limits 889 for n,d in zip(names,data): 890 i = 0 891 smp = [] 892 while len(smp)<self.K: 893 try: 894 smp += [x for x in gaussian_kde(d).resample(self.K)[0] if x >= limits[i][0] and x <= limits[i][1]] 895 except: 896 #d is has no variation, i.e., all elements are the same 897 #print d 898 #raise LinAlgError, "Singular matrix" 899 smp = ones(self.K)*d[0] #in this case return a constant sample 900 self.q2phi[n] = array(smp[:self.K]) 901 self.q2type.append('empirical') 902 i += 1
903 #self.q2phi = self.filtM(self.q2phi, self.q2phi, limits) 904 905
906 - def run(self,*args):
907 """ 908 Runs the model through the Melding inference.model 909 model is a callable which return the output of the deterministic model, 910 i.e. the model itself. 911 The model is run self.K times to obtain phi = M(theta). 912 """ 913 #TODO: implement calculation of AIC,BIC and DIC for SIR 914 for i in xrange(self.K): 915 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names] 916 r = self.po.apply_async(self.model, theta) 917 res = r.get() 918 self.phi[i]= res[-1]#self.model(*theta)[-1] #phi is the last point in the simulation 919 920 self.done_running = True
921
922 - def getPosteriors(self,t):
923 """ 924 Updates the posteriors of the model's output for the last t time steps. 925 Returns two record arrays: 926 - The posteriors of the Theta 927 - the posterior of Phi last t values of time-series. self.L by `t` arrays. 928 929 :Parameters: 930 - `t`: length of the posterior time-series to return. 931 """ 932 if not self.done_running: 933 print "Estimation has not yet been run" 934 return 935 if t > 1: 936 self.post_phi = recarray((self.L,t),formats=['f8']*self.nphi) 937 self.post_phi.dtype.names = self.phi.dtype.names 938 def cb(r): 939 ''' 940 callback function for the asynchronous model runs. 941 r: tuple with results of simulation (results, run#) 942 ''' 943 if t == 1: 944 self.post_phi[r[1]] = (r[0][-1],) 945 #self.post_phi[r[1]]= [tuple(l) for l in r[0][-t:]] 946 else: 947 self.post_phi[r[1]]= [tuple(l) for l in r[0][-t:]]
948 po = Pool() 949 #random indices for the marginal posteriors of theta 950 pti = lhs.lhs(stats.randint,(0,self.L),siz=(self.ntheta,self.L)) 951 for i in xrange(self.L):#Monte Carlo with values of the posterior of Theta 952 theta = [self.post_theta[n][pti[j,i]] for j,n in enumerate(self.post_theta.dtype.names)] 953 po.apply_async(enumRun, (self.model,theta,i), callback=cb) 954 if i%100 == 0 and self.verbose: 955 print "==> L = %s\r"%i 956 957 po.close() 958 po.join() 959 return self.post_theta, self.post_phi 960
961 - def filtM(self,cond,x,limits):
962 ''' 963 Multiple condition filtering. 964 Remove values in x[i], if corresponding values in 965 cond[i] are less than limits[i][0] or greater than 966 limits[i][1]. 967 968 :Parameters: 969 - `cond`: is an array of conditions. 970 - `limits`: is a list of tuples (ll,ul) with length equal to number of lines in `cond` and `x`. 971 - `x`: array to be filtered. 972 ''' 973 # Deconstruct the record array, if necessary. 974 names = [] 975 if isinstance(cond, recarray): 976 names = list(cond.dtype.names) 977 cond = [cond[v] for v in cond.dtype.names] 978 x = [x[v] for v in x.dtype.names] 979 980 cond = array(cond) 981 cnd = ones(cond.shape[1],int) 982 for i,j in zip(cond,limits): 983 ll = j[0] 984 ul = j[1] 985 #print cond.shape,cnd.shape,i.shape,ll,ul 986 cnd = cnd & less(i,ul) & greater(i,ll) 987 f = compress(cnd,x, axis=1) 988 989 if names:#Reconstruct the record array 990 r = recarray((1,f.shape[1]),formats=['f8']*len(names),names=names) 991 for i,n in enumerate(names): 992 r[n]=f[i] 993 f=r 994 995 return f
996 997 998
999 - def logPooling(self,phi):
1000 """ 1001 Returns the probability associated with each phi[i] 1002 on the pooled pdf of phi and q2phi. 1003 1004 :Parameters: 1005 - `phi`: prior of Phi induced by the model and q1theta. 1006 """ 1007 1008 # Estimating the multivariate joint probability densities 1009 phidens = gaussian_kde(array([phi[n][:,-1] for n in phi.dtype.names])) 1010 1011 q2dens = gaussian_kde(array([self.q2phi[n] for n in self.q2phi.dtype.names])) 1012 # Determining the pooled probabilities for each phi[i] 1013 # qtilphi = zeros(self.K) 1014 lastp = array([list(phi[i,-1]) for i in xrange(self.K)]) 1015 # print lastp,lastp.shape 1016 qtilphi = (phidens.evaluate(lastp.T)**(1-self.alpha))*q2dens.evaluate(lastp.T)**self.alpha 1017 return qtilphi/sum(qtilphi)
1018
1019 - def abcRun(self,fitfun=None, data={}, t=1,pool=False,savetemp=False):
1020 """ 1021 Runs the model for inference through Approximate Bayes Computation 1022 techniques. This method should be used as an alternative to the sir. 1023 1024 :Parameters: 1025 - `fitfun`: Callable which will return the goodness of fit of the model to data as a number between 0-1, with 1 meaning perfect fit 1026 - `t`: number of time steps to retain at the end of the of the model run for fitting purposes. 1027 - `data`: dict containing observed time series (lists of length t) of the state variables. This dict must have as many items the number of state variables, with labels matching variables names. Unorbserved variables must have an empty list as value. 1028 - `pool`: if True, Pools the user provided priors on the model's outputs, with the model induced priors. 1029 - `savetemp`: Should temp results be saved. Useful for long runs. Alows for resuming the simulation from last sa 1030 """ 1031 seed() 1032 if not fitfun: 1033 fitfun = basicfit 1034 if savetemp: 1035 CP.dump(self.q1theta,open('q1theta','w')) 1036 # Running the model ========================== 1037 phi = self.runModel(savetemp,t) 1038 1039 print "==> Done Running the K replicates\n" 1040 # Do Log Pooling 1041 if not pool: 1042 qtilphi = ones(self.K) 1043 else: 1044 t0 = time() 1045 qtilphi = self.logPooling(phi) #vector with probability of each phi[i] belonging to qtilphi 1046 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0) 1047 qtilphi = nan_to_num(qtilphi) 1048 #print 'max(qtilphi): ', max(qtilphi) 1049 if sum(qtilphi)==0: 1050 print 'Pooled prior on ouputs is null, please check your priors, and try again.' 1051 return 0 1052 # 1053 print "Calculating weights" 1054 po = Pool() 1055 jobs = [po.apply_async(fitfun, (phi[i],data)) for i in xrange(phi.shape[0])] 1056 w = [j.get() for j in jobs] 1057 po.close();po.join() 1058 w /=sum(w) 1059 w = 1-w 1060 1061 w = nan_to_num(w) 1062 w = array(w)*qtilphi 1063 w /=sum(w) 1064 w = nan_to_num(w) 1065 print 'max(w): %s\nmin:%s\nmean(w): %s\nvar(w): %s'%(max(w), min(w), mean(w), var(w)) 1066 if sum(w) == 0.0: 1067 print 'Resampling weights are all zero, please check your model or data.' 1068 return 0 1069 print "Resampling Thetas" 1070 t0 = time() 1071 j = 0 1072 while j < self.L: # Extract L samples from q1theta 1073 i=randint(0,w.size)# Random position of w and q1theta 1074 if random()<= w[i]: 1075 self.post_theta[j] = self.q1theta[i]# retain the sample according with resampling prob. 1076 j+=1 1077 print "==> Done Resampling (L=%s) priors (took %s seconds)"%(self.L,(time()-t0)) 1078 1079 self.done_running = True 1080 return 1
1081
1082 - def imp_sample(self,n,data, w):
1083 """ 1084 Importance sampling 1085 1086 :Returns: 1087 returns a sample of size n 1088 """ 1089 #sanitizing weights 1090 print "Starting importance Sampling" 1091 w /=sum(w) 1092 w = nan_to_num(w) 1093 j=0 1094 k=0 1095 smp = copy.deepcopy(data[:n]) 1096 while j < n: 1097 i = randint(0,w.size)# Random position of w 1098 if random() <= w[i]: 1099 smp[j] = data[j] 1100 j += 1 1101 1102 k+=1 1103 print "Done imp samp." 1104 return smp
1105
1106 - def mcmc_run(self, data, t=1, likvariance=10,burnin=1000, nopool=False, method="MH" , constraints=[]):
1107 """ 1108 MCMC based fitting 1109 1110 :Parameters: 1111 - `data`: observed time series on the model's output 1112 - `t`: length of the observed time series 1113 - `likvariance`: variance of the Normal likelihood function 1114 - `nopool`: True if no priors on the outputs are available. Leads to faster calculations 1115 - `method`: Step method. defaults to Metropolis hastings 1116 """ 1117 #self.phi = recarray((self.K,t),formats=['f8']*self.nphi, names = self.phi.dtype.names) 1118 ta = True if self.verbose==1 else False 1119 tc = True if self.verbose==1 else False 1120 if method == "MH": 1121 sampler = MCMC.Metropolis(self, self.K,self.K*10, data, t, self.theta_dists, self.q1theta.dtype.names, self.tlimits, like.Normal, likvariance, burnin, trace_acceptance=ta, trace_convergence=tc, nchains=self.ntheta, constraints=[]) 1122 sampler.step() 1123 self.phi = sampler.phi 1124 #self.mh(self.K,t,data,like.Normal,likvariance,burnin) 1125 elif method == 'dream': 1126 sampler = MCMC.Dream(self, self.K,self.K*10, data, t, self.theta_dists, self.q1theta.dtype.names, self.tlimits, like.Normal, likvariance, burnin, trace_acceptance=ta, trace_convergence=tc, nchains=self.ntheta, constraints=[]) 1127 sampler.step() 1128 self.phi = sampler.phi 1129 else: 1130 sys.exit("Invalid MCMC method!\nTry 'MH'.") 1131 self.done_running = 1 1132 return 1
1133
1134 - def _output_loglike(self, prop, data, likfun=like.Normal,likvar=1e-1, po=None):
1135 """ 1136 Returns the log-likelihood of a simulated series 1137 1138 :Parameters: 1139 - `prop`: Proposed output 1140 - `data`: Data against which proposal will be measured 1141 - `likfun`: Likelihood function 1142 - `likvar`: Variance of the likelihood function 1143 - `po`: Pool of processes for parallel execution 1144 1145 :Types: 1146 - `prop`: array of shape (t,nphi) with series as columns. 1147 - `data`: Dictionary with keys being the names (as in phinames) of observed variables 1148 - `likfun`: Log likelihood function object 1149 """ 1150 if isinstance(prop, numpy.recarray): 1151 prop= numpy.array(prop.tolist()) 1152 t = prop.shape[0] #1 if model's output is a scalar, larger if it is a time series (or a set of them) 1153 lik=0 1154 for k in xrange(self.nphi): #loop on series 1155 if self.q2phi.dtype.names[k] not in data: 1156 continue#Only calculate liks of series for which we have data 1157 obs = array(data[self.q2phi.dtype.names[k]]) 1158 if len(obs.shape)>1:#in case of more than one dataset 1159 obs = clearNaN(obs).mean(axis=1) 1160 if po != None:# Parallel version 1161 liks = [po.apply_async(likfun,(obs[p],prop[p][k],1./likvar)) for p in xrange(t) if not isnan(obs[p])] 1162 lik = nansum([l.get() for l in liks]) 1163 else: 1164 liks = [likfun(obs[p],prop[p][k],1./likvar) for p in xrange(t)if not isnan(obs[p]) ] 1165 lik = nansum(liks) 1166 # if isnan(lik): 1167 # pdb.set_trace() 1168 return lik
1169
1170 - def sir(self, data={}, t=1,variance=0.1, pool=False,savetemp=False):
1171 """ 1172 Run the model output through the Sampling-Importance-Resampling algorithm. 1173 Returns 1 if successful or 0 if not. 1174 1175 :Parameters: 1176 - `data`: observed time series on the model's output 1177 - `t`: length of the observed time series 1178 - `variance`: variance of the Normal likelihood function 1179 - `pool`: False if no priors on the outputs are available. Leads to faster calculations 1180 - `savetemp`: Boolean. create a temp file? 1181 """ 1182 seed() 1183 phi = self.runModel(savetemp,t) 1184 # Do Log Pooling 1185 if not pool: 1186 qtilphi = ones(self.K) 1187 else: 1188 t0 = time() 1189 qtilphi = self.logPooling(phi) #vector with probability of each phi[i] belonging to qtilphi 1190 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0) 1191 qtilphi = nan_to_num(qtilphi) 1192 print 'max(qtilphi): ', max(qtilphi) 1193 if sum(qtilphi)==0: 1194 print 'Pooled prior on ouputs is null, please check your priors, and try again.' 1195 return 0 1196 1197 # Calculating the likelihood of each phi[i] considering the observed data 1198 # lik =self._output_loglike() 1199 lik = zeros(self.K) 1200 t0=time() 1201 po = Pool() 1202 for i in xrange(self.K): 1203 p = phi[i] 1204 l =self._output_loglike(p,data, likvar=variance) 1205 # for n in data.keys(): 1206 # if isinstance(data[n],list) and data[n] == []: 1207 # continue #no observations for this variable 1208 # elif isinstance(data[n],numpy.ndarray) and (not data[n].any()): 1209 # continue #no observations for this variable 1210 # p = phi[n] 1211 # lik =self._output_loglike() 1212 # liklist=[po.apply_async(like.Normal,(data[n][m], j, 1./variance)) for m,j in enumerate(p[i])] 1213 # l=sum([p.get() for p in liklist]) 1214 if i%self.K/10. ==0: 1215 print "Likelihood calculation progress: %s of %s done."%(i, self.K) 1216 lik[i]=l 1217 po.close() 1218 po.join() 1219 if self.viz: 1220 dtplot.clearFig();phiplot.clearFig();thplot.clearFig() 1221 dtplot.gp.xlabel('observed') 1222 dtplot.gp.ylabel('simulated') 1223 obs = [];sim =[] 1224 for n in data.keys(): 1225 obs.append(data[n]) 1226 sim.append(phi[n].mean(axis=0).tolist()) 1227 dtplot.scatter(array(obs),array(sim),names=data.keys(),title='fit') 1228 phiplot.plotlines(array(sim),names=data.keys(),title='Model Output') 1229 thplot.plothist(self.q1theta, title='Input parameters',names=self.q1theta.dtype.names) 1230 print "==> Done Calculating Likelihoods (took %s seconds)"%(time()-t0) 1231 lr = nan_to_num(max(lik)/min(lik)) 1232 print '==> Likelihood (min,mean,max,sum): ',min(lik),mean(lik),max(lik), sum(lik) 1233 print "==> Likelihood ratio of best run/worst run: %s"%(lr,) 1234 # Calculating the weights 1235 w = nan_to_num(qtilphi*lik) 1236 w = nan_to_num(w/sum(w)) 1237 1238 if not sum(w) == 0.0: 1239 j = 0 1240 t0 = time() 1241 maxw = 0;minw = max(w) #keep track of goodness of fit of phi 1242 while j < self.L: # Extract L samples from q1theta 1243 i=randint(0,w.size)# Random position of w and q1theta 1244 if random()*max(w)<= w[i]: 1245 self.post_theta[j] = self.q1theta[i]# retain the sample according with resampling prob. 1246 maxw = max(maxw,w[i]) 1247 minw = min(minw,w[i]) 1248 j+=1 1249 if not j%100 and self.verbose: 1250 print j, "of %s"%self.L 1251 self.done_running = True 1252 print "==> Done Resampling (L=%s) priors (took %s seconds)"%(self.L,(time()-t0)) 1253 wr = maxw/minw 1254 print "==> Likelihood ratio of best/worst retained runs: %s"%(wr,) 1255 if wr == 1: 1256 print "==> Flat likelihood, trying again..." 1257 return 0 1258 print "==> Improvement: %s percent"%(100-100*wr/lr,) 1259 else: 1260 print 'Resampling weights are all zero, please check your model or data, and try again.\n' 1261 print '==> Likelihood (min,mean,max): ',min(lik),mean(lik),max(lik) 1262 print '==> RMS deviation of outputs: %s'%(basicfit(phi, data),) 1263 return 0 1264 return 1
1265 1266
1267 - def runModel(self,savetemp,t=1, k=None):
1268 ''' 1269 Handles running the model k times keeping a temporary savefile for 1270 resuming calculation in case of interruption. 1271 1272 :Parameters: 1273 - `savetemp`: Boolean. create a temp file? 1274 - `t`: number of time steps 1275 1276 :Returns: 1277 - self.phi: a record array of shape (k,t) with the results. 1278 ''' 1279 if savetemp: 1280 CP.dump(self.q1theta,open('q1theta','w')) 1281 if not k: 1282 k = self.K 1283 # Running the model ========================== 1284 1285 1286 if os.path.exists('phi.temp'): 1287 self.phi,j = CP.load(open('phi.temp','r')) 1288 else: 1289 j=0 1290 self.phi = recarray((k,t),formats=['f8']*self.nphi, names = self.phi.dtype.names) 1291 def cb(r): 1292 ''' 1293 callback function for the asynchronous model runs 1294 ''' 1295 if t == 1: 1296 self.phi[r[1]] = (r[0][-1],) 1297 else: 1298 self.phi[r[1]] = [tuple(l) for l in r[0][-t:]]# #phi is the last t points in the simulation
1299 1300 po = Pool() 1301 t0=time() 1302 for i in xrange(j,k): 1303 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names] 1304 r = po.apply_async(enumRun,(self.model,theta,i),callback=cb) 1305 # r = po.apply_async(self.model,theta) 1306 # if t == 1: 1307 # phi[i] = (r.get()[-1],) 1308 # else: 1309 # phi[i] = [tuple(l) for l in r.get()[-t:]]# #phi is the last t points in the simulation 1310 if i%100 == 0 and self.verbose: 1311 print "==> K = %s"%i 1312 if savetemp: 1313 CP.dump((self.phi,i),open('phi.temp','w')) 1314 if savetemp: #If all replicates are done, clear temporary save files. 1315 os.unlink('phi.temp') 1316 os.unlink('q1theta') 1317 po.close() 1318 po.join() 1319 print "==> Done Running the K (%s) replicates (took %s seconds)\n"%(k,(time()-t0)) 1320 1321 return self.phi 1322
1323 -def basicfit(s1,s2):
1324 ''' 1325 Calculates a basic fitness calculation between a model- 1326 generated time series and a observed time series. 1327 it uses a Mean square error. 1328 1329 :Parameters: 1330 - `s1`: model-generated time series. record array. 1331 - `s2`: observed time series. dictionary with keys matching names of s1 1332 1333 :Return: 1334 Root mean square deviation between ´s1´ and ´s2´. 1335 ''' 1336 if isinstance(s1, recarray): 1337 # print "==> is recarray!" 1338 assert isinstance(s2, dict) 1339 err = [] 1340 for k in s2.keys(): 1341 if k not in s1.dtype.names: 1342 continue 1343 ls1 = len(s1[k]) #handles the cases where data is slightly longer that simulated series. 1344 # print s2[k] 1345 # try: 1346 # pdb.set_trace() 1347 1348 if len(s2[k].shape) >1: 1349 s2[k] = clearNaN(s2[k]).mean(axis=1)#nanmean(s2[k], axis=1) 1350 dif = s1[k]-s2[k][:ls1].astype(float) 1351 1352 dif[isnan(dif)] = 0 1353 e = nanmean(dif**2) 1354 # except TypeError: 1355 # print s1[k], s2[k] 1356 1357 err.append(e) 1358 elif isinstance(s1, list): 1359 # print "==> is List!" 1360 assert isinstance(s2, list) and len(s1) ==len(s2) 1361 s1 = array(s1).astype(float) 1362 s2 = array(s2).astype(float) 1363 if len(s2.shape) >1: 1364 s2 = clearNaN(s2).mean(axis=1) 1365 1366 err = [nanmean((s-t)**2) for s, t in zip(s1, s2) ] 1367 #err = [sum((s-t)**2./t**2) for s, t in zip(s1, s2)] 1368 mse = nan_to_num(mean(err)) 1369 return mse
1370
1371 -def clearNaN(obs):
1372 """ 1373 Loops through an array with data series as columns, and 1374 Replaces NaNs with the mean of the other series. 1375 1376 :Parameters: 1377 - `obs`: 2-dimensional numpy array 1378 1379 :Returns: 1380 array of the same shape as obs 1381 """ 1382 rows = obs.T.tolist() #tranpose to facilitate processing 1383 res = zeros(obs.T.shape) 1384 for i, r in enumerate(rows): 1385 a = array(r) 1386 if not isnan(a).any(): 1387 continue 1388 else: 1389 for j, e in enumerate(r): 1390 if isnan(e): 1391 c = obs.T[:, j].tolist() 1392 c.pop(i) 1393 r[j] = nanmean(c) 1394 r = nan_to_num(r) 1395 res[i, :] = r 1396 1397 return res.T
1398
1399 -def enumRun(model,theta,k):
1400 """ 1401 Returns model results plus run number. 1402 1403 :Parameters: 1404 - `model`: model callable 1405 - `theta`: model input list 1406 - `k`: run number 1407 1408 :Return: 1409 - res: result list 1410 - `k`: run number 1411 """ 1412 res =model(theta) 1413 return (res,k)
1414
1415 -def model_as_ra(theta, model, phinames):
1416 """ 1417 Does a single run of self.model and returns the results as a record array 1418 """ 1419 theta = list(theta) 1420 nphi = len(phinames) 1421 r = model(theta) 1422 res = recarray(r.shape[0],formats=['f8']*nphi, names = phinames) 1423 for i, n in enumerate(res.dtype.names): 1424 res[n] = r[:, i] 1425 return res
1426
1427 -def model(theta, n=1):
1428 """ 1429 Model (r,p0, n=1) 1430 Simulates the Population dynamic Model (PDM) Pt = rP0 1431 for n time steps. 1432 P0 is the initial population size. 1433 Example model for testing purposes. 1434 """ 1435 r, p0 = theta 1436 # print "oi" 1437 Pt = zeros((n, 1), float) # initialize the output vector 1438 P = p0 1439 for i in xrange(n): 1440 Pt[i] = r*P 1441 P = Pt[i] 1442 1443 return Pt
1444 1445
1446 -def plotRaHist(arr):
1447 ''' 1448 Plots a record array 1449 as a panel of histograms 1450 ''' 1451 nv = len(arr.dtype.names) 1452 fs = (numpy.ceil(numpy.sqrt(nv)),numpy.floor(numpy.sqrt(nv))+1) #figure size 1453 P.figure() 1454 for i,n in enumerate(arr.dtype.names): 1455 P.subplot(floor(sqrt(nv)),floor(sqrt(nv))+1,i+1) 1456 P.hist(arr[n],bins=50, normed=1, label=n) 1457 P.legend()
1458 1459
1460 -def main2():
1461 start = time() 1462 #Me = Meld(K=5000,L=1000,model=model, ntheta=2,nphi=1,verbose=False,viz=False) 1463 Me.setTheta(['r','p0'],[stats.uniform,stats.uniform],[(2,4),(0,5)], [(0, 10), (0, 10)]) 1464 Me.setPhi(['p'],[stats.uniform],[(6,9)],[(0,10)]) 1465 #Me.add_data(normal(7.5,1,400),'normal',(6,9)) 1466 #Me.run() 1467 Me.sir(data ={'p':[7.5]} ) 1468 pt,pp = Me.getPosteriors(1) 1469 end = time() 1470 print end-start, ' seconds' 1471 plotRaHist(pt) 1472 plotRaHist(pp) 1473 P.show()
1474 1475
1476 -def mh_test():
1477 start = time() 1478 #Me = Meld(K=5000,L=1000,model=model, ntheta=2,nphi=1,verbose=False,viz=False) 1479 Me.setTheta(['r','p0'],[stats.uniform,stats.uniform],[(2,4),(0,5)], [(0, 10), (0, 10)]) 1480 Me.setPhi(['p'],[stats.uniform],[(6,9)],[(0,10)]) 1481 Me.mcmc_run(data ={'p':[7.5]},burnin=1000) 1482 pt,pp = Me.getPosteriors(1) 1483 end = time() 1484 print end-start, ' seconds' 1485 plotRaHist(pt) 1486 plotRaHist(pp) 1487 P.show()
1488 1489 1490 if __name__ == '__main__': 1491 Me = Meld(K=5000,L=1000,model=model, ntheta=2,nphi=1,verbose=True,viz=False) 1492 #mh_test() 1493 main2() 1494