Package trunk :: Package BIP :: Package Bayes :: Package Samplers :: Module MCMC
[hide private]

Source Code for Module trunk.BIP.Bayes.Samplers.MCMC

  1  # To change this template, choose Tools | Templates 
  2  # and open the template in the editor. 
  3  """ 
  4  Module implementing MCMC samplers  
  5   
  6      - Metropolis: Adaptive Metropolis Hastings sampler 
  7      - Dream: DiffeRential Evolution Adaptive Markov chain sampler 
  8  """ 
  9  import sys 
 10  import time 
 11  import pdb 
 12  import cython 
 13  import xmlrpclib 
 14  import logging 
 15  from multiprocessing import Pool 
 16  from random import sample 
 17   
 18  import numpy as np 
 19  from liveplots.xmlrpcserver import rpc_plot 
 20  from numpy import array, mean,isnan,  nan_to_num, var, sqrt, inf, exp, greater, less, identity, ones, zeros, floor, log, recarray, nan 
 21  from numpy.random import random,  multivariate_normal,  multinomial,  rand 
 22  from scipy.stats import cov,  uniform, norm, scoreatpercentile 
 23   
 24   
 25  __author__="fccoelho" 
 26  __date__ ="$09/12/2009 10:44:11$" 
 27  __docformat__ = "restructuredtext en" 
 28   
 29  logger = logging.getLogger('BIP.MCMC') 
30 31 -def timeit(method):
32 """ 33 Decorator to time methods 34 """ 35 def timed(*args, **kw): 36 ts = time.time() 37 result = method(*args, **kw) 38 te = time.time() 39 40 print '%r %2.2f sec' % \ 41 (method.__name__ , te-ts) 42 return result
43 44 return timed 45
46 47 48 -class _Sampler(object):
49 ''' 50 Base classe for all samplers 51 Holds common logic and 52 ''' 53 _po = None 54 _dimensions = None #cache for dimensions 55 trace_acceptance = False 56 trace_convergence = False 57 seqhist = None 58 liklist = [] 59 e = 1e-20 #very small number used in the proposal covariance function calculation 60 _j=-1 61 _R = np.inf #Gelman Rubin Convergence
62 - def __init__(self, parpriors=[], parnames = []):
63 self.parpriors = parpriors 64 self.parnames = parnames
65 66 @property
67 - def best_prop_index(self):
68 ''' 69 Returns the index of the best fitting proposal, i.e., 70 the one which with max Likelihood 71 ''' 72 if not self.liklist: 73 return 0 74 return self.liklist.index(max(self.liklist))
75 76 @property
77 - def DIC(self):
78 """ 79 Calculates the deviance information criterion 80 """ 81 D = -2*array(self.liklist) 82 Dbar = nan_to_num(D).mean() 83 meanprop = array([self.meld.post_phi[i].mean(axis=0) for i in self.meld.post_phi.dtype.names]) 84 pd = Dbar+2*self.meld._output_loglike(meanprop.T, self.data, self.likfun, self.likvariance) 85 DIC = pd +Dbar 86 return DIC
87 88 @property
89 - def dimensions(self):
90 if not self._dimensions: 91 self._dimensions = len(self.parpriors) 92 return self._dimensions
93 94 @property
95 - def po(self):
96 ''' 97 Pool of processes for parallel execution of tasks 98 Remember to call self.term_pool() when done. 99 ''' 100 if self._po == None: 101 self._po = Pool() 102 else: 103 if self._po._state: 104 self._po = Pool() #Pool has been terminated 105 return self._po
106
107 - def shut_down(self,reason=''):
108 ''' 109 Finalizes the sampler, nicely closing the resources allocated 110 111 :Parameters: 112 - `reason`: comment stating why the sampling is being shutdown. 113 ''' 114 try: 115 self.term_pool() 116 except OSError: 117 pass 118 self.pserver.close_plot() 119 self.pserver2.close_plot() 120 if reason: 121 logger.info(reason)
122 123
124 - def term_pool(self):
125 if self._po == None: 126 return 127 if not self._po._state: #Pool needs terminating 128 self._po.close() 129 self._po.join() 130 self._po = None
131
132 - def gr_R(self, end, start):
133 if self._j == end: 134 return self._R 135 else: 136 self.gr_convergence(end, start) 137 self._j = end 138 return self._R
139
140 - def gr_convergence(self, relevantHistoryEnd, relevantHistoryStart):
141 """ 142 Gelman-Rubin Convergence 143 """ 144 start = relevantHistoryStart 145 end = relevantHistoryEnd 146 N = end - start 147 if N==0: 148 self._R = np.inf*np.ones(self.nchains) 149 return 150 N = min(min([len(self.seqhist[c]) for c in range(self.nchains)]), N) 151 seq = [self.seqhist[c][-N:] for c in range(self.nchains)] 152 sequences = array(seq) #this becomes an array (nchains,samples,dimensions) 153 variances = var(sequences,axis = 1)#array(nchains,dim) 154 means = mean(sequences, axis = 1)#array(nchains,dim) 155 withinChainVariances = mean(variances, axis = 0) 156 betweenChainVariances = var(means, axis = 0) * N 157 varEstimate = (1 - 1.0/N) * withinChainVariances + (1.0/N) * betweenChainVariances 158 self._R = sqrt(varEstimate/ withinChainVariances)
159 160 @np.vectorize
161 - def _accept(self, last_lik, lik):
162 """ 163 Decides whether to accept a proposal 164 """ 165 if last_lik == None: last_lik = -inf 166 # liks are logliks 167 if lik == -inf:#0: 168 return 0 169 if last_lik >-inf:#0: 170 alpha = min( exp(lik-last_lik), 1) 171 #alpha = min(lik-last_lik, 1) 172 elif last_lik == -inf:#0: 173 alpha = 1 174 else: 175 return 0 176 raise ValueError("Negative likelihood!?!") 177 # print "last_lik, lik, alpha: ", last_lik, lik, alpha 178 if random() < alpha: 179 return 1 180 else: 181 return 0
182
183 - def setup_xmlrpc_plotserver(self):
184 """ 185 Sets up the server for real-time chain watch 186 """ 187 p=0;p2=0 188 while p==0 or p2 == 0: 189 p = rpc_plot() 190 p2 = rpc_plot(hold=1) 191 self.pserver = xmlrpclib.ServerProxy('http://localhost:%s'%p) 192 self.pserver2 = xmlrpclib.ServerProxy('http://localhost:%s'%p2)
193
194 - def shutdown_xmlrpc_plotserver(self):
195 self.pserver.flush_queue() 196 self.pserver.shutdown() 197 self.pserver2.flush_queue() 198 self.pserver2.shutdown()
199
200 - def _every_plot(self):
201 """ 202 plotting function for generating a plot at every step 203 """ 204 pass
205
206 - def _watch_chain(self, j):
207 if j<100: 208 return 209 self.gr_convergence(j, j-100) 210 print "Gelman-Rubin's R: ", self._R 211 self.pserver.clearFig() 212 thin = j//500 if j//500 !=0 else 1 #history is thinned to show at most 500 points, equally spaced over the entire range 213 chaindata = self.history[:j:thin].T.tolist() 214 obs = [];lbs = [] 215 for k, d in self.data.items(): 216 if len(d.shape)>1: 217 obs += [nan_to_num(i).tolist() for i in d.T] 218 lbs += [k+str(i) for i in range(d.shape[1])] 219 else: 220 obs += nan_to_num(d).tolist() 221 lbs += [k] 222 self.pserver.lines(chaindata,range(j-(len(chaindata[0])), j), self.parnames, "Chain Progress.",'points' , 1) 223 self.pserver2.lines(obs,[],lbs, "Fit", 'points' ) 224 s = j-50 if 3*j//4<50 else 3*j//4 225 #series = [self.phi[k][s:j].mean(axis=0).tolist() for k in self.data.keys()] 226 series = [mean(self.phi[k][s:j], axis=0).tolist() for k in self.data.keys()] 227 bi = self.liklist.index(max(self.liklist[s:j])) #index of the best fit 228 series = [mean(self.phi[k][s:j], axis=0).tolist() for k in self.data.keys()] 229 best = [self.phi[k][bi].tolist() for k in self.data.keys()] 230 mlabels = ['Mean '+l for l in self.data.keys()] 231 blabels = ['Best '+l for l in self.data.keys()] 232 self.pserver2.lines(series,[],mlabels, "Mean fit of last %s samples"%(j-s), 'lines' ) 233 self.pserver2.lines(best,[],blabels, "mean and best fit of last %s samples"%(j-s), 'lines' ) 234 self.pserver2.clearFig()
235 #TODO: Implement plot of best fit simulation against data 236
237 - def _tune_likvar(self, ar):
238 try: 239 self.arhist.append(ar) 240 except AttributeError: 241 self.tsig = 1 242 self.tstep = .05 243 self.arhist = [ar] 244 dev = (0.35-ar)**2 245 if dev > 0.02: 246 self.likvariance *= 1+self.tsig *(.5*(np.tanh(8*dev-3)+1)) 247 else: return #ar at target, don't change anything 248 improv = (0.35-mean(self.arhist[-5:-1]))**2 - (0.35-ar)**2 249 if improv < 0: 250 self.tsig *= -1 #change signal if AR is not improving 251 self.tstep = .05 #reset to small steps if changing direction 252 elif improv > 0 and improv <.01: 253 if random() <.05: #1 in 20 chance to change direction if no improvements 254 self.tsig *= -1 #change signal if AR is not improving 255 elif improv > 0.01: 256 self.tstep *= 0.97 #reduce step if approacching sweet spot
257 258 # @np.vectorize
259 - def check_constraints(self, theta):
260 """ 261 Check if given theta vector complies with all constraints 262 263 :Parameters: 264 - `theta`: parameter vector 265 266 :Returns: 267 True if theta passes all constraints, False otherwise 268 """ 269 if not self.constraints: 270 return True 271 r = array([c(theta) for c in self.constraints]) 272 return r.all()
273
274 - def _propose(self, step, po=None):
275 """ 276 Generates proposals. 277 returns two lists 278 279 :Parameters: 280 - `step`: Position in the markov chain history. 281 - `po`: Process pool for parallel proposal generation 282 283 :Returns: 284 - `theta`: List of proposed self.dimensional points in parameter space 285 - `prop`: List of self.nchains proposed phis. 286 """ 287 thetalist = [] 288 proplist = [] 289 initcov = np.identity(self.dimensions) 290 for c in range(self.nchains): 291 if step <= 1 or self.seqhist[c] ==[]: 292 #sample from the priors 293 while 1: 294 theta = [self.parpriors[dist]() for dist in self.parnames] 295 if not self.check_constraints(theta): 296 continue 297 if sum ([int(greater(t, self.parlimits[i][0]) and less(t, self.parlimits[i][1])) for i, t in enumerate(theta)]) == self.dimensions: 298 break 299 self.lastcv = initcov #assume no covariance at the beginning 300 else: 301 #use gaussian proposal 302 if step%10==0 and len(self.seqhist[c]) >=10: #recalculate covariance matrix only every ten steps 303 cv = self.scaling_factor*cov(array(self.seqhist[c][-10:]))+self.scaling_factor*self.e*identity(self.dimensions) 304 self.lastcv = cv 305 else: 306 cv = self.lastcv 307 while 1: 308 theta = multivariate_normal(self.seqhist[c][-1],cv, size=1).tolist()[0] 309 if sum ([int(greater(t, self.parlimits[i][0]) and less(t, self.parlimits[i][1])) for i, t in enumerate(theta)]) == self.dimensions: 310 break 311 thetalist.append(theta) 312 if po: 313 proplis = [po.apply_async(model_as_ra, (t, self.meld.model, self.meld.phi.dtype.names)) for t in thetalist] 314 proplist = [job.get() for job in proplis] 315 else: 316 proplist = [model_as_ra(t, self.meld.model, self.meld.phi.dtype.names) for t in thetalist] 317 propl = [p[:self.t] for p in proplist] 318 return thetalist,propl
319
320 #TODO: remove dependency on the meld object 321 -class Metropolis(_Sampler):
322 """ 323 Standard random-walk Metropolis Hastings sampler class 324 """
325 - def __init__(self, meldobj, samples, sampmax, data, t, parpriors, parnames, parlimits, likfun, likvariance, burnin, **kwargs):
326 """ 327 MCMC based fitting 328 329 :Parameters: 330 - `samples`: Number of samples to obtain 331 - `sampmax`: Maximum number of samples drawn. 332 - `data`: observed time series on the model's output 333 - `t`: length of the observed time series 334 - `parpriors`: Dictionary with frozen distributions objects as values and parnames as keys 335 - `parnames`: List of parameter names 336 - `parlimits`: list of tuples with (min,max) for every parameter. 337 - `likfun`: Likelihood function 338 - `likvariance`: variance of the Normal likelihood function 339 - `burnin`: Number of burnin samples 340 """ 341 self.salt_band = 0.05 342 self.samples = samples 343 self.sampmax = sampmax 344 self.parpriors = parpriors 345 self.parnames = parnames 346 self.parlimits = parlimits 347 self.likfun = likfun 348 self.likvariance = likvariance 349 self.data = data 350 self.meld = meldobj 351 self.t = t 352 self.burnin = burnin 353 self.nchains = 1 354 self.phi = np.recarray((self.samples+self.burnin,t),formats=['f8']*self.meld.nphi, names = self.meld.phi.dtype.names) 355 self.scaling_factor = (2.38**2)/self.dimensions 356 self.e = 1e-20 357 if kwargs: 358 for k, v in kwargs.iteritems(): 359 exec('self.%s = %s'%(k, v)) 360 self.nchains = 1 361 # Combined history of accepted samples 362 self.history = np.zeros((self.nchains*(samples+self.burnin), self.dimensions)) 363 #complete history of all chains as a dictionary with keys as integer ids of the chains 364 self.seqhist = dict([(i, [])for i in range(self.nchains)]) 365 #self.seqhist = np.zeros((self.nchains, self.dimensions, samples+self.burnin)) 366 self.setup_xmlrpc_plotserver()
367
368 - def _propose(self, step, po=None):
369 """ 370 Generates proposals. 371 returns two lists 372 373 :Parameters: 374 - `step`: Position in the markov chain history. 375 - `po`: Process pool for parallel proposal generation 376 377 :Returns: 378 - `theta`: List of proposed self.dimensional points in parameter space 379 - `prop`: List of self.nchains proposed phis. 380 """ 381 po=None 382 thetalist = [] 383 proplist = [] 384 initcov = identity(self.dimensions) 385 if self.meld.initheta and step <= 1: 386 #start from user-defined point in parameter space. 387 for i in range(self.nchains): 388 thetalist.append(self.meld.initheta) 389 self.lastcv = initcov #assume no covariance at the beginning 390 else: 391 for c in range(self.nchains): 392 off = 0 393 if step <= 1 or self.seqhist[c] ==[]: 394 #sample from the priors 395 while off<50: 396 theta = [self.parpriors[par].rvs() for par in self.parnames] 397 if not self.check_constraints(theta): 398 continue 399 if sum ([int(t>= self.parlimits[i][0] and t<= self.parlimits[i][1]) for i, t in enumerate(theta)]) == self.dimensions: 400 break 401 off+=1 402 if off ==50:#try a compromising proposal 403 theta = self.seqhist[c][-1] #last accepted proposal for this chain 404 # print "off:" , off 405 self.lastcv = initcov #assume no covariance at the beginning 406 else: 407 #use gaussian proposal 408 if step%10==0 and len(self.seqhist[c]) >=10: #recalculate covariance matrix only every ten steps 409 cv = self.scaling_factor*cov(array(self.seqhist[c][-10:]))+self.scaling_factor*self.e*identity(self.dimensions) 410 self.lastcv = cv 411 else: 412 cv = self.lastcv 413 #print self.parlimits 414 while off<50: 415 theta = multivariate_normal(self.seqhist[c][-1],cv, size=1).tolist()[0] 416 if sum ([int(t>= self.parlimits[i][0] and t<= self.parlimits[i][1]) for i, t in enumerate(theta)]) == self.dimensions: 417 break 418 off+=1 419 if off ==50: #try a compromising proposal 420 theta = self.seqhist[c][-1] #last accepted proposal for this chain 421 #print "off:" , off 422 thetalist.append(theta) 423 if po: 424 proplis = [po.apply_async(model_as_ra, (t, self.meld.model, self.meld.phi.dtype.names)) for t in thetalist] 425 proplist = [job.get() for job in proplis] 426 else: 427 proplist = [model_as_ra(t, self.meld.model, self.meld.phi.dtype.names) for t in thetalist] 428 propl = [p[:self.t] for p in proplist] 429 return thetalist,propl
430
431 - def step(self, nchains=1):
432 """ 433 Does the actual sampling loop. 434 """ 435 ptheta = recarray(self.samples+self.burnin,formats=['f8']*self.dimensions, names = self.parnames) 436 i=0;j=0;rej=0;ar=0 #total samples,accepted samples, rejected proposals, acceptance rate 437 last_lik = None 438 while j < self.samples+self.burnin: 439 print j 440 self.meld.current_step = j 441 if self.meld.stop_now: 442 return self.shut_down('user interrupted') 443 #generate proposals 444 theta,prop = self._propose(j, self.po) 445 #calculate likelihoods 446 lik = [self.meld._output_loglike(p, self.data, self.likfun, self.likvariance, self.po) for p in prop] 447 448 # print "lik:" , lik, last_lik, j 449 accepted = self._accept(self, last_lik, lik)# have to include self in the call because method is vectorized. 450 # print "acc:", accepted, theta 451 #Decide whether to accept proposal 452 if last_lik == None: #on first sample 453 last_lik = lik 454 continue 455 i +=self.nchains 456 if sum(accepted) < self.nchains: 457 rej += self.nchains-sum(accepted) #adjust rejection counter 458 if i%100 == 0: 459 ar = (i-rej)/float(i) 460 self._tune_likvar(ar) 461 if self.trace_acceptance: 462 print "--> %s: Acc. ratio: %s"%(rej, ar) 463 # Store accepted values 464 # print "nchains:", self.nchains 465 for c, t, pr, a in zip(range(self.nchains), theta, prop, accepted): #Iterates over the results of each chain 466 #if not accepted repeat last value 467 if not a: 468 continue 469 self.history[j, :] = t 470 self.seqhist[c].append(t) 471 #self.seqhist[c, :, j] = t 472 self.phi[j] = pr[0] if self.t==1 else [tuple(point) for point in pr] 473 ptheta[j] = tuple(t) 474 self.liklist.append(lik[c]) 475 if j == self.samples+self.burnin:break 476 j += 1 #update accepted sample counter 477 #print j, len(self.seqhist[0]) 478 if j%100==0 and j>0: 479 if self.trace_acceptance: 480 print "++>%s,%s: Acc. ratio: %s"%(j,i, ar) 481 self._watch_chain(j) 482 if self.trace_convergence: print "++> %s: Likvar: %s\nML:%s"%(j, self.likvariance, np.max(self.liklist) ) 483 # print "%s\r"%j 484 last_lik = lik 485 last_prop = prop 486 last_theta = theta 487 ar = (i-rej)/float(i) 488 if self.meld.verbose ==2 and j>10: 489 self.meld.current_plot(self.phi, self.data, self.best_prop_index, step=j) 490 self.term_pool() 491 self.meld.post_theta = ptheta[self.burnin:] 492 self.meld.post_phi = self.phi[self.burnin:] 493 self.meld.post_theta = ptheta#self._imp_sample(self.meld.L,ptheta,liklist) 494 self.meld.likmax = max(self.liklist) 495 self.meld.DIC = self.DIC 496 print "Total steps(i): ",i,"rej:",rej, "j:",j 497 print ">>> Acceptance rate: %s"%ar 498 self.shut_down('Finished normally') 499 return 1
500 501
502 - def _rms_fit(self, s1, s2):
503 ''' 504 Calculates a basic fitness calculation between a model- 505 generated time series and a observed time series. 506 It uses a normalized RMS variation. 507 508 :Parameters: 509 - `s1`: model-generated time series. 510 - `s2`: observed time series. dictionary with keys matching names of s1 511 :Types: 512 - `s1`: Record array or list. 513 - `s2`: Dictionary or list 514 515 s1 and s2 can also be both lists of lists or lists of arrays of the same length. 516 517 :Return: 518 Inverse of the Root mean square deviation between `s1` and `s2`. 519 ''' 520 if isinstance(s1, np.recarray): 521 assert isinstance(s2, dict) 522 err = [] 523 for k in s2.keys(): 524 e = np.sqrt(np.mean((s1[k]-s2[k])**2.)) 525 err.append(e) 526 if isinstance(s1, list): 527 assert isinstance(s2, list) and len(s1) ==len(s2) 528 err = [np.sqrt(np.mean((s-t)**2.)) for s, t in zip(s1, s2)] 529 rmsd = np.mean(err) 530 fit = 1./rmsd #fitness measure 531 # print "rmsd, fit, err: ", rmsd,fit, err 532 if fit ==np.inf: 533 sys.exit() 534 return fit #mean r-squared
535
536 - def _imp_sample(self,n,data, w):
537 """ 538 Importance sampling 539 540 :Parameters: 541 - `n`: Number of samples to return 542 - `data`: record array (containing on or more vectors of data) to be resampled 543 - `w`: Weight vector 544 :Returns: 545 returns a sample of size n 546 """ 547 #sanitizing weights 548 print "Starting importance Sampling" 549 w /= sum(w) 550 w = np.nan_to_num(w) 551 j=0 552 k=0 553 nvar = len(data.dtype.names) 554 smp = np.recarray(n,formats = [data.dtype.descr[0][1]]*nvar,names = data.dtype.names) 555 #smp = copy.deepcopy(data[:n]) 556 while j < n: 557 i = np.random.randint(0,w.size)# Random position of w 558 if np.random.random() <= w[i]: 559 smp[j] = data[j] 560 j += 1 561 k+=1 562 print "Done importance sampling." 563 return smp
564 565 # def _watch_chain(self, j): 566 # if j<100: 567 # return 568 # self.gr_convergence(j, j-100) 569 # self.pserver.clearFig() 570 # thin = j//500 if j//500 !=0 else 1 #history is thinned to show at most 500 points, equally spaced over the entire range 571 # data = self.history[:j:thin].T.tolist() 572 # self.pserver.plotlines(data,range(j-(len(data[0])), j), self.parnames, "Chain Progress. GR Convergence: %s"%self._R,'points' , 1) 573
574 - def _add_salt(self,dataset,band):
575 """ 576 Adds a few extra uniformly distributed data 577 points beyond the dataset range. 578 This is done by adding from a uniform dist. 579 580 :Parameters: 581 - `dataset`: vector of data 582 - `band`: Fraction of range to extend: [0,1[ 583 :Returns: 584 Salted dataset. 585 """ 586 dmax = max(dataset) 587 dmin = min(dataset) 588 drange = dmax-dmin 589 hb = drange*band/2. 590 d = numpy.concatenate((dataset,stats.uniform(dmin-hb,dmax-dmin+hb).rvs(self.K*.05))) 591 return d
592
593 594 -def model_as_ra(theta, model, phinames):
595 """ 596 Does a single run of self.model and returns the results as a record array 597 """ 598 theta = list(theta) 599 nphi = len(phinames) 600 r = model(theta) 601 res = np.recarray(r.shape[0],formats=['f8']*nphi, names = phinames) 602 for i, n in enumerate(res.dtype.names): 603 res[n] = r[:, i] 604 return res
605
606 -class Dream(_Sampler):
607 ''' 608 DiffeRential Evolution Adaptive Markov chain sampler 609 '''
610 - def __init__(self, meldobj, samples, sampmax, data, t , parpriors, parnames, parlimits,likfun, likvariance, burnin, thin = 5, convergenceCriteria = 1.1, nCR = 3, DEpairs = 1, adaptationRate = .65, eps = 5e-6, mConvergence = False, mAccept = False, **kwargs):
611 self.meld = meldobj 612 self.samples = samples 613 self.sampmax = sampmax 614 self.data = data 615 self.t = t 616 self.parpriors = parpriors 617 self.parnames = parnames 618 self.parlimits = parlimits 619 self.likfun = likfun 620 self.likvariance = likvariance 621 self.burnin = burnin 622 self.nchains = len(parpriors) 623 self.phi = np.recarray((self.samples+self.burnin,t),formats=['f8']*self.meld.nphi, names = self.meld.phi.dtype.names) 624 self.nCR = nCR 625 self.DEpairs = DEpairs 626 self.delayRej = 1 627 if kwargs: 628 for k, v in kwargs.iteritems(): 629 exec('self.%s = %s'%(k, v)) 630 self._R = array([2]*self.nchains) #initializing _R 631 self.maxChainDraws = floor(samples/self.nchains) 632 #initialize the history arrays 633 # History of log posterior probs for all chains 634 self.omega = zeros((self.samples+self.burnin, self.nchains)) 635 # Combined history of accepted samples 636 self.history = zeros((self.nchains*(samples+self.burnin), self.dimensions)) 637 self.seqhist = dict([(i, [])for i in range(self.nchains)]) 638 #self.sequenceHistories = np.zeros((self.nchains, self.dimensions, self.maxChainDraws)) 639 # initialize the temporary storage vectors 640 self.currentVectors = zeros((self.nchains, self.dimensions)) 641 self.currentLiks = ones(self.nchains)*-inf 642 self.scaling_factor = 2.38/sqrt(2*DEpairs*self.dimensions) 643 self.setup_xmlrpc_plotserver()
644
645 - def _det_outlier_chains(self, step):
646 """ 647 Determine which chains are outliers 648 """ 649 means = self.omega[step//2:step,:].mean(axis=0) 650 q1 = scoreatpercentile(self.omega[step//2:step,:], 25) 651 q3 = scoreatpercentile(self.omega[step//2:step,:], 75) 652 iqr = q3-q1 653 outl = means<q1-2*iqr 654 return outl
655 # @timeit
656 - def delayed_rejection(self, xi, zi, pxi, zprob):
657 """ 658 Generates a second proposal based on rejected proposal xi 659 """ 660 k=.3 #Deflation factor for the second proposal 661 cv = self.scaling_factor*cov(xi)+self.scaling_factor*self.e*identity(self.dimensions) 662 o=0 663 while o<50: 664 zdr = multivariate_normal(xi,k*cv,1).tolist()[0] 665 if not self.check_constraints(zdr): continue 666 if sum ([t>= self.parlimits[i][0] and t <= self.parlimits[i][1] for i, t in enumerate(zdr)]) == self.dimensions: 667 break 668 o+=1 669 if not sum ([t>= self.parlimits[i][0] and t <= self.parlimits[i][1] for i, t in enumerate(zdr)]) == self.dimensions: 670 return xi, 0, 0, 0, 0 671 propphi_zdr = self._prop_phi([zdr]) 672 # print propphi_zdr, zdr 673 zdrprob, zdrlik = self._get_post_prob([zdr],propphi_zdr) 674 alpha2 = min(zdrprob[0]*(1-self._alpha1(self,zdrprob[0],zprob))/pxi*(1-self._alpha1(self, pxi, zprob)), 1) 675 acc = 0; lik = 0; pr = 0; prop = 0 676 if random()< alpha2: 677 xi = zdr 678 acc = 1 679 liks = zdrlik 680 pr = zdrprob[0] 681 prop = propphi_zdr 682 return xi, acc, lik, pr, prop
683 684 @np.vectorize
685 - def _alpha1(self, p1, p2):
686 """ 687 Returns the Metropolis acceptance probability: 688 alpha1(p1,p1) = min(1,p1/p2) if p2 >-np.inf else 1 689 690 :Parameters: 691 - `p1`: log probability 692 - `p2`: log probability 693 """ 694 if p2 == None: p2 = -inf 695 # ps are log probabilities 696 if p2 >-inf:#np.exp(p2)>0 697 alpha = min( exp(p1-p2), 1) 698 elif p2 == -inf:#np.exp(p2)==0 699 alpha = 1 700 else: 701 print "proposal's logP: ", p2 702 alpha = 0 703 return alpha
704 705
706 - def update_CR_dist(self):
707 t = 1 708 Lm = 0 709 pm =1./self.nCR 710 711 for i in range(self.nchains): 712 m = multinomial(1, [pm]*self.nCR).nonzero()[0][0]+1 713 CR = float(m)/self.nCR 714 Lm +=1
715 #TODO: finish implementing this 716
717 - def _prop_initial_theta(self, step):
718 """ 719 Generate Theta proposals from priors 720 """ 721 if self.meld.initheta: 722 #start from user-defined point in parameter space. 723 return [self.meld.initheta for i in range(self.nchains)] 724 725 thetalist = [] 726 initcov = identity(self.dimensions) 727 for c in range(self.nchains): 728 #sample from the priors 729 # while 1: 730 theta = array([self.parpriors[par].stats(moments='m') for par in self.parnames]) 731 # if sum ([int(t>= self.parlimits[i][0] and t<= self.parlimits[i][1]) for i, t in enumerate(theta)]) == self.dimensions: 732 # break 733 self.lastcv = initcov #assume no covariance at the beginning 734 735 thetalist.append(theta.tolist()) 736 return thetalist
737 738 # @timeit
739 - def _prop_phi(self, thetalist, po=None):
740 """ 741 Returns proposed Phi derived from theta 742 """ 743 if po: 744 propl = [po.apply_async(model_as_ra, (t, self.meld.model, self.meld.phi.dtype.names)) for t in thetalist] 745 proplist = [job.get()[:self.t] for job in propl] 746 else: 747 proplist = [model_as_ra(t, self.meld.model, self.meld.phi.dtype.names)[:self.t] for t in thetalist] 748 return proplist
749 750 # @timeit
751 - def _chain_evolution(self, proptheta, propphi, pps, liks):
752 """ 753 Chain evolution as describe in ter Braak's Dream algorithm. 754 """ 755 CR = 1./self.nCR 756 b = [(l[1]-l[0])/10. for l in self.parlimits] 757 delta = (self.nchains-1)//2 if self.nchains >2 else 1 758 gam = 2.38/sqrt(2*delta*self.dimensions) 759 zis = [] 760 for c in xrange(self.nchains): 761 o = 0 762 while 1: #check constraints 763 e = [uniform(-i, 2*i).rvs() for i in b] 764 eps = [norm(0, i).rvs() for i in b] 765 others = [x for i, x in enumerate(proptheta) if i !=c] 766 dif = zeros(self.dimensions) 767 for d in range(delta): 768 d1, d2 = sample(others, 2) 769 dif+=array(d1)-array(d2) 770 zi = array(proptheta[c])+(ones(self.dimensions)+e)*gam*dif+eps 771 #revert offlimits proposals 772 for i in xrange(len(zi)): 773 if zi[i]<= self.parlimits[i][0] or zi[i]>= self.parlimits[i][1]:# or isnan(zi): 774 zi[i] = proptheta[c][i] 775 #Cross over 776 for i in xrange(len(zi)): 777 zi[i] = proptheta[c][i] if rand() < 1-CR else zi[i] 778 zis.append(zi) 779 if self.check_constraints(zi): 780 break 781 782 #get the associated Phi's 783 if isnan(zis).any(): 784 pdb.set_trace() 785 propphi_z = self._prop_phi(zis, self.po) 786 zprobs, zliks = self._get_post_prob(zis, propphi_z) 787 prop_evo = [0]*self.dimensions 788 liks_evo = [0]*self.dimensions 789 790 evolved = [0]*self.dimensions #evolved Theta 791 prop_evo = [0]*self.nchains 792 liks_evo = [0]*self.dimensions 793 pps_evo = zeros(self.nchains) #posterior probabilities 794 accepted = self._accept(self, pps, zprobs)#have to pass self because method is vectorized 795 796 # Do Delayed rejection with the chains that got rejected 797 # and store results. 798 i = 0 799 for z, x in zip(zis, proptheta): 800 if accepted[i]: 801 evolved[i] = z 802 prop_evo[i] = propphi_z[i] 803 pps_evo[i] = zprobs[i] 804 liks_evo[i] = zliks[i] 805 #self.liklist.append(zliks[i]) 806 else: 807 th2,acc,lk,pr,prop = self.delayed_rejection(x,z,pps[i],zprobs[i]) 808 if acc: 809 accepted[i] = 1 810 #self.liklist.append(lk) 811 evolved[i] = th2 812 prop_evo[i] = prop if acc else propphi[i] 813 liks_evo[i] = lk if acc else liks[i] 814 try: 815 pps_evo[i] = pr if acc else pps[i] 816 except TypeError: #when pps == None 817 pps_evo[i] = -inf 818 i += 1 819 return evolved, prop_evo, pps_evo, liks_evo, accepted
820 821 # @timeit
822 - def _get_post_prob(self, theta, prop, po = None):
823 ''' 824 Calculates the posterior probability for the proposal of each chain 825 826 :Parameters: 827 - `theta`: list of nchains thetas 828 - `prop`: list of nchains phis 829 - `po`: Pool of processes 830 831 :Returns: 832 - `posts`: list of log posterior probabilities of length self.nchains 833 - `listoliks`: list of log-likelihoods of length self.nchains 834 ''' 835 pri = 1 836 pris = [] 837 for c in xrange(len(theta)):#iterate over chains 838 for i in xrange(len(theta[c])): #iterate over parameters 839 try: 840 pri *= self.parpriors[self.parnames[i]].pdf(theta[c][i]) 841 except AttributeError: #in case distribution is discrete 842 pri *= self.parpriors[self.parnames[i]].pmf(theta[c][i]) 843 pris.append(pri) 844 if po: 845 listol = [po.apply_async(self.meld._output_loglike, (p, self.data, self.likfun, self.likvariance)) for p in prop] 846 listoliks = [l.get() for l in listol] 847 self.term_pool() 848 else: 849 listoliks = [self.meld._output_loglike(p, self.data, self.likfun, self.likvariance) for p in prop] 850 # Multiply by prior values to obtain posterior probs 851 # Actually sum the logs 852 posts = (log(array(pris))+array(listoliks)).tolist() 853 854 if isnan(posts).any(): 855 print "\nLikelihoods returned some NaNs. Dropping to debug mode:\n" 856 pdb.set_trace() 857 return posts, listoliks
858
859 - def step(self):
860 """ 861 Does the actual sampling loop. 862 """ 863 ptheta = recarray(self.samples+self.burnin,formats=['f8']*self.dimensions, names = self.parnames) 864 i = 0;j=0;rej=0;ar=0 #total samples,accepted samples, rejected proposals, acceptance rate 865 last_pps = None 866 t0=time.time() 867 while j < self.samples+self.burnin: 868 self.meld.current_step = j 869 if self.meld.stop_now: 870 return self.shut_down('user interrupted') 871 #generate proposals 872 if j == 0: 873 theta = self._prop_initial_theta(j) 874 prop = self._prop_phi(theta, self.po) 875 pps, liks = self._get_post_prob(theta, prop) 876 else: 877 theta = [self.seqhist[c][-1] for c in range(self.nchains)] 878 prop = self._prop_phi(theta, self.po) 879 #pps = last_pps 880 #liks = last_liks 881 # Evolve chains 882 # while sum(self._R <=1.2)<self.nchains: 883 theta, prop, pps, liks, accepted = self._chain_evolution(theta, prop, pps, liks) 884 #storing log post probs 885 # print self.omega.shape, pps.shape 886 self.omega[j, :] = pps 887 #Compute GR R 888 self.gr_R(j, -j//2) 889 # if sum(self._R <=1.2)==self.nchains: 890 # print "Converged on all dimensions" 891 # print j, self._R 892 #Update last_lik 893 if last_pps == None: #on first sample 894 last_pps = pps 895 #last_liks = liks 896 continue 897 i +=self.nchains 898 if sum(accepted) < self.nchains: 899 ar = (i-rej)/float(i) 900 rej += self.nchains-sum(accepted) #adjust rejection counter 901 # print "==> Acc. ratio: %2.2f"%ar 902 if i%100 == 0: 903 self._tune_likvar(ar) 904 if self.trace_acceptance: 905 print "--> %s rejected. Acc. ratio: %2.2f"%(rej, ar) 906 907 908 # Store accepted values 909 for c, t,pr, acc in zip(range(self.nchains), theta, prop, accepted): #Iterates over the results of each chain 910 #if not accepted repeat last value 911 if not acc: 912 #Add something to the seqhist so that they all have the same length 913 if self.seqhist[c] == []: 914 self.seqhist[c].append(t) 915 else: 916 self.seqhist[c].append(self.seqhist[c][-1]) 917 else: 918 self.history[j, :] = t 919 self.seqhist[c].append(t) 920 try: 921 self.phi[j] = pr[0] if self.t==1 else [tuple(point) for point in pr] 922 ptheta[j] = tuple(t) 923 except IndexError: 924 print "index error", j, self.phi.shape 925 self.liklist.append(liks[c]) 926 if j == self.samples+self.burnin:break 927 j += 1 #update accepted samples counter 928 929 # Remove Outlier Chains 930 if j>0 and j < self.burnin: 931 outl = self._det_outlier_chains(j) 932 imax = pps.tolist().index(pps.max()) 933 for n, c in enumerate(outl): 934 if c: 935 theta[n] = theta[imax] 936 prop[n] = prop[imax] 937 pps [n] = pps[imax] 938 liks[n] = liks[imax] 939 940 el =time.time()-t0 941 if int(el)%10 ==0 and el>1 and j>100:#j%100 == 0 and j>0: 942 if self.trace_acceptance: 943 print "++>Acc. %s out of %s. Acc. ratio: %1.3f"%(j,i, ar) 944 self._watch_chain(j) 945 if self.trace_convergence: 946 print "++> Likvar: %s\nBest run Likelihood:%s"%(self.likvariance, np.max(self.liklist) ) 947 t0 = time.time() 948 # print "%s\r"%j 949 last_pps = pps 950 #last_liks = last_liks 951 last_prop = prop 952 last_theta = theta 953 ar = (i-rej)/float(i) 954 if self.meld.verbose ==2 and j > 10: 955 #print len(self.liklist),j 956 self.meld.current_plot(self.phi, self.data, self.best_prop_index, step=j) 957 self.term_pool() 958 self.meld.post_theta = ptheta[self.burnin:] 959 self.meld.post_phi = self.phi[self.burnin:] 960 self.meld.post_theta = ptheta#self._imp_sample(self.meld.L,ptheta,liklist) 961 self.meld.likmax = max(self.liklist) 962 self.meld.DIC = self.DIC 963 print "Total steps(i): ",i,"rej:",rej, "j:",j 964 print ">>> Acceptance rate: %1.3f"%ar 965 self.shut_down('Finished normally.') 966 return 1
967 968 if __name__ == "__main__": 969 pass 970