Package Bio :: Package PopGen :: Package FDist :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Controller

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6   
  7   
  8  """ 
  9  This module allows to control fdist. 
 10   
 11  This will allow to call fdist and associated program (cplot, datacal, pv). 
 12   
 13  http://www.rubic.rdg.ac.uk/~mab/software.html 
 14  """ 
 15   
 16  import os 
 17  import tempfile 
 18  from sys import platform, maxint 
 19  from shutil import copyfile 
 20  from random import randint, random 
 21  from time import strftime, clock 
 22  #from logging import debug 
 23   
24 -class FDistController:
25 - def __init__(self, fdist_dir = '', ext = None):
26 """Initializes the controller. 27 28 fdist_dir is the directory where fdist2 is. 29 ext is the extension of binaries (.exe on windows, 30 none on Unix) 31 32 """ 33 self.tmp_idx = 0 34 self.fdist_dir = fdist_dir 35 self.os_name = os.name 36 if self.os_name=='nt' or platform=='cygwin': 37 py_ext = '.exe' 38 else: 39 py_ext = '' 40 if ext == None: 41 self.ext = py_ext 42 else: 43 self.ext = ext 44 exec_counts = 0
45
46 - def _get_path(self, app):
47 """Returns the path to an fdist application. 48 49 Includes Path where fdist can be found plus executable extension. 50 """ 51 if self.fdist_dir == '': 52 return app + self.ext 53 else: 54 return os.sep.join([self.fdist_dir, app]) + self.ext
55
56 - def _get_temp_file(self):
57 """Gets a temporary file name. 58 59 Returns a temporary file name, if executing inside jython 60 tries to replace unexisting tempfile.mkstemp(). 61 """ 62 self.tmp_idx += 1 63 return strftime("%H%M%S") + str(int(clock()*100)) + str(randint(0,1000)) + str(self.tmp_idx)
64
65 - def run_datacal(self, data_dir='.'):
66 """Executes datacal. 67 68 data_dir - Where the data is found. 69 """ 70 in_name = self._get_temp_file() 71 out_name = self._get_temp_file() 72 f = open(data_dir + os.sep + in_name, 'w') 73 f.write('a\n') 74 f.close() 75 curr_dir = os.getcwd() 76 os.system('cd ' + data_dir + ' && ' + 77 self._get_path('datacal') + ' < ' + in_name + ' > ' + out_name) 78 f = open(data_dir + os.sep + out_name) 79 fst_line = f.readline().rstrip().split(' ') 80 fst = float(fst_line[4]) 81 sample_line = f.readline().rstrip().split(' ') 82 sample = int(sample_line[9]) 83 f.close() 84 os.remove(data_dir + os.sep + in_name) 85 os.remove(data_dir + os.sep + out_name) 86 return fst, sample
87
88 - def _generate_intfile(self, data_dir):
89 """Generates an INTFILE. 90 91 Parameter: 92 data_dir - data directory 93 """ 94 inf = open(data_dir + os.sep + 'INTFILE', 'w') 95 for i in range(98): 96 inf.write(str(randint(-maxint+1,maxint-1)) + '\n') 97 inf.write('8\n') 98 inf.close()
99
100 - def run_fdist(self, npops, nsamples, fst, sample_size, 101 mut = 0, num_sims = 20000, data_dir='.'):
102 """Executes fdist. 103 104 Parameters: 105 npops - Number of populations 106 nsamples - Number of populations sampled 107 fst - expected Fst 108 sample_size - Sample size per population 109 mut - 1=Stepwise, 0=Infinite allele 110 num_sims - number of simulations 111 data_dir - Where the data is found 112 113 Returns: 114 fst - Average Fst 115 116 Important Note: This can take quite a while to run! 117 """ 118 if fst >= 0.9: 119 #Lets not joke 120 fst = 0.899 121 if fst <= 0.0: 122 #0 will make fdist run forever 123 fst = 0.001 124 in_name = 'input.fd' 125 out_name = 'output.fd' 126 #print 'writing', data_dir + os.sep + in_name 127 f = open(data_dir + os.sep + in_name, 'w') 128 f.write('y\n\n') 129 f.close() 130 f = open(data_dir + os.sep + 'fdist_params2.dat', 'w') 131 f.write(str(npops) + '\n') 132 f.write(str(nsamples) + '\n') 133 f.write(str(fst) + '\n') 134 f.write(str(sample_size) + '\n') 135 f.write(str(mut) + '\n') 136 f.write(str(num_sims) + '\n') 137 f.close() 138 self._generate_intfile(data_dir) 139 140 os.system('cd ' + data_dir + ' && ' + 141 self._get_path('fdist2') + ' < ' + in_name + ' > ' + out_name) 142 f = open(data_dir + os.sep + out_name) 143 lines = f.readlines() 144 f.close() 145 for line in lines: 146 if line.startswith('average Fst'): 147 fst = float(line.rstrip().split(' ')[-1]) 148 os.remove(data_dir + os.sep + in_name) 149 os.remove(data_dir + os.sep + out_name) 150 return fst
151
152 - def run_fdist_force_fst(self, npops, nsamples, fst, sample_size, 153 mut = 0, num_sims = 20000, data_dir='.', try_runs = 5000, limit=0.001):
154 """Exectues fdist trying to force Fst. 155 156 Parameters: 157 try_runs - Number of simulations on the part trying to get 158 Fst correct 159 limit - Interval limit 160 Other parameters can be seen on run_fdist. 161 """ 162 max_run_fst = 1 163 min_run_fst = 0 164 current_run_fst = fst 165 old_fst = fst 166 while True: 167 #debug('testing fst ' + str(current_run_fst)) 168 real_fst = self.run_fdist(npops, nsamples, current_run_fst, sample_size, 169 mut, try_runs, data_dir) 170 #debug('got real fst ' + str(real_fst)) 171 if abs(real_fst - fst) < limit: 172 #debug('We are OK') 173 return self.run_fdist(npops, nsamples, current_run_fst, sample_size, 174 mut, num_sims, data_dir) 175 old_fst = current_run_fst 176 if real_fst > fst: 177 max_run_fst = current_run_fst 178 if current_run_fst < min_run_fst + limit: 179 #we can do no better 180 #debug('Lower limit is ' + str(min_run_fst)) 181 return self.run_fdist(npops, nsamples, current_run_fst, 182 sample_size, mut, num_sims, data_dir) 183 current_run_fst = (min_run_fst + current_run_fst)/2 184 else: 185 min_run_fst = current_run_fst 186 if current_run_fst > max_run_fst - limit: 187 #we can do no better 188 #debug('Upper limit is ' + str(max_run_fst)) 189 return self.run_fdist(npops, nsamples, current_run_fst, 190 sample_size, mut, num_sims, data_dir) 191 current_run_fst = (max_run_fst + current_run_fst)/2
192
193 - def run_cplot(self, ci= 0.95, data_dir='.'):
194 """Executes cplot. 195 196 ci - Confidence interval. 197 data_dir - Where the data is found. 198 """ 199 in_name = self._get_temp_file() 200 out_name = self._get_temp_file() 201 f = open(data_dir + os.sep + in_name, 'w') 202 f.write('out.dat out.cpl\n' + str(ci) + '\n') 203 f.close() 204 curr_dir = os.getcwd() 205 self._generate_intfile(data_dir) 206 os.system('cd ' + data_dir + ' && ' + 207 self._get_path('cplot') + ' < ' + in_name + ' > ' + out_name) 208 os.remove(data_dir + os.sep + in_name) 209 os.remove(data_dir + os.sep + out_name) 210 f = open(data_dir + os.sep + 'out.cpl') 211 conf_lines = [] 212 l = f.readline() 213 try: 214 while l!='': 215 conf_lines.append( 216 tuple(map(lambda x : float(x), l.rstrip().split(' '))) 217 ) 218 l = f.readline() 219 except ValueError: 220 f.close() 221 return [] 222 f.close() 223 return conf_lines
224
225 - def run_pv(self, out_file='probs.dat', data_dir='.'):
226 """Executes pv. 227 228 out_file - Name of output file. 229 data_dir - Where the data is found. 230 """ 231 in_name = self._get_temp_file() 232 out_name = self._get_temp_file() 233 f = open(data_dir + os.sep + in_name, 'w') 234 f.write('data_fst_outfile ' + out_file + ' out.dat\n') 235 f.close() 236 self._generate_intfile(data_dir) 237 os.system('cd ' + data_dir + ' && ' + 238 self._get_path('pv') + ' < ' + in_name + ' > ' + out_name) 239 pvf = open(data_dir + os.sep + out_file, 'r') 240 result = map(lambda x: tuple(map(lambda y: float(y), x.rstrip().split(' '))), 241 pvf.readlines()) 242 pvf.close() 243 os.remove(data_dir + os.sep + in_name) 244 os.remove(data_dir + os.sep + out_name) 245 return result
246