Package Bio :: Package PopGen :: Package GenePop :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.GenePop.Controller

  1  # Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6   
  7   
  8  """ 
  9  This module allows to control GenePop. 
 10   
 11  """ 
 12   
 13  import os 
 14  import re 
 15  import shutil 
 16  import subprocess 
 17  import sys 
 18  import tempfile 
 19   
 20  from Bio.Application import AbstractCommandline, _Argument, _Option 
 21   
22 -def _gp_float(tok):
23 """Gets a float from a token, if it fails, returns the string. 24 """ 25 try: 26 return float(tok) 27 except ValueError: 28 return str(tok)
29
30 -def _gp_int(tok):
31 """Gets a int from a token, if it fails, returns the string. 32 """ 33 try: 34 return int(tok) 35 except ValueError: 36 return str(tok)
37 38
39 -def _read_allele_freq_table(f):
40 l = f.readline() 41 while l.find(" --")==-1: 42 if l == "": 43 raise StopIteration 44 l = f.readline() 45 alleles = filter(lambda x: x != '', f.readline().rstrip().split(" ")) 46 alleles = map(lambda x: _gp_int(x), alleles) 47 l = f.readline().rstrip() 48 table = [] 49 while l != "": 50 line = filter(lambda x: x != '', l.split(" ")) 51 try: 52 table.append( 53 (line[0], 54 map(lambda x: _gp_float(x), line[1:-1]), 55 _gp_int(line[-1]))) 56 except ValueError: 57 table.append( 58 (line[0], 59 [None] * len(alleles), 60 0)) 61 l = f.readline().rstrip() 62 return alleles, table
63
64 -def _read_table(f, funs):
65 table = [] 66 l = f.readline().rstrip() 67 while l.find("---")==-1: 68 l = f.readline().rstrip() 69 l = f.readline().rstrip() 70 while l.find("===")==-1 and l.find("---")==-1 and l != "": 71 toks = filter(lambda x: x != "", l.split(" ")) 72 line = [] 73 for i in range(len(toks)): 74 try: 75 line.append(funs[i](toks[i])) 76 except ValueError: 77 line.append(toks[i]) # Could not cast 78 table.append(tuple(line)) 79 l = f.readline().rstrip() 80 return table
81
82 -def _read_triangle_matrix(f):
83 matrix = [] 84 l = f.readline().rstrip() 85 while l != "": 86 matrix.append( 87 map(lambda x: _gp_float(x), 88 filter(lambda y: y != "", l.split(" ")))) 89 l = f.readline().rstrip() 90 return matrix
91
92 -def _read_headed_triangle_matrix(f):
93 matrix = {} 94 header = f.readline().rstrip() 95 if header.find("---")>-1 or header.find("===")>-1: 96 header = f.readline().rstrip() 97 nlines = len(filter(lambda x:x != '', header.split(' '))) - 1 98 for line_pop in range(nlines): 99 l = f.readline().rstrip() 100 vals = filter(lambda x:x != '', l.split(' ')[1:]) 101 clean_vals = [] 102 for val in vals: 103 try: 104 clean_vals.append(_gp_float(val)) 105 except ValueError: 106 clean_vals.append(None) 107 for col_pop in range(len(clean_vals)): 108 matrix[(line_pop+1, col_pop)] = clean_vals[col_pop] 109 return matrix
110
111 -def _hw_func(stream, is_locus, has_fisher = False):
112 l = stream.readline() 113 if is_locus: 114 hook = "Locus " 115 else: 116 hook = " Pop : " 117 while l != "": 118 if l.startswith(hook): 119 stream.readline() 120 stream.readline() 121 stream.readline() 122 table = _read_table(stream,[str,_gp_float,_gp_float,_gp_float,_gp_float,_gp_int,str]) 123 #loci might mean pop if hook="Locus " 124 loci = {} 125 for entry in table: 126 if len(entry) < 3: 127 loci[entry[0]] = None 128 else: 129 locus, p, se, fis_wc, fis_rh, steps = entry[:-1] 130 if se == "-": se = None 131 loci[locus] = p, se, fis_wc, fis_rh, steps 132 return loci 133 l = stream.readline() 134 #self.done = True 135 raise StopIteration 136
137 -class _FileIterator:
138 """Iterator which crawls over a stream of lines with a function. 139 140 The generator function is expected to yield a tuple, while 141 consuming input 142 """
143 - def __init__(self, func, stream, fname):
144 self.func = func 145 self.stream = stream 146 self.fname = fname 147 self.done = False
148
149 - def __iter__(self):
150 if self.done: 151 self.done = True 152 raise StopIteration 153 return self
154
155 - def next(self):
156 return self.func(self)
157
158 - def __del__(self):
159 self.stream.close() 160 os.remove(self.fname)
161
162 -class _GenePopCommandline(AbstractCommandline):
163 """ Command Line Wrapper for GenePop. 164 """
165 - def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
166 self.parameters = [ 167 _Argument(["command"], 168 ["INTEGER(.INTEGER)*"], 169 None, 170 True, 171 "GenePop option to be called"), 172 _Argument(["mode"], 173 ["Dont touch this"], 174 None, 175 True, 176 "Should allways be batch"), 177 _Argument(["input"], 178 ["input"], 179 None, 180 True, 181 "Input file"), 182 _Argument(["Dememorization"], 183 ["input"], 184 None, 185 False, 186 "Dememorization step"), 187 _Argument(["BatchNumber"], 188 ["input"], 189 None, 190 False, 191 "Number of MCMC batches"), 192 _Argument(["BatchLength"], 193 ["input"], 194 None, 195 False, 196 "Length of MCMC chains"), 197 _Argument(["HWtests"], 198 ["input"], 199 None, 200 False, 201 "Enumeration or MCMC"), 202 _Argument(["IsolBDstatistic"], 203 ["input"], 204 None, 205 False, 206 "IBD statistic (a or e)"), 207 _Argument(["MinimalDistance"], 208 ["input"], 209 None, 210 False, 211 "Minimal IBD distance"), 212 _Argument(["GeographicScale"], 213 ["input"], 214 None, 215 False, 216 "Log or Linear"), 217 ] 218 AbstractCommandline.__init__(self, cmd, **kwargs) 219 self.set_parameter("mode", "Mode=Batch")
220
221 - def set_menu(self, option_list):
222 """Sets the menu option. 223 224 Example set_menu([6,1]) = get all F statistics (menu 6.1) 225 """ 226 self.set_parameter("command", "MenuOptions="+ 227 ".".join(map(lambda x:str(x),option_list)))
228
229 - def set_input(self, fname):
230 """Sets the input file name. 231 """ 232 self.set_parameter("input", "InputFile="+fname)
233
234 -class GenePopController:
235 - def __init__(self, genepop_dir = None):
236 """Initializes the controller. 237 238 genepop_dir is the directory where GenePop is. 239 240 The binary should be called Genepop (capital G) 241 242 """ 243 self.controller = _GenePopCommandline(genepop_dir)
244
245 - def _remove_garbage(self, fname_out):
246 try: 247 if fname_out != None: os.remove(fname_out) 248 except OSError: 249 pass # safe 250 try: 251 os.remove("genepop.txt") 252 except OSError: 253 pass # safe 254 try: 255 os.remove("fichier.in") 256 except OSError: 257 pass # safe 258 try: 259 os.remove("cmdline.txt") 260 except OSError: 261 pass # safe
262
263 - def _get_opts(self, dememorization, batches, iterations, enum_test=None):
264 opts = {} 265 opts["Dememorization"]=dememorization 266 opts["BatchNumber"]=batches 267 opts["BatchLength"]=iterations 268 if enum_test != None: 269 if enum_test == True: 270 opts["HWtests"]="Enumeration" 271 else: 272 opts["HWtests"]="MCMC" 273 return opts
274
275 - def _run_genepop(self, extensions, option, fname, opts={}):
276 for extension in extensions: 277 self._remove_garbage(fname + extension) 278 self.controller.set_menu(option) 279 self.controller.set_input(fname) 280 for opt in opts: 281 self.controller.set_parameter(opt, opt+"="+str(opts[opt])) 282 child = subprocess.Popen(str(self.controller), 283 stdin=subprocess.PIPE, 284 stdout=subprocess.PIPE, 285 stderr=subprocess.PIPE, 286 shell=(sys.platform!="win32")) 287 r_out, e_out = child.communicate() 288 # capture error code: 289 ret = child.returncode 290 291 self._remove_garbage(None) 292 if ret != 0: raise IOError("GenePop not found") 293 return
294 295
296 - def _test_pop_hz_both(self, fname, type, ext, enum_test = True, 297 dememorization = 10000, batches = 20, iterations = 5000):
298 """Hardy-Weinberg test for heterozygote deficiency/excess. 299 300 Returns a population iterator containg 301 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 302 Some loci have a None if the info is not available 303 SE might be none (for enumerations) 304 """ 305 opts = self._get_opts(dememorization, batches, iterations, enum_test) 306 self._run_genepop([ext], [1, type], fname, opts) 307 f = open(fname + ext) 308 def hw_func(self): 309 return _hw_func(self.stream, False)
310 return _FileIterator(hw_func, f, fname + ext)
311
312 - def _test_global_hz_both(self, fname, type, ext, enum_test = True, 313 dememorization = 10000, batches = 20, iterations = 5000):
314 """Global Hardy-Weinberg test for heterozygote deficiency/excess. 315 316 Returns a triple with: 317 A list per population containg 318 (pop_name, P-val, SE, switches) 319 Some pops have a None if the info is not available 320 SE might be none (for enumerations) 321 A list per loci containg 322 (locus_name, P-val, SE, switches) 323 Some loci have a None if the info is not available 324 SE might be none (for enumerations) 325 Overall results (P-val, SE, switches) 326 327 """ 328 opts = self._get_opts(dememorization, batches, iterations, enum_test) 329 self._run_genepop([ext], [1, type], fname, opts) 330 def hw_pop_func(self): 331 return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
332 f1 = open(fname + ext) 333 l = f1.readline() 334 while l.find("by population") == -1: 335 l = f1.readline() 336 pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float]) 337 f2 = open(fname + ext) 338 l = f2.readline() 339 while l.find("by locus") == -1: 340 l = f2.readline() 341 loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float]) 342 f = open(fname + ext) 343 l = f.readline() 344 while l.find("all locus") == -1: 345 l = f.readline() 346 f.readline() 347 f.readline() 348 f.readline() 349 f.readline() 350 l = f.readline().rstrip() 351 p, se, switches = tuple(map(lambda x: _gp_float(x), 352 filter(lambda y: y != "",l.split(" ")))) 353 f.close() 354 return pop_p, loc_p, (p, se, switches) 355 356 #1.1
357 - def test_pop_hz_deficiency(self, fname, enum_test = True, 358 dememorization = 10000, batches = 20, iterations = 5000):
359 """Hardy-Weinberg test for heterozygote deficiency. 360 361 Returns a population iterator containg 362 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 363 Some loci have a None if the info is not available 364 SE might be none (for enumerations) 365 """ 366 return self._test_pop_hz_both(fname, 1, ".D", enum_test, 367 dememorization, batches, iterations)
368 369 #1.2
370 - def test_pop_hz_excess(self, fname, enum_test = True, 371 dememorization = 10000, batches = 20, iterations = 5000):
372 """Hardy-Weinberg test for heterozygote deficiency. 373 374 Returns a population iterator containg 375 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 376 Some loci have a None if the info is not available 377 SE might be none (for enumerations) 378 """ 379 return self._test_pop_hz_both(fname, 2, ".E", enum_test, 380 dememorization, batches, iterations)
381 382 #1.3 P file
383 - def test_pop_hz_prob(self, fname, ext, enum_test = False, 384 dememorization = 10000, batches = 20, iterations = 5000):
385 """Hardy-Weinberg test based on probability. 386 387 Returns 2 iterators and a final tuple: 388 389 1. Returns a loci iterator containing 390 b. A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps) 391 Some pops have a None if the info is not available 392 SE might be none (for enumerations) 393 c. Result of Fisher's test (Chi2, deg freedom, prob) 394 2. Returns a population iterator containg 395 a. A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 396 Some loci have a None if the info is not available 397 SE might be none (for enumerations) 398 b. Result of Fisher's test (Chi2, deg freedom, prob) 399 3. (Chi2, deg freedom, prob) 400 """ 401 opts = self._get_opts(dememorization, batches, iterations, enum_test) 402 self._run_genepop([ext], [1, 3], fname, opts) 403 def hw_prob_loci_func(self): 404 return _hw_func(self.stream, True, True)
405 def hw_prob_pop_func(self): 406 return _hw_func(self.stream, False, True) 407 shutil.copyfile(fname+".P", fname+".P2") 408 f1 = open(fname + ".P") 409 f2 = open(fname + ".P2") 410 return _FileIterator(hw_prob_loci_func, f1, fname + ".P"), _FileIterator(hw_prob_pop_func, f2, fname + ".P2") 411 412 #1.4
413 - def test_global_hz_deficiency(self, fname, enum_test = True, 414 dememorization = 10000, batches = 20, iterations = 5000):
415 """Global Hardy-Weinberg test for heterozygote deficiency. 416 417 Returns a triple with: 418 An list per population containg 419 (pop_name, P-val, SE, switches) 420 Some pops have a None if the info is not available 421 SE might be none (for enumerations) 422 An list per loci containg 423 (locus_name, P-val, SE, switches) 424 Some loci have a None if the info is not available 425 SE might be none (for enumerations) 426 Overall results (P-val, SE, switches) 427 """ 428 return self._test_global_hz_both(fname, 4, ".DG", enum_test, 429 dememorization, batches, iterations)
430 431 432 #1.5
433 - def test_global_hz_excess(self, fname, enum_test = True, 434 dememorization = 10000, batches = 20, iterations = 5000):
435 """Global Hardy-Weinberg test for heterozygote excess. 436 437 Returns a triple with: 438 An list per population containg 439 (pop_name, P-val, SE, switches) 440 Some pops have a None if the info is not available 441 SE might be none (for enumerations) 442 An list per loci containg 443 (locus_name, P-val, SE, switches) 444 Some loci have a None if the info is not available 445 SE might be none (for enumerations) 446 Overall results (P-val, SE, switches) 447 """ 448 return self._test_global_hz_both(fname, 5, ".EG", enum_test, 449 dememorization, batches, iterations)
450 451 #2.1
452 - def test_ld(self, fname, 453 dememorization = 10000, batches = 20, iterations = 5000):
454 opts = self._get_opts(dememorization, batches, iterations) 455 self._run_genepop([".DIS"], [2, 1], fname, opts) 456 def ld_pop_func(self): 457 current_pop = None 458 l = self.stream.readline().rstrip() 459 if l == "": 460 self.done = True 461 raise StopIteration 462 toks = filter(lambda x: x != "", l.split(" ")) 463 pop, locus1, locus2 = toks[0], toks[1], toks[2] 464 if not hasattr(self, "start_locus1"): 465 start_locus1, start_locus2 = locus1, locus2 466 current_pop = -1 467 if locus1 == start_locus1 and locus2 == start_locus2: 468 current_pop += 1 469 if toks[3] == "No": 470 return current_pop, pop, (locus1, locus2), None 471 p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5]) 472 return current_pop, pop, (locus1, locus2), (p, se, switches)
473 def ld_func(self): 474 l = self.stream.readline().rstrip() 475 if l == "": 476 self.done = True 477 raise StopIteration 478 toks = filter(lambda x: x != "", l.split(" ")) 479 locus1, locus2 = toks[0], toks[2] 480 try: 481 chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5]) 482 except ValueError: 483 return (locus1, locus2), None 484 return (locus1, locus2), (chi2, df, p) 485 f1 = open(fname + ".DIS") 486 l = f1.readline() 487 while l.find("----")==-1: 488 l = f1.readline() 489 shutil.copyfile(fname + ".DIS", fname + ".DI2") 490 f2 = open(fname + ".DI2") 491 l = f2.readline() 492 while l.find("Locus pair")==-1: 493 l = f2.readline() 494 while l.find("----")==-1: 495 l = f2.readline() 496 return _FileIterator(ld_pop_func, f1, fname+".DIS"), _FileIterator(ld_func, f2, fname + ".DI2") 497 498 #2.2
499 - def create_contingency_tables(self, fname):
500 raise NotImplementedError
501 502 #3.1 PR/GE files
503 - def test_genic_diff_all(self, fname, 504 dememorization = 10000, batches = 20, iterations = 5000):
505 raise NotImplementedError
506 507 #3.2 PR2/GE2 files
508 - def test_genic_diff_pair(self, fname, 509 dememorization = 10000, batches = 20, iterations = 5000):
510 raise NotImplementedError
511 512 #3.3 G files
513 - def test_genotypic_diff_all(self, fname, 514 dememorization = 10000, batches = 20, iterations = 5000):
515 raise NotImplementedError
516 517 #3.4 2G2 files
518 - def test_genotypic_diff_pair(self, fname, 519 dememorization = 10000, batches = 20, iterations = 5000):
520 raise NotImplementedError
521 522 #4
523 - def estimate_nm(self, fname):
524 self._run_genepop(["PRI"], [4], fname) 525 f = open(fname + ".PRI") 526 lines = f.readlines() # Small file, it is ok 527 f.close() 528 for line in lines: 529 m = re.search("Mean sample size: ([.0-9]+)", line) 530 if m != None: 531 mean_sample_size = _gp_float(m.group(1)) 532 m = re.search("Mean frequency of private alleles p\(1\)= ([.0-9]+)", line) 533 if m != None: 534 mean_priv_alleles = _gp_float(m.group(1)) 535 m = re.search("N=10: ([.0-9]+)", line) 536 if m != None: 537 mig10 = _gp_float(m.group(1)) 538 m = re.search("N=25: ([.0-9]+)", line) 539 if m != None: 540 mig25 = _gp_float(m.group(1)) 541 m = re.search("N=50: ([.0-9]+)", line) 542 if m != None: 543 mig50 = _gp_float(m.group(1)) 544 m = re.search("for size= ([.0-9]+)", line) 545 if m != None: 546 mig_corrected = _gp_float(m.group(1)) 547 os.remove(fname + ".PRI") 548 return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
549 550 #5.1
551 - def calc_allele_genotype_freqs(self, fname):
552 """Calculates allele and genotype frequencies per locus and per sample. 553 554 Parameters: 555 fname - file name 556 557 Returns tuple with 2 elements: 558 Population iterator with 559 population name 560 Locus dictionary with key = locus name and content tuple as 561 Genotype List with 562 (Allele1, Allele2, observed, expected) 563 (expected homozygotes, observed hm, 564 expected heterozygotes, observed ht) 565 Allele frequency/Fis dictionary with allele as key and 566 (count, frequency, Fis Weir & Cockerham) 567 Totals as a pair 568 count 569 Fis Weir & Cockerham, 570 Fis Robertson & Hill 571 Locus iterator with 572 Locus name 573 allele list 574 Population list with a triple 575 population name 576 list of allele frequencies in the same order as allele list above 577 number of genes 578 579 580 Will create a file called fname.INF 581 """ 582 self._run_genepop(["INF"], [5,1], fname) 583 #First pass, general information 584 #num_loci = None 585 #num_pops = None 586 #f = open(fname + ".INF") 587 #l = f.readline() 588 #while (num_loci == None or num_pops == None) and l != '': 589 # m = re.search("Number of populations detected : ([0-9+])", l) 590 # if m != None: 591 # num_pops = _gp_int(m.group(1)) 592 # m = re.search("Number of loci detected : ([0-9+])", l) 593 # if m != None: 594 # num_loci = _gp_int(m.group(1)) 595 # l = f.readline() 596 #f.close() 597 def pop_parser(self): 598 if hasattr(self, "old_line"): 599 l = self.old_line 600 del self.old_line 601 else: 602 l = self.stream.readline() 603 loci_content = {} 604 while l<>'': 605 l = l.rstrip() 606 if l.find("Tables of allelic frequencies for each locus")>-1: 607 return self.curr_pop, loci_content 608 match = re.match(".*Pop: (.+) Locus: (.+)", l) 609 if match != None: 610 pop = match.group(1) 611 locus = match.group(2) 612 if not hasattr(self, "first_locus"): 613 self.first_locus = locus 614 if hasattr(self, "curr_pop"): 615 if self.first_locus == locus: 616 old_pop = self.curr_pop 617 #self.curr_pop = pop 618 self.old_line = l 619 del self.first_locus 620 del self.curr_pop 621 return old_pop, loci_content 622 self.curr_pop = pop 623 else: 624 l = self.stream.readline() 625 continue 626 geno_list = [] 627 l = self.stream.readline() 628 if l.find("No data")>-1: continue 629 630 while l.find("Genotypes Obs.")==-1: 631 l = self.stream.readline() 632 633 while l<>"\n": 634 m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)",l) 635 if m2 != None: 636 geno_list.append((_gp_int(m2.group(1)), _gp_int(m2.group(2)), 637 _gp_int(m2.group(3)), _gp_float(m2.group(4)))) 638 else: 639 l = self.stream.readline() 640 continue 641 l = self.stream.readline() 642 643 while l.find("Expected number of ho")==-1: 644 l = self.stream.readline() 645 expHo = _gp_float(l[38:]) 646 l = self.stream.readline() 647 obsHo = _gp_int(l[38:]) 648 l = self.stream.readline() 649 expHe = _gp_float(l[38:]) 650 l = self.stream.readline() 651 obsHe = _gp_int(l[38:]) 652 l = self.stream.readline() 653 654 while l.find("Sample count")==-1: 655 l = self.stream.readline() 656 l = self.stream.readline() 657 freq_fis={} 658 overall_fis = None 659 while l.find("----")==-1: 660 vals = filter(lambda x: x!='', 661 l.rstrip().split(' ')) 662 if vals[0]=="Tot": 663 overall_fis = _gp_int(vals[1]), \ 664 _gp_float(vals[2]), _gp_float(vals[3]) 665 else: 666 freq_fis[_gp_int(vals[0])] = _gp_int(vals[1]), \ 667 _gp_float(vals[2]), _gp_float(vals[3]) 668 l = self.stream.readline() 669 loci_content[locus] = geno_list, \ 670 (expHo, obsHo, expHe, obsHe), \ 671 freq_fis, overall_fis 672 self.done = True 673 raise StopIteration
674 def locus_parser(self): 675 l = self.stream.readline() 676 while l != "": 677 l = l.rstrip() 678 match = re.match(" Locus: (.+)", l) 679 if match != None: 680 locus = match.group(1) 681 alleles, table = _read_allele_freq_table(self.stream) 682 return locus, alleles, table 683 l = self.stream.readline() 684 self.done = True 685 raise StopIteration 686 687 popf = open(fname + ".INF") 688 shutil.copyfile(fname + ".INF", fname + ".IN2") 689 locf = open(fname + ".IN2") 690 pop_iter = _FileIterator(pop_parser, popf, fname + ".INF") 691 locus_iter = _FileIterator(locus_parser, locf, fname + ".IN2") 692 return (pop_iter, locus_iter) 693
694 - def _calc_diversities_fis(self, fname, ext):
695 self._run_genepop([ext], [5,2], fname) 696 f = open(fname + ext) 697 l = f.readline() 698 while l<>"": 699 l = l.rstrip() 700 if l.startswith("Statistics per sample over all loci with at least two individuals typed"): 701 avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float]) 702 avg_Qintra = _read_table(f, [str, _gp_float]) 703 l = f.readline() 704 f.close() 705 def fis_func(self): 706 l = self.stream.readline() 707 while l != "": 708 l = l.rstrip() 709 m = re.search("Locus: (.+)", l) 710 if m != None: 711 locus = m.group(1) 712 self.stream.readline() 713 if self.stream.readline().find("No complete")>-1: return locus, None 714 self.stream.readline() 715 fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float]) 716 self.stream.readline() 717 avg_qinter, avg_fis = tuple(map (lambda x: _gp_float(x), 718 filter(lambda y:y != "", self.stream.readline().split(" ")))) 719 return locus, fis_table, avg_qinter, avg_fis 720 l = self.stream.readline() 721 self.done = True 722 raise StopIteration
723 dvf = open(fname + ext) 724 return _FileIterator(fis_func, dvf, fname + ext), avg_fis, avg_Qintra 725 726 #5.2
727 - def calc_diversities_fis_with_identity(self, fname):
728 return self._calc_diversities_fis(fname, ".DIV")
729 730 #5.3
731 - def calc_diversities_fis_with_size(self, fname):
732 raise NotImplementedError
733 734 #6.1 Less genotype frequencies
735 - def calc_fst_all(self, fname):
736 """Executes GenePop and gets Fst/Fis/Fit (all populations) 737 738 Parameters: 739 fname - file name 740 741 Returns: 742 (multiLocusFis, multiLocusFst, multiLocus Fit), 743 Iterator of tuples 744 (Locus name, Fis, Fst, Fit, Qintra, Qinter) 745 746 Will create a file called fname.FST . 747 748 This does not return the genotype frequencies. 749 750 """ 751 self._run_genepop([".FST"], [6,1], fname) 752 f = open(fname + ".FST") 753 l = f.readline() 754 while l<>'': 755 if l.startswith(' All:'): 756 toks=filter(lambda x:x!="", l.rstrip().split(' ')) 757 try: 758 allFis = _gp_float(toks[1]) 759 except ValueError: 760 allFis = None 761 try: 762 allFst = _gp_float(toks[2]) 763 except ValueError: 764 allFst = None 765 try: 766 allFit = _gp_float(toks[3]) 767 except ValueError: 768 allFit = None 769 l = f.readline() 770 f.close() 771 f = open(fname + ".FST") 772 def proc(self): 773 if hasattr(self, "last_line"): 774 l = self.last_line 775 del self.last_line 776 else: 777 l = self.stream.readline() 778 locus = None 779 fis = None 780 fst = None 781 fit = None 782 qintra = None 783 qinter = None 784 while l<>'': 785 l = l.rstrip() 786 if l.startswith(' Locus:'): 787 if locus != None: 788 self.last_line = l 789 return locus, fis, fst, fit, qintra, qinter 790 else: 791 locus = l.split(':')[1].lstrip() 792 elif l.startswith('Fis^='): 793 fis = _gp_float(l.split(' ')[1]) 794 elif l.startswith('Fst^='): 795 fst = _gp_float(l.split(' ')[1]) 796 elif l.startswith('Fit^='): 797 fit = _gp_float(l.split(' ')[1]) 798 elif l.startswith('1-Qintra^='): 799 qintra = _gp_float(l.split(' ')[1]) 800 elif l.startswith('1-Qinter^='): 801 qinter = _gp_float(l.split(' ')[1]) 802 return locus, fis, fst, fit, qintra, qinter 803 l = self.stream.readline() 804 if locus != None: 805 return locus, fis, fst, fit, qintra, qinter 806 self.stream.close() 807 self.done = True 808 raise StopIteration
809 return (allFis, allFst, allFit), _FileIterator(proc , f, fname + ".FST") 810 811 #6.2
812 - def calc_fst_pair(self, fname):
813 self._run_genepop([".ST2", ".MIG"], [6,2], fname) 814 f = open(fname + ".ST2") 815 l = f.readline() 816 while l<>"": 817 l = l.rstrip() 818 if l.startswith("Estimates for all loci"): 819 avg_fst = _read_headed_triangle_matrix(f) 820 l = f.readline() 821 f.close() 822 def loci_func(self): 823 l = self.stream.readline() 824 while l != "": 825 l = l.rstrip() 826 m = re.search(" Locus: (.+)", l) 827 if m != None: 828 locus = m.group(1) 829 matrix = _read_headed_triangle_matrix(self.stream) 830 return locus, matrix 831 l = self.stream.readline() 832 self.done = True 833 raise StopIteration
834 stf = open(fname + ".ST2") 835 os.remove(fname + ".MIG") 836 return _FileIterator(loci_func, stf, fname + ".ST2"), avg_fst 837 838 #6.3
839 - def calc_rho_all(self, fname):
840 raise NotImplementedError
841 842 #6.4
843 - def calc_rho_pair(self, fname):
844 raise NotImplementedError
845
846 - def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
847 """Calculates isolation by distance statistics 848 """ 849 self._run_genepop([".GRA", ".MIG", ".ISO"], [6,sub], 850 fname, opts = { 851 "MinimalDistance" : min_dist, 852 "GeographicScale" : scale, 853 "IsolBDstatistic" : stat, 854 }) 855 f = open(fname + ".ISO") 856 f.readline() 857 f.readline() 858 f.readline() 859 f.readline() 860 estimate = _read_triangle_matrix(f) 861 f.readline() 862 f.readline() 863 distance = _read_triangle_matrix(f) 864 f.readline() 865 match = re.match("a = (.+), b = (.+)", f.readline().rstrip()) 866 a = _gp_float(match.group(1)) 867 b = _gp_float(match.group(2)) 868 f.readline() 869 f.readline() 870 match = re.match(" b=(.+)", f.readline().rstrip()) 871 bb = _gp_float(match.group(1)) 872 match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip()) 873 bblow = _gp_float(match.group(1)) 874 bbhigh = _gp_float(match.group(2)) 875 f.close() 876 os.remove(fname + ".MIG") 877 os.remove(fname + ".GRA") 878 os.remove(fname + ".ISO") 879 return estimate, distance, (a, b), (bb, bblow, bbhigh)
880 881 #6.5
882 - def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
883 """Calculates isolation by distance statistics for diploid data. 884 885 See _calc_ibd for parameter details. 886 Note that each pop can only have a single individual and 887 the individual name has to be the sample coordinates. 888 """ 889 return self._calc_ibd(fname, 5, stat, scale, min_dist)
890 891 #6.6
892 - def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
893 """Calculates isolation by distance statistics for haploid data. 894 895 See _calc_ibd for parameter details. 896 Note that each pop can only have a single individual and 897 the individual name has to be the sample coordinates. 898 """ 899 return self._calc_ibd(fname, 6, stat, scale, min_dist)
900