Package Bio :: Package PopGen :: Package GenePop
[hide private]
[frames] | no frames]

Source Code for Package Bio.PopGen.GenePop

  1  # Copyright 2007 by Tiago Antao.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module provides code to work with GenePop. 
  8   
  9  See http://wbiomed.curtin.edu.au/genepop/ , the format is documented 
 10  here: http://wbiomed.curtin.edu.au/genepop/help_input.html . 
 11   
 12  Classes: 
 13  Record           Holds GenePop data. 
 14   
 15  Functions: 
 16  read             Parses a GenePop record (file) into a Record object. 
 17   
 18   
 19  Obsolete classes: 
 20  RecordParser     Parses a GenePop record (file) into a Record object. 
 21   
 22  _Scanner         Scans a GenePop record. 
 23  _RecordConsumer  Consumes GenePop data to a Record object. 
 24   
 25  Partially inspired on MedLine Code. 
 26   
 27  """ 
 28  from copy import deepcopy 
 29   
 30   
31 -def get_indiv(line):
32 indiv_name, marker_line = line.split(',') 33 markers = marker_line.replace('\t', ' ').split(' ') 34 markers = [marker for marker in markers if marker!=''] 35 if len(markers[0]) in [2, 4]: #2 digits per allele 36 marker_len = 2 37 else: 38 marker_len = 3 39 try: 40 allele_list = [(int(marker[0:marker_len]), 41 int(marker[marker_len:])) 42 for marker in markers] 43 except ValueError: #Haploid 44 allele_list = [(int(marker[0:marker_len]),) 45 for marker in markers] 46 return indiv_name, allele_list, marker_len
47
48 -def read(handle):
49 """Parses a handle containing a GenePop file. 50 51 handle is a file-like object that contains a GenePop record. 52 """ 53 record = Record() 54 record.comment_line = handle.next().rstrip() 55 #We can now have one loci per line or all loci in a single line 56 #separated by either space or comma+space... 57 #We will remove all commas on loci... that should not be a problem 58 sample_loci_line = handle.next().rstrip().replace(',', '') 59 all_loci = sample_loci_line.split(' ') 60 record.loci_list.extend(all_loci) 61 for line in handle: 62 line = line.rstrip() 63 if line.upper()=='POP': 64 break 65 record.loci_list.append(line) 66 else: 67 raise ValueError('No population data found, file probably not GenePop related') 68 record.populations.append([]) 69 for line in handle: 70 line = line.rstrip() 71 if line.upper()=='POP': 72 record.populations.append([]) 73 else: 74 indiv_name, allele_list, record.marker_len = get_indiv(line) 75 record.populations[-1].append((indiv_name, allele_list)) 76 loci = record.loci_list 77 for pop in record.populations: 78 record.pop_list.append(pop[-1][0]) 79 for indiv in pop: 80 for mk_i in range(len(loci)): 81 mk_orig = indiv[1][mk_i] 82 mk_real = [] 83 for al in mk_orig: 84 if al == 0: 85 mk_real.append(None) 86 else: 87 mk_real.append(al) 88 indiv[1][mk_i] = tuple(mk_real) 89 return record
90 91
92 -class Record:
93 """Holds information from a GenePop record. 94 95 Members: 96 marker_len The marker length (2 or 3 digit code per allele). 97 98 comment_line Comment line. 99 100 loci_list List of loci names. 101 102 pop_list List of population names. 103 104 populations List of population data. 105 106 In most genepop files, the population name is not trustable. 107 It is strongly recommended that populations are referred by index. 108 109 populations has one element per population. Each element is itself 110 a list of individuals, each individual is a pair composed by individual 111 name and a list of alleles (2 per marker or 1 for haploids): Example 112 [ 113 [ 114 ('Ind1', [(1,2), (3,3), (200,201)], 115 ('Ind2', [(2,None), (3,3), (None,None)], 116 ], 117 [ 118 ('Other1', [(1,1), (4,3), (200,200)], 119 ] 120 ] 121 122 123 """
124 - def __init__(self):
125 self.marker_len = 0 126 self.comment_line = "" 127 self.loci_list = [] 128 self.pop_list = [] 129 self.populations = []
130
131 - def __str__(self):
132 """Returns (reconstructs) a GenePop textual representation. 133 """ 134 rep = [self.comment_line + '\n'] 135 rep.append('\n'.join(self.loci_list) + '\n') 136 for pop in self.populations: 137 rep.append('Pop\n') 138 for indiv in pop: 139 name, markers = indiv 140 rep.append(name) 141 rep.append(',') 142 for marker in markers: 143 rep.append(' ') 144 for al in marker: 145 if al == None: 146 al = '0' 147 aStr = str(al) 148 while len(aStr)<self.marker_len: 149 aStr = "".join(['0', aStr]) 150 rep.append(aStr) 151 rep.append('\n') 152 return "".join(rep)
153
154 - def split_in_pops(self, pop_names):
155 """Splits a GP record in a dictionary with 1 pop per entry. 156 157 Given a record with n pops and m loci returns a dictionary 158 of records (key pop_name) where each item is a record 159 with a single pop and m loci. 160 161 Parameters: 162 pop_names - Population names 163 """ 164 gp_pops = {} 165 for i in range(len(self.populations)): 166 gp_pop = Record() 167 gp_pop.marker_len = self.marker_len 168 gp_pop.comment_line = self.comment_line 169 gp_pop.loci_list = deepcopy(self.loci_list) 170 gp_pop.populations = [deepcopy(self.populations[i])] 171 gp_pops[pop_names[i]] = gp_pop 172 return gp_pops
173
174 - def split_in_loci(self, gp):
175 """Splits a GP record in a dictionary with 1 locus per entry. 176 177 Given a record with n pops and m loci returns a dictionary 178 of records (key locus name) where each item is a record 179 with a single locus and n pops. 180 """ 181 gp_loci = {} 182 for i in range(len(self.loci_list)): 183 gp_pop = Record() 184 gp_pop.marker_len = self.marker_len 185 gp_pop.comment_line = self.comment_line 186 gp_pop.loci_list = [self.loci_list[i]] 187 gp_pop.populations = [] 188 for pop in self.populations: 189 my_pop = [] 190 for indiv in pop: 191 my_pop.append((indiv[0], [indiv[1][i]])) 192 gp_pop.populations.append(my_pop) 193 gp_loci[gp_pop.loci_list[0]] = gp_pop 194 return gp_loci
195 196
197 - def remove_population(self, pos):
198 """Removes a population (by position). 199 """ 200 del self.populations[pos]
201
202 - def remove_locus_by_position(self, pos):
203 """Removes a locus by position. 204 """ 205 del self.loci_list[pos] 206 for pop in self.populations: 207 for indiv in pop: 208 name, loci = indiv 209 del loci[pos]
210
211 - def remove_locus_by_name(self, name):
212 """Removes a locus by name. 213 """ 214 for i in range(len(self.loci_list)): 215 if self.loci_list[i] == name: 216 self.remove_locus_by_position(i) 217 return
218 #If here than locus not existent... Maybe raise exception? 219 # Although it should be Ok... Just a boolean return, maybe? 220