Package Bio :: Package Compass
[hide private]
[frames] | no frames]

Source Code for Package Bio.Compass

  1  # Copyright 2004 by James Casbon.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  Code to deal with COMPASS output, a program for profile/profile comparison. 
  8   
  9  Compass is described in: 
 10   
 11  Sadreyev R, Grishin N. COMPASS: a tool for comparison of multiple protein 
 12  alignments with assessment of statistical significance. J Mol Biol. 2003 Feb 
 13  7;326(1):317-36. 
 14   
 15  Tested with COMPASS 1.24. 
 16   
 17  Functions: 
 18  read          Reads a COMPASS file containing one COMPASS record 
 19  parse         Iterates over records in a COMPASS file. 
 20   
 21  Classes: 
 22  Record        One result of a COMPASS file 
 23   
 24  OBSOLETE CLASSES: 
 25   
 26  _Scanner      Scan compass results 
 27  _Consumer     Consume scanner events 
 28  RecordParser  Parse one compass record 
 29  Iterator      Iterate through a number of compass records 
 30  """ 
 31  import re 
 32   
 33   
 34   
35 -def read(handle):
36 record = None 37 try: 38 line = handle.next() 39 record = Record() 40 __read_names(record, line) 41 line = handle.next() 42 __read_threshold(record, line) 43 line = handle.next() 44 __read_lengths(record, line) 45 line = handle.next() 46 __read_profilewidth(record, line) 47 line = handle.next() 48 __read_scores(record, line) 49 except StopIteration: 50 if not record: 51 raise ValueError("No record found in handle") 52 else: 53 raise ValueError("Unexpected end of stream.") 54 for line in handle: 55 if is_blank_line(line): 56 continue 57 __read_query_alignment(record, line) 58 try: 59 line = handle.next() 60 __read_positive_alignment(record, line) 61 line = handle.next() 62 __read_hit_alignment(record, line) 63 except StopIteration: 64 raise ValueError("Unexpected end of stream.") 65 return record
66
67 -def parse(handle):
68 record = None 69 try: 70 line = handle.next() 71 except StopIteration: 72 return 73 while True: 74 try: 75 record = Record() 76 __read_names(record, line) 77 line = handle.next() 78 __read_threshold(record, line) 79 line = handle.next() 80 __read_lengths(record, line) 81 line = handle.next() 82 __read_profilewidth(record, line) 83 line = handle.next() 84 __read_scores(record, line) 85 except StopIteration: 86 raise ValueError("Unexpected end of stream.") 87 for line in handle: 88 if not line.strip(): 89 continue 90 if "Ali1:" in line: 91 yield record 92 break 93 __read_query_alignment(record, line) 94 try: 95 line = handle.next() 96 __read_positive_alignment(record, line) 97 line = handle.next() 98 __read_hit_alignment(record, line) 99 except StopIteration: 100 raise ValueError("Unexpected end of stream.") 101 else: 102 yield record 103 break
104
105 -class Record:
106 """ 107 Hold information from one compass hit. 108 Ali1 one is the query, Ali2 the hit. 109 """ 110
111 - def __init__(self):
112 self.query='' 113 self.hit='' 114 self.gap_threshold=0 115 self.query_length=0 116 self.query_filtered_length=0 117 self.query_nseqs=0 118 self.query_neffseqs=0 119 self.hit_length=0 120 self.hit_filtered_length=0 121 self.hit_nseqs=0 122 self.hit_neffseqs=0 123 self.sw_score=0 124 self.evalue=-1 125 self.query_start=-1 126 self.hit_start=-1 127 self.query_aln='' 128 self.hit_aln='' 129 self.positives=''
130 131
132 - def query_coverage(self):
133 """Return the length of the query covered in alignment""" 134 s = self.query_aln.replace("=", "") 135 return len(s)
136
137 - def hit_coverage(self):
138 """Return the length of the hit covered in the alignment""" 139 s = self.hit_aln.replace("=", "") 140 return len(s)
141 142 # Everything below is private 143 144 __regex = {"names": re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+"), 145 "threshold": re.compile("Threshold of effective gap content in columns: (\S+)"), 146 "lengths": re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)\s+filtered_length2=(\S+)"), 147 "profilewidth": re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)"), 148 "scores": re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)"), 149 "start": re.compile("(\d+)"), 150 "align": re.compile("^.{15}(\S+)"), 151 "positive_alignment": re.compile("^.{15}(.+)"), 152 } 153
154 -def __read_names(record, line):
155 """ 156 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 157 ------query----- -------hit------------- 158 """ 159 if not "Ali1:" in line: 160 raise ValueError("Line does not contain 'Ali1:':\n%s" % line) 161 m = __regex["names"].search(line) 162 record.query = m.group(1) 163 record.hit = m.group(2)
164
165 -def __read_threshold(record,line):
166 if not line.startswith("Threshold"): 167 raise ValueError("Line does not start with 'Threshold':\n%s" % line) 168 m = __regex["threshold"].search(line) 169 record.gap_threshold = float(m.group(1))
170
171 -def __read_lengths(record, line):
172 if not line.startswith("length1="): 173 raise ValueError("Line does not start with 'length1=':\n%s" % line) 174 m = __regex["lengths"].search(line) 175 record.query_length = int(m.group(1)) 176 record.query_filtered_length = float(m.group(2)) 177 record.hit_length = int(m.group(3)) 178 record.hit_filtered_length = float(m.group(4))
179
180 -def __read_profilewidth(record, line):
181 if not "Nseqs1" in line: 182 raise ValueError("Line does not contain 'Nseqs1':\n%s" % line) 183 m = __regex["profilewidth"].search(line) 184 record.query_nseqs = int(m.group(1)) 185 record.query_neffseqs = float(m.group(2)) 186 record.hit_nseqs = int(m.group(3)) 187 record.hit_neffseqs = float(m.group(4))
188
189 -def __read_scores(record, line):
190 if not line.startswith("Smith-Waterman"): 191 raise ValueError("Line does not start with 'Smith-Waterman':\n%s" % line) 192 m = __regex["scores"].search(line) 193 if m: 194 record.sw_score = int(m.group(1)) 195 record.evalue = float(m.group(2)) 196 else: 197 record.sw_score = 0 198 record.evalue = -1.0
199
200 -def __read_query_alignment(record, line):
201 m = __regex["start"].search(line) 202 if m: 203 record.query_start = int(m.group(1)) 204 m = __regex["align"].match(line) 205 assert m!=None, "invalid match" 206 record.query_aln += m.group(1)
207
208 -def __read_positive_alignment(record, line):
209 m = __regex["positive_alignment"].match(line) 210 assert m!=None, "invalid match" 211 record.positives += m.group(1)
212
213 -def __read_hit_alignment(record, line):
214 m = __regex["start"].search(line) 215 if m: 216 record.hit_start = int(m.group(1)) 217 m = __regex["align"].match(line) 218 assert m!=None, "invalid match" 219 record.hit_aln += m.group(1)
220 221 # Everything below is obsolete 222 223 from Bio import File 224 from Bio.ParserSupport import * 225 226
227 -class _Scanner:
228 """Reads compass output and generate events (OBSOLETE)""" 229
230 - def feed(self, handle, consumer):
231 """Feed in COMPASS ouput""" 232 233 if isinstance(handle, File.UndoHandle): 234 pass 235 else: 236 handle = File.UndoHandle(handle) 237 238 239 assert isinstance(handle, File.UndoHandle), \ 240 "handle must be an UndoHandle" 241 if handle.peekline(): 242 self._scan_record(handle, consumer)
243 244
245 - def _scan_record(self,handle,consumer):
246 self._scan_names(handle, consumer) 247 self._scan_threshold(handle, consumer) 248 self._scan_lengths(handle,consumer) 249 self._scan_profilewidth(handle, consumer) 250 self._scan_scores(handle,consumer) 251 self._scan_alignment(handle,consumer)
252
253 - def _scan_names(self,handle,consumer):
254 """ 255 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 256 """ 257 read_and_call(handle, consumer.names, contains="Ali1:")
258
259 - def _scan_threshold(self,handle, consumer):
260 """ 261 Threshold of effective gap content in columns: 0.5 262 """ 263 read_and_call(handle, consumer.threshold, start="Threshold")
264
265 - def _scan_lengths(self,handle, consumer):
266 """ 267 length1=388 filtered_length1=386 length2=145 filtered_length2=137 268 """ 269 read_and_call(handle, consumer.lengths, start="length1=")
270
271 - def _scan_profilewidth(self,handle,consumer):
272 """ 273 Nseqs1=399 Neff1=12.972 Nseqs2=1 Neff2=6.099 274 """ 275 read_and_call(handle, consumer.profilewidth, contains="Nseqs1")
276
277 - def _scan_scores(self,handle, consumer):
278 """ 279 Smith-Waterman score = 37 Evalue = 5.75e+02 280 """ 281 read_and_call(handle, consumer.scores, start="Smith-Waterman")
282
283 - def _scan_alignment(self,handle, consumer):
284 """ 285 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN 286 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 287 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN 288 289 290 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV 291 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 292 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV 293 294 """ 295 while 1: 296 line = handle.readline() 297 if not line: 298 break 299 if is_blank_line(line): 300 continue 301 else: 302 consumer.query_alignment(line) 303 read_and_call(handle, consumer.positive_alignment) 304 read_and_call(handle, consumer.hit_alignment)
305
306 -class _Consumer:
307 # all regular expressions used -- compile only once 308 _re_names = re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+") 309 _re_threshold = \ 310 re.compile("Threshold of effective gap content in columns: (\S+)") 311 _re_lengths = \ 312 re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)" 313 + "\s+filtered_length2=(\S+)") 314 _re_profilewidth = \ 315 re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)") 316 _re_scores = re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)") 317 _re_start = re.compile("(\d+)") 318 _re_align = re.compile("^.{15}(\S+)") 319 _re_positive_alignment = re.compile("^.{15}(.+)") 320
321 - def __init__(self):
322 self.data = None
323
324 - def names(self, line):
325 """ 326 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 327 ------query----- -------hit------------- 328 """ 329 self.data = Record() 330 m = self.__class__._re_names.search(line) 331 self.data.query = m.group(1) 332 self.data.hit = m.group(2)
333
334 - def threshold(self,line):
335 m = self.__class__._re_threshold.search(line) 336 self.data.gap_threshold = float(m.group(1))
337
338 - def lengths(self,line):
339 m = self.__class__._re_lengths.search(line) 340 self.data.query_length = int(m.group(1)) 341 self.data.query_filtered_length = float(m.group(2)) 342 self.data.hit_length = int(m.group(3)) 343 self.data.hit_filtered_length = float(m.group(4))
344
345 - def profilewidth(self,line):
346 m = self.__class__._re_profilewidth.search(line) 347 self.data.query_nseqs = int(m.group(1)) 348 self.data.query_neffseqs = float(m.group(2)) 349 self.data.hit_nseqs = int(m.group(3)) 350 self.data.hit_neffseqs = float(m.group(4))
351
352 - def scores(self, line):
353 m = self.__class__._re_scores.search(line) 354 if m: 355 self.data.sw_score = int(m.group(1)) 356 self.data.evalue = float(m.group(2)) 357 else: 358 self.data.sw_score = 0 359 self.data.evalue = -1.0
360
361 - def query_alignment(self, line):
362 m = self.__class__._re_start.search(line) 363 if m: 364 self.data.query_start = int(m.group(1)) 365 m = self.__class__._re_align.match(line) 366 assert m!=None, "invalid match" 367 self.data.query_aln = self.data.query_aln + m.group(1)
368
369 - def positive_alignment(self,line):
370 m = self.__class__._re_positive_alignment.match(line) 371 assert m!=None, "invalid match" 372 self.data.positives = self.data.positives + m.group(1)
373
374 - def hit_alignment(self,line):
375 m = self.__class__._re_start.search(line) 376 if m: 377 self.data.hit_start = int(m.group(1)) 378 m = self.__class__._re_align.match(line) 379 assert m!=None, "invalid match" 380 self.data.hit_aln = self.data.hit_aln + m.group(1)
381
382 -class RecordParser(AbstractParser):
383 """Parses compass results into a Record object (OBSOLETE). 384 """
385 - def __init__(self):
386 self._scanner = _Scanner() 387 self._consumer = _Consumer()
388 389
390 - def parse(self, handle):
391 if isinstance(handle, File.UndoHandle): 392 uhandle = handle 393 else: 394 uhandle = File.UndoHandle(handle) 395 self._scanner.feed(uhandle, self._consumer) 396 return self._consumer.data
397
398 -class Iterator:
399 """Iterate through a file of compass results (OBSOLETE)."""
400 - def __init__(self, handle):
401 self._uhandle = File.UndoHandle(handle) 402 self._parser = RecordParser()
403
404 - def next(self):
405 lines = [] 406 while 1: 407 line = self._uhandle.readline() 408 if not line: 409 break 410 if line[0:4] == "Ali1" and lines: 411 self._uhandle.saveline(line) 412 break 413 414 lines.append(line) 415 416 417 if not lines: 418 return None 419 420 data = ''.join(lines) 421 return self._parser.parse(File.StringHandle(data))
422