Package Bio :: Package AlignIO :: Module EmbossIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.AlignIO.EmbossIO

  1  # Copyright 2008-2010 by Peter Cock.  All rights reserved. 
  2  # 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """ 
  7  Bio.AlignIO support for the "emboss" alignment output from EMBOSS tools. 
  8   
  9  You are expected to use this module via the Bio.AlignIO functions (or the 
 10  Bio.SeqIO functions if you want to work directly with the gapped sequences). 
 11   
 12  This module contains a parser for the EMBOSS pairs/simple file format, for 
 13  example from the alignret, water and needle tools. 
 14  """ 
 15   
 16  from Bio.Align import MultipleSeqAlignment 
 17  from Interfaces import AlignmentIterator, SequentialAlignmentWriter 
 18   
19 -class EmbossWriter(SequentialAlignmentWriter):
20 """Emboss alignment writer (WORK IN PROGRESS). 21 22 Writes a simplfied version of the EMBOSS pairs/simple file format. 23 A lot of the information their tools record in their headers is not 24 available and is ommitted. 25 """ 26
27 - def write_header(self):
28 handle = self.handle 29 handle.write("########################################\n") 30 handle.write("# Program: Biopython\n") 31 try: 32 handle.write("# Report_file: %s\n" % handle.name) 33 except AttributeError: 34 pass 35 handle.write("########################################\n")
36 41
42 - def write_alignment(self, alignment):
43 """Use this to write (another) single alignment to an open file.""" 44 handle = self.handle 45 handle.write("#=======================================\n") 46 handle.write("#\n") 47 handle.write("# Aligned_sequences: %i\n" % len(alignment)) 48 for i, record in enumerate(alignment): 49 handle.write("# %i: %s\n" % (i+1, record.id)) 50 handle.write("#\n") 51 handle.write("# Length: %i\n" % alignment.get_alignment_length()) 52 handle.write("#\n") 53 handle.write("#=======================================\n") 54 handle.write("\n") 55 #... 56 assert False
57
58 -class EmbossIterator(AlignmentIterator):
59 """Emboss alignment iterator. 60 61 For reading the (pairwise) alignments from EMBOSS tools in what they 62 call the "pairs" and "simple" formats. 63 """ 64
65 - def next(self):
66 67 handle = self.handle 68 69 try: 70 #Header we saved from when we were parsing 71 #the previous alignment. 72 line = self._header 73 del self._header 74 except AttributeError: 75 line = handle.readline() 76 if not line: 77 return None 78 79 while line.rstrip() != "#=======================================": 80 line = handle.readline() 81 if not line: 82 return None 83 84 length_of_seqs = None 85 number_of_seqs = None 86 ids = [] 87 seqs = [] 88 89 90 while line[0] == "#": 91 #Read in the rest of this alignment header, 92 #try and discover the number of records expected 93 #and their length 94 parts = line[1:].split(":",1) 95 key = parts[0].lower().strip() 96 if key == "aligned_sequences": 97 number_of_seqs = int(parts[1].strip()) 98 assert len(ids) == 0 99 # Should now expect the record identifiers... 100 for i in range(number_of_seqs): 101 line = handle.readline() 102 parts = line[1:].strip().split(":",1) 103 assert i+1 == int(parts[0].strip()) 104 ids.append(parts[1].strip()) 105 assert len(ids) == number_of_seqs 106 if key == "length": 107 length_of_seqs = int(parts[1].strip()) 108 109 #And read in another line... 110 line = handle.readline() 111 112 if number_of_seqs is None: 113 raise ValueError("Number of sequences missing!") 114 if length_of_seqs is None: 115 raise ValueError("Length of sequences missing!") 116 117 if self.records_per_alignment is not None \ 118 and self.records_per_alignment != number_of_seqs: 119 raise ValueError("Found %i records in this alignment, told to expect %i" \ 120 % (number_of_seqs, self.records_per_alignment)) 121 122 seqs = ["" for id in ids] 123 seq_starts = [] 124 index = 0 125 126 #Parse the seqs 127 while line: 128 if len(line) > 21: 129 id_start = line[:21].strip().split(None, 1) 130 seq_end = line[21:].strip().split(None, 1) 131 if len(id_start) == 2 and len(seq_end) == 2: 132 #identifier, seq start position, seq, seq end position 133 #(an aligned seq is broken up into multiple lines) 134 id, start = id_start 135 seq, end = seq_end 136 if start==end: 137 #Special case, either a single letter is present, 138 #or no letters at all. 139 if seq.replace("-","") == "": 140 start = int(start) 141 end = int(end) 142 else: 143 start = int(start) - 1 144 end = int(end) 145 else: 146 assert seq.replace("-","") != "" 147 start = int(start)-1 #python counting 148 end = int(end) 149 150 #The identifier is truncated... 151 assert 0 <= index and index < number_of_seqs, \ 152 "Expected index %i in range [0,%i)" \ 153 % (index, number_of_seqs) 154 assert id==ids[index] or id == ids[index][:len(id)] 155 156 if len(seq_starts) == index: 157 #Record the start 158 seq_starts.append(start) 159 160 #Check the start... 161 if start == end: 162 assert seq.replace("-","") == "", line 163 else: 164 assert start - seq_starts[index] == len(seqs[index].replace("-","")), \ 165 "Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s" \ 166 % (len(seqs[index].replace("-","")), index, id, repr(seqs[index]), 167 start, line) 168 169 seqs[index] += seq 170 171 #Check the end ... 172 assert end == seq_starts[index] + len(seqs[index].replace("-","")), \ 173 "Found %i chars so far for sequence %i (%s, %s, start=%i), file says end %i:\n%s" \ 174 % (len(seqs[index].replace("-","")), index, id, repr(seqs[index]), 175 seq_starts[index], end, line) 176 177 index += 1 178 if index >= number_of_seqs: 179 index = 0 180 else: 181 #just a start value, this is just alignment annotation (?) 182 #print "Skipping: " + line.rstrip() 183 pass 184 elif line.strip() == "": 185 #Just a spacer? 186 pass 187 else: 188 print line 189 assert False 190 191 line = handle.readline() 192 if line.rstrip() == "#---------------------------------------" \ 193 or line.rstrip() == "#=======================================": 194 #End of alignment 195 self._header = line 196 break 197 198 assert index == 0 199 200 if self.records_per_alignment is not None \ 201 and self.records_per_alignment != len(ids): 202 raise ValueError("Found %i records in this alignment, told to expect %i" \ 203 % (len(ids), self.records_per_alignment)) 204 205 alignment = MultipleSeqAlignment(self.alphabet) 206 for id, seq in zip(ids, seqs): 207 if len(seq) != length_of_seqs: 208 #EMBOSS 2.9.0 is known to use spaces instead of minus signs 209 #for leading gaps, and thus fails to parse. This old version 210 #is still used as of Dec 2008 behind the EBI SOAP webservice: 211 #http://www.ebi.ac.uk/Tools/webservices/wsdl/WSEmboss.wsdl 212 raise ValueError("Error parsing alignment - sequences of " 213 "different length? You could be using an " 214 "old version of EMBOSS.") 215 alignment.add_sequence(id, seq) 216 return alignment
217 218 if __name__ == "__main__": 219 print "Running a quick self-test" 220 221 #http://emboss.sourceforge.net/docs/themes/alnformats/align.simple 222 simple_example = \ 223 """######################################## 224 # Program: alignret 225 # Rundate: Wed Jan 16 17:16:13 2002 226 # Report_file: stdout 227 ######################################## 228 #======================================= 229 # 230 # Aligned_sequences: 4 231 # 1: IXI_234 232 # 2: IXI_235 233 # 3: IXI_236 234 # 4: IXI_237 235 # Matrix: EBLOSUM62 236 # Gap_penalty: 10.0 237 # Extend_penalty: 0.5 238 # 239 # Length: 131 240 # Identity: 95/131 (72.5%) 241 # Similarity: 127/131 (96.9%) 242 # Gaps: 25/131 (19.1%) 243 # Score: 100.0 244 # 245 # 246 #======================================= 247 248 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 249 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 250 IXI_236 1 TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRPPGRPCCSAAPPRPQAT 48 251 IXI_237 1 TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRPT----CSAAPRRPQAT 45 252 |||||:|||||||||::::::: |||||:||||:::::|||||:||||| 253 254 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 255 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 256 IXI_236 49 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSR--G 96 257 IXI_237 46 GGYKTCSGTCTTSTSTRHRGRSGYSARTTTAACLRASRKSMRAACSR--G 93 258 ||:||||||||||||||||||||:::::::::::||||||||||||| | 259 260 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 261 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 262 IXI_236 97 SRPPRFAPPLMSSCITSTTGPPPPAGDRSHE 127 263 IXI_237 94 SRPNRFAPTLMSSCLTSTTGPPAYAGDRSHE 124 264 |||:||||:|||||:|||||||::||||||| 265 266 267 #--------------------------------------- 268 #--------------------------------------- 269 270 """ 271 272 #http://emboss.sourceforge.net/docs/themes/alnformats/align.pair 273 pair_example = \ 274 """######################################## 275 # Program: water 276 # Rundate: Wed Jan 16 17:23:19 2002 277 # Report_file: stdout 278 ######################################## 279 #======================================= 280 # 281 # Aligned_sequences: 2 282 # 1: IXI_234 283 # 2: IXI_235 284 # Matrix: EBLOSUM62 285 # Gap_penalty: 10.0 286 # Extend_penalty: 0.5 287 # 288 # Length: 131 289 # Identity: 112/131 (85.5%) 290 # Similarity: 112/131 (85.5%) 291 # Gaps: 19/131 (14.5%) 292 # Score: 591.5 293 # 294 # 295 #======================================= 296 297 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 298 ||||||||||||||| |||||||||||||||||||||||||| 299 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 300 301 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 302 |||||||||||||||||||||||| |||||||||||||||| 303 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 304 305 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 306 ||||||||||||||||||||||||||||||| 307 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 308 309 310 #--------------------------------------- 311 #--------------------------------------- 312 313 314 """ 315 316 pair_example2 = \ 317 """######################################## 318 # Program: needle 319 # Rundate: Sun 27 Apr 2007 17:20:35 320 # Commandline: needle 321 # [-asequence] Spo0F.faa 322 # [-bsequence] paired_r.faa 323 # -sformat2 pearson 324 # Align_format: srspair 325 # Report_file: ref_rec .needle 326 ######################################## 327 328 #======================================= 329 # 330 # Aligned_sequences: 2 331 # 1: ref_rec 332 # 2: gi|94968718|receiver 333 # Matrix: EBLOSUM62 334 # Gap_penalty: 10.0 335 # Extend_penalty: 0.5 336 # 337 # Length: 124 338 # Identity: 32/124 (25.8%) 339 # Similarity: 64/124 (51.6%) 340 # Gaps: 17/124 (13.7%) 341 # Score: 112.0 342 # 343 # 344 #======================================= 345 346 ref_rec 1 KILIVDD----QYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDL 46 347 :|:.|| :.|.|::|.: :.|.....:|.:|.||:.:..:..|.: 348 gi|94968718|r 1 -VLLADDHALVRRGFRLMLED--DPEIEIVAEAGDGAQAVKLAGELHPRV 47 349 350 ref_rec 47 VLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALT 96 351 |::|..:|||.|::..|:::....:|.|:::|.:.|...::.:.|.||.. 352 gi|94968718|r 48 VVMDCAMPGMSGMDATKQIRTQWPDIAVLMLTMHSEDTWVRLALEAGANG 97 353 354 ref_rec 97 HFAK-PFDIDEIRDAV-------- 111 355 :..| ..|:|.|: || 356 gi|94968718|r 98 YILKSAIDLDLIQ-AVRRVANGET 120 357 358 359 #======================================= 360 # 361 # Aligned_sequences: 2 362 # 1: ref_rec 363 # 2: gi|94968761|receiver 364 # Matrix: EBLOSUM62 365 # Gap_penalty: 10.0 366 # Extend_penalty: 0.5 367 # 368 # Length: 119 369 # Identity: 34/119 (28.6%) 370 # Similarity: 58/119 (48.7%) 371 # Gaps: 9/119 ( 7.6%) 372 # Score: 154.0 373 # 374 # 375 #======================================= 376 377 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLD 50 378 ||||||:......|:..|...|::.....|.::||:|...:..||:|.| 379 gi|94968761|r 1 -ILIVDDEANTLASLSRAFRLAGHEATVCDNAVRALEIAKSKPFDLILSD 49 380 381 ref_rec 51 MKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAK 100 382 :.:||.||:.:|:.:|.......|::|:....::|..::..||||....| 383 gi|94968761|r 50 VVMPGRDGLTLLEDLKTAGVQAPVVMMSGQAHIEMAVKATRLGALDFLEK 99 384 385 ref_rec 101 PFDIDEIRDAV-------- 111 386 |...|::...| 387 gi|94968761|r 100 PLSTDKLLLTVENALKLKR 118 388 389 390 #======================================= 391 # 392 # Aligned_sequences: 2 393 # 1: ref_rec 394 # 2: gi|94967506|receiver 395 # Matrix: EBLOSUM62 396 # Gap_penalty: 10.0 397 # Extend_penalty: 0.5 398 # 399 # Length: 120 400 # Identity: 29/120 (24.2%) 401 # Similarity: 53/120 (44.2%) 402 # Gaps: 9/120 ( 7.5%) 403 # Score: 121.0 404 # 405 # 406 #======================================= 407 408 ref_rec 1 -KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLL 49 409 .|::|||..|..:.:..||.:.|:..........|.:.:.....||.:: 410 gi|94967506|r 1 LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHPVDLAIV 50 411 412 ref_rec 50 DMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFA 99 413 |:.:....|:|:|:|.:|....:..:|:|....|:|...|...||:.:.. 414 gi|94967506|r 51 DVYLGSTTGVEVLRRCRVHRPKLYAVIITGQISLEMAARSIAEGAVDYIQ 100 415 416 ref_rec 100 KPFDIDEIRDAV-------- 111 417 ||.|||.:.:.. 418 gi|94967506|r 101 KPIDIDALLNIAERALEHKE 120 419 420 421 #======================================= 422 # 423 # Aligned_sequences: 2 424 # 1: ref_rec 425 # 2: gi|94970045|receiver 426 # Matrix: EBLOSUM62 427 # Gap_penalty: 10.0 428 # Extend_penalty: 0.5 429 # 430 # Length: 118 431 # Identity: 30/118 (25.4%) 432 # Similarity: 64/118 (54.2%) 433 # Gaps: 9/118 ( 7.6%) 434 # Score: 126.0 435 # 436 # 437 #======================================= 438 439 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTK--ERPDLVL 48 440 :|:|:|:..:|....:.....||:...|.:|.:||.:.:| ||.|::: 441 gi|94970045|r 1 -VLLVEDEEALRAAAGDFLETRGYKIMTARDGTEALSMASKFAERIDVLI 49 442 443 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 444 .|:.:||:.|..:.:.:..|....:|:.|:.|.: :.:..:.|:.:.:.| 445 gi|94970045|r 50 TDLVMPGISGRVLAQELVKIHPETKVMYMSGYDD-ETVMVNGEIDSSSAF 98 446 447 ref_rec 99 -AKPFDID----EIRDAV 111 448 .|||.:| :||:.: 449 gi|94970045|r 99 LRKPFRMDALSAKIREVL 116 450 451 452 #======================================= 453 # 454 # Aligned_sequences: 2 455 # 1: ref_rec 456 # 2: gi|94970041|receiver 457 # Matrix: EBLOSUM62 458 # Gap_penalty: 10.0 459 # Extend_penalty: 0.5 460 # 461 # Length: 125 462 # Identity: 35/125 (28.0%) 463 # Similarity: 70/125 (56.0%) 464 # Gaps: 18/125 (14.4%) 465 # Score: 156.5 466 # 467 # 468 #======================================= 469 470 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIV--TKERPDLVL 48 471 .:|:|:|:.|:|.|:..:.:::||...:|.:|.:||:|| :.::.|::| 472 gi|94970041|r 1 TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEEALEIVRESTQKIDMLL 50 473 474 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 475 .|:.:.||.|.|:.:|:::...:::||.|:.|.:..:::. |.||.. 476 gi|94970041|r 51 SDVVLVGMSGRELSERLRIQMPSLKVIYMSGYTDDAIVRH----GVLTES 96 477 478 ref_rec 99 A----KPFDIDEIRDAV-------- 111 479 | |||..|.:...| 480 gi|94970041|r 97 AEFLQKPFTSDSLLRKVRAVLQKRQ 121 481 482 483 #--------------------------------------- 484 #--------------------------------------- 485 486 """ 487 488 pair_example3 = """######################################## 489 # Program: needle 490 # Rundate: Mon 14 Jul 2008 11:45:42 491 # Commandline: needle 492 # [-asequence] asis:TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGC 493 # [-bsequence] asis:TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTT 494 # -filter 495 # Align_format: srspair 496 # Report_file: stdout 497 ######################################## 498 499 #======================================= 500 # 501 # Aligned_sequences: 2 502 # 1: asis 503 # 2: asis 504 # Matrix: EDNAFULL 505 # Gap_penalty: 10.0 506 # Extend_penalty: 0.5 507 # 508 # Length: 667 509 # Identity: 210/667 (31.5%) 510 # Similarity: 210/667 (31.5%) 511 # Gaps: 408/667 (61.2%) 512 # Score: 561.0 513 # 514 # 515 #======================================= 516 517 asis 1 TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAAT 50 518 519 asis 0 -------------------------------------------------- 0 520 521 asis 51 AGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGA 100 522 523 asis 0 -------------------------------------------------- 0 524 525 asis 101 CTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTG 150 526 527 asis 0 -------------------------------------------------- 0 528 529 asis 151 GGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGA 200 530 .|||||| 531 asis 1 ------------TTATTAA------------------------------- 7 532 533 asis 201 TACTTATTGT------GTAGTAGCTCATTTTCATTATGTTCTTCGAATGG 244 534 .|||||.|| |||..|..|| ||||.||||.||.| ||.| 535 asis 8 -TCTTATGGTTTTGCCGTAAAATTTC--TTTCTTTATTTTTT----ATTG 50 536 537 asis 245 GAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT------GAAATTTTTAG 288 538 ||.|.|||||.|||.||||.|||| | ||||||||| 539 asis 51 ---------TTAGGATTTTGTTGATTTTATTTTTCTCAAG-AATTTTTAG 90 540 541 asis 289 GTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGAATTTTA 331 542 ||.|.||||||| ||||||||.| ||.| |||.|.||||| 543 asis 91 GTCAATTAGACCGGCTTATTTTTTTGTCAGTGT------TTAAAGTTTTA 134 544 545 asis 332 TTAGCCTTTGGTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTC 377 546 ||| |||||| .||||...||||..|||||. 547 asis 135 TTA-----------------ATTTTTGGGGGGGGGGGGAGACGGGGTGTT 167 548 549 asis 378 GT-TGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATA-------- 418 550 .| ||||||||||| || ||.||.||.|| 551 asis 168 ATCTGAATTAGTTT-------------TT--GGGAGTCTCTAGACATCTC 202 552 553 asis 419 -------------GGATCTACCTTTTATCTTTCTAAT--CTTTT----GT 449 554 ||..||.||.|.|||..||||.|| ||||| | 555 asis 203 ATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGG- 251 556 557 asis 450 TTTAGT-ATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTT 498 558 ||| |.||| |||||||||.||| .||||||...||||||||| 559 asis 252 ---AGTAAGAAT-----TTCGATTCAGCAA-CTTTAGTTCACAGTCTTTT 292 560 561 asis 499 TTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTT 548 562 ||||||||..| |||||||| 563 asis 293 TTTTTATTAAG-AAAGGTTT------------------------------ 311 564 565 asis 549 TCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGT 598 566 567 asis 311 -------------------------------------------------- 311 568 569 asis 599 GAAAGGGGGTTAATAGC 615 570 571 asis 311 ----------------- 311 572 573 574 #--------------------------------------- 575 #---------------------------------------""" 576 577 from StringIO import StringIO 578 579 alignments = list(EmbossIterator(StringIO(pair_example))) 580 assert len(alignments) == 1 581 assert len(alignments[0]) == 2 582 assert [r.id for r in alignments[0]] \ 583 == ["IXI_234", "IXI_235"] 584 585 alignments = list(EmbossIterator(StringIO(simple_example))) 586 assert len(alignments) == 1 587 assert len(alignments[0]) == 4 588 assert [r.id for r in alignments[0]] \ 589 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 590 591 alignments = list(EmbossIterator(StringIO(pair_example + simple_example))) 592 assert len(alignments) == 2 593 assert len(alignments[0]) == 2 594 assert len(alignments[1]) == 4 595 assert [r.id for r in alignments[0]] \ 596 == ["IXI_234", "IXI_235"] 597 assert [r.id for r in alignments[1]] \ 598 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 599 600 alignments = list(EmbossIterator(StringIO(pair_example2))) 601 assert len(alignments) == 5 602 assert len(alignments[0]) == 2 603 assert [r.id for r in alignments[0]] \ 604 == ["ref_rec", "gi|94968718|receiver"] 605 assert [r.id for r in alignments[4]] \ 606 == ["ref_rec", "gi|94970041|receiver"] 607 608 609 alignments = list(EmbossIterator(StringIO(pair_example3))) 610 assert len(alignments) == 1 611 assert len(alignments[0]) == 2 612 assert [r.id for r in alignments[0]] \ 613 == ["asis","asis"] 614 615 print "Done" 616