1
2
3
4
5
6
7
8 """Alphabets used in Seq objects etc to declare sequence type and letters.
9
10 This is used by sequences which contain a finite number of similar words.
11 """
12
14 size = None
15 letters = None
16
17
18
19
20
22 return self.__class__.__name__ + "()"
23
25 """Does this alphabet 'contain' the other (OBSOLETE?).
26
27 Returns a boolean. This relies on the Alphabet subclassing
28 hierarchy only, and does not check the letters property.
29 This isn't ideal, and doesn't seem to work as intended
30 with the AlphabetEncoder classes."""
31 return isinstance(other, self.__class__)
32
48
50 """Return an upper case variant of the current alphabet (PRIVATE)."""
51 if not self.letters or self.letters==self.letters.upper():
52
53 return self
54 else:
55
56 return self._case_less()
57
59 """Return a lower case variant of the current alphabet (PRIVATE)."""
60 if not self.letters or self.letters==self.letters.lower():
61
62 return self
63 else:
64
65 return self._case_less()
66
67 generic_alphabet = Alphabet()
68
72
73 single_letter_alphabet = SingleLetterAlphabet()
74
75
76
79
80 generic_protein = ProteinAlphabet()
81
82
85
86 generic_nucleotide = NucleotideAlphabet()
87
90
91 generic_dna = DNAAlphabet()
92
93
94
95
98
99 generic_rna = RNAAlphabet()
100
101
102
103
104
107
109 size = 3
110 letters = [
111 "Ala", "Asx", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile",
112 "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr",
113 "Sec", "Val", "Trp", "Xaa", "Tyr", "Glx",
114 ]
115
116
117
118
119
121 - def __init__(self, alphabet, new_letters):
129 if key[:2] == "__" and key[-2:] == "__":
130 raise AttributeError(key)
131 return getattr(self.alphabet, key)
132
134 return "%s(%r, %r)" % (self.__class__.__name__, self.alphabet,
135 self.new_letters)
136
138 """Does this alphabet 'contain' the other (OBSOLETE?).
139
140 This is isn't implemented for the base AlphabetEncoder,
141 which will always return 0 (False)."""
142 return 0
143
147
151
152
153 -class Gapped(AlphabetEncoder):
154 - def __init__(self, alphabet, gap_char = "-"):
157
159 """Does this alphabet 'contain' the other (OBSOLETE?).
160
161 Returns a boolean. This relies on the Alphabet subclassing
162 hierarchy, and attempts to check the gap character. This fails
163 if the other alphabet does not have a gap character!
164 """
165 return other.gap_char == self.gap_char and \
166 self.alphabet.contains(other.alphabet)
167
169 """Return an upper case variant of the current alphabet (PRIVATE)."""
170 return Gapped(self.alphabet._upper(), self.gap_char.upper())
171
173 """Return a lower case variant of the current alphabet (PRIVATE)."""
174 return Gapped(self.alphabet._lower(), self.gap_char.lower())
175
176
178 - def __init__(self, alphabet, stop_symbol = "*"):
181
187
189 """Does this alphabet 'contain' the other (OBSOLETE?).
190
191 Returns a boolean. This relies on the Alphabet subclassing
192 hierarchy, and attempts to check the stop symbol. This fails
193 if the other alphabet does not have a stop symbol!
194 """
195 return other.stop_symbol == self.stop_symbol and \
196 self.alphabet.contains(other.alphabet)
197
201
205
206
208 """Returns the non-gapped non-stop-codon Alphabet object (PRIVATE)."""
209 a = alphabet
210 while isinstance(a, AlphabetEncoder):
211 a = a.alphabet
212 assert isinstance(a, Alphabet), \
213 "Invalid alphabet found, %s" % repr(a)
214 return a
215
229
231 """Returns a common but often generic base alphabet object (PRIVATE).
232
233 This throws away any AlphabetEncoder information, e.g. Gapped alphabets.
234
235 Note that DNA+RNA -> Nucleotide, and Nucleotide+Protein-> generic single
236 letter. These DO NOT raise an exception!"""
237 common = None
238 for alpha in alphabets:
239 a = _get_base_alphabet(alpha)
240 if common is None:
241 common = a
242 elif common == a:
243 pass
244 elif isinstance(a, common.__class__):
245 pass
246 elif isinstance(common, a.__class__):
247 common = a
248 elif isinstance(a, NucleotideAlphabet) \
249 and isinstance(common, NucleotideAlphabet):
250
251 common = generic_nucleotide
252 elif isinstance(a, SingleLetterAlphabet) \
253 and isinstance(common, SingleLetterAlphabet):
254
255 common = single_letter_alphabet
256 else:
257
258 return generic_alphabet
259 if common is None:
260
261 return generic_alphabet
262 return common
263
265 """Returns a common but often generic alphabet object (PRIVATE).
266
267 Note that DNA+RNA -> Nucleotide, and Nucleotide+Protein-> generic single
268 letter. These DO NOT raise an exception!
269
270 This is aware of Gapped and HasStopCodon and new letters added by
271 other AlphabetEncoders. This WILL raise an exception if more than
272 one gap character or stop symbol is present."""
273 base = _consensus_base_alphabet(alphabets)
274 gap = None
275 stop = None
276 new_letters = ""
277 for alpha in alphabets:
278
279 if not hasattr(alpha, "gap_char"):
280 pass
281 elif gap is None:
282 gap = alpha.gap_char
283 elif gap == alpha.gap_char:
284 pass
285 else:
286 raise ValueError("More than one gap character present")
287
288 if not hasattr(alpha, "stop_symbol"):
289 pass
290 elif stop is None:
291 stop = alpha.stop_symbol
292 elif stop == alpha.stop_symbol:
293 pass
294 else:
295 raise ValueError("More than one stop symbol present")
296
297 if hasattr(alpha, "new_letters"):
298 for letter in alpha.new_letters:
299 if letter not in new_letters \
300 and letter != gap and letter != stop:
301 new_letters += letter
302
303 alpha = base
304 if new_letters:
305 alpha = AlphabetEncoder(alpha, new_letters)
306 if gap:
307 alpha = Gapped(alpha, gap_char=gap)
308 if stop:
309 alpha = HasStopCodon(alpha, stop_symbol=stop)
310 return alpha
311
313 """Returns True except for DNA+RNA or Nucleotide+Protein (PRIVATE).
314
315 This relies on the Alphabet subclassing hierarchy. It does not
316 check things like gap characters or stop symbols."""
317 dna, rna, nucl, protein = False, False, False, False
318 for alpha in alphabets:
319 a = _get_base_alphabet(alpha)
320 if isinstance(a, DNAAlphabet):
321 dna = True
322 nucl = True
323 if rna or protein : return False
324 elif isinstance(a, RNAAlphabet):
325 rna = True
326 nucl = True
327 if dna or protein : return False
328 elif isinstance(a, NucleotideAlphabet):
329 nucl = True
330 if protein : return False
331 elif isinstance(a, ProteinAlphabet):
332 protein = True
333 if nucl : return False
334 return True
335