1
2
3
4
5
6
7 """
8 This module provides code to work with the KEGG Enzyme database.
9
10 Functions:
11 parse - Returns an iterator giving Record objects.
12
13 Classes:
14 Record -- Holds the information from a KEGG Enzyme record.
15 """
16
17 from Bio.KEGG import _write_kegg
18 from Bio.KEGG import _wrap_kegg
19
20
21
22 rxn_wrap = [0, "",
23 (" + ","",1,1),
24 (" = ","",1,1),
25 (" ","$",1,1),
26 ("-","$",1,1)]
27 name_wrap = [0, "",
28 (" ","$",1,1),
29 ("-","$",1,1)]
30 id_wrap = lambda indent : [indent, "",
31 (" ","",1,0)]
32 struct_wrap = lambda indent : [indent, "",
33 (" ","",1,1)]
34
36 """Holds info from a KEGG Enzyme record.
37
38 Members:
39 entry The EC number (withou the 'EC ').
40 name A list of the enzyme names.
41 classname A list of the classification terms.
42 sysname The systematic name of the enzyme.
43 reaction A list of the reaction description strings.
44 substrate A list of the substrates.
45 product A list of the products.
46 inhibitor A list of the inhibitors.
47 cofactor A list of the cofactors.
48 effector A list of the effectors.
49 comment A list of the comment strings.
50 pathway A list of 3-tuples: (database, id, pathway)
51 genes A list of 2-tuples: (organism, list of gene ids)
52 disease A list of 3-tuples: (database, id, disease)
53 structures A list of 2-tuples: (database, list of struct ids)
54 dblinks A list of 2-tuples: (database, list of db ids)
55 """
57 """__init___(self)
58
59 Create a new Record.
60 """
61 self.entry = ""
62 self.name = []
63 self.classname = []
64 self.sysname = []
65 self.reaction = []
66 self.substrate = []
67 self.product = []
68 self.inhibitor = []
69 self.cofactor = []
70 self.effector = []
71 self.comment = []
72 self.pathway = []
73 self.genes = []
74 self.disease = []
75 self.structures = []
76 self.dblinks = []
100 return _write_kegg("ENTRY",
101 ["EC " + self.entry])
143 s = []
144 for entry in self.pathway:
145 s.append(entry[0] + ": " + entry[1] + " " + entry[2])
146 return _write_kegg("PATHWAY",
147 [_wrap_kegg(l, wrap_rule = id_wrap(16)) \
148 for l in s])
150 s = []
151 for entry in self.genes:
152 s.append(entry[0] + ": " + " ".join(entry[1]))
153 return _write_kegg("GENES",
154 [_wrap_kegg(l, wrap_rule = id_wrap(5)) \
155 for l in s])
164 s = []
165 for entry in self.structures:
166 s.append(entry[0] + ": " + " ".join(entry[1]) + " ")
167 return _write_kegg("STRUCTURES",
168 [_wrap_kegg(l, wrap_rule = struct_wrap(5)) \
169 for l in s])
171
172
173
174
175 s = []
176 for entry in self.dblinks:
177 s.append(entry[0] + ": " + " ".join(entry[1]))
178 return _write_kegg("DBLINKS", s)
179
180
181
183 """Parse a KEGG Enzyme file, returning Record objects.
184
185 This is an iterator function, typically used in a for loop. For
186 example, using one of the example KEGG files in the Biopython
187 test suite,
188
189 >>> handle = open("KEGG/enzyme.sample")
190 >>> for record in parse(handle):
191 ... print record.entry, record.name[0]
192 ...
193 1.1.1.1 Alcohol dehydrogenase
194 1.1.1.62 Estradiol 17beta-dehydrogenase
195 1.1.1.68 Transferred to EC 1.7.99.5
196 1.6.5.3 NADH dehydrogenase (ubiquinone)
197 1.14.13.28 3,9-Dihydroxypterocarpan 6a-monooxygenase
198 2.4.1.68 Glycoprotein 6-alpha-L-fucosyltransferase
199 3.1.1.6 Acetylesterase
200 2.7.2.1 Acetate kinase
201 """
202 record = Record()
203 for line in handle:
204 if line[:3]=="///":
205 yield record
206 record = Record()
207 continue
208 if line[:12]!=" ":
209 keyword = line[:12]
210 data = line[12:].strip()
211 if keyword=="ENTRY ":
212 words = data.split()
213 record.entry = words[1]
214 elif keyword=="CLASS ":
215 record.classname.append(data)
216 elif keyword=="COFACTOR ":
217 record.cofactor.append(data)
218 elif keyword=="COMMENT ":
219 record.comment.append(data)
220 elif keyword=="DBLINKS ":
221 if ":" in data:
222 key, values = data.split(":")
223 values = values.split()
224 row = (key, values)
225 record.dblinks.append(row)
226 else:
227 row = record.dblinks[-1]
228 key, values = row
229 values.extend(data.split())
230 row = key, values
231 record.dblinks[-1] = row
232 elif keyword=="DISEASE ":
233 if ":" in data:
234 database, data = data.split(":")
235 number, name = data.split(None, 1)
236 row = (database, number, name)
237 record.disease.append(row)
238 else:
239 row = record.disease[-1]
240 database, number, name = row
241 name = name + " " + data
242 row = database, number, name
243 record.disease[-1] = row
244 elif keyword=="EFFECTOR ":
245 record.effector.append(data.strip(";"))
246 elif keyword=="GENES ":
247 if data[3:5]==': ':
248 key, values = data.split(":",1)
249 values = [value.split("(")[0] for value in values.split()]
250 row = (key, values)
251 record.genes.append(row)
252 else:
253 row = record.genes[-1]
254 key, values = row
255 for value in data.split():
256 value = value.split("(")[0]
257 values.append(value)
258 row = key, values
259 record.genes[-1] = row
260 elif keyword=="INHIBITOR ":
261 record.inhibitor.append(data.strip(";"))
262 elif keyword=="NAME ":
263 record.name.append(data.strip(";"))
264 elif keyword=="PATHWAY ":
265 if data[:5]=='PATH:':
266 path, map, name = data.split(None,2)
267 pathway = (path[:-1], map, name)
268 record.pathway.append(pathway)
269 else:
270 pathway = record.pathway[-1]
271 path, map, name = pathway
272 name = name + " " + data
273 pathway = path, map, name
274 record.pathway[-1] = pathway
275 elif keyword=="PRODUCT ":
276 record.product.append(data.strip(";"))
277 elif keyword=="REACTION ":
278 record.reaction.append(data.strip(";"))
279 elif keyword=="STRUCTURES ":
280 if data[:4]=='PDB:':
281 database = data[:3]
282 accessions = data[4:].split()
283 row = (database, accessions)
284 record.structures.append(row)
285 else:
286 row = record.structures[-1]
287 database, accessions = row
288 accessions.extend(data.split())
289 row = (database, accessions)
290 record.structures[-1] = row
291 elif keyword=="SUBSTRATE ":
292 record.substrate.append(data.strip(";"))
293 elif keyword=="SYSNAME ":
294 record.sysname.append(data.strip(";"))
295
297 """Run the Bio.KEGG.Enzyme module's doctests.
298
299 This will try and locate the unit tests directory, and run the doctests
300 from there in order that the relative paths used in the examples work.
301 """
302 import doctest
303 import os
304 if os.path.isdir(os.path.join("..","..","..","Tests")):
305 print "Runing doctests..."
306 cur_dir = os.path.abspath(os.curdir)
307 os.chdir(os.path.join("..","..","..","Tests"))
308 doctest.testmod()
309 os.chdir(cur_dir)
310 del cur_dir
311 print "Done"
312
313 if __name__ == "__main__":
314 _test()
315