Package Bio :: Package KEGG :: Package Compound
[hide private]
[frames] | no frames]

Source Code for Package Bio.KEGG.Compound

  1  # Copyright 2001 by Tarjei Mikkelsen.  All rights reserved. 
  2  # Copyright 2007 by Michiel de Hoon.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  """ 
  8  This module provides code to work with the KEGG Ligand/Compound database. 
  9   
 10  Functions: 
 11  parse - Returns an iterator giving Record objects. 
 12   
 13  Classes: 
 14  Record - A representation of a KEGG Ligand/Compound. 
 15  """ 
 16   
 17  # other Biopython stuff 
 18  from Bio.KEGG import _write_kegg 
 19  from Bio.KEGG import _wrap_kegg 
 20   
 21   
 22  # Set up line wrapping rules (see Bio.KEGG._wrap_kegg) 
 23  name_wrap = [0, "", 
 24               (" ","$",1,1), 
 25               ("-","$",1,1)] 
 26  id_wrap = lambda indent : [indent, "", 
 27                             (" ","",1,0)] 
 28  struct_wrap = lambda indent : [indent, "", 
 29                                 ("  ","",1,1)] 
 30   
31 -class Record:
32 """Holds info from a KEGG Ligand/Compound record. 33 34 Members: 35 entry The entry identifier. 36 name A list of the compund names. 37 formula The chemical formula for the compound 38 mass The molecular weight for the compound 39 pathway A list of 3-tuples: (database, id, pathway) 40 enzyme A list of 2-tuples: (enzyme id, role) 41 structures A list of 2-tuples: (database, list of struct ids) 42 dblinks A list of 2-tuples: (database, list of link ids) 43 44 """
45 - def __init__(self):
46 """__init___(self) 47 48 Create a new Record. 49 """ 50 self.entry = "" 51 self.name = [] 52 self.formula = "" 53 self.mass = "" 54 self.pathway = [] 55 self.enzyme = [] 56 self.structures = [] 57 self.dblinks = []
58 - def __str__(self):
59 """__str__(self) 60 61 Returns a string representation of this Record. 62 """ 63 return self._entry() + \ 64 self._name() + \ 65 self._formula() + \ 66 self._mass() + \ 67 self._pathway() + \ 68 self._enzyme() + \ 69 self._structures() + \ 70 self._dblinks() + \ 71 "///"
72 - def _entry(self):
73 return _write_kegg("ENTRY", 74 [self.entry])
75 - def _name(self):
76 return _write_kegg("NAME", 77 map(lambda l: 78 _wrap_kegg(l, wrap_rule = name_wrap), 79 self.name))
80 - def _formula(self):
81 return _write_kegg("FORMULA", 82 [self.formula])
83
84 - def _mass(self):
85 return _write_kegg("MASS", 86 [self.mass])
87
88 - def _pathway(self):
89 s = [] 90 for entry in self.pathway: 91 s.append(entry[0] + ": " + entry[1] + " " + entry[2]) 92 return _write_kegg("PATHWAY", 93 [_wrap_kegg(l, wrap_rule = id_wrap(16)) \ 94 for l in s])
95 - def _enzyme(self):
96 s = "" 97 for entry in self.enzyme: 98 if entry[1]: 99 t = entry[0] + " (" + entry[1] + ")" 100 else: 101 t = entry[0] 102 s = s + t.ljust(16) 103 return _write_kegg("ENZYME", 104 [_wrap_kegg(s, wrap_rule = id_wrap(0))])
105 - def _structures(self):
106 s = [] 107 for entry in self.structures: 108 s.append(entry[0] + ": " + " ".join(entry[1]) + " ") 109 return _write_kegg("STRUCTURES", 110 [_wrap_kegg(l, wrap_rule = struct_wrap(5)) \ 111 for l in s])
119 120
121 -def parse(handle):
122 """Parse a KEGG Ligan/Compound file, returning Record objects. 123 124 This is an iterator function, typically used in a for loop. For 125 example, using one of the example KEGG files in the Biopython 126 test suite, 127 128 >>> handle = open("KEGG/compound.sample") 129 >>> for record in parse(handle): 130 ... print record.entry, record.name[0] 131 ... 132 C00023 Iron 133 C00017 Protein 134 C00099 beta-Alanine 135 C00294 Inosine 136 C00298 Trypsin 137 C00348 Undecaprenyl phosphate 138 C00349 2-Methyl-3-oxopropanoate 139 C01386 NH2Mec 140 """ 141 record = Record() 142 for line in handle: 143 if line[:3]=="///": 144 yield record 145 record = Record() 146 continue 147 if line[:12]!=" ": 148 keyword = line[:12] 149 data = line[12:].strip() 150 if keyword=="ENTRY ": 151 words = data.split() 152 record.entry = words[0] 153 elif keyword=="NAME ": 154 data = data.strip(";") 155 record.name.append(data) 156 elif keyword=="ENZYME ": 157 while data: 158 column = data[:16] 159 data = data[16:] 160 if '(' in column: 161 entry = column.split() 162 enzyme = (entry[0], entry[1][1:-1]) 163 else: 164 enzyme = (column.strip(), "") 165 record.enzyme.append(enzyme) 166 elif keyword=="PATHWAY ": 167 if data[:5]=='PATH:': 168 path, map, name = data.split(None,2) 169 pathway = (path[:-1], map, name) 170 record.pathway.append(pathway) 171 else: 172 pathway = record.pathway[-1] 173 path, map, name = pathway 174 name = name + " " + data 175 pathway = path, map, name 176 record.pathway[-1] = pathway 177 elif keyword=="FORMULA ": 178 record.formula = data 179 elif keyword=="MASS ": 180 record.mass = data 181 elif keyword=="DBLINKS ": 182 if ":" in data: 183 key, values = data.split(":") 184 values = values.split() 185 row = (key, values) 186 record.dblinks.append(row) 187 else: 188 row = record.dblinks[-1] 189 key, values = row 190 values.extend(data.split()) 191 row = key, values 192 record.dblinks[-1] = row
193
194 -def _test():
195 """Run the Bio.KEGG.Compound module's doctests. 196 197 This will try and locate the unit tests directory, and run the doctests 198 from there in order that the relative paths used in the examples work. 199 """ 200 import doctest 201 import os 202 if os.path.isdir(os.path.join("..","..","..","Tests")): 203 print "Runing doctests..." 204 cur_dir = os.path.abspath(os.curdir) 205 os.chdir(os.path.join("..","..","..","Tests")) 206 doctest.testmod() 207 os.chdir(cur_dir) 208 del cur_dir 209 print "Done"
210 211 if __name__ == "__main__": 212 _test() 213