Package Bio :: Package PDB :: Module FragmentMapper'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.FragmentMapper'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  import os 
  7  import numpy 
  8   
  9  from Bio.SVDSuperimposer import SVDSuperimposer 
 10  from Bio.PDB import * 
 11  from Bio.PDB.PDBExceptions import PDBException 
 12   
 13  __doc__=""" 
 14  Classify protein backbone structure according to Kolodny et al's fragment 
 15  libraries. It can be regarded as a form of objective secondary structure 
 16  classification. Only fragments of length 5 or 7 are supported (ie. there is a 
 17  'central' residue). 
 18   
 19  Full reference: 
 20   
 21  Kolodny R, Koehl P, Guibas L, Levitt M. 
 22  Small libraries of protein fragments model native protein structures accurately. 
 23  J Mol Biol. 2002 323(2):297-307. 
 24   
 25  The definition files of the fragments can be obtained from: 
 26   
 27  U{http://csb.stanford.edu/~rachel/fragments/} 
 28   
 29  You need these files to use this module. 
 30   
 31  The following example uses the library with 10 fragments of length 5. 
 32  The library files can be found in directory 'fragment_data'. 
 33   
 34      >>> model=structure[0] 
 35      >>> fm=FragmentMapper(lsize=10, flength=5, dir="fragment_data") 
 36      >>> fm.map(model) 
 37      >>> fragment=fm[residue] 
 38  """ 
 39   
 40   
 41  # fragment file (lib_SIZE_z_LENGTH.txt) 
 42  # SIZE=number of fragments 
 43  # LENGTH=length of fragment (4,5,6,7) 
 44  _FRAGMENT_FILE="lib_%s_z_%s.txt" 
 45   
 46   
47 -def _read_fragments(size, length, dir="."):
48 """ 49 Read a fragment spec file (available from 50 U{http://csb.stanford.edu/rachel/fragments/} 51 and return a list of Fragment objects. 52 53 @param size: number of fragments in the library 54 @type size: int 55 56 @param length: length of the fragments 57 @type length: int 58 59 @param dir: directory where the fragment spec files can be found 60 @type dir: string 61 """ 62 filename=(dir+"/"+_FRAGMENT_FILE) % (size, length) 63 fp=open(filename, "r") 64 flist=[] 65 # ID of fragment=rank in spec file 66 fid=0 67 for l in fp.readlines(): 68 # skip comment and blank lines 69 if l[0]=="*" or l[0]=="\n": 70 continue 71 sl=l.split() 72 if sl[1]=="------": 73 # Start of fragment definition 74 f=Fragment(length, fid) 75 flist.append(f) 76 # increase fragment id (rank) 77 fid+=1 78 continue 79 # Add CA coord to Fragment 80 coord=numpy.array(map(float, sl[0:3])) 81 # XXX= dummy residue name 82 f.add_residue("XXX", coord) 83 fp.close() 84 return flist
85 86
87 -class Fragment:
88 """ 89 Represent a polypeptide C-alpha fragment. 90 """
91 - def __init__(self, length, fid):
92 """ 93 @param length: length of the fragment 94 @type length: int 95 96 @param fid: id for the fragment 97 @type fid: int 98 """ 99 # nr of residues in fragment 100 self.length=length 101 # nr of residues added 102 self.counter=0 103 self.resname_list=[] 104 # CA coordinate matrix 105 self.coords_ca=numpy.zeros((length, 3), "d") 106 self.fid=fid
107
108 - def get_resname_list(self):
109 """ 110 @return: the residue names 111 @rtype: [string, string,...] 112 """ 113 return self.resname_list
114
115 - def get_id(self):
116 """ 117 @return: id for the fragment 118 @rtype: int 119 """ 120 return self.fid
121
122 - def get_coords(self):
123 """ 124 @return: the CA coords in the fragment 125 @rtype: Numeric (Nx3) array 126 """ 127 return self.coords_ca
128
129 - def add_residue(self, resname, ca_coord):
130 """ 131 @param resname: residue name (eg. GLY). 132 @type resname: string 133 134 @param ca_coord: the c-alpha coorinates of the residues 135 @type ca_coord: Numeric array with length 3 136 """ 137 if self.counter>=self.length: 138 raise PDBException("Fragment boundary exceeded.") 139 self.resname_list.append(resname) 140 self.coords_ca[self.counter]=ca_coord 141 self.counter=self.counter+1
142
143 - def __len__(self):
144 """ 145 @return: length of fragment 146 @rtype: int 147 """ 148 return self.length
149
150 - def __sub__(self, other):
151 """ 152 Return rmsd between two fragments. 153 154 Example: 155 >>> rmsd=fragment1-fragment2 156 157 @return: rmsd between fragments 158 @rtype: float 159 """ 160 sup=SVDSuperimposer() 161 sup.set(self.coords_ca, other.coords_ca) 162 sup.run() 163 return sup.get_rms()
164
165 - def __repr__(self):
166 """ 167 Returns <Fragment length=L id=ID> where L=length of fragment 168 and ID the identifier (rank in the library). 169 """ 170 return "<Fragment length=%i id=%i>" % (self.length, self.fid)
171 172
173 -def _make_fragment_list(pp, length):
174 """ 175 Dice up a peptide in fragments of length "length". 176 177 @param pp: a list of residues (part of one peptide) 178 @type pp: [L{Residue}, L{Residue}, ...] 179 180 @param length: fragment length 181 @type length: int 182 """ 183 frag_list=[] 184 for i in range(0, len(pp)-length+1): 185 f=Fragment(length, -1) 186 for j in range(0, length): 187 residue=pp[i+j] 188 resname=residue.get_resname() 189 if residue.has_id("CA"): 190 ca=residue["CA"] 191 else: 192 raise PDBException("CHAINBREAK") 193 if ca.is_disordered(): 194 raise PDBException("CHAINBREAK") 195 ca_coord=ca.get_coord() 196 f.add_residue(resname, ca_coord) 197 frag_list.append(f) 198 return frag_list
199 200
201 -def _map_fragment_list(flist, reflist):
202 """ 203 Map all frgaments in flist to the closest 204 (in RMSD) fragment in reflist. 205 206 Returns a list of reflist indices. 207 208 @param flist: list of protein fragments 209 @type flist: [L{Fragment}, L{Fragment}, ...] 210 211 @param reflist: list of reference (ie. library) fragments 212 @type reflist: [L{Fragment}, L{Fragment}, ...] 213 """ 214 mapped=[] 215 for f in flist: 216 rank=[] 217 for i in range(0, len(reflist)): 218 rf=reflist[i] 219 rms=f-rf 220 rank.append((rms, rf)) 221 rank.sort() 222 fragment=rank[0][1] 223 mapped.append(fragment) 224 return mapped
225 226
227 -class FragmentMapper:
228 """ 229 Map polypeptides in a model to lists of representative fragments. 230 """
231 - def __init__(self, model, lsize=20, flength=5, fdir="."):
232 """ 233 @param model: the model that will be mapped 234 @type model: L{Model} 235 236 @param lsize: number of fragments in the library 237 @type lsize: int 238 239 @param flength: length of fragments in the library 240 @type flength: int 241 242 @param fdir: directory where the definition files are 243 found (default=".") 244 @type fdir: string 245 """ 246 if flength==5: 247 self.edge=2 248 elif flength==7: 249 self.edge=3 250 else: 251 raise PDBException("Fragment length should be 5 or 7.") 252 self.flength=flength 253 self.lsize=lsize 254 self.reflist=_read_fragments(lsize, flength, fdir) 255 self.model=model 256 self.fd=self._map(self.model)
257
258 - def _map(self, model):
259 """ 260 @param model: the model that will be mapped 261 @type model: L{Model} 262 """ 263 ppb=PPBuilder() 264 ppl=ppb.build_peptides(model) 265 fd={} 266 for pp in ppl: 267 try: 268 # make fragments 269 flist=_make_fragment_list(pp, self.flength) 270 # classify fragments 271 mflist=_map_fragment_list(flist, self.reflist) 272 for i in range(0, len(pp)): 273 res=pp[i] 274 if i<self.edge: 275 # start residues 276 continue 277 elif i>=(len(pp)-self.edge): 278 # end residues 279 continue 280 else: 281 # fragment 282 index=i-self.edge 283 assert(index>=0) 284 fd[res]=mflist[index] 285 except "CHAINBREAK": 286 # Funny polypeptide - skip 287 pass 288 return fd
289
290 - def has_key(self, res):
291 """ 292 @type res: L{Residue} 293 """ 294 return self.fd.has_key(res)
295
296 - def __getitem__(self, res):
297 """ 298 @type res: L{Residue} 299 300 @return: fragment classification 301 @rtype: L{Fragment} 302 """ 303 return self.fd[res]
304 305 306 if __name__=="__main__": 307 308 import sys 309 310 p=PDBParser() 311 s=p.get_structure("X", sys.argv[1]) 312 313 m=s[0] 314 fm=FragmentMapper(m, 10, 5, "levitt_data") 315 316 317 for r in Selection.unfold_entities(m, "R"): 318 319 print r, 320 if fm.has_key(r): 321 print fm[r] 322 else: 323 print 324