1
2
3
4
5
6 import os
7 import numpy
8
9 from Bio.SVDSuperimposer import SVDSuperimposer
10 from Bio.PDB import *
11 from Bio.PDB.PDBExceptions import PDBException
12
13 __doc__="""
14 Classify protein backbone structure according to Kolodny et al's fragment
15 libraries. It can be regarded as a form of objective secondary structure
16 classification. Only fragments of length 5 or 7 are supported (ie. there is a
17 'central' residue).
18
19 Full reference:
20
21 Kolodny R, Koehl P, Guibas L, Levitt M.
22 Small libraries of protein fragments model native protein structures accurately.
23 J Mol Biol. 2002 323(2):297-307.
24
25 The definition files of the fragments can be obtained from:
26
27 U{http://csb.stanford.edu/~rachel/fragments/}
28
29 You need these files to use this module.
30
31 The following example uses the library with 10 fragments of length 5.
32 The library files can be found in directory 'fragment_data'.
33
34 >>> model=structure[0]
35 >>> fm=FragmentMapper(lsize=10, flength=5, dir="fragment_data")
36 >>> fm.map(model)
37 >>> fragment=fm[residue]
38 """
39
40
41
42
43
44 _FRAGMENT_FILE="lib_%s_z_%s.txt"
45
46
48 """
49 Read a fragment spec file (available from
50 U{http://csb.stanford.edu/rachel/fragments/}
51 and return a list of Fragment objects.
52
53 @param size: number of fragments in the library
54 @type size: int
55
56 @param length: length of the fragments
57 @type length: int
58
59 @param dir: directory where the fragment spec files can be found
60 @type dir: string
61 """
62 filename=(dir+"/"+_FRAGMENT_FILE) % (size, length)
63 fp=open(filename, "r")
64 flist=[]
65
66 fid=0
67 for l in fp.readlines():
68
69 if l[0]=="*" or l[0]=="\n":
70 continue
71 sl=l.split()
72 if sl[1]=="------":
73
74 f=Fragment(length, fid)
75 flist.append(f)
76
77 fid+=1
78 continue
79
80 coord=numpy.array(map(float, sl[0:3]))
81
82 f.add_residue("XXX", coord)
83 fp.close()
84 return flist
85
86
88 """
89 Represent a polypeptide C-alpha fragment.
90 """
92 """
93 @param length: length of the fragment
94 @type length: int
95
96 @param fid: id for the fragment
97 @type fid: int
98 """
99
100 self.length=length
101
102 self.counter=0
103 self.resname_list=[]
104
105 self.coords_ca=numpy.zeros((length, 3), "d")
106 self.fid=fid
107
109 """
110 @return: the residue names
111 @rtype: [string, string,...]
112 """
113 return self.resname_list
114
116 """
117 @return: id for the fragment
118 @rtype: int
119 """
120 return self.fid
121
123 """
124 @return: the CA coords in the fragment
125 @rtype: Numeric (Nx3) array
126 """
127 return self.coords_ca
128
130 """
131 @param resname: residue name (eg. GLY).
132 @type resname: string
133
134 @param ca_coord: the c-alpha coorinates of the residues
135 @type ca_coord: Numeric array with length 3
136 """
137 if self.counter>=self.length:
138 raise PDBException("Fragment boundary exceeded.")
139 self.resname_list.append(resname)
140 self.coords_ca[self.counter]=ca_coord
141 self.counter=self.counter+1
142
144 """
145 @return: length of fragment
146 @rtype: int
147 """
148 return self.length
149
151 """
152 Return rmsd between two fragments.
153
154 Example:
155 >>> rmsd=fragment1-fragment2
156
157 @return: rmsd between fragments
158 @rtype: float
159 """
160 sup=SVDSuperimposer()
161 sup.set(self.coords_ca, other.coords_ca)
162 sup.run()
163 return sup.get_rms()
164
166 """
167 Returns <Fragment length=L id=ID> where L=length of fragment
168 and ID the identifier (rank in the library).
169 """
170 return "<Fragment length=%i id=%i>" % (self.length, self.fid)
171
172
174 """
175 Dice up a peptide in fragments of length "length".
176
177 @param pp: a list of residues (part of one peptide)
178 @type pp: [L{Residue}, L{Residue}, ...]
179
180 @param length: fragment length
181 @type length: int
182 """
183 frag_list=[]
184 for i in range(0, len(pp)-length+1):
185 f=Fragment(length, -1)
186 for j in range(0, length):
187 residue=pp[i+j]
188 resname=residue.get_resname()
189 if residue.has_id("CA"):
190 ca=residue["CA"]
191 else:
192 raise PDBException("CHAINBREAK")
193 if ca.is_disordered():
194 raise PDBException("CHAINBREAK")
195 ca_coord=ca.get_coord()
196 f.add_residue(resname, ca_coord)
197 frag_list.append(f)
198 return frag_list
199
200
202 """
203 Map all frgaments in flist to the closest
204 (in RMSD) fragment in reflist.
205
206 Returns a list of reflist indices.
207
208 @param flist: list of protein fragments
209 @type flist: [L{Fragment}, L{Fragment}, ...]
210
211 @param reflist: list of reference (ie. library) fragments
212 @type reflist: [L{Fragment}, L{Fragment}, ...]
213 """
214 mapped=[]
215 for f in flist:
216 rank=[]
217 for i in range(0, len(reflist)):
218 rf=reflist[i]
219 rms=f-rf
220 rank.append((rms, rf))
221 rank.sort()
222 fragment=rank[0][1]
223 mapped.append(fragment)
224 return mapped
225
226
228 """
229 Map polypeptides in a model to lists of representative fragments.
230 """
231 - def __init__(self, model, lsize=20, flength=5, fdir="."):
232 """
233 @param model: the model that will be mapped
234 @type model: L{Model}
235
236 @param lsize: number of fragments in the library
237 @type lsize: int
238
239 @param flength: length of fragments in the library
240 @type flength: int
241
242 @param fdir: directory where the definition files are
243 found (default=".")
244 @type fdir: string
245 """
246 if flength==5:
247 self.edge=2
248 elif flength==7:
249 self.edge=3
250 else:
251 raise PDBException("Fragment length should be 5 or 7.")
252 self.flength=flength
253 self.lsize=lsize
254 self.reflist=_read_fragments(lsize, flength, fdir)
255 self.model=model
256 self.fd=self._map(self.model)
257
258 - def _map(self, model):
259 """
260 @param model: the model that will be mapped
261 @type model: L{Model}
262 """
263 ppb=PPBuilder()
264 ppl=ppb.build_peptides(model)
265 fd={}
266 for pp in ppl:
267 try:
268
269 flist=_make_fragment_list(pp, self.flength)
270
271 mflist=_map_fragment_list(flist, self.reflist)
272 for i in range(0, len(pp)):
273 res=pp[i]
274 if i<self.edge:
275
276 continue
277 elif i>=(len(pp)-self.edge):
278
279 continue
280 else:
281
282 index=i-self.edge
283 assert(index>=0)
284 fd[res]=mflist[index]
285 except "CHAINBREAK":
286
287 pass
288 return fd
289
291 """
292 @type res: L{Residue}
293 """
294 return self.fd.has_key(res)
295
297 """
298 @type res: L{Residue}
299
300 @return: fragment classification
301 @rtype: L{Fragment}
302 """
303 return self.fd[res]
304
305
306 if __name__=="__main__":
307
308 import sys
309
310 p=PDBParser()
311 s=p.get_structure("X", sys.argv[1])
312
313 m=s[0]
314 fm=FragmentMapper(m, 10, 5, "levitt_data")
315
316
317 for r in Selection.unfold_entities(m, "R"):
318
319 print r,
320 if fm.has_key(r):
321 print fm[r]
322 else:
323 print
324