Package Bio :: Package PDB :: Module Polypeptide
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.Polypeptide

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  from types import StringType 
  7   
  8  from Bio.Alphabet import generic_protein 
  9  from Bio.Seq import Seq 
 10  from Bio.SCOP.Raf import to_one_letter_code 
 11  from Bio.PDB.PDBExceptions import PDBException 
 12  from Bio.PDB.Residue import Residue, DisorderedResidue 
 13  from Vector import calc_dihedral, calc_angle 
 14   
 15  __doc__=""" 
 16  Polypeptide related classes (construction and representation). 
 17   
 18  Example: 
 19   
 20      >>> ppb=PPBuilder() 
 21      >>> for pp in ppb.build_peptides(structure): 
 22      >>>     print pp.get_sequence() 
 23  """ 
 24   
 25  standard_aa_names=["ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS",  
 26                     "LEU", "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL", 
 27                     "TRP", "TYR"] 
 28   
 29   
 30  aa1="ACDEFGHIKLMNPQRSTVWY" 
 31  aa3=standard_aa_names 
 32   
 33  d1_to_index={} 
 34  dindex_to_1={} 
 35  d3_to_index={} 
 36  dindex_to_3={} 
 37   
 38  # Create some lookup tables 
 39  for i in range(0, 20): 
 40      n1=aa1[i] 
 41      n3=aa3[i] 
 42      d1_to_index[n1]=i 
 43      dindex_to_1[i]=n1 
 44      d3_to_index[n3]=i 
 45      dindex_to_3[i]=n3 
 46   
47 -def index_to_one(index):
48 """ 49 Index to corresponding one letter amino acid name. 50 For example: 0 to A. 51 """ 52 return dindex_to_1[index]
53
54 -def one_to_index(s):
55 """ 56 One letter code to index. 57 For example: A to 0. 58 """ 59 return d1_to_index[s]
60
61 -def index_to_three(i):
62 """ 63 Index to corresponding three letter amino acid name. 64 For example: 0 to ALA. 65 """ 66 return dindex_to_3[i]
67
68 -def three_to_index(s):
69 """ 70 Three letter code to index. 71 For example: ALA to 0. 72 """ 73 return d3_to_index[s]
74
75 -def three_to_one(s):
76 """ 77 Three letter code to one letter code. 78 For example: ALA to A. 79 """ 80 i=d3_to_index[s] 81 return dindex_to_1[i]
82
83 -def one_to_three(s):
84 """ 85 One letter code to three letter code. 86 For example: A to ALA. 87 """ 88 i=d1_to_index[s] 89 return dindex_to_3[i]
90
91 -def is_aa(residue, standard=0):
92 """ 93 Return 1 if residue object/string is an amino acid. 94 95 @param residue: a L{Residue} object OR a three letter amino acid code 96 @type residue: L{Residue} or string 97 98 @param standard: flag to check for the 20 AA (default false) 99 @type standard: boolean 100 """ 101 if not type(residue)==StringType: 102 residue=residue.get_resname() 103 residue=residue.upper() 104 if standard: 105 return d3_to_index.has_key(residue) 106 else: 107 return to_one_letter_code.has_key(residue)
108 109
110 -class Polypeptide(list):
111 """ 112 A polypeptide is simply a list of L{Residue} objects. 113 """
114 - def get_ca_list(self):
115 """ 116 @return: the list of C-alpha atoms 117 @rtype: [L{Atom}, L{Atom}, ...] 118 """ 119 ca_list=[] 120 for res in self: 121 ca=res["CA"] 122 ca_list.append(ca) 123 return ca_list
124
125 - def get_phi_psi_list(self):
126 """ 127 Return the list of phi/psi dihedral angles 128 """ 129 ppl=[] 130 lng=len(self) 131 for i in range(0, lng): 132 res=self[i] 133 try: 134 n=res['N'].get_vector() 135 ca=res['CA'].get_vector() 136 c=res['C'].get_vector() 137 except: 138 # Some atoms are missing 139 # Phi/Psi cannot be calculated for this residue 140 ppl.append((None, None)) 141 res.xtra["PHI"]=None 142 res.xtra["PSI"]=None 143 continue 144 # Phi 145 if i>0: 146 rp=self[i-1] 147 try: 148 cp=rp['C'].get_vector() 149 phi=calc_dihedral(cp, n, ca, c) 150 except: 151 phi=None 152 else: 153 # No phi for residue 0! 154 phi=None 155 # Psi 156 if i<(lng-1): 157 rn=self[i+1] 158 try: 159 nn=rn['N'].get_vector() 160 psi=calc_dihedral(n, ca, c, nn) 161 except: 162 psi=None 163 else: 164 # No psi for last residue! 165 psi=None 166 ppl.append((phi, psi)) 167 # Add Phi/Psi to xtra dict of residue 168 res.xtra["PHI"]=phi 169 res.xtra["PSI"]=psi 170 return ppl
171
172 - def get_tau_list(self):
173 """ 174 Return list of tau torsions angles for all 4 consecutive 175 Calpha atoms. 176 """ 177 ca_list=self.get_ca_list() 178 tau_list=[] 179 for i in range(0, len(ca_list)-3): 180 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3]] 181 vector_list=map(lambda a: a.get_vector(), atom_list) 182 v1, v2, v3, v4=vector_list 183 tau=calc_dihedral(v1, v2, v3, v4) 184 tau_list.append(tau) 185 # Put tau in xtra dict of residue 186 res=ca_list[i+2].get_parent() 187 res.xtra["TAU"]=tau 188 return tau_list
189
190 - def get_theta_list(self):
191 """ 192 Return list of theta angles for all 3 consecutive 193 Calpha atoms. 194 """ 195 theta_list=[] 196 ca_list=self.get_ca_list() 197 for i in range(0, len(ca_list)-2): 198 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2]] 199 vector_list=map(lambda a: a.get_vector(), atom_list) 200 v1, v2, v3=vector_list 201 theta=calc_angle(v1, v2, v3) 202 theta_list.append(theta) 203 # Put tau in xtra dict of residue 204 res=ca_list[i+1].get_parent() 205 res.xtra["THETA"]=theta 206 return theta_list
207
208 - def get_sequence(self):
209 """ 210 Return the AA sequence. 211 212 @return: polypeptide sequence 213 @rtype: L{Seq} 214 """ 215 s="" 216 for res in self: 217 resname=res.get_resname() 218 if to_one_letter_code.has_key(resname): 219 resname=to_one_letter_code[resname] 220 else: 221 resname='X' 222 s=s+resname 223 seq=Seq(s, generic_protein) 224 return seq
225
226 - def __repr__(self):
227 """ 228 Return <Polypeptide start=START end=END>, where START 229 and END are sequence identifiers of the outer residues. 230 """ 231 start=self[0].get_id()[1] 232 end=self[-1].get_id()[1] 233 s="<Polypeptide start=%s end=%s>" % (start, end) 234 return s
235
236 -class _PPBuilder:
237 """ 238 Base class to extract polypeptides. 239 It checks if two consecutive residues in a chain 240 are connected. The connectivity test is implemented by a 241 subclass. 242 """
243 - def __init__(self, radius):
244 """ 245 @param radius: distance 246 @type radius: float 247 """ 248 self.radius=radius
249
250 - def _accept(self, residue):
251 "Check if the residue is an amino acid." 252 if is_aa(residue): 253 return 1 254 else: 255 if "CA" in residue.child_dict: 256 #It has an alpha carbon... 257 #We probably need to update the hard coded list of 258 #non-standard residues, see function is_aa for details. 259 import warnings 260 warnings.warn("Assuming residue %s is an unknown modified " 261 "amino acid" % residue.get_resname()) 262 return 1 263 # not a standard AA so skip 264 return 0
265
266 - def build_peptides(self, entity, aa_only=1):
267 """ 268 Build and return a list of Polypeptide objects. 269 270 @param entity: polypeptides are searched for in this object 271 @type entity: L{Structure}, L{Model} or L{Chain} 272 273 @param aa_only: if 1, the residue needs to be a standard AA 274 @type aa_only: int 275 """ 276 is_connected=self._is_connected 277 accept=self._accept 278 level=entity.get_level() 279 # Decide wich entity we are dealing with 280 if level=="S": 281 model=entity[0] 282 chain_list=model.get_list() 283 elif level=="M": 284 chain_list=entity.get_list() 285 elif level=="C": 286 chain_list=[entity] 287 else: 288 raise PDBException("Entity should be Structure, Model or Chain.") 289 pp_list=[] 290 for chain in chain_list: 291 chain_it=iter(chain) 292 prev=chain_it.next() 293 pp=None 294 for next in chain_it: 295 if aa_only and not accept(prev): 296 prev=next 297 continue 298 if is_connected(prev, next): 299 if pp is None: 300 pp=Polypeptide() 301 pp.append(prev) 302 pp_list.append(pp) 303 pp.append(next) 304 else: 305 pp=None 306 prev=next 307 return pp_list
308 309
310 -class CaPPBuilder(_PPBuilder):
311 """ 312 Use CA--CA distance to find polypeptides. 313 """
314 - def __init__(self, radius=4.3):
315 _PPBuilder.__init__(self, radius)
316
317 - def _is_connected(self, prev, next):
318 for r in [prev, next]: 319 if not r.has_id("CA"): 320 return 0 321 n=next["CA"] 322 p=prev["CA"] 323 # Unpack disordered 324 if n.is_disordered(): 325 nlist=n.disordered_get_list() 326 else: 327 nlist=[n] 328 if p.is_disordered(): 329 plist=p.disordered_get_list() 330 else: 331 plist=[p] 332 for nn in nlist: 333 for pp in plist: 334 if (nn-pp)<self.radius: 335 return 1 336 return 0
337 338
339 -class PPBuilder(_PPBuilder):
340 """ 341 Use C--N distance to find polypeptides. 342 """
343 - def __init__(self, radius=1.8):
344 _PPBuilder.__init__(self, radius)
345
346 - def _is_connected(self, prev, next):
347 if not prev.has_id("C"): 348 return 0 349 if not next.has_id("N"): 350 return 0 351 test_dist=self._test_dist 352 c=prev["C"] 353 n=next["N"] 354 # Test all disordered atom positions! 355 if c.is_disordered(): 356 clist=c.disordered_get_list() 357 else: 358 clist=[c] 359 if n.is_disordered(): 360 nlist=n.disordered_get_list() 361 else: 362 nlist=[n] 363 for nn in nlist: 364 for cc in clist: 365 # To form a peptide bond, N and C must be 366 # within radius and have the same altloc 367 # identifier or one altloc blank 368 n_altloc=nn.get_altloc() 369 c_altloc=cc.get_altloc() 370 if n_altloc==c_altloc or n_altloc==" " or c_altloc==" ": 371 if test_dist(nn, cc): 372 # Select the disordered atoms that 373 # are indeed bonded 374 if c.is_disordered(): 375 c.disordered_select(c_altloc) 376 if n.is_disordered(): 377 n.disordered_select(n_altloc) 378 return 1 379 return 0
380
381 - def _test_dist(self, c, n):
382 "Return 1 if distance between atoms<radius" 383 if (c-n)<self.radius: 384 return 1 385 else: 386 return 0
387 388 389 if __name__=="__main__": 390 391 import sys 392 393 from Bio.PDB.PDBParser import PDBParser 394 395 p=PDBParser(PERMISSIVE=1) 396 397 s=p.get_structure("scr", sys.argv[1]) 398 399 ppb=PPBuilder() 400 401 print "C-N" 402 for pp in ppb.build_peptides(s): 403 print pp.get_sequence() 404 for pp in ppb.build_peptides(s[0]): 405 print pp.get_sequence() 406 for pp in ppb.build_peptides(s[0]["A"]): 407 print pp.get_sequence() 408 409 for pp in ppb.build_peptides(s): 410 for phi, psi in pp.get_phi_psi_list(): 411 print phi, psi 412 413 ppb=CaPPBuilder() 414 415 print "CA-CA" 416 for pp in ppb.build_peptides(s): 417 print pp.get_sequence() 418 for pp in ppb.build_peptides(s[0]): 419 print pp.get_sequence() 420 for pp in ppb.build_peptides(s[0]["A"]): 421 print pp.get_sequence() 422