Ticket #4177: ligand_utils.py

File ligand_utils.py, 8.2 KB (added by Tristan Croll, 5 years ago)

Added by email2trac

Line 
1def recluster_ligands(session, model):
2 from chimerax.atomic import Residue, Residues, concatenate
3 m = model
4 polymer = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
5 non_polymer = m.residues[m.residues.polymer_types==Residue.PT_NONE]
6 # Ligands bound to polymeric residues should keep their existing chain IDs
7 unbound, bound_map = sort_bound_and_unbound_ligands(non_polymer)
8 chain_map, unclassified = cluster_unbound_ligands(m, unbound)
9 non_polymer.chain_ids = 'Xtmp'
10 m.renumber_residues(non_polymer, 0)
11 assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map)
12 assign_unbound_residues(m, chain_map, unclassified)
13
14def assign_unbound_residues(m, chain_map, unclassified):
15 from chimerax.atomic import Residue, concatenate
16 for cid, residues in chain_map.items():
17 first_ligand_number = next_available_ligand_number(m, cid)
18 residues.chain_ids = cid+'tmp'
19 residues = concatenate([residues[residues.names!='HOH'], residues[residues.names=='HOH']])
20 m.renumber_residues(residues, first_ligand_number)
21 residues.chain_ids=cid
22 if len(unclassified):
23 # Try reclassifying with a more permissive cutoff, now that all the other ligands
24 # are assigned to chains
25 chain_map, unclassified = cluster_unbound_ligands(m, unclassified, cutoff=8)
26 assign_unbound_residues(m, chain_map, [])
27 if len(unclassified):
28 session = m.session
29 warn_str = ('{} residues could not be automatically assigned to chains. '
30 'These have been given the chain ID UNK.').format(len(unclassified))
31 session.logger.warning(warn_str)
32 unclassified.chain_ids='UNK'
33 m.renumber_residues(unclassified, 1)
34
35
36
37
38def cluster_unbound_ligands(model, unbound, cutoff=5):
39 from chimerax.geometry import find_close_points
40 from chimerax.atomic import Residue, Residues
41 from collections import Counter, defaultdict
42 m = model
43 chain_ids = m.residues.unique_chain_ids
44 other_residues = m.residues.subtract(unbound)
45 #polymeric = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
46 ligand_atoms = unbound.atoms[unbound.atoms.element_names != 'H']
47 chain_map = {}
48 for cid in chain_ids:
49 cres = other_residues[other_residues.chain_ids==cid]
50 catoms = cres.atoms[cres.atoms.element_names!='H']
51 ci, li = find_close_points(catoms.coords, ligand_atoms.coords, cutoff)
52 close_ligand_atoms = ligand_atoms[li]
53 chain_map[cid] = Counter([a.residue for a in close_ligand_atoms])
54 unclassified = []
55 closest_chain_map = defaultdict(list)
56 for r in unbound:
57 max_atoms = 0
58 closest = None
59 for cid in chain_ids:
60 close = chain_map[cid].get(r, None)
61 if close is not None:
62 if close > max_atoms:
63 closest = cid
64 max_atoms = close
65 if closest is not None:
66 closest_chain_map[cid].append(r)
67 else:
68 unclassified.append(r)
69 return {cid: Residues(residues) for cid, residues in chain_map.items()}, Residues(unclassified)
70
71
72def next_available_ligand_number(m, chain_id):
73 from chimerax.atomic import Residue
74 cres = m.residues[m.residues.chain_ids==chain_id]
75 lres = cres[cres.polymer_types==Residue.PT_NONE]
76 if len(lres):
77 return max(lres.numbers)+1
78 pres = cres[cres.polymer_types!=Residue.PT_NONE]
79 if len(pres):
80 max_polymeric_residue_number = max(pres.numbers)
81 else:
82 return 0
83 first_ligand_number = round(max_polymeric_residue_number+1000,-3)
84 if first_ligand_number - max_polymeric_residue_number < 100:
85 first_ligand_number += 1000
86 return first_ligand_number
87
88known_sugars = ["NGC","NGA","RM4","FCB","GLC","GCS","GTR","GAL","BDR","RIB","BDF","BGC","BXX","XYZ","FUL","FUB","Z6H","PSV","A2G","LDY","RIP","SHD","NDG","ARB","SOE","SLB","BM3","LFR","Z6J","GIV","PA1","ABE","AHR","XXR","ARA","AFD","ADA","IDR","MAN","RAM","32O","NAG","GUP","T6T","G6D","FUC","Z9N","BMA","XYP","FCA","BDP","TYV","BM7","GCU","GXL","XYS","HSY","LXC","FRU","WOO","ALL","0MK","SIA","BXY","64K","GL0","GLA"]
89def assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map):
90 # The wwPDB steering committee has dictated that for protein-linked glycans,
91 # the following rules apply:
92 # - if the modelled portion of the glycan is a single residue, it should have
93 # the same chain ID as the protein.
94 # - if more than one residue, it should have a unique chain ID.
95 from chimerax.atomic import Residues, Residue, concatenate
96 import numpy
97 for cid, bound_residues in bound_map.items():
98 first_ligand_number = next_available_ligand_number(m, cid)
99 new_glycan_cid = []
100 assign_to_chain = []
101 groups = independent_groupings(bound_residues)
102 for g in groups:
103 if len(g)==1:
104 assign_to_chain.append(g)
105 elif any (r.name in known_sugars for r in g):
106 new_glycan_cid.append(g)
107 else:
108 assign_to_chain.append(g)
109 new_glycan_cid = list(sorted(new_glycan_cid, key=lambda g:linked_polymer_residue(g).number))
110 assign_to_chain = list(sorted(assign_to_chain, key=lambda g:linked_polymer_residue(g).number))
111 for i,g in enumerate(new_glycan_cid):
112 gid = cid+'glyc'+str(i)
113 residues = sort_glycan_residues(g)
114 residues.chain_ids=gid
115 m.renumber_residues(residues, 1)
116 if len(assign_to_chain):
117 assign_to_chain = concatenate([Residues(g) for g in assign_to_chain])
118 assign_to_chain.chain_ids = 'XXtmp'
119 m.renumber_residues(assign_to_chain, first_ligand_number)
120 assign_to_chain.chain_ids = cid
121
122
123def sort_glycan_residues(residues):
124 from chimerax.atomic import Residues
125 polymer_stem_res = linked_polymer_residue(residues)
126 for r in polymer_stem_res.neighbors:
127 if r in residues:
128 break
129 order = [r]
130 def _sort_walk(r, residues, order):
131 bonds = [r.bonds_between(n)[0] for n in r.neighbors if n in residues and n not in order]
132 ordered_bonds = []
133 for b in bonds:
134 atoms = b.atoms
135 if atoms[0].residue == r:
136 ordered_bonds.append(atoms)
137 else:
138 ordered_bonds.append(atoms[::-1])
139 ordered_bonds = list(sorted(ordered_bonds, key=lambda b:
140 (int(b[0].name[-1]))))
141 for b in ordered_bonds:
142 next_r = b[1].residue
143 order.append(next_r)
144 _sort_walk(next_r, residues, order)
145 _sort_walk(r, residues, order)
146 return Residues(order)
147
148
149
150
151def linked_polymer_residue(residue_group):
152 from chimerax.atomic import Residue
153 for r in residue_group:
154 for n in r.neighbors:
155 if n.polymer_type != Residue.PT_NONE:
156 return n
157 return None
158
159def independent_groupings(residues):
160 residues = list(residues)
161 groups = []
162 while len(residues) > 0:
163 r = residues[0]
164 connected = set([r])
165 walk_tree(r, residues, connected)
166 groups.append(connected)
167 for r in connected:
168 residues.remove(r)
169
170 return groups
171
172def walk_tree(r, residues, connected):
173 for n in r.neighbors:
174 if n in connected or n not in residues:
175 continue
176 connected.add(n)
177 walk_tree(n, residues, connected)
178
179def sort_bound_and_unbound_ligands(residues):
180 unbound = []
181 from collections import defaultdict
182 bound_map = defaultdict(set)
183 for r in residues:
184 if not len(r.neighbors):
185 unbound.append(r)
186 cid = bound_to_polymer(r)
187 if cid is not None:
188 bound_map[cid].add(r)
189 else:
190 unbound.append(r)
191 from chimerax.atomic import Residues
192 return (Residues(unbound), {cid: Residues(rset) for cid, rset in bound_map.items()})
193
194def bound_to_polymer(residue, seen=None):
195 from chimerax.atomic import Residue
196 if seen is None:
197 seen=set()
198 seen.add(residue)
199 for n in residue.neighbors:
200 if n in seen:
201 continue
202 if n.polymer_type!=Residue.PT_NONE:
203 return n.chain_id
204 seen.add(n)
205 return bound_to_polymer(n, seen)
206 return None
207
208