Context Navigation

Back to Ticket #4177

Ticket #4177: ligand_utils.py

File ligand_utils.py, 8.2 KB (added by Tristan Croll, 5 years ago)
Added by email2trac

Line
1	def recluster_ligands(session, model):
2	from chimerax.atomic import Residue, Residues, concatenate
3	m = model
4	polymer = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
5	non_polymer = m.residues[m.residues.polymer_types==Residue.PT_NONE]
6	# Ligands bound to polymeric residues should keep their existing chain IDs
7	unbound, bound_map = sort_bound_and_unbound_ligands(non_polymer)
8	chain_map, unclassified = cluster_unbound_ligands(m, unbound)
9	non_polymer.chain_ids = 'Xtmp'
10	m.renumber_residues(non_polymer, 0)
11	assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map)
12	assign_unbound_residues(m, chain_map, unclassified)
13
14	def assign_unbound_residues(m, chain_map, unclassified):
15	from chimerax.atomic import Residue, concatenate
16	for cid, residues in chain_map.items():
17	first_ligand_number = next_available_ligand_number(m, cid)
18	residues.chain_ids = cid+'tmp'
19	residues = concatenate([residues[residues.names!='HOH'], residues[residues.names=='HOH']])
20	m.renumber_residues(residues, first_ligand_number)
21	residues.chain_ids=cid
22	if len(unclassified):
23	# Try reclassifying with a more permissive cutoff, now that all the other ligands
24	# are assigned to chains
25	chain_map, unclassified = cluster_unbound_ligands(m, unclassified, cutoff=8)
26	assign_unbound_residues(m, chain_map, [])
27	if len(unclassified):
28	session = m.session
29	warn_str = ('{} residues could not be automatically assigned to chains. '
30	'These have been given the chain ID UNK.').format(len(unclassified))
31	session.logger.warning(warn_str)
32	unclassified.chain_ids='UNK'
33	m.renumber_residues(unclassified, 1)
34
35
36
37
38	def cluster_unbound_ligands(model, unbound, cutoff=5):
39	from chimerax.geometry import find_close_points
40	from chimerax.atomic import Residue, Residues
41	from collections import Counter, defaultdict
42	m = model
43	chain_ids = m.residues.unique_chain_ids
44	other_residues = m.residues.subtract(unbound)
45	#polymeric = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
46	ligand_atoms = unbound.atoms[unbound.atoms.element_names != 'H']
47	chain_map = {}
48	for cid in chain_ids:
49	cres = other_residues[other_residues.chain_ids==cid]
50	catoms = cres.atoms[cres.atoms.element_names!='H']
51	ci, li = find_close_points(catoms.coords, ligand_atoms.coords, cutoff)
52	close_ligand_atoms = ligand_atoms[li]
53	chain_map[cid] = Counter([a.residue for a in close_ligand_atoms])
54	unclassified = []
55	closest_chain_map = defaultdict(list)
56	for r in unbound:
57	max_atoms = 0
58	closest = None
59	for cid in chain_ids:
60	close = chain_map[cid].get(r, None)
61	if close is not None:
62	if close > max_atoms:
63	closest = cid
64	max_atoms = close
65	if closest is not None:
66	closest_chain_map[cid].append(r)
67	else:
68	unclassified.append(r)
69	return {cid: Residues(residues) for cid, residues in chain_map.items()}, Residues(unclassified)
70
71
72	def next_available_ligand_number(m, chain_id):
73	from chimerax.atomic import Residue
74	cres = m.residues[m.residues.chain_ids==chain_id]
75	lres = cres[cres.polymer_types==Residue.PT_NONE]
76	if len(lres):
77	return max(lres.numbers)+1
78	pres = cres[cres.polymer_types!=Residue.PT_NONE]
79	if len(pres):
80	max_polymeric_residue_number = max(pres.numbers)
81	else:
82	return 0
83	first_ligand_number = round(max_polymeric_residue_number+1000,-3)
84	if first_ligand_number - max_polymeric_residue_number < 100:
85	first_ligand_number += 1000
86	return first_ligand_number
87
88	known_sugars = ["NGC","NGA","RM4","FCB","GLC","GCS","GTR","GAL","BDR","RIB","BDF","BGC","BXX","XYZ","FUL","FUB","Z6H","PSV","A2G","LDY","RIP","SHD","NDG","ARB","SOE","SLB","BM3","LFR","Z6J","GIV","PA1","ABE","AHR","XXR","ARA","AFD","ADA","IDR","MAN","RAM","32O","NAG","GUP","T6T","G6D","FUC","Z9N","BMA","XYP","FCA","BDP","TYV","BM7","GCU","GXL","XYS","HSY","LXC","FRU","WOO","ALL","0MK","SIA","BXY","64K","GL0","GLA"]
89	def assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map):
90	# The wwPDB steering committee has dictated that for protein-linked glycans,
91	# the following rules apply:
92	# - if the modelled portion of the glycan is a single residue, it should have
93	# the same chain ID as the protein.
94	# - if more than one residue, it should have a unique chain ID.
95	from chimerax.atomic import Residues, Residue, concatenate
96	import numpy
97	for cid, bound_residues in bound_map.items():
98	first_ligand_number = next_available_ligand_number(m, cid)
99	new_glycan_cid = []
100	assign_to_chain = []
101	groups = independent_groupings(bound_residues)
102	for g in groups:
103	if len(g)==1:
104	assign_to_chain.append(g)
105	elif any (r.name in known_sugars for r in g):
106	new_glycan_cid.append(g)
107	else:
108	assign_to_chain.append(g)
109	new_glycan_cid = list(sorted(new_glycan_cid, key=lambda g:linked_polymer_residue(g).number))
110	assign_to_chain = list(sorted(assign_to_chain, key=lambda g:linked_polymer_residue(g).number))
111	for i,g in enumerate(new_glycan_cid):
112	gid = cid+'glyc'+str(i)
113	residues = sort_glycan_residues(g)
114	residues.chain_ids=gid
115	m.renumber_residues(residues, 1)
116	if len(assign_to_chain):
117	assign_to_chain = concatenate([Residues(g) for g in assign_to_chain])
118	assign_to_chain.chain_ids = 'XXtmp'
119	m.renumber_residues(assign_to_chain, first_ligand_number)
120	assign_to_chain.chain_ids = cid
121
122
123	def sort_glycan_residues(residues):
124	from chimerax.atomic import Residues
125	polymer_stem_res = linked_polymer_residue(residues)
126	for r in polymer_stem_res.neighbors:
127	if r in residues:
128	break
129	order = [r]
130	def _sort_walk(r, residues, order):
131	bonds = [r.bonds_between(n)[0] for n in r.neighbors if n in residues and n not in order]
132	ordered_bonds = []
133	for b in bonds:
134	atoms = b.atoms
135	if atoms[0].residue == r:
136	ordered_bonds.append(atoms)
137	else:
138	ordered_bonds.append(atoms[::-1])
139	ordered_bonds = list(sorted(ordered_bonds, key=lambda b:
140	(int(b[0].name[-1]))))
141	for b in ordered_bonds:
142	next_r = b[1].residue
143	order.append(next_r)
144	_sort_walk(next_r, residues, order)
145	_sort_walk(r, residues, order)
146	return Residues(order)
147
148
149
150
151	def linked_polymer_residue(residue_group):
152	from chimerax.atomic import Residue
153	for r in residue_group:
154	for n in r.neighbors:
155	if n.polymer_type != Residue.PT_NONE:
156	return n
157	return None
158
159	def independent_groupings(residues):
160	residues = list(residues)
161	groups = []
162	while len(residues) > 0:
163	r = residues[0]
164	connected = set([r])
165	walk_tree(r, residues, connected)
166	groups.append(connected)
167	for r in connected:
168	residues.remove(r)
169
170	return groups
171
172	def walk_tree(r, residues, connected):
173	for n in r.neighbors:
174	if n in connected or n not in residues:
175	continue
176	connected.add(n)
177	walk_tree(n, residues, connected)
178
179	def sort_bound_and_unbound_ligands(residues):
180	unbound = []
181	from collections import defaultdict
182	bound_map = defaultdict(set)
183	for r in residues:
184	if not len(r.neighbors):
185	unbound.append(r)
186	cid = bound_to_polymer(r)
187	if cid is not None:
188	bound_map[cid].add(r)
189	else:
190	unbound.append(r)
191	from chimerax.atomic import Residues
192	return (Residues(unbound), {cid: Residues(rset) for cid, rset in bound_map.items()})
193
194	def bound_to_polymer(residue, seen=None):
195	from chimerax.atomic import Residue
196	if seen is None:
197	seen=set()
198	seen.add(residue)
199	for n in residue.neighbors:
200	if n in seen:
201	continue
202	if n.polymer_type!=Residue.PT_NONE:
203	return n.chain_id
204	seen.add(n)
205	return bound_to_polymer(n, seen)
206	return None
207
208

Download in other formats:

Original Format