| 1 | def recluster_ligands(session, model):
|
|---|
| 2 | from chimerax.atomic import Residue, Residues, concatenate
|
|---|
| 3 | m = model
|
|---|
| 4 | polymer = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
|
|---|
| 5 | non_polymer = m.residues[m.residues.polymer_types==Residue.PT_NONE]
|
|---|
| 6 | # Ligands bound to polymeric residues should keep their existing chain IDs
|
|---|
| 7 | unbound, bound_map = sort_bound_and_unbound_ligands(non_polymer)
|
|---|
| 8 | chain_map, unclassified = cluster_unbound_ligands(m, unbound)
|
|---|
| 9 | non_polymer.chain_ids = 'Xtmp'
|
|---|
| 10 | m.renumber_residues(non_polymer, 0)
|
|---|
| 11 | assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map)
|
|---|
| 12 | assign_unbound_residues(m, chain_map, unclassified)
|
|---|
| 13 |
|
|---|
| 14 | def assign_unbound_residues(m, chain_map, unclassified):
|
|---|
| 15 | from chimerax.atomic import Residue, concatenate
|
|---|
| 16 | for cid, residues in chain_map.items():
|
|---|
| 17 | first_ligand_number = next_available_ligand_number(m, cid)
|
|---|
| 18 | residues.chain_ids = cid+'tmp'
|
|---|
| 19 | residues = concatenate([residues[residues.names!='HOH'], residues[residues.names=='HOH']])
|
|---|
| 20 | m.renumber_residues(residues, first_ligand_number)
|
|---|
| 21 | residues.chain_ids=cid
|
|---|
| 22 | if len(unclassified):
|
|---|
| 23 | # Try reclassifying with a more permissive cutoff, now that all the other ligands
|
|---|
| 24 | # are assigned to chains
|
|---|
| 25 | chain_map, unclassified = cluster_unbound_ligands(m, unclassified, cutoff=8)
|
|---|
| 26 | assign_unbound_residues(m, chain_map, [])
|
|---|
| 27 | if len(unclassified):
|
|---|
| 28 | session = m.session
|
|---|
| 29 | warn_str = ('{} residues could not be automatically assigned to chains. '
|
|---|
| 30 | 'These have been given the chain ID UNK.').format(len(unclassified))
|
|---|
| 31 | session.logger.warning(warn_str)
|
|---|
| 32 | unclassified.chain_ids='UNK'
|
|---|
| 33 | m.renumber_residues(unclassified, 1)
|
|---|
| 34 |
|
|---|
| 35 |
|
|---|
| 36 |
|
|---|
| 37 |
|
|---|
| 38 | def cluster_unbound_ligands(model, unbound, cutoff=5):
|
|---|
| 39 | from chimerax.geometry import find_close_points
|
|---|
| 40 | from chimerax.atomic import Residue, Residues
|
|---|
| 41 | from collections import Counter, defaultdict
|
|---|
| 42 | m = model
|
|---|
| 43 | chain_ids = m.residues.unique_chain_ids
|
|---|
| 44 | other_residues = m.residues.subtract(unbound)
|
|---|
| 45 | #polymeric = m.residues[m.residues.polymer_types!=Residue.PT_NONE]
|
|---|
| 46 | ligand_atoms = unbound.atoms[unbound.atoms.element_names != 'H']
|
|---|
| 47 | chain_map = {}
|
|---|
| 48 | for cid in chain_ids:
|
|---|
| 49 | cres = other_residues[other_residues.chain_ids==cid]
|
|---|
| 50 | catoms = cres.atoms[cres.atoms.element_names!='H']
|
|---|
| 51 | ci, li = find_close_points(catoms.coords, ligand_atoms.coords, cutoff)
|
|---|
| 52 | close_ligand_atoms = ligand_atoms[li]
|
|---|
| 53 | chain_map[cid] = Counter([a.residue for a in close_ligand_atoms])
|
|---|
| 54 | unclassified = []
|
|---|
| 55 | closest_chain_map = defaultdict(list)
|
|---|
| 56 | for r in unbound:
|
|---|
| 57 | max_atoms = 0
|
|---|
| 58 | closest = None
|
|---|
| 59 | for cid in chain_ids:
|
|---|
| 60 | close = chain_map[cid].get(r, None)
|
|---|
| 61 | if close is not None:
|
|---|
| 62 | if close > max_atoms:
|
|---|
| 63 | closest = cid
|
|---|
| 64 | max_atoms = close
|
|---|
| 65 | if closest is not None:
|
|---|
| 66 | closest_chain_map[cid].append(r)
|
|---|
| 67 | else:
|
|---|
| 68 | unclassified.append(r)
|
|---|
| 69 | return {cid: Residues(residues) for cid, residues in chain_map.items()}, Residues(unclassified)
|
|---|
| 70 |
|
|---|
| 71 |
|
|---|
| 72 | def next_available_ligand_number(m, chain_id):
|
|---|
| 73 | from chimerax.atomic import Residue
|
|---|
| 74 | cres = m.residues[m.residues.chain_ids==chain_id]
|
|---|
| 75 | lres = cres[cres.polymer_types==Residue.PT_NONE]
|
|---|
| 76 | if len(lres):
|
|---|
| 77 | return max(lres.numbers)+1
|
|---|
| 78 | pres = cres[cres.polymer_types!=Residue.PT_NONE]
|
|---|
| 79 | if len(pres):
|
|---|
| 80 | max_polymeric_residue_number = max(pres.numbers)
|
|---|
| 81 | else:
|
|---|
| 82 | return 0
|
|---|
| 83 | first_ligand_number = round(max_polymeric_residue_number+1000,-3)
|
|---|
| 84 | if first_ligand_number - max_polymeric_residue_number < 100:
|
|---|
| 85 | first_ligand_number += 1000
|
|---|
| 86 | return first_ligand_number
|
|---|
| 87 |
|
|---|
| 88 | known_sugars = ["NGC","NGA","RM4","FCB","GLC","GCS","GTR","GAL","BDR","RIB","BDF","BGC","BXX","XYZ","FUL","FUB","Z6H","PSV","A2G","LDY","RIP","SHD","NDG","ARB","SOE","SLB","BM3","LFR","Z6J","GIV","PA1","ABE","AHR","XXR","ARA","AFD","ADA","IDR","MAN","RAM","32O","NAG","GUP","T6T","G6D","FUC","Z9N","BMA","XYP","FCA","BDP","TYV","BM7","GCU","GXL","XYS","HSY","LXC","FRU","WOO","ALL","0MK","SIA","BXY","64K","GL0","GLA"]
|
|---|
| 89 | def assign_bound_ligand_chain_ids_and_residue_numbers(m, bound_map):
|
|---|
| 90 | # The wwPDB steering committee has dictated that for protein-linked glycans,
|
|---|
| 91 | # the following rules apply:
|
|---|
| 92 | # - if the modelled portion of the glycan is a single residue, it should have
|
|---|
| 93 | # the same chain ID as the protein.
|
|---|
| 94 | # - if more than one residue, it should have a unique chain ID.
|
|---|
| 95 | from chimerax.atomic import Residues, Residue, concatenate
|
|---|
| 96 | import numpy
|
|---|
| 97 | for cid, bound_residues in bound_map.items():
|
|---|
| 98 | first_ligand_number = next_available_ligand_number(m, cid)
|
|---|
| 99 | new_glycan_cid = []
|
|---|
| 100 | assign_to_chain = []
|
|---|
| 101 | groups = independent_groupings(bound_residues)
|
|---|
| 102 | for g in groups:
|
|---|
| 103 | if len(g)==1:
|
|---|
| 104 | assign_to_chain.append(g)
|
|---|
| 105 | elif any (r.name in known_sugars for r in g):
|
|---|
| 106 | new_glycan_cid.append(g)
|
|---|
| 107 | else:
|
|---|
| 108 | assign_to_chain.append(g)
|
|---|
| 109 | new_glycan_cid = list(sorted(new_glycan_cid, key=lambda g:linked_polymer_residue(g).number))
|
|---|
| 110 | assign_to_chain = list(sorted(assign_to_chain, key=lambda g:linked_polymer_residue(g).number))
|
|---|
| 111 | for i,g in enumerate(new_glycan_cid):
|
|---|
| 112 | gid = cid+'glyc'+str(i)
|
|---|
| 113 | residues = sort_glycan_residues(g)
|
|---|
| 114 | residues.chain_ids=gid
|
|---|
| 115 | m.renumber_residues(residues, 1)
|
|---|
| 116 | if len(assign_to_chain):
|
|---|
| 117 | assign_to_chain = concatenate([Residues(g) for g in assign_to_chain])
|
|---|
| 118 | assign_to_chain.chain_ids = 'XXtmp'
|
|---|
| 119 | m.renumber_residues(assign_to_chain, first_ligand_number)
|
|---|
| 120 | assign_to_chain.chain_ids = cid
|
|---|
| 121 |
|
|---|
| 122 |
|
|---|
| 123 | def sort_glycan_residues(residues):
|
|---|
| 124 | from chimerax.atomic import Residues
|
|---|
| 125 | polymer_stem_res = linked_polymer_residue(residues)
|
|---|
| 126 | for r in polymer_stem_res.neighbors:
|
|---|
| 127 | if r in residues:
|
|---|
| 128 | break
|
|---|
| 129 | order = [r]
|
|---|
| 130 | def _sort_walk(r, residues, order):
|
|---|
| 131 | bonds = [r.bonds_between(n)[0] for n in r.neighbors if n in residues and n not in order]
|
|---|
| 132 | ordered_bonds = []
|
|---|
| 133 | for b in bonds:
|
|---|
| 134 | atoms = b.atoms
|
|---|
| 135 | if atoms[0].residue == r:
|
|---|
| 136 | ordered_bonds.append(atoms)
|
|---|
| 137 | else:
|
|---|
| 138 | ordered_bonds.append(atoms[::-1])
|
|---|
| 139 | ordered_bonds = list(sorted(ordered_bonds, key=lambda b:
|
|---|
| 140 | (int(b[0].name[-1]))))
|
|---|
| 141 | for b in ordered_bonds:
|
|---|
| 142 | next_r = b[1].residue
|
|---|
| 143 | order.append(next_r)
|
|---|
| 144 | _sort_walk(next_r, residues, order)
|
|---|
| 145 | _sort_walk(r, residues, order)
|
|---|
| 146 | return Residues(order)
|
|---|
| 147 |
|
|---|
| 148 |
|
|---|
| 149 |
|
|---|
| 150 |
|
|---|
| 151 | def linked_polymer_residue(residue_group):
|
|---|
| 152 | from chimerax.atomic import Residue
|
|---|
| 153 | for r in residue_group:
|
|---|
| 154 | for n in r.neighbors:
|
|---|
| 155 | if n.polymer_type != Residue.PT_NONE:
|
|---|
| 156 | return n
|
|---|
| 157 | return None
|
|---|
| 158 |
|
|---|
| 159 | def independent_groupings(residues):
|
|---|
| 160 | residues = list(residues)
|
|---|
| 161 | groups = []
|
|---|
| 162 | while len(residues) > 0:
|
|---|
| 163 | r = residues[0]
|
|---|
| 164 | connected = set([r])
|
|---|
| 165 | walk_tree(r, residues, connected)
|
|---|
| 166 | groups.append(connected)
|
|---|
| 167 | for r in connected:
|
|---|
| 168 | residues.remove(r)
|
|---|
| 169 |
|
|---|
| 170 | return groups
|
|---|
| 171 |
|
|---|
| 172 | def walk_tree(r, residues, connected):
|
|---|
| 173 | for n in r.neighbors:
|
|---|
| 174 | if n in connected or n not in residues:
|
|---|
| 175 | continue
|
|---|
| 176 | connected.add(n)
|
|---|
| 177 | walk_tree(n, residues, connected)
|
|---|
| 178 |
|
|---|
| 179 | def sort_bound_and_unbound_ligands(residues):
|
|---|
| 180 | unbound = []
|
|---|
| 181 | from collections import defaultdict
|
|---|
| 182 | bound_map = defaultdict(set)
|
|---|
| 183 | for r in residues:
|
|---|
| 184 | if not len(r.neighbors):
|
|---|
| 185 | unbound.append(r)
|
|---|
| 186 | cid = bound_to_polymer(r)
|
|---|
| 187 | if cid is not None:
|
|---|
| 188 | bound_map[cid].add(r)
|
|---|
| 189 | else:
|
|---|
| 190 | unbound.append(r)
|
|---|
| 191 | from chimerax.atomic import Residues
|
|---|
| 192 | return (Residues(unbound), {cid: Residues(rset) for cid, rset in bound_map.items()})
|
|---|
| 193 |
|
|---|
| 194 | def bound_to_polymer(residue, seen=None):
|
|---|
| 195 | from chimerax.atomic import Residue
|
|---|
| 196 | if seen is None:
|
|---|
| 197 | seen=set()
|
|---|
| 198 | seen.add(residue)
|
|---|
| 199 | for n in residue.neighbors:
|
|---|
| 200 | if n in seen:
|
|---|
| 201 | continue
|
|---|
| 202 | if n.polymer_type!=Residue.PT_NONE:
|
|---|
| 203 | return n.chain_id
|
|---|
| 204 | seen.add(n)
|
|---|
| 205 | return bound_to_polymer(n, seen)
|
|---|
| 206 | return None
|
|---|
| 207 |
|
|---|
| 208 |
|
|---|