#
# Find each protein-protein interface patch and see if I can extract a segment of each
# protein sequence to run ColabFold on to predict that interface. This is to handle
# the large NALCN complex of ~7000 amino acids, using the human cryoEM structure to test
# if the cryptococcus homolog has the same protein-protein interfaces.
#
def patches(session, residues, distance = 5.0, cluster_distance = 20.0, verbose = False):
patches = all_interface_patches(residues, distance, cluster_distance)
patches.sort(key = lambda pair: len(pair[0]) + len(pair[1]), reverse = True)
pspecs = []
rspec = _concise_res_spec(residues)
r1specs, r2specs = [], []
for r1, r2 in patches:
r1spec, r2spec = _concise_res_spec(r1), _concise_res_spec(r2)
r1specs.append(r1spec)
r2specs.append(r2spec)
cid1, cid2 = r1[0].chain_id, r2[0].chain_id
r1min,r1max = r1.numbers.min(), r1.numbers.max()
d1spec = f'#{r1[0].structure.id_string}/{cid1}:{r1min}-{r1max}'
r2min,r2max = r2.numbers.min(), r2.numbers.max()
d2spec = f'#{r2[0].structure.id_string}/{cid2}:{r2min}-{r2max}'
cmd = f'color {rspec} gray ; color {d1spec} lightblue ; color {r1spec} blue ; color {d2spec} lightgreen ; color {r2spec} lime'
line = f'{cid1} with {cid2}, {len(r1)} with {len(r2)} residues'
pspecs.append(line)
all_cmd = f'color {"".join(r1specs)} blue ; color {"".join(r2specs)} lime'
pinfo = '\n'.join(pspecs)
msg = f'{len(patches)} patches, color
{pinfo}'
session.logger.info(msg, is_html = True)
if verbose:
sizes = ', '.join(str(len(r1)+len(r2)) for r1,r2 in patches)
pspecs = '\n'.join(f'{_concise_res_spec(r1)}{_concise_res_spec(r2)}' for r1,r2 in patches)
msg = f'Found {len(patches)} patches with residue counts {sizes}\n{pspecs}'
session.logger.info(msg)
def all_interface_patches(residues, distance, cluster_distance):
patches = []
res_by_chain = residues.by_chain
for i, (structure1, chain_id1, chain_res1) in enumerate(res_by_chain):
for structure2, chain_id2, chain_res2 in res_by_chain[i+1:]:
patches.extend(interface_patches(chain_res1, chain_res2, distance, cluster_distance))
return patches
def interface_patches(residues1, residues2, distance, cluster_distance):
r1close, r2close = _find_close_residues(residues1, residues2, distance)
r1groups = _residue_clusters(r1close, cluster_distance)
r2groups = _residue_clusters(r2close, cluster_distance)
close_groups = [(i1, i2)
for i1,res1 in enumerate(r1groups)
for i2,res2 in enumerate(r2groups)
if _any_close_residues(res1, res2, distance)]
jpairs = _join_pairs(close_groups)
from chimerax.atomic import concatenate
paired_patches = [(concatenate([r1groups[i1] for i1 in i1list]),
concatenate([r2groups[i2] for i2 in i2list]))
for i1list, i2list in jpairs]
return paired_patches # list of (Residues,Residues)
def _find_close_residues(residues1, residues2, distance):
a1 = residues1.atoms
axyz1 = a1.scene_coords
a2 = residues2.atoms
axyz2 = a2.scene_coords
from chimerax.geometry import find_close_points
ai1, ai2 = find_close_points(axyz1, axyz2, distance)
r1near = a1[ai1].unique_residues
r2near = a2[ai2].unique_residues
return r1near, r2near
def _any_close_residues(res1, res2, distance):
axyz1 = res1.atoms.scene_coords
axyz2 = res2.atoms.scene_coords
from chimerax.geometry import find_close_points
ai1, ai2 = find_close_points(axyz1, axyz2, distance)
return len(ai1) > 0
def _residue_clusters(residues, cluster_distance):
atoms = residues.find_existing_atoms('CA')
if len(atoms) == 0:
return []
if len(atoms) < len(residues):
ca_atoms = residues.find_atoms('CA')
res_no_ca = [residues[i] for i,a in enumerate(ca_atoms) if a is None]
rspec = _concise_res_spec(res_no_ca)
print(f'Residues {rspec} does not have a CA atom. Cannot cluster them.')
residues = residues.filter([i for i,a in enumerate(ca_atoms) if a])
xyz = atoms.scene_coords
from chimerax.foldseek.cluster import _cluster_by_distance
cnums = _cluster_by_distance(xyz, cluster_distance)
nc = cnums.max()
rclusters = [residues[cnums == i] for i in range(1,nc+1)]
return rclusters
def _concise_res_spec(residues):
session = residues[0].structure.session
from chimerax.atomic import concise_residue_spec
rspec = concise_residue_spec(session, residues)
return rspec
def _join_pairs(pairs):
jpairs = []
unused = set(pairs)
for (i1,i2) in pairs:
if (i1,i2) in unused:
i1list,i2list = [i1],[i2]
unused.remove((i1,i2))
o1 = o2 = 0
while len(i1list) > o1 or len(i2list) > o2:
for i1 in i1list[o1:]:
near_i1 = [j2 for j1,j2 in unused if j1 == i1]
i2list.extend(near_i1)
for j2 in near_i1:
unused.remove((i1,j2))
o1 = len(i1list)
for i2 in i2list[o2:]:
near_i2 = [j1 for j1,j2 in unused if j2 == i2]
i1list.extend(near_i2)
for j1 in near_i2:
unused.remove((j1,i2))
o2 = len(i2list)
jpairs.append((i1list, i2list))
return jpairs
def register_command(logger):
from chimerax.core.commands import CmdDesc, register, FloatArg, BoolArg
from chimerax.atomic import ResiduesArg
desc = CmdDesc(
required = [('residues', ResiduesArg)],
keyword = [('distance', FloatArg),
('cluster_distance', FloatArg),
('verbose', BoolArg),
],
synopsis = 'Compute sets of interface residues making contacts across chains'
)
register('patches', desc, patches, logger=logger)
register_command(session.logger)