# # Find each protein-protein interface patch and see if I can extract a segment of each # protein sequence to run ColabFold on to predict that interface. This is to handle # the large NALCN complex of ~7000 amino acids, using the human cryoEM structure to test # if the cryptococcus homolog has the same protein-protein interfaces. # def patches(session, residues, distance = 5.0, cluster_distance = 20.0, verbose = False): patches = all_interface_patches(residues, distance, cluster_distance) patches.sort(key = lambda pair: len(pair[0]) + len(pair[1]), reverse = True) pspecs = [] rspec = _concise_res_spec(residues) r1specs, r2specs = [], [] for r1, r2 in patches: r1spec, r2spec = _concise_res_spec(r1), _concise_res_spec(r2) r1specs.append(r1spec) r2specs.append(r2spec) cid1, cid2 = r1[0].chain_id, r2[0].chain_id r1min,r1max = r1.numbers.min(), r1.numbers.max() d1spec = f'#{r1[0].structure.id_string}/{cid1}:{r1min}-{r1max}' r2min,r2max = r2.numbers.min(), r2.numbers.max() d2spec = f'#{r2[0].structure.id_string}/{cid2}:{r2min}-{r2max}' cmd = f'color {rspec} gray ; color {d1spec} lightblue ; color {r1spec} blue ; color {d2spec} lightgreen ; color {r2spec} lime' line = f'{cid1} with {cid2}, {len(r1)} with {len(r2)} residues
' pspecs.append(line) all_cmd = f'color {"".join(r1specs)} blue ; color {"".join(r2specs)} lime' pinfo = '\n'.join(pspecs) msg = f'{len(patches)} patches, color
{pinfo}' session.logger.info(msg, is_html = True) if verbose: sizes = ', '.join(str(len(r1)+len(r2)) for r1,r2 in patches) pspecs = '\n'.join(f'{_concise_res_spec(r1)}{_concise_res_spec(r2)}' for r1,r2 in patches) msg = f'Found {len(patches)} patches with residue counts {sizes}\n{pspecs}' session.logger.info(msg) def all_interface_patches(residues, distance, cluster_distance): patches = [] res_by_chain = residues.by_chain for i, (structure1, chain_id1, chain_res1) in enumerate(res_by_chain): for structure2, chain_id2, chain_res2 in res_by_chain[i+1:]: patches.extend(interface_patches(chain_res1, chain_res2, distance, cluster_distance)) return patches def interface_patches(residues1, residues2, distance, cluster_distance): r1close, r2close = _find_close_residues(residues1, residues2, distance) r1groups = _residue_clusters(r1close, cluster_distance) r2groups = _residue_clusters(r2close, cluster_distance) close_groups = [(i1, i2) for i1,res1 in enumerate(r1groups) for i2,res2 in enumerate(r2groups) if _any_close_residues(res1, res2, distance)] jpairs = _join_pairs(close_groups) from chimerax.atomic import concatenate paired_patches = [(concatenate([r1groups[i1] for i1 in i1list]), concatenate([r2groups[i2] for i2 in i2list])) for i1list, i2list in jpairs] return paired_patches # list of (Residues,Residues) def _find_close_residues(residues1, residues2, distance): a1 = residues1.atoms axyz1 = a1.scene_coords a2 = residues2.atoms axyz2 = a2.scene_coords from chimerax.geometry import find_close_points ai1, ai2 = find_close_points(axyz1, axyz2, distance) r1near = a1[ai1].unique_residues r2near = a2[ai2].unique_residues return r1near, r2near def _any_close_residues(res1, res2, distance): axyz1 = res1.atoms.scene_coords axyz2 = res2.atoms.scene_coords from chimerax.geometry import find_close_points ai1, ai2 = find_close_points(axyz1, axyz2, distance) return len(ai1) > 0 def _residue_clusters(residues, cluster_distance): atoms = residues.find_existing_atoms('CA') if len(atoms) == 0: return [] if len(atoms) < len(residues): ca_atoms = residues.find_atoms('CA') res_no_ca = [residues[i] for i,a in enumerate(ca_atoms) if a is None] rspec = _concise_res_spec(res_no_ca) print(f'Residues {rspec} does not have a CA atom. Cannot cluster them.') residues = residues.filter([i for i,a in enumerate(ca_atoms) if a]) xyz = atoms.scene_coords from chimerax.foldseek.cluster import _cluster_by_distance cnums = _cluster_by_distance(xyz, cluster_distance) nc = cnums.max() rclusters = [residues[cnums == i] for i in range(1,nc+1)] return rclusters def _concise_res_spec(residues): session = residues[0].structure.session from chimerax.atomic import concise_residue_spec rspec = concise_residue_spec(session, residues) return rspec def _join_pairs(pairs): jpairs = [] unused = set(pairs) for (i1,i2) in pairs: if (i1,i2) in unused: i1list,i2list = [i1],[i2] unused.remove((i1,i2)) o1 = o2 = 0 while len(i1list) > o1 or len(i2list) > o2: for i1 in i1list[o1:]: near_i1 = [j2 for j1,j2 in unused if j1 == i1] i2list.extend(near_i1) for j2 in near_i1: unused.remove((i1,j2)) o1 = len(i1list) for i2 in i2list[o2:]: near_i2 = [j1 for j1,j2 in unused if j2 == i2] i1list.extend(near_i2) for j1 in near_i2: unused.remove((j1,i2)) o2 = len(i2list) jpairs.append((i1list, i2list)) return jpairs def register_command(logger): from chimerax.core.commands import CmdDesc, register, FloatArg, BoolArg from chimerax.atomic import ResiduesArg desc = CmdDesc( required = [('residues', ResiduesArg)], keyword = [('distance', FloatArg), ('cluster_distance', FloatArg), ('verbose', BoolArg), ], synopsis = 'Compute sets of interface residues making contacts across chains' ) register('patches', desc, patches, logger=logger) register_command(session.logger)