# Load AlphaFold PDB dimer predictions and PAE files and score the confidence of interactions between # the two chains. def dimer_confidence(session, pdb_path, pae_path, distance = 4, max_pae = 5): # from chimerax.core.commands import run # m = run(session, f'open {pdb_path}', log = False)[0] from chimerax.pdb import open_pdb m = open_pdb(session, pdb_path, log_info = False)[0][0] chains = m.chains if len(chains) == 2: res1 = chains[0].existing_residues res2 = chains[1].existing_residues elif len(chains) == 3: from chimerax.atomic import concatenate res1 = concatenate((chains[0].existing_residues, chains[1].existing_residues)) res2 = chains[2].existing_residues else: raise ValueError(f'Got {len(chains)} chains, expected 2 or 3') n1, n2 = len(res1), len(res2) # r1, r2 = contacting_residues(res1, res2, distance) # ni1, ni2 = len(r1), len(r2) r1, r2, ni1, ni2 = contacting_residue_pairs(res1, res2, distance) npair = len(r1) allres = m.residues r1i = allres.indices(r1) r2i = allres.indices(r2) from chimerax.alphafold.pae import AlphaFoldPAE pae = AlphaFoldPAE(pae_path, m).pae_matrix # pae_command = f'alphafold pae #{m.id_string} file {pae_path} plot false' # run(session, pae_command, log = False) # pae = m.alphafold_pae.pae_matrix from numpy import minimum # pae12 = minimum(pae[r1i,:][:,r2i], pae[r2i,:][:,r1i].transpose()) pae12 = minimum(pae[r1i,r2i], pae[r2i,r1i]) # print (pae12) nconf = (pae12 <= max_pae).sum() # session.models.close([m]) # Make residue specifier for interface residues for aligning to other models. low_pae = (pae12 <= max_pae) r1nums, r2nums = r1[low_pae].numbers, r2[low_pae].numbers m.delete() return (n1, n2, ni1, ni2, npair, nconf, r1nums, r2nums) def contacting_residues(res1, res2, distance): a1, a2 = res1.atoms, res2.atoms from chimerax.geometry import find_close_points i1, i2 = find_close_points(a1.coords, a2.coords, distance) return a1[i1].unique_residues, a2[i2].unique_residues def contacting_residue_pairs(res1, res2, distance): a1, a2 = res1.atoms, res2.atoms from chimerax.geometry import find_close_points i1, i2 = find_close_points(a1.coords, a2.coords, distance) r1, r2 = a1[i1].unique_residues, a2[i2].unique_residues ra2 = r2.atoms axyz2 = ra2.coords rpairs = [] for r in r1: i1, i2 = find_close_points(r.atoms.coords, axyz2, distance) rpairs.extend((r,rc) for rc in ra2[i2].unique_residues) from chimerax.atomic import Residues rpair1 = Residues([rp1 for rp1, rp2 in rpairs]) rpair2 = Residues([rp2 for rp1, rp2 in rpairs]) return rpair1, rpair2, len(r1), len(r2) def test(session): pdb_path = '/Users/goddard/Downloads/ChimeraX/AlphaFold/prediction_86/af182_unrelaxed_rank_001_alphafold2_multimer_v3_model_1_seed_000.pdb' pae_path = '/Users/goddard/Downloads/ChimeraX/AlphaFold/prediction_86/af182_scores_rank_001_alphafold2_multimer_v3_model_1_seed_000.json' n1, n2, nconf = dimer_confidence(session, pdb_path, pae_path) from os.path import basename name = basename(pdb_path) print (f'{name} {n1} {n2} {nconf}') def interface_confidence(directory): from os import listdir pdb_files = [f for f in listdir(directory) if f.endswith('.pdb')] pdb_files.sort() print (len(pdb_files), 'pdb files') lines = [] for file in pdb_files: from os.path import join pdb_path = join(directory, file) pae_file = file.replace('unrelaxed', 'scores').replace('.pdb','.json') pae_path = join(directory, pae_file) n1, n2, ni1, ni2, npair, nconf, r1nums, r2nums = dimer_confidence(session, pdb_path, pae_path) # parts = file.split('.') # genes = '-'.join(p.split('_')[2] for p in parts[:-1]) # rank = parts[-2].split('_')[5] pconf = '%.3g' % (0 if npair == 0 else nconf/npair) r1list, r2list = ' '.join(str(i) for i in r1nums), ' '.join(str(i) for i in r2nums) lines.append(f'{file},{n1},{n2},{ni1},{ni2},{npair},{nconf},{pconf},{r1list},{r2list}') # if len(lines) >= 50: # break lines.sort() return lines from sys import argv directory = argv[1] if len(argv) >= 2 else '.' lines = interface_confidence(directory) print('\n'.join(lines))