# Filter the dimer confidence score list output by dimer_confidence.py # to list just those with confidence above a specified percentage of # interface residue pairs with good PAE values, listing only highest # scoring of 5 alphafold models. def high_confidence_dimers(lines, cutoff = 0.10): scores = {} best = {} for line in lines: fields = line.split(',') pconf = float(fields[7]) name_end = line.find('_unrelaxed') genes = line[:name_end] if pconf >= cutoff and (genes not in scores or scores[genes] < pconf): scores[genes] = pconf best[genes] = line.strip() best_dimers = list(best.values()) best_dimers.sort() return best_dimers with open('dimer_confidence.csv', 'r') as file: lines = file.readlines() print('\n'.join(high_confidence_dimers(lines, cutoff = 0.1)))