#!/usr/bin/env python3 # Create multimer fasta input file for running predictions from all unique combinations # of sequences in the input files. def multimers(seq_lists): if len(seq_lists) == 1: return seq_lists[0] seqs = [] for name1, seqs1 in seq_lists[0]: for name2, seqs2 in multimers(seq_lists[1:]): seqs.append((name1 + name2, seqs1 + seqs2)) return seqs def read_fasta_sequences(path): f = open(path, 'r') seqs = [] title = '' lines = [] for line in f.readlines(): if line.startswith('>'): if lines: seqs.append(((title,), (lines,))) title = line[1:].strip() lines = [] else: lines.append(line.strip()) if lines: seqs.append(((title,), (lines,))) return seqs def unique_multimers(multimer_seqs): keep = [] found = set() for names, seqs in multimer_seqs: key = tuple(sorted(names)) if key not in found: keep.append((names, seqs)) found.add(key) return keep def alphafold_seqs(seqs): lines = [] for names, seqs in seqs: lines.append('>' + '.'.join(names)) lines.append(':\n'.join('\n'.join(seq_lines) for seq_lines in seqs)) return '\n'.join(lines) from sys import argv fasta_paths = argv[1:] seq_lists = [read_fasta_sequences(path) for path in fasta_paths] seqs = multimers(seq_lists) useqs = unique_multimers(seqs) print (alphafold_seqs(useqs))