from chimerax.webservices.cxservices_job import CxServicesJob class BlastProtein(CxServicesJob): inet_error = "Could not start BLAST job. Please check your internet connection and try again." service_name = "blast" def __init__(self, session, sequence, database = 'pdb', cutoff = 1e-3, matrix = 'BLOSUM62', max_seqs = 10000, version = None): super().__init__(session) self.sequence = sequence.replace('?', 'X') # string if self.sequence.count('X') == len(self.sequence): from chimerax.core.errors import UserError raise UserError("Sequence consists entirely of unknown amino acids.") self.database = database # string self.cutoff = cutoff # float self.matrix = matrix # string self.max_seqs = max_seqs # int if version is None: from chimerax.blastprotein.data_model import CurrentDBVersions version = CurrentDBVersions[self.database] self.version = version # DB Version self.params = { "db": self.database, "evalue": str(self.cutoff), "matrix": self.matrix, "blimit": str(self.max_seqs), "input_seq": self.sequence, "version": self.version } def blast(self): from urllib3.exceptions import MaxRetryError try: super().start(self.service_name, self.params, blocking = True) except MaxRetryError: self.session.logger.warning(self.inet_error) if not self.exited_normally(): from chimerax.core.errors import UserError raise UserError(f"BLAST job {self.id} failed") results = self.get_results() from chimerax.blastprotein.data_model import get_database blast_results = get_database(self.database) blast_results.parse("query", self.sequence, results) hits = [] for m in blast_results.parser.matches: name = m.match if m.match else m.name if name != 'query': hits.append((name, m.evalue, m.score, m.description)) return hits def read_fasta_sequences(path): if isinstance(path, str): f = open(path, 'r') else: f = path # File stream seqs = [] title = '' lines = [] for line in f.readlines(): if line.startswith('>'): if lines: seqs.append((title, ''.join(lines))) title = line[1:].strip() lines = [] else: lines.append(line.strip()) if lines: seqs.append((title, ''.join(lines))) return seqs def mblast(session, fasta_path, json_output): seqs = read_fasta_sequences(fasta_path) pdbs = {} for title, sequence in seqs: pdbs[title] = BlastProtein(session, sequence).blast() import json open(json_output, 'w').write(json.dumps(pdbs)) def register_command(session): from chimerax.core.commands import CmdDesc, register, OpenFileNameArg, SaveFileNameArg desc = CmdDesc(required=[('fasta_path', OpenFileNameArg), ('json_output', SaveFileNameArg)], synopsis='BLAST PDB database with several sequences') register('mblast', desc, mblast, logger=session.logger) register_command(session)