# Rewrite a fasta file with shortened header lines so the header line text can be used
# in file names for MSAs and structure predictions.
#
# Example file line
#
# >lcl|L43967.2_prot_AAC71217.2_1 [gene=dnaN] [locus_tag=MG_001] [protein=DNA polymerase III, beta subunit] [protein_id=AAC71217.2] [location=686..1828] [gbkey=CDS]
#
# change to
#
# >MG_001_dnaN
#
# by taking the locus_tag and gene names.  The gene name may be missing.
#
def shorten_fasta_headers(fasta_input, fasta_output, field_names):
    while line := fasta_input.readline():
        if line.startswith('>'):
            short_name = '_'.join(extract_fields(line, field_names))
            fasta_output.write(f'>{short_name}\n')
        else:
            fasta_output.write(line)

def extract_fields(text, field_names):
    fields = []
    for name in field_names:
        i = text.find(name)
        if i >= 0:
            fields.append(text[i+len(name)+1:].split(']', maxsplit=1)[0])
    return fields

from sys import argv, stdin, stdout
shorten_fasta_headers(stdin, stdout, argv[1:])

