| 1 | #!/usr/bin/env python3
|
|---|
| 2 | import os
|
|---|
| 3 | import shutil
|
|---|
| 4 | import argparse
|
|---|
| 5 |
|
|---|
| 6 | def is_well_formatted(pdb_path):
|
|---|
| 7 | """
|
|---|
| 8 | Returns True if no atom serial number (columns 7â11) is
|
|---|
| 9 | repeated in the ATOM/HETATM records of this PDB.
|
|---|
| 10 | """
|
|---|
| 11 | seen = set()
|
|---|
| 12 | with open(pdb_path, 'r') as f:
|
|---|
| 13 | for line in f:
|
|---|
| 14 | if line.startswith(('ATOM ', 'HETATM')):
|
|---|
| 15 | try:
|
|---|
| 16 | serial = int(line[6:11])
|
|---|
| 17 | except ValueError:
|
|---|
| 18 | # malformed line or non-integer serial â reject
|
|---|
| 19 | return False
|
|---|
| 20 | if serial in seen:
|
|---|
| 21 | return False
|
|---|
| 22 | seen.add(serial)
|
|---|
| 23 | return True
|
|---|
| 24 |
|
|---|
| 25 | def main():
|
|---|
| 26 | parser = argparse.ArgumentParser(
|
|---|
| 27 | description="Keep only well-formed PDBs (no duplicate atom serials)."
|
|---|
| 28 | )
|
|---|
| 29 | parser.add_argument('pdb_dir',
|
|---|
| 30 | help="Directory containing .pdb files to filter")
|
|---|
| 31 | args = parser.parse_args()
|
|---|
| 32 | pdb_dir = os.path.abspath(args.pdb_dir)
|
|---|
| 33 |
|
|---|
| 34 | bad_dir = os.path.join(pdb_dir, 'bad')
|
|---|
| 35 | os.makedirs(bad_dir, exist_ok=True)
|
|---|
| 36 |
|
|---|
| 37 | for fn in os.listdir(pdb_dir):
|
|---|
| 38 | if not fn.lower().endswith('.pdb'):
|
|---|
| 39 | continue
|
|---|
| 40 | full = os.path.join(pdb_dir, fn)
|
|---|
| 41 | if not is_well_formatted(full):
|
|---|
| 42 | print(f"â malformed: {fn}")
|
|---|
| 43 | shutil.move(full, os.path.join(bad_dir, fn))
|
|---|
| 44 | else:
|
|---|
| 45 | print(f"â OK: {fn}")
|
|---|
| 46 |
|
|---|
| 47 | if __name__ == '__main__':
|
|---|
| 48 | main()
|
|---|
| 49 |
|
|---|