参考

https://wap.sciencenet.cn/blog-1094241-1127394.html?mobile=1

  1. #!/usr/bin/env python3
  2. # this code was changed based on biostar "https://www.biostars.org/p/152592/"
  3. # Import necessary packages
  4. import argparse
  5. import re
  6. from Bio import SeqIO
  7. # Parse command-line arguments
  8. parser = argparse.ArgumentParser()
  9. parser.add_argument("fasta")
  10. args = parser.parse_args()
  11. # Open FASTA, search for masked regions, print in GFF3 format
  12. with open(args.fasta) as handle:
  13. i = 0
  14. for record in SeqIO.parse(handle, "fasta"):
  15. for match in re.finditer('N+', str(record.seq)):
  16. i = i+1
  17. print (record.id, ".", "gap", match.start() + 1, match.end(), ".", ".", ".", "Name=gap" + str(i) + ";size=" + str(match.end()-match.start()), sep='\t')
  18. #use the following at CMD: FILENAME.py FILENAME.fasta >> FILENAME.gff3 here