参考
https://wap.sciencenet.cn/blog-1094241-1127394.html?mobile=1
#!/usr/bin/env python3# this code was changed based on biostar "https://www.biostars.org/p/152592/"# Import necessary packagesimport argparseimport refrom Bio import SeqIO# Parse command-line argumentsparser = argparse.ArgumentParser()parser.add_argument("fasta")args = parser.parse_args()# Open FASTA, search for masked regions, print in GFF3 formatwith open(args.fasta) as handle:i = 0for record in SeqIO.parse(handle, "fasta"):for match in re.finditer('N+', str(record.seq)):i = i+1print (record.id, ".", "gap", match.start() + 1, match.end(), ".", ".", ".", "Name=gap" + str(i) + ";size=" + str(match.end()-match.start()), sep='\t')#use the following at CMD: FILENAME.py FILENAME.fasta >> FILENAME.gff3 here
