# TEannogff=../../23.EDTA/Aly/genome.fa.mod.EDTA.TEanno.gff3
TEannogff=/pfs/proj/nobackup/fs/projnb10/hpc2nstor2024-021/shwzhao/01_research/02_T89/06_evaluate3/edta/genome.all.polished.fa.mod.EDTA.TEanno.gff3
ref=genome.fa
awk '{if($3 ~ "LTR_retrotransposon")print}' ${TEannogff} \
| grep "Method=structural" > intact.LTR.gff3
awk '{if($3 ~ "LTR_retrotransposon" && $9 ~ /Method=structural/)print}' ${TEannogff} > intact.LTR.gff3
# 无strand 信息的使用 +
awk 'BEGIN{OFS="\t"}
{gsub(/;.*/, "", $9);
gsub(/ID=/, "", $9);
if($7 != "?")
{print $1, $4-1, $5, $9, ".", $7}
else
{print $1, $4-1, $5, $9, ".", "+"}
}' intact.LTR.gff3 \
> intact.LTR.bed
# 根据 gff 提取序列
bedtools getfasta \
-fi ${ref} \
-bed intact.LTR.bed \
-fo - -name -s \
| awk -F '::' '{print $1}' \
| seqtk seq -l 60 \
> intact.LTR.fa
TEsorter -db rexdb intact.LTR.fa