1. # TEannogff=../../23.EDTA/Aly/genome.fa.mod.EDTA.TEanno.gff3
    2. TEannogff=/pfs/proj/nobackup/fs/projnb10/hpc2nstor2024-021/shwzhao/01_research/02_T89/06_evaluate3/edta/genome.all.polished.fa.mod.EDTA.TEanno.gff3
    3. ref=genome.fa
    4. awk '{if($3 ~ "LTR_retrotransposon")print}' ${TEannogff} \
    5. | grep "Method=structural" > intact.LTR.gff3
    6. awk '{if($3 ~ "LTR_retrotransposon" && $9 ~ /Method=structural/)print}' ${TEannogff} > intact.LTR.gff3
    7. # 无strand 信息的使用 +
    8. awk 'BEGIN{OFS="\t"}
    9. {gsub(/;.*/, "", $9);
    10. gsub(/ID=/, "", $9);
    11. if($7 != "?")
    12. {print $1, $4-1, $5, $9, ".", $7}
    13. else
    14. {print $1, $4-1, $5, $9, ".", "+"}
    15. }' intact.LTR.gff3 \
    16. > intact.LTR.bed
    17. # 根据 gff 提取序列
    18. bedtools getfasta \
    19. -fi ${ref} \
    20. -bed intact.LTR.bed \
    21. -fo - -name -s \
    22. | awk -F '::' '{print $1}' \
    23. | seqtk seq -l 60 \
    24. > intact.LTR.fa
    1. TEsorter -db rexdb intact.LTR.fa