质控

sample=SRR16841689
fastp \
  -i ${sample}_1.fastq \
  -I ${sample}_2.fastq \
  -o ${sample}.fp.R1.fastq \
  -O ${sample}.fp.R2.fastq \
  -h ${sample}.html \
  -j ${sample}.json

比对

HISAT2
链特异性
samtools
将sam转换为bam，略

ref=genome.fa
gtf=genome.gtf
# 为参考基因组构建 index
hisat2-build \
  ${ref} \
  genome \
  -p 20 \
  &> hisat2-build.log
hisat2 \
  -x genome \
  -1 ${sample}.fp.R1.fastq \
  -2 ${sample}.fp.R2.fastq \
  -p 20 \
  --new-summary \
  --summary-file ${sample}.hisat2.summary \
  --rna-strandness RF \
  | samtools sort \
  -o ${i}.hisat2.sorted.bam \
  - \
  1> ${i}.hisat2.log \
  2> ${i}.hisat2.err
samtools index ${i}.hisat2.sorted.bam

stringTie 转录本重构

stringTie


# 转录本重构
stringtie \
  ${i}.hisat2.sorted.bam \
  --rf \
  -G ${gtf} \
  -o ${sample}.gtf \
  -p 10 \
  &> ${sample}.stringtie.log
stringtie --merge -o merged.gtf -G ${gtf} *gtf
# 过滤
FEELnc_filter.pl \
  -i merged.gtf \
  -a ${gtf} \
  --monoex=-1 \
  -s 200 \
  -f 0 \
  -p 40 \
  1> candidate_lncRNA.gtf \
  2> FEELnc_filter.log
# 提取转录本序列
gffread \
  ${gtf} \
  -g ${genome} \
  -w candidate_lncRNA.fa
提取 ID

过滤

根据长度
过滤掉mRNA

编码能力预测

FEELnc_codpot.pl \
  -i candidate_lncRNA.gtf \
  -a ${gtf} \
  -l 
  -g

CPC2.py \
  -i candidate_lncRNA.gtf \
  -o cpc2output

组学分析

lncRNA | 鉴定

质控

比对

stringTie 转录本重构

过滤

编码能力预测

参考