数据下载
wkdir=`pwd`
mkdir $wkdir/data
cd $wkdir/data
prefetch SRR13195578 SRR13195579 SRR13195580 SRR13195581 SRR13195582 SRR13195583 SRR13195584 SRR13195585 SRR13195586 SRR13195587 SRR13195588 SRR13195589 --max-size u
cd SRR13195578/
fasterq-dump SRR13195578.sra
https://www.ncbi.nlm.nih.gov/bioproject/682443
https://www.ncbi.nlm.nih.gov/bioproject/?term=GSE162640
质控
mkdir $wkdir/1_qc
cd $wkdir/1_qc
sample="SRR13195578"
fastp \
-i $wkdir/data/${sample}/${sample}_1.fastq \
-I $wkdir/data/${sample}/${sample}_2.fastq \
-o ${sample}_1.fp.fq.gz \
-O ${sample}_2.fp.fq.gz \
-j ${sample}.json \
-h ${sample}.html \
&> ${sample}.fp.log
比对
方法一(Bowtie2)
构建索引
mkdir $wkdir/2_mapping_bowtie2
cd $wkdir/2_mapping_bowtie2
mkdir ref
bowtie2-build \
$wkdir/ncbi_dataset/data/GCF_000001735.4/GCF_000001735.4_TAIR10.1_genomic.fna \
ref/genome \
&> bowtie2-build.log
比对
bowtie2 \
-x $wkdir/2_mapping/ref/genome \
-1 $wkdir/1_qc/${sample}_1.fp.fq.gz \
-2 $wkdir/1_qc/${sample}_2.fp.fq.gz \
-S ${sample}.sam \
-p 4 \
1> ${sample}_bowtie2_align.log \
2> ${sample}_bowtie2_align.err
方法二(BWA)
构建索引
mkdir $wkdir/2_mapping_bwa
cd $wkdir/2_mapping_bwa
mkdir ref
比对
bwa mem \
-M reference_genome.fa \
sample1_R1_trimmed.fq \
sample1_R2_trimmed.fq \
> sample1_aligned.sam
过滤
方法一(picard)
排序
mkdir $wkdir/3_filter_picard
cd $wkdir/3_filter_picard
samtools view \
${sample}.sam \
-Sb \
-q 20 | \
samtools sort \
-o ${sample}_sorted.bam \
-@ 4 \
&> ${sample}_sort_bam.log
过滤
去除PCR重复
java -jar picard.jar MarkDuplicates \
I=${sample}_sorted.bam \
O=${sample}_aligned_dedup.bam \
M=${sample}_duplicate_metrics.txt \
REMOVE_DUPLICATES=true \
&> ${sample}_picard.log
# $EBROOTPICARD/picard.jar
方法二(deepTools)
mkdir $wkdir/3_filter_deeptools
cd $wkdir/3_filter_deeptools