数据下载
wkdir=`pwd`mkdir $wkdir/datacd $wkdir/dataprefetch SRR13195578 SRR13195579 SRR13195580 SRR13195581 SRR13195582 SRR13195583 SRR13195584 SRR13195585 SRR13195586 SRR13195587 SRR13195588 SRR13195589 --max-size ucd SRR13195578/fasterq-dump SRR13195578.sra
https://www.ncbi.nlm.nih.gov/bioproject/682443
https://www.ncbi.nlm.nih.gov/bioproject/?term=GSE162640
质控
mkdir $wkdir/1_qccd $wkdir/1_qcsample="SRR13195578"fastp \-i $wkdir/data/${sample}/${sample}_1.fastq \-I $wkdir/data/${sample}/${sample}_2.fastq \-o ${sample}_1.fp.fq.gz \-O ${sample}_2.fp.fq.gz \-j ${sample}.json \-h ${sample}.html \&> ${sample}.fp.log
比对
方法一(Bowtie2)
构建索引
mkdir $wkdir/2_mapping_bowtie2cd $wkdir/2_mapping_bowtie2mkdir refbowtie2-build \$wkdir/ncbi_dataset/data/GCF_000001735.4/GCF_000001735.4_TAIR10.1_genomic.fna \ref/genome \&> bowtie2-build.log
比对
bowtie2 \-x $wkdir/2_mapping/ref/genome \-1 $wkdir/1_qc/${sample}_1.fp.fq.gz \-2 $wkdir/1_qc/${sample}_2.fp.fq.gz \-S ${sample}.sam \-p 4 \1> ${sample}_bowtie2_align.log \2> ${sample}_bowtie2_align.err
方法二(BWA)
构建索引
mkdir $wkdir/2_mapping_bwacd $wkdir/2_mapping_bwamkdir ref
比对
bwa mem \-M reference_genome.fa \sample1_R1_trimmed.fq \sample1_R2_trimmed.fq \> sample1_aligned.sam
过滤
方法一(picard)
排序
mkdir $wkdir/3_filter_picardcd $wkdir/3_filter_picardsamtools view \${sample}.sam \-Sb \-q 20 | \samtools sort \-o ${sample}_sorted.bam \-@ 4 \&> ${sample}_sort_bam.log
过滤
去除PCR重复
java -jar picard.jar MarkDuplicates \I=${sample}_sorted.bam \O=${sample}_aligned_dedup.bam \M=${sample}_duplicate_metrics.txt \REMOVE_DUPLICATES=true \&> ${sample}_picard.log# $EBROOTPICARD/picard.jar
方法二(deepTools)
mkdir $wkdir/3_filter_deeptoolscd $wkdir/3_filter_deeptools
