质控

fastp: https://github.com/OpenGene/fastp

  1. cd $workspace/1_qc
  2. for i in `cat $workspace/0_raw_data/Sample.Group.txt`
  3. do
  4. echo "fastp -i $workspace/0_raw_data/${i}.1.fq.gz -I $workspace/0_raw_data/${i}.2.fq.gz -o ${i}.1.qc.fq.gz -O ${i}.2.qc.fq.gz -h ${i}.html -j ${i}.json -w 16"
  5. done > run.qc.sh
  1. cat run.qc.sh
  2. ## fastp -i /home/shwzhao/test/0_raw_data/SY0_1.1.fq.gz -I /home/shwzhao/test/0_raw_data/SY0_1.2.fq.gz -o SY0_1.1.qc.fq.gz -O SY0_1.2.qc.fq.gz -h SY0_1.html -j SY0_1.json -w 16
  3. ## fastp -i /home/shwzhao/test/0_raw_data/SY0_2.1.fq.gz -I /home/shwzhao/test/0_raw_data/SY0_2.2.fq.gz -o SY0_2.1.qc.fq.gz -O SY0_2.2.qc.fq.gz -h SY0_2.html -j SY0_2.json -w 16
  4. ## fastp -i /home/shwzhao/test/0_raw_data/SY0_3.1.fq.gz -I /home/shwzhao/test/0_raw_data/SY0_3.2.fq.gz -o SY0_3.1.qc.fq.gz -O SY0_3.2.qc.fq.gz -h SY0_3.html -j SY0_3.json -w 16
  5. ## fastp -i /home/shwzhao/test/0_raw_data/SY6_1.1.fq.gz -I /home/shwzhao/test/0_raw_data/SY6_1.2.fq.gz -o SY6_1.1.qc.fq.gz -O SY6_1.2.qc.fq.gz -h SY6_1.html -j SY6_1.json -w 16
  6. ## fastp -i /home/shwzhao/test/0_raw_data/SY6_2.1.fq.gz -I /home/shwzhao/test/0_raw_data/SY6_2.2.fq.gz -o SY6_2.1.qc.fq.gz -O SY6_2.2.qc.fq.gz -h SY6_2.html -j SY6_2.json -w 16
  7. ## ......
  1. bash run.qc.sh &> run.qc.sh.log

比对

定量

http://subread.sourceforge.net/

  1. cd $workspace/3_counts
  2. for i in `cat $workspace/0_raw_data/Sample.Group.txt`
  3. do
  4. featureCounts \
  5. -T 10 \
  6. -p \
  7. -t exon \
  8. -g gene_id \
  9. -a genome.gtf \
  10. -o ${i}.counts.txt \
  11. $workspace/2_mapping/${i}.sort.bam
  12. done

2. counts 矩阵

  1. awk '{if($0~/#/ || $0~/Geneid/)next;print FILENAME"\t"$1"\t"$6"\t"$7}' *.counts.txt \
  2. | awk '{if(a[$2]=="")a[$2]=$2"\t"$3"\t"$4;else a[$2]=a[$2]"\t"$4}END{for(i in a)print a[i]}' \
  3. > Counts.Length.tsv

参考