质控
fastp: https://github.com/OpenGene/fastp
cd $workspace/1_qc
for i in `cat $workspace/0_raw_data/Sample.Group.txt`
do
echo "fastp -i $workspace/0_raw_data/${i}.1.fq.gz -I $workspace/0_raw_data/${i}.2.fq.gz -o ${i}.1.qc.fq.gz -O ${i}.2.qc.fq.gz -h ${i}.html -j ${i}.json -w 16"
done > run.qc.sh
cat run.qc.sh
## fastp -i /home/shwzhao/test/0_raw_data/SY0_1.1.fq.gz -I /home/shwzhao/test/0_raw_data/SY0_1.2.fq.gz -o SY0_1.1.qc.fq.gz -O SY0_1.2.qc.fq.gz -h SY0_1.html -j SY0_1.json -w 16
## fastp -i /home/shwzhao/test/0_raw_data/SY0_2.1.fq.gz -I /home/shwzhao/test/0_raw_data/SY0_2.2.fq.gz -o SY0_2.1.qc.fq.gz -O SY0_2.2.qc.fq.gz -h SY0_2.html -j SY0_2.json -w 16
## fastp -i /home/shwzhao/test/0_raw_data/SY0_3.1.fq.gz -I /home/shwzhao/test/0_raw_data/SY0_3.2.fq.gz -o SY0_3.1.qc.fq.gz -O SY0_3.2.qc.fq.gz -h SY0_3.html -j SY0_3.json -w 16
## fastp -i /home/shwzhao/test/0_raw_data/SY6_1.1.fq.gz -I /home/shwzhao/test/0_raw_data/SY6_1.2.fq.gz -o SY6_1.1.qc.fq.gz -O SY6_1.2.qc.fq.gz -h SY6_1.html -j SY6_1.json -w 16
## fastp -i /home/shwzhao/test/0_raw_data/SY6_2.1.fq.gz -I /home/shwzhao/test/0_raw_data/SY6_2.2.fq.gz -o SY6_2.1.qc.fq.gz -O SY6_2.2.qc.fq.gz -h SY6_2.html -j SY6_2.json -w 16
## ......
bash run.qc.sh &> run.qc.sh.log
比对
定量
http://subread.sourceforge.net/
cd $workspace/3_counts
for i in `cat $workspace/0_raw_data/Sample.Group.txt`
do
featureCounts \
-T 10 \
-p \
-t exon \
-g gene_id \
-a genome.gtf \
-o ${i}.counts.txt \
$workspace/2_mapping/${i}.sort.bam
done
2. counts 矩阵
awk '{if($0~/#/ || $0~/Geneid/)next;print FILENAME"\t"$1"\t"$6"\t"$7}' *.counts.txt \
| awk '{if(a[$2]=="")a[$2]=$2"\t"$3"\t"$4;else a[$2]=a[$2]"\t"$4}END{for(i in a)print a[i]}' \
> Counts.Length.tsv