#!/bin/bash
analysis_dir=$1
config_file=$2
number1=$3
number2=$4
conda activate chip
mkdir -p {raw,clean,align,peaks,motif,qc,log}
index=/home/yb77613/reference/index/bowtie/hg38
# for config.txt
# sampleName and fastq files(for single end)
# input /home/vip28/data/public/wen_chipseq/raw/SRR3356385.fastq.gz
# MYCN /home/vip28/data/public/wen_chipseq/raw/SRR3356386.fastq.gz
# TWIST /home/vip28/data/public/wen_chipseq/raw/SRR3356388.fastq.gz
cat $config_file |while read id
do
echo $id
arr=($id)
fq=${arr[1]}
sample=${arr[0]}
if((i%$number1==$number2))
then
## step1: basic QC for fastq files
if [ ! -f ok.fastp.$sample.status ]; then
fastp -i $fq -o clean/${sample}.fq.gz \
--thread=4 --length_required=36 --n_base_limit=6 \
--compression=6 -R ${sample} \
1>log/log.fastp.${sample}.txt 2>&1
fi ## end for if files
if [ $? -eq 0 ]; then
touch ok.fastp.$sample.status
else
echo "fastp failed" $sample
fi
fastqc $fq -O qc
fastqc clean/${sample}.fq.gz -O qc
## step2: alignment
if [ ! -f ok.bowtie2.$sample.status ]; then
bowtie2 -p 4 -x $index -U clean/${sample}.fq.gz | \
samtools sort -O bam -@ 4 -o - > align/${sample}.bam
fi ## end for if files
if [ $? -eq 0 ]; then
touch ok.bowtie2.$sample.status
else
echo "bowtie2 failed" $sample
fi
## step3: QC for bam files
if [ ! -f ok.sambamba.$sample.status ]; then
sambamba markdup -r align/${sample}.bam align/${sample}_rmd.bam
fi ## end for if files
if [ $? -eq 0 ]; then
touch ok.sambamba.$sample.status
else
echo "sambamba failed" $sample
fi
samtools flagstat align/${sample}.bam > qc/${sample}.flagstat
samtools flagstat align/${sample}_rmd.bam > qc/${sample}_rmd.flagstat
## step4: call peaks
if [ ! -f ok.macs2.$sample.status ]; then
macs2 callpeak -t align/${sample}_rmd.bam -f BAM -g hs -n ${sample}_rmd -B -q 0.01 --outdir peaks
macs2 callpeak -t align/${sample}.bam -f BAM -g hs -n ${sample}_raw -B -q 0.01 --outdir peaks
fi ## end for if files
if [ $? -eq 0 ]; then
touch ok.macs2.$sample.status
else
echo "macs2 failed" $sample
fi
fi # check the number1 number2
i=$((i+1))
done