质控
fastp --thread 4 \
-i ../data/SP1_1.fastq.gz \
-I ../data/SP1_2.fastq.gz \
-o SP1_1.fq.gz \
-O SP1_2.fq.gz \
-j SP1.fastp.json
-h SP1.fastp.html \
2> SP1.fastp.log
组装
构建参考基因组index
bowtie2-build genome.fa genome
将clean data比对到参考基因组
bowtie2 -p 4 \
-x ./genome \
-1 SP1_1.fq.gz \
-2 SP1_2.fq.gz \
2> SP1.bowtie2.log | \
samtools view -h -f 4 | \
samtools fastq \
-1 un.SP1_1.fq.gz \
-2 un.SP1_2.fq.gz \
-s un.SP1_single.fq.gz
组装未必对reads
megahit \
-1 un.SP1_1.fq.gz,un.SP2_1.fq.gz \
-2 un.SP1_2.fq.gz,un.SP2_2.fq.gz \
-r un.SP1_single.fq.gz,un.SP2_single.fq.gz \
--min-contig-len 200 \
--tmp-dir ./ \
--memory 0.4 \
--num-cpu-threads 10 \
--out-dir megahit \
--out-prefix assembly
seqtk seq -L 500 megahit/assembly.contigs.fa | sed 's/^>/>novel./' > megahit.fasta
assembly-stats megahit.fasta > megahit.fasta.stat
mkdir soapdenovo
SOAPdenovo-63mer all \
-s ./soapdenovo_config.txt \
-o soapdenovo/assembly \
-K 41 -p 10 -d 1 -F -M 3 -R \
1> soapdenovo.log \
2> soapdenovo.err
GapCloser -l 151 -t 10 \
-a soapdenovo/assembly.scafSeq \
-b soapdenovo_config.txt \
-o soapdenovo/assembly.scafSeq.gapclo.fa
seqtk seq -L 500 soapdenovo/assembly.scafSeq.gapclo.fa | sed 's/^>/>novel./' > soapdenovo.fasta