质控

  1. fastp --thread 4 \
  2. -i ../data/SP1_1.fastq.gz \
  3. -I ../data/SP1_2.fastq.gz \
  4. -o SP1_1.fq.gz \
  5. -O SP1_2.fq.gz \
  6. -j SP1.fastp.json
  7. -h SP1.fastp.html \
  8. 2> SP1.fastp.log

组装

构建参考基因组index

  1. bowtie2-build genome.fa genome

将clean data比对到参考基因组

  1. bowtie2 -p 4 \
  2. -x ./genome \
  3. -1 SP1_1.fq.gz \
  4. -2 SP1_2.fq.gz \
  5. 2> SP1.bowtie2.log | \
  6. samtools view -h -f 4 | \
  7. samtools fastq \
  8. -1 un.SP1_1.fq.gz \
  9. -2 un.SP1_2.fq.gz \
  10. -s un.SP1_single.fq.gz

组装未必对reads

  • 使用megahit进行组装
  1. megahit \
  2. -1 un.SP1_1.fq.gz,un.SP2_1.fq.gz \
  3. -2 un.SP1_2.fq.gz,un.SP2_2.fq.gz \
  4. -r un.SP1_single.fq.gz,un.SP2_single.fq.gz \
  5. --min-contig-len 200 \
  6. --tmp-dir ./ \
  7. --memory 0.4 \
  8. --num-cpu-threads 10 \
  9. --out-dir megahit \
  10. --out-prefix assembly
  11. seqtk seq -L 500 megahit/assembly.contigs.fa | sed 's/^>/>novel./' > megahit.fasta
  12. assembly-stats megahit.fasta > megahit.fasta.stat
  • 使用soapdenovo进行组装
  1. mkdir soapdenovo
  2. SOAPdenovo-63mer all \
  3. -s ./soapdenovo_config.txt \
  4. -o soapdenovo/assembly \
  5. -K 41 -p 10 -d 1 -F -M 3 -R \
  6. 1> soapdenovo.log \
  7. 2> soapdenovo.err
  8. GapCloser -l 151 -t 10 \
  9. -a soapdenovo/assembly.scafSeq \
  10. -b soapdenovo_config.txt \
  11. -o soapdenovo/assembly.scafSeq.gapclo.fa
  12. seqtk seq -L 500 soapdenovo/assembly.scafSeq.gapclo.fa | sed 's/^>/>novel./' > soapdenovo.fasta