https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#wildcards
expand
#normal
expand(["{dataset}/a.{ext}", "{dataset}/b.{ext}"], dataset=DATASETS, ext=FORMATS)
#leads to
["ds1/a.txt", "ds1/b.txt", "ds2/a.txt", "ds2/b.txt", "ds1/a.csv", "ds1/b.csv", "ds2/a.csv", "ds2/b.csv"]
#zip
expand(["{dataset}/a.{ext}", "{dataset}/b.{ext}"], zip, dataset=DATASETS, ext=FORMATS)
#leads to
["ds1/a.txt", "ds1/b.txt", "ds2/a.csv", "ds2/b.csv"]
#mask from rule all
expand("{{dataset}}/a.{ext}", ext=FORMATS)
#simplest format: multiext
rule plot:
output:
multiext("some/plot", ".pdf", ".svg", ".png")
##Error
###Not all output, log and benchmark files of rule bam_to_tbi_ribo contain the same wildcards. This is crucial though, in order to avoid that two or more jobs write to the same file
###解决方法多wildcards输入对更多wildcards输出的时候,要用expand把结果和mask把结果弄好,不然snakemake遍历之后会分不清你的输入所对应的输出结果是哪些。
###也要注意mask的变量是否写在后面,对输出数量的影响(循环的原理)。
rule bam_to_tbi_ribo:
input:
ribo_bam = 'ribohmm/ribo_bam_tbi/{merge}_{mpsf}.bam',
lengths = 'ribohmm/configs/{merge}_{mpsf}.txt'
output:
fwd = expand('ribohmm/ribo_bam_tbi/{{merge}}_{{mpsf}}_fwd.{read_length}.gz.tbi', read_length=READ_LENGTHS),
rev = expand('ribohmm/ribo_bam_tbi/{{merge}}_{{mpsf}}_rev.{read_length}.gz.tbi', read_length=READ_LENGTHS)
benchmark: 'time_benchmarks/ribohmm/bam_to_tbi/RIBO_{merge}_{mpsf}.txt'
shell:
r''' {PYTHON3} {RIBOHMM_SRC_DIR}/bam_to_tbi.py --dtype riboseq --length {input.lengths} {input.ribo_bam}'''
Constrain wildcards
#First, a wildcard can be constrained within the file pattern, by appending a regular expression separated by a comma:
output: "{dataset,\d+}.{group}.txt"
#Second, a wildcard can be constrained within the rule via the keyword wildcard_constraints:
rule complex_conversion:
input:
"{dataset}/inputfile"
output:
"{dataset}/file.{group}.txt"
wildcard_constraints:
dataset="\d+"
shell:
"somecommand --group {wildcards.group} < {input} > {output}"
#global constraining wildcards
wildcard_constraints:
dataset="\d+"
rule a:
...
rule b:
...