安装Docker、Docker Compose
# https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository
sudo apt-get update
sudo apt-get install \
apt-transport-https \
ca-certificates \
curl \
gnupg \
lsb-release
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose
安装Miniconda
# 约定安装在/opt/local/cobweb目录下
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh
安装定制版Cromwell
cat << EOF > ~/.condarc
channels:
- conda-forge
- defaults
- bioconda
- anaconda
- fastai
- pytorch
show_channel_urls: true
auto_activate_base: false
anaconda_upload: true
EOF
conda create -n cromwell-35 java-jdk=8.0.112
# 从CO开发者处获取定制版cromwell-35,cromwell-35.tar.gz文件详见/home/ubuntu
mkdir /opt/local/cobweb/envs/cromwell-35/share/cromwell
tar -xzvf cromwell-35.tar.gz -C /opt/local/cobweb/envs/cromwell-35/share/cromwell
# 保存systemd service
cat << EOF > /lib/systemd/system/cromwell-35.service
[Unit]
Description=Cromwell server daemon
After=network.target
[Service]
Type=simple
ExecStart=/opt/local/cobweb/envs/cromwell-35/bin/java -Xms512m -Xmx1g -Dconfig.file=/etc/cromwell-35.conf -jar /opt/local/cobweb/envs/cromwell-35/share/cromwell/cromwell.jar server
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
RestartSec=42s
[Install]
WantedBy=multi-user.target
EOF
# Cromwell配置文件(详见如下代码块)
cp cromwell-35.conf /etc/cromwell-35.conf
systemctl restart cromwell-35
# cromwell-35.conf
include required(classpath("application"))
call-caching {
# Allows re-use of existing results for jobs you've already run
# (default: false)
enabled = false
# Whether to invalidate a cache result forever if we cannot reuse them. Disable this if you expect some cache copies
# to fail for external reasons which should not invalidate the cache (e.g. auth differences between users):
# (default: true)
invalidate-bad-cache-results = true
}
docker {
hash-lookup {
enable = true
# How should docker hashes be looked up. Possible values are "local" and "remote"
# "local": Lookup hashes on the local docker daemon using the cli
# "remote": Lookup hashes on docker hub and gcr
method = "remote"
}
}
backend {
default = "Local"
providers {
Local {
# The actor that runs the backend. In this case, it's the Shared File System (SFS) ConfigBackend.
actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
# The backend custom configuration.
config {
# Optional limits on the number of concurrent jobs
#concurrent-job-limit = 5
# If true submits scripts to the bash background using "&". Only usefull for dispatchers that do NOT submit
# the job and then immediately return a scheduled job id.
run-in-background = true
# `temporary-directory` creates the temporary directory for commands.
#
# If this value is not set explicitly, the default value creates a unique temporary directory, equivalent to:
# temporary-directory = "$(mktemp -d \"$PWD\"/tmp.XXXXXX)"
#
# The expression is run from the execution directory for the script. The expression must create the directory
# if it does not exist, and then return the full path to the directory.
#
# To create and return a non-random temporary directory, use something like:
# temporary-directory = "$(mkdir -p /tmp/mydir && echo /tmp/mydir)"
# `script-epilogue` configures a shell command to run after the execution of every command block.
#
# If this value is not set explicitly, the default value is `sync`, equivalent to:
# script-epilogue = "sync"
#
# To turn off the default `sync` behavior set this value to an empty string:
# script-epilogue = ""
# `glob-link-command` specifies command used to link glob outputs, by default using hard-links.
# If filesystem doesn't allow hard-links (e.g., beeGFS), change to soft-links as follows:
# glob-link-command = "ln -sL GLOB_PATTERN GLOB_DIRECTORY"
# The list of possible runtime custom attributes.
runtime-attributes = """
String? docker
String? docker_user
"""
# Submit string when there is no "docker" runtime attribute.
submit = "/usr/bin/env bash ${script}"
# Submit string when there is a "docker" runtime attribute.
submit-docker = """
docker run \
--rm -i \
${"--user " + docker_user} \
--entrypoint ${job_shell} \
-v ${cwd}:${docker_cwd} \
${docker} ${script}
"""
# Root directory where Cromwell writes job results. This directory must be
# visible and writeable by the Cromwell process as well as the jobs that Cromwell
# launches.
root = "/data/clinico-omics"
# Root directory where Cromwell writes job results in the container. This value
# can be used to specify where the execution folder is mounted in the container.
# it is used for the construction of the docker_cwd string in the submit-docker
# value above.
dockerRoot = "/cromwell-executions"
# File system configuration.
filesystems {
# For SFS backends, the "local" configuration specifies how files are handled.
local {
# Try to hard link (ln), then soft-link (ln -s), and if both fail, then copy the files.
localization: [
"hard-link", "soft-link", "copy"
]
# Call caching strategies
caching {
# When copying a cached result, what type of file duplication should occur. Attempted in the order listed below:
duplication-strategy: [
"hard-link", "soft-link", "copy"
]
# Possible values: file, path, path+modtime
# "file" will compute an md5 hash of the file content.
# "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link",
# in order to allow for the original file path to be hashed.
# "path+modtime" will compute an md5 hash of the file path and the last modified time. The same conditions as for "path" apply here.
# Default: file
hashing-strategy: "file"
# When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash.
# If false or the md5 does not exist, will proceed with the above-defined hashing strategy.
check-sibling-md5: false
}
}
}
# The defaults for runtime attributes if not provided.
default-runtime-attributes {
failOnStderr: false
continueOnReturnCode: 0
}
}
}
}
}
engine {
filesystems {
}
}
database {
profile = "slick.jdbc.MySQLProfile$"
db {
driver = "com.mysql.jdbc.Driver"
url = "jdbc:mysql://localhost/cromwell?rewriteBatchedStatements=true&useSSL=false&characterEncoding=utf8"
user = "gxmu-cromwell"
password = "gxmu-test"
connectionTimeout = 5000
}
}
workflow-options {
workflow-log-dir = "/var/log/cromwell-workflow-logs"
workflow-log-temporary = false
}
webservice {
port = 8000
interface = 0.0.0.0
}
安装Choppy Pipe
conda create -n choppy-pipe-0.3.9.dev0 python=3.8
conda activate choppy-pipe-0.3.9.dev0
pip3 install choppy-pipe==0.3.9.dev0
安装ClinicoOmics
# deployer