Skip to content

Commit

Permalink
Merge pull request #63 from EBI-Metagenomics/dev
Browse files Browse the repository at this point in the history
Merge DEV into Master
  • Loading branch information
mberacochea committed Oct 20, 2021
2 parents 53a99b8 + c5a5bdd commit f367002
Show file tree
Hide file tree
Showing 48 changed files with 6,024 additions and 294 deletions.
2 changes: 1 addition & 1 deletion bin/cwl_input.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3

import argparse
from ruamel.yaml import YAML
Expand Down
10 changes: 10 additions & 0 deletions cwl/download-databases.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,14 @@ echo "IMG_VR_2018-07-01_4"
rsync -ahrv --progress --partial "${BASE}"/IMG_VR_2018-07-01_4.tar.gz "${OUTPUT}"
tar xvzf "${OUTPUT}"/IMG_VR_2018-07-01_4.tar.gz --directory "${OUTPUT}"

echo "VirFinder metadata"
rsync -ahrv --progress --partial "${BASE}"/virfinder/* "${OUTPUT}/virfinder"

echo "Additional Metadata version 2"
rsync -ahrv --progress --partial "${BASE}"/additional_data_vpHMMs_v2.tsv "${OUTPUT}/additional_data_vpHMMs_v2.tsv"

echo "CheckV database"
wget https://portal.nersc.gov/CheckV/checkv-db-v1.0.tar.gz
tar -zxvf checkv-db-v1.0.tar.gz

echo "Completed."
55 changes: 27 additions & 28 deletions cwl/ebi/submit-virify.sh → cwl/ebi/bsub-virify.sh
Original file line number Diff line number Diff line change
@@ -1,36 +1,32 @@
#!/bin/bash
#BSUB -n 1
#BSUB -M 8192
#BSUB -R "rusage[mem=4096]"
#BSUB -J virify
#BSUB -o output.txt
#BSUB -e error.txt

# CONSTANTS
# Wrapper for Virify.sh
WORKDIR="/hps/nobackup/production/metagenomics/toil-workdir"
# Wrapper for Virify.shs

# Production
# virify.sh
VIRIFY_SH="/nfs/production/metagenomics/pipelines/virify/scripts/virify.sh"
# ENV
ENV_FILE="/nfs/production/metagenomics/pipelines/virify/scripts/ebi-env.sh"
# Production scripts and env
VIRIFY_SH="/nfs/production/rdf/metagenomics/pipelines/prod/emg-viral-pipeline/cwl/virify.sh"
ENV_FILE="/nfs/production/rdf/metagenomics/pipelines/prod/emg-viral-pipeline/cwl/ebi/codon-virify-env.sh"
WORKDIR="/hps/nobackup/rdf/metagenomics/toil-workdir"

set -e

usage () {
echo ""
echo "Virify pipeline BSUB"
echo ""
echo "-n the name for the job *a timestamp will be added to folder* [mandatory]"
echo "-i contigs input fasta [mandatory]"
echo "-o output folder [mandatory]"
echo ""
echo "Example:"
echo ""
echo "bsub-virify.sh -n test-run -i input_fasta -o /data/results/"
echo "bsub-virify.sh -n test-run -i input_fasta -o /data/results/ [-f 1.0]"
echo ""
echo "NOTE:"
echo "- The results folder will be /data/results/{job_name}."
echo "- The logs will be stored in /data/results/{job_name}/logs"
echo ""
echo "PARAMETERS:"
echo "Settings files and executable scripts:"
echo "- toil work dir: ${WORKDIR} * toil will create a folder in this path"
echo "- virify.sh: ${VIRIFY_SH}"
echo "- virify env: ${ENV_FILE}"
Expand All @@ -41,25 +37,26 @@ usage () {
NAME=""
CONTIGS=""
RESULTS_FOLDER=""
LEN_FILTER="1.0"

while getopts "n:i:o:h" opt; do
while getopts "n:i:o:f:h" opt; do
case $opt in
n)
NAME="$OPTARG"
;;
i)
CONTIGS="$OPTARG"
# if [ ! -f "$NAME_RUN" ];
# then
# echo ""
# echo "ERROR '${OPTARG}' doesn't exist." >&2
# usage;
# exit 1
# fi
;;
o)
RESULTS_FOLDER="$OPTARG"
;;
f)
LEN_FILTER="$OPTARG"
;;
h)
usage;
exit 0
;;
:)
usage;
exit 1
Expand All @@ -81,8 +78,10 @@ fi

${VIRIFY_SH} \
-e ${ENV_FILE} \
-n ${NAME} \
-j ${WORKDIR} \
-o ${RESULTS_FOLDER} \
-c 1 -m 8192 \
-i ${CONTIGS}
-n "${NAME}" \
-j "${WORKDIR}" \
-o "${RESULTS_FOLDER}" \
-f "${LEN_FILTER}" \
-p CODON \
-c 1 -m 12000 \
-i "${CONTIGS}"
26 changes: 26 additions & 0 deletions cwl/ebi/codon-virify-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

set -e

# EMG-Viral pipeline ENV script
. /hps/software/users/rdf/metagenomics/service-team/envs/mitrc.sh

mitload virify env

# virify scripts
_addpath "/nfs/production/rdf/metagenomics/pipelines/prod/emg-viral-pipeline/bin/"

DATABASES="/nfs/production/rdf/metagenomics/pipelines/prod/emg-viral-pipeline/cwl/databases"

export VIRSORTER_DATA="${DATABASES}/virsorter-data"
export ADDITIONAL_HMMS_DATA="${DATABASES}/additional_data_vpHMMs_v2.tsv"
export HMMSCAN_DATABASE="${DATABASES}/hmmer_databases/vpHMM_database_v3/vpHMM_database_v3.hmm"
export NCBI_TAX_DB_FILE="${DATABASES}/2020-07-01_ete3_ncbi_tax.sqlite"
export IMGVR_BLAST_DB="${DATABASES}/IMG_VR_2018-07-01_4"
export VIRFINDER_MODEL="${DATABASES}/virfinder/VF.modEPV_k8.rda"

# workdir
# required to be shared because
# - https://toil.readthedocs.io/en/latest/running/hpcEnvironments.html#standard-output-error-from-batch-system-jobs
# TODO this was seted in virify.sh
export TMPDIR="/tmp"
114 changes: 114 additions & 0 deletions cwl/ebi/embassy-wrapper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/bin/bash
#SBATCH --cpus-per-task=1
#SBATCH --ntasks=1
#SBATCH --mem 8G
#SBATCH --output="%x_virify.out"
#SBATCH --error="%x_virify.err"

set -e

usage() {
echo ""
echo "Wrapper script to run the virify workflow using toil-cwl-runner."
echo ""
echo "Results will be stored in the output folder,"
echo "under a subfolder named '<job_name>_<timestamp>/'"
echo "-n Job name [mandatory]."
echo "-i Input fasta file (full path) [mandatory]."
echo "-o Output [mandatory]."
echo "-f Length filter (default 1.0)."
echo ""
}

# Defaults
CORES=4
MEMORY=8000 # 8GB
LEN_FILTER=1.0

while getopts "n:i:o:f:h" opt; do
case $opt in
n)
NAME_RUN="$OPTARG"
if [ -z "$NAME_RUN" ]; then
echo ""
echo "ERROR -n cannot be empty." >&2
usage
exit 1
fi
;;
i)
INPUT_FASTA="${OPTARG}"
if [ -z "${INPUT_FASTA}" ]; then
echo ""
echo "ERROR -i cannot be empty." >&2
usage
exit 1
fi
;;
o)
OUTDIR="$OPTARG"
if [ -z "$OUTDIR" ]; then
echo ""
echo "ERROR -o cannot be empty." >&2
usage
exit 1
fi
mkdir -p "$OUTDIR"
;;
f)
LEN_FILTER="${OPTARG}"
;;
h)
usage
exit 0
;;
:)
usage
exit 1
;;
\?)
echo ""
echo "Invalid option -${OPTARG}" >&2
usage
exit 1
;;
esac
done

if ((OPTIND == 1)); then
echo ""
echo "ERROR: No options specified"
usage
exit 1
fi

shift $((OPTIND - 1))

# mandatory params
if [ -z "${NAME_RUN}" ] ||
[ -z "${INPUT_FASTA}" ] ||
[ -z "${OUTDIR}" ]; then
echo ""
echo "ERROR: Missing mandatory parameter."
usage
exit 1
fi

# Embassy env specifics
WORKDIR="/home/virify/workdir"

echo "Submitting the job."
echo "Workdir: ${WORKDIR}"
echo "Outdir: ${OUTDIR}/${NAME_RUN}"
echo ""

/home/virify/emg-viral-pipeline/cwl/virify.sh \
-e /home/virify/scripts/emg-virify-env.sh \
-n "${NAME_RUN}" \
-j "${WORKDIR}" \
-o "${OUTDIR}" \
-f "${LEN_FILTER}" \
-c "${CORES}" \
-m "${MEMORY}" \
-i "${INPUT_FASTA}" \
-p embassy
3 changes: 1 addition & 2 deletions cwl/requirements/pip_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ ruamel.yaml==0.16.5

rdflib==4.2.2
html5lib==1.0.1
toil[cwl]==5.3.0
# patch for toil issue: https://github.com/DataBiosphere/toil/issues/3565
toil[cwl]==5.4.0
# boto and boto3 are not used.
boto==2.49.0
boto3==1.17.57
2 changes: 1 addition & 1 deletion cwl/src/Tools/Annotation/viral_annotation.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ label: "Viral contig annotation"

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/emg-viral-pipeline-python3:v1"
dockerPull: "microbiomeinformatics/emg-viral-pipeline-python3:v1"

requirements:
InitialWorkDirRequirement:
Expand Down
2 changes: 1 addition & 1 deletion cwl/src/Tools/Assign/assign.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ label: "Viral contig assign"

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/emg-viral-pipeline-python3:v1"
dockerPull: "microbiomeinformatics/emg-viral-pipeline-python3:v1"

requirements:
InitialWorkDirRequirement:
Expand Down
2 changes: 1 addition & 1 deletion cwl/src/Tools/FastaRename/fasta_rename.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ label: "Fasta rename utility"

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/emg-viral-pipeline-python3:v1"
dockerPull: "microbiomeinformatics/emg-viral-pipeline-python3:v1"

requirements:
InlineJavascriptRequirement: {}
Expand Down
2 changes: 1 addition & 1 deletion cwl/src/Tools/FastaRename/fasta_restore.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ label: "Fasta name restore utility"

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/emg-viral-pipeline-python3:v1"
dockerPull: "microbiomeinformatics/emg-viral-pipeline-python3:v1"

requirements:
InlineJavascriptRequirement: {}
Expand Down
2 changes: 1 addition & 1 deletion cwl/src/Tools/HMMScan/hmmscan.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ label: hmmscan wrapper

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/hmmer:v3.1b2"
dockerPull: "microbiomeinformatics/hmmer:v3.1b2"

requirements:
InlineJavascriptRequirement: {}
Expand Down
2 changes: 1 addition & 1 deletion cwl/src/Tools/HMMScan/hmmscan_format_table.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ doc: |

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/emg-viral-pipeline-python3:v1"
dockerPull: "microbiomeinformatics/emg-viral-pipeline-python3:v1"

requirements:
InlineJavascriptRequirement: {}
Expand Down
3 changes: 2 additions & 1 deletion cwl/src/Tools/IMGvrBlast/imgvr_blast.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ label: blast against IMG/VR

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/blast:v2.9.0"
dockerPull: "microbiomeinformatics/blast:v2.9.0"

requirements:
InlineJavascriptRequirement: {}
ResourceRequirement:
coresMin: $(inputs.number_of_cpus)
ramMin: 9536
InitialWorkDirRequirement:
listing:
- class: File
Expand Down
2 changes: 1 addition & 1 deletion cwl/src/Tools/IMGvrBlast/imgvr_merge.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ doc: Combine the filtered blast results with meta information from the IMG/VR da

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/emg-viral-pipeline-python3:v1"
dockerPull: "microbiomeinformatics/emg-viral-pipeline-python3:v1"

requirements:
InitialWorkDirRequirement:
Expand Down
2 changes: 1 addition & 1 deletion cwl/src/Tools/Krona/generate_counts_table.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ label: Convert the assing taxonomy table

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/emg-viral-pipeline-python3:v1"
dockerPull: "microbiomeinformatics/emg-viral-pipeline-python3:v1"

requirements:
InitialWorkDirRequirement:
Expand Down
2 changes: 1 addition & 1 deletion cwl/src/Tools/Krona/krona.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ label: Krona

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/krona:v2.7.1"
dockerPull: "microbiomeinformatics/krona:v2.7.1"

baseCommand: ["ktImportText"]

Expand Down
2 changes: 1 addition & 1 deletion cwl/src/Tools/LengthFiltering/length_filtering.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ label: Length Filter

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/emg-viral-pipeline-python3:v1"
dockerPull: "microbiomeinformatics/emg-viral-pipeline-python3:v1"

requirements:
InitialWorkDirRequirement:
Expand Down
2 changes: 1 addition & 1 deletion cwl/src/Tools/MashMap/mashmap.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ label: MashMap

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/mashmap:2.0"
dockerPull: "microbiomeinformatics/mashmap:2.0"
SoftwareRequirement:
packages:
mashmap:
Expand Down
2 changes: 1 addition & 1 deletion cwl/src/Tools/PPRMeta/pprmeta.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ baseCommand: [ "bash", "pprmeta.sh"]

hints:
DockerRequirement:
dockerPull: "docker.io/microbiomeinformatics/pprmeta:v1.1"
dockerPull: "microbiomeinformatics/pprmeta:v1.1"

inputs:
fasta_file:
Expand Down
Loading

0 comments on commit f367002

Please sign in to comment.