Skip to content

Commit

Permalink
Merge pull request #438 from vsmalladi/master
Browse files Browse the repository at this point in the history
Make Pipelines Azure compatible Closes #143
  • Loading branch information
dpark01 committed Nov 3, 2022
2 parents 039ac18 + 4b68fe8 commit a223f7e
Show file tree
Hide file tree
Showing 13 changed files with 375 additions and 114 deletions.
43 changes: 34 additions & 9 deletions pipes/WDL/tasks/tasks_assembly.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ task assemble {
String docker = "quay.io/broadinstitute/viral-assemble:2.1.16.1"
}

Int disk_size = 375

command {
set -ex -o pipefail

Expand Down Expand Up @@ -57,7 +59,8 @@ task assemble {
docker: docker
memory: select_first([machine_mem_gb, 15]) + " GB"
cpu: 4
disks: "local-disk 375 LOCAL"
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x8"
maxRetries: 2
}
Expand Down Expand Up @@ -87,6 +90,8 @@ task scaffold {
String sample_name = basename(basename(contigs_fasta, ".fasta"), ".assembly1-spades")
}

Int disk_size = 375

command {
set -ex -o pipefail

Expand Down Expand Up @@ -150,7 +155,8 @@ task scaffold {
docker: docker
memory: select_first([machine_mem_gb, 31]) + " GB"
cpu: 4
disks: "local-disk 375 LOCAL"
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x8"
maxRetries: 2
}
Expand All @@ -174,6 +180,7 @@ task ivar_trim {
}

String bam_basename=basename(aligned_bam, ".bam")
Int disk_size = 375

parameter_meta {
aligned_bam: { description: "aligned reads in BAM format", patterns: ["*.bam"] }
Expand Down Expand Up @@ -215,7 +222,8 @@ task ivar_trim {
docker: docker
memory: select_first([machine_mem_gb, 7]) + " GB"
cpu: 4
disks: "local-disk 375 LOCAL"
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x4"
maxRetries: 2
}
Expand All @@ -231,6 +239,8 @@ task ivar_trim_stats {
String docker = "quay.io/broadinstitute/py3-bio:0.1.2"
}

Int disk_size = 50

command <<<
set -e
python3<<CODE
Expand Down Expand Up @@ -273,7 +283,8 @@ task ivar_trim_stats {
docker: docker
memory: "1 GB"
cpu: 1
disks: "local-disk 50 HDD"
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB"
dx_instance_type: "mem1_ssd1_v2_x2"
maxRetries: 2
}
Expand All @@ -300,6 +311,8 @@ task align_reads {
String sample_name = basename(basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt"), ".clean")
}
Int disk_size = 375
parameter_meta {
aligner: { description: "Short read aligner to use: novoalign, minimap2, or bwa. (Default: novoalign)" }
}
Expand Down Expand Up @@ -390,7 +403,8 @@ task align_reads {
docker: docker
memory: select_first([machine_mem_gb, 15]) + " GB"
cpu: 8
disks: "local-disk 375 LOCAL"
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x8"
preemptible: 1
maxRetries: 2
Expand All @@ -415,6 +429,8 @@ task refine_assembly_with_aligned_reads {
String docker = "quay.io/broadinstitute/viral-assemble:2.1.16.1"
}
Int disk_size = 375
parameter_meta {
major_cutoff: {
description: "If the major allele is present at a frequency higher than this cutoff, we will call an unambiguous base at that position. If it is equal to or below this cutoff, we will call an ambiguous base representing all possible alleles at that position."
Expand Down Expand Up @@ -490,7 +506,8 @@ task refine_assembly_with_aligned_reads {
docker: docker
memory: select_first([machine_mem_gb, 15]) + " GB"
cpu: 8
disks: "local-disk 375 LOCAL"
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x8"
maxRetries: 2
}
Expand Down Expand Up @@ -525,6 +542,8 @@ task refine_2x_and_plot {
String sample_name = basename(basename(reads_unmapped_bam, ".bam"), ".cleaned")
}
Int disk_size = 375
command {
set -ex -o pipefail
Expand Down Expand Up @@ -635,7 +654,8 @@ task refine_2x_and_plot {
docker: docker
memory: select_first([machine_mem_gb, 7]) + " GB"
cpu: 8
disks: "local-disk 375 LOCAL"
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x8"
maxRetries: 2
}
Expand All @@ -655,6 +675,8 @@ task run_discordance {
String docker = "quay.io/broadinstitute/viral-core:2.1.33"
}
Int disk_size = 100
command {
set -ex -o pipefail
Expand Down Expand Up @@ -710,7 +732,8 @@ task run_discordance {
docker: docker
memory: "3 GB"
cpu: 2
disks: "local-disk 100 HDD"
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x2"
preemptible: 1
maxRetries: 2
Expand All @@ -729,6 +752,7 @@ task filter_bad_ntc_batches {
Int ntc_min_unambig
File? genome_status_json
}
Int disk_size = 50
command <<<
set -e
python3<<CODE
Expand Down Expand Up @@ -807,7 +831,8 @@ task filter_bad_ntc_batches {
docker: "python:slim"
memory: "2 GB"
cpu: 1
disks: "local-disk 50 HDD"
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x2"
maxRetries: 2
}
Expand Down
25 changes: 19 additions & 6 deletions pipes/WDL/tasks/tasks_demux.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ task merge_tarballs {
String docker = "quay.io/broadinstitute/viral-core:2.1.33"
}

Int disk_size = 2625

command {
set -ex -o pipefail

Expand All @@ -32,7 +34,8 @@ task merge_tarballs {
docker: docker
memory: select_first([machine_mem_gb, 7]) + " GB"
cpu: 16
disks: "local-disk 2625 LOCAL"
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd2_v2_x16"
maxRetries: 2
preemptible: 0
Expand All @@ -47,6 +50,7 @@ task samplesheet_rename_ids {
String new_id_col = 'external_id'
}
String new_base = basename(old_sheet, '.txt')
Int disk_size = 50
command <<<
python3 << CODE
import csv
Expand Down Expand Up @@ -75,7 +79,8 @@ task samplesheet_rename_ids {
docker: "python:slim"
memory: "1 GB"
cpu: 1
disks: "local-disk 50 HDD"
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x2"
maxRetries: 2
}
Expand All @@ -88,6 +93,7 @@ task revcomp_i5 {
String docker = "quay.io/broadinstitute/py3-bio:0.1.2"
}
String new_base = basename(basename(old_sheet, '.txt'), '.tsv')
Int disk_size = 50
command <<<
python3 << CODE
import csv
Expand Down Expand Up @@ -118,7 +124,8 @@ task revcomp_i5 {
docker: docker
memory: "1 GB"
cpu: 1
disks: "local-disk 50 HDD"
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB"
dx_instance_type: "mem1_ssd1_v2_x2"
maxRetries: 2
}
Expand Down Expand Up @@ -147,6 +154,7 @@ task illumina_demux {
Int? machine_mem_gb
String docker = "quay.io/broadinstitute/viral-core:2.1.33"
}
Int disk_size = 2625
parameter_meta {
flowcell_tgz: {
description: "Illumina BCL directory compressed as tarball. Must contain RunInfo.xml (unless overridden by runinfo), SampleSheet.csv (unless overridden by samplesheet), RTAComplete.txt, and Data/Intensities/BaseCalls/*",
Expand Down Expand Up @@ -394,7 +402,8 @@ task illumina_demux {
docker: docker
memory: select_first([machine_mem_gb, 200]) + " GB"
cpu: 32
disks: "local-disk 2625 LOCAL"
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
dx_instance_type: "mem3_ssd2_v2_x32"
dx_timeout: "20H"
maxRetries: 2
Expand All @@ -407,6 +416,7 @@ task map_map_setdefault {
File map_map_json
Array[String] sub_keys
}
Int disk_size = 20
command <<<
python3 << CODE
import json
Expand All @@ -427,7 +437,8 @@ task map_map_setdefault {
docker: "python:slim"
memory: "1 GB"
cpu: 1
disks: "local-disk 20 HDD"
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB"
dx_instance_type: "mem1_ssd1_v2_x2"
maxRetries: 2
}
Expand All @@ -437,6 +448,7 @@ task merge_maps {
input {
Array[File] maps_jsons
}
Int disk_size = 20
command <<<
python3 << CODE
import json
Expand All @@ -457,7 +469,8 @@ task merge_maps {
docker: "python:slim"
memory: "1 GB"
cpu: 1
disks: "local-disk 20 HDD"
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x2"
maxRetries: 2
}
Expand Down
31 changes: 25 additions & 6 deletions pipes/WDL/tasks/tasks_interhost.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ task multi_align_mafft_ref {
}

String fasta_basename = basename(reference_fasta, '.fasta')
Int disk_size = 200

command {
interhost.py --version | tee VERSION
Expand All @@ -39,7 +40,8 @@ task multi_align_mafft_ref {
docker: "${docker}"
memory: select_first([machine_mem_gb, 60]) + " GB"
cpu: 8
disks: "local-disk 200 HDD"
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
dx_instance_type: "mem3_ssd1_v2_x8"
maxRetries: 2
}
Expand All @@ -57,6 +59,8 @@ task multi_align_mafft {
String docker = "quay.io/broadinstitute/viral-phylo:2.1.19.1"
}

Int disk_size = 200

command {
interhost.py --version | tee VERSION
interhost.py multichr_mafft \
Expand All @@ -82,7 +86,8 @@ task multi_align_mafft {
docker: "${docker}"
memory: select_first([machine_mem_gb, 30]) + " GB"
cpu: 8
disks: "local-disk 200 HDD"
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
dx_instance_type: "mem2_ssd1_v2_x8"
maxRetries: 2
}
Expand All @@ -96,10 +101,15 @@ task beast {
Int? accelerator_count
String? gpu_type
Int? gpu_count
String? vm_size

String docker = "quay.io/broadinstitute/beast-beagle-cuda:1.10.5pre"
}

Int disk_size = 300
Int boot_disk = 50
Int disk_size_az = disk_size + boot_disk

# TO DO: parameterize gpuType and gpuCount
command {
Expand All @@ -125,9 +135,11 @@ task beast {
docker: "${docker}"
memory: "7 GB"
cpu: 4
disks: "local-disk 300 HDD"
disks: "local-disk " + disk_size + " HDD"
disk: disk_size_az + " GB"
vm_size: select_first([accelerator_type, "Standard_NC6"]) # TES Azure
maxRetries: 1
bootDiskSizeGb: 50
bootDiskSizeGb: boot_disk
gpu: true # dxWDL
dx_timeout: "40H" # dxWDL
dx_instance_type: "mem1_ssd1_gpu2_x8" # dxWDL
Expand All @@ -148,6 +160,8 @@ task index_ref {
String docker = "quay.io/broadinstitute/viral-core:2.1.33"
}

Int disk_size = 100

command {
read_utils.py --version | tee VERSION
read_utils.py novoindex \
Expand All @@ -168,7 +182,9 @@ task index_ref {
docker: "${docker}"
cpu: 2
memory: select_first([machine_mem_gb, 4]) + " GB"
disks: "local-disk 100 HDD"
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
maxRetries: 2
}
}
Expand All @@ -183,6 +199,8 @@ task trimal_clean_msa {
String input_basename = basename(basename(in_aligned_fasta, ".fasta"), ".fa")
}

Int disk_size = 100

command {
trimal -fasta -automated1 -in "${in_aligned_fasta}" -out "${input_basename}_trimal_cleaned.fasta"
}
Expand All @@ -194,7 +212,8 @@ task trimal_clean_msa {
docker: "${docker}"
memory: select_first([machine_mem_gb, 7]) + " GB"
cpu: 4
disks: "local-disk 100 HDD"
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x8"
maxRetries: 2
}
Expand Down
Loading

0 comments on commit a223f7e

Please sign in to comment.