Skip to content

Commit

Permalink
Merge pull request #112 from broadinstitute/dp-nextstrain
Browse files Browse the repository at this point in the history
nextstrain VM shape tuning
  • Loading branch information
dpark01 authored Jun 11, 2020
2 parents 27e354b + d4f1e5f commit ce3538d
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 43 deletions.
7 changes: 7 additions & 0 deletions pipes/WDL/tasks/tasks_demux.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,10 @@ task illumina_demux {
--out_zip ,,_fastqc.zip \
--threads $num_fastqc_threads" \
::: $(cat $OUT_BASENAMES)

cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES
}

output {
Expand All @@ -249,6 +253,9 @@ task illumina_demux {
File unmatched_reads_bam = "unmatched/Unmatched.bam"
Array[File] raw_reads_fastqc = glob("*_fastqc.html")
Array[File] raw_reads_fastqc_zip = glob("*_fastqc.zip")
Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000)
Int runtime_sec = ceil(read_float("UPTIME_SEC"))
Int cpu_load_15min = ceil(read_float("LOAD_15M"))
String viralngs_version = read_string("VERSION")
}
Expand Down
103 changes: 60 additions & 43 deletions pipes/WDL/tasks/tasks_nextstrain.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ task filter_subsample_sequences {
String? exclude_where
String? include_where
Int? machine_mem_gb
String docker = "nextstrain/base"
}
parameter_meta {
Expand Down Expand Up @@ -126,6 +125,9 @@ task filter_subsample_sequences {
#cat ~{sequences_fasta} | grep \> | wc -l > IN_COUNT
grep "sequences were dropped during filtering" STDOUT | cut -f 1 -d ' ' > DROP_COUNT
grep "sequences have been written out to" STDOUT | cut -f 1 -d ' ' > OUT_COUNT
cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES
}
runtime {
docker: docker
Expand All @@ -140,6 +142,9 @@ task filter_subsample_sequences {
String augur_version = read_string("VERSION")
Int sequences_dropped = read_int("DROP_COUNT")
Int sequences_out = read_int("OUT_COUNT")
Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000)
Int runtime_sec = ceil(read_float("UPTIME_SEC"))
Int cpu_load_15min = ceil(read_float("LOAD_15M"))
}
}
Expand Down Expand Up @@ -170,20 +175,23 @@ task augur_mafft_align {
~{true="--remove-reference" false="" remove_reference} \
--debug \
--nthreads auto
cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES
}
runtime {
docker: docker
memory: select_first([machine_mem_gb, 104]) + " GB"
cpu : 16
memory: select_first([machine_mem_gb, 208]) + " GB"
cpu : 32
disks: "local-disk ${disk_space_gb} LOCAL"
preemptible: 0
dx_instance_type: "mem3_ssd2_v2_x16"
dx_instance_type: "mem3_ssd2_v2_x32"
}
output {
File aligned_sequences = "~{basename}_aligned.fasta"
File align_troubleshoot = stdout()
Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000)
Int runtime_sec = ceil(read_float("UPTIME_SEC"))
Int cpu_load_15min = ceil(read_float("LOAD_15M"))
String augur_version = read_string("VERSION")
}
}
Expand Down Expand Up @@ -215,19 +223,23 @@ task augur_mask_sites {
else
cp "~{sequences}" "~{out_fname}"
fi
cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES
}
runtime {
docker: docker
memory: "3 GB"
cpu : 2
memory: "2 GB"
cpu : 1
disks: "local-disk 100 HDD"
preemptible: 2
preemptible: 1
dx_instance_type: "mem1_ssd1_v2_x2"
}
output {
File masked_sequences = out_fname
Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000)
Int runtime_sec = ceil(read_float("UPTIME_SEC"))
Int cpu_load_15min = ceil(read_float("LOAD_15M"))
String augur_version = read_string("VERSION")
}
}
Expand All @@ -246,8 +258,6 @@ task draft_augur_tree {
File? vcf_reference
String? tree_builder_args
Int? machine_mem_gb
Int? disk_space_gb = 750
String docker = "nextstrain/base"
}
parameter_meta {
Expand All @@ -272,10 +282,10 @@ task draft_augur_tree {
}
runtime {
docker: docker
memory: select_first([machine_mem_gb, 30]) + " GB"
cpu : 16
disks: "local-disk ${disk_space_gb} LOCAL"
dx_instance_type: "mem1_ssd1_v2_x16"
memory: "16 GB"
cpu : 32
disks: "local-disk 750 LOCAL"
dx_instance_type: "mem1_ssd1_v2_x36"
preemptible: 0
}
output {
Expand Down Expand Up @@ -313,8 +323,6 @@ task refine_augur_tree {
String? divergence_units
File? vcf_reference
Int? machine_mem_gb
Int? disk_space_gb = 750
String docker = "nextstrain/base"
}
parameter_meta {
Expand Down Expand Up @@ -353,10 +361,10 @@ task refine_augur_tree {
}
runtime {
docker: docker
memory: select_first([machine_mem_gb, 30]) + " GB"
cpu : 16
disks: "local-disk ${disk_space_gb} LOCAL"
dx_instance_type: "mem1_ssd1_v2_x16"
memory: "13 GB"
cpu : 2
disks: "local-disk 100 HDD"
dx_instance_type: "mem3_ssd1_v2_x2"
preemptible: 0
}
output {
Expand All @@ -383,7 +391,6 @@ task ancestral_traits {
File? weights
Float? sampling_bias_correction
Int? machine_mem_gb
String docker = "nextstrain/base"
}
command {
Expand All @@ -394,20 +401,25 @@ task ancestral_traits {
--columns ~{sep=" " columns} \
--output-node-data "~{basename}_nodes.json" \
~{"--weights " + weights} \
~{"--sampling-bias-correction " + sampling_bias_correction} \
~{true="--confidence" false="" confidence}
cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES
}
runtime {
docker: docker
memory: select_first([machine_mem_gb, 3]) + " GB"
cpu : 2
disks: "local-disk 50 HDD"
memory: "2 GB"
cpu : 1
disks: "local-disk 50 HDD"
dx_instance_type: "mem1_ssd1_v2_x2"
preemptible: 2
preemptible: 1
}
output {
File node_data_json = "~{basename}_nodes.json"
Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000)
Int runtime_sec = ceil(read_float("UPTIME_SEC"))
Int cpu_load_15min = ceil(read_float("LOAD_15M"))
String augur_version = read_string("VERSION")
}
}
Expand All @@ -428,7 +440,6 @@ task ancestral_tree {
File? vcf_reference
File? output_vcf
Int? machine_mem_gb
String docker = "nextstrain/base"
}
parameter_meta {
Expand Down Expand Up @@ -456,11 +467,11 @@ task ancestral_tree {
}
runtime {
docker: docker
memory: select_first([machine_mem_gb, 7]) + " GB"
cpu : 4
disks: "local-disk 50 HDD"
dx_instance_type: "mem1_ssd1_v2_x4"
preemptible: 2
memory: "13 GB"
cpu : 2
disks: "local-disk 50 HDD"
dx_instance_type: "mem3_ssd1_v2_x2"
preemptible: 1
}
output {
File nt_muts_json = "~{basename}_nt_muts.json"
Expand All @@ -486,7 +497,6 @@ task translate_augur_tree {
File? vcf_reference_output
File? vcf_reference
Int? machine_mem_gb
String docker = "nextstrain/base"
}
command {
Expand All @@ -502,11 +512,11 @@ task translate_augur_tree {
}
runtime {
docker: docker
memory: select_first([machine_mem_gb, 3]) + " GB"
cpu : 2
memory: "2 GB"
cpu : 1
disks: "local-disk 50 HDD"
dx_instance_type: "mem1_ssd1_v2_x2"
preemptible: 2
preemptible: 1
}
output {
File aa_muts_json = "~{basename}_aa_muts.json"
Expand Down Expand Up @@ -541,11 +551,11 @@ task assign_clades_to_nodes {
}
runtime {
docker: docker
memory: "3 GB"
cpu : 2
memory: "2 GB"
cpu : 1
disks: "local-disk 50 HDD"
dx_instance_type: "mem1_ssd1_v2_x2"
preemptible: 2
preemptible: 1
}
output {
File node_clade_data_json = "~{out_basename}_node-clade-assignments.json"
Expand Down Expand Up @@ -580,20 +590,24 @@ task augur_import_beast {
~{"--tip-date-regex " + tip_date_regex} \
~{"--tip-date-format " + tip_date_format} \
~{"--tip-date-delimeter " + tip_date_delimiter}
Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000)
cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES
}
runtime {
docker: docker
memory: select_first([machine_mem_gb, 3]) + " GB"
cpu : 2
disks: "local-disk 50 HDD"
dx_instance_type: "mem1_ssd1_v2_x2"
preemptible: 2
preemptible: 1
}
output {
File tree_newick = "~{tree_basename}.nwk"
File node_data_json = "~{tree_basename}.json"
Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000)
Int runtime_sec = ceil(read_float("UPTIME_SEC"))
Int cpu_load_15min = ceil(read_float("LOAD_15M"))
String augur_version = read_string("VERSION")
}
}
Expand All @@ -616,7 +630,6 @@ task export_auspice_json {
Array[String]? maintainers
String? title
Int? machine_mem_gb
String docker = "nextstrain/base"
}
String out_basename = basename(basename(tree, ".nwk"), "_refined_tree")
Expand Down Expand Up @@ -665,19 +678,23 @@ task export_auspice_json {
~{"--colors " + colors_tsv} \
~{"--description " + description_md} \
--output ~{out_basename}_auspice.json)
cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES
}
runtime {
docker: docker
memory: select_first([machine_mem_gb, 3]) + " GB"
memory: "3 GB"
cpu : 2
disks: "local-disk 100 HDD"
dx_instance_type: "mem1_ssd1_v2_x2"
preemptible: 2
preemptible: 0
}
output {
File virus_json = "~{out_basename}_auspice.json"
Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000)
Int runtime_sec = ceil(read_float("UPTIME_SEC"))
Int cpu_load_15min = ceil(read_float("LOAD_15M"))
String augur_version = read_string("VERSION")
}
}

0 comments on commit ce3538d

Please sign in to comment.