Skip to content

Commit

Permalink
add workflows to add to or create library and sample tables based on …
Browse files Browse the repository at this point in the history
…demultiplexing output (#507)

This PR adds one new workflows:

populate_library_and_sample_tables_from_flowcell populate library and sample tables with per-library-lane and per-sample (i.e. named references to one or more libraries) using existing demultiplexing output
It also adds the same functionality of the above workflow as an optional step executed after demux by the existing demux_deplete workflow, if the input insert_demux_outputs_into_terra_tables=true, using outputs passed directly from demultiplexing rather than live table data

These workflows rely on a new task, also added by this PR:

tasks_terra.wdl::create_or_update_sample_tables
  • Loading branch information
tomkinsc authored Jan 29, 2024
1 parent 52b297c commit a019ed8
Show file tree
Hide file tree
Showing 7 changed files with 357 additions and 37 deletions.
10 changes: 10 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ workflows:
primaryDescriptorPath: /pipes/WDL/workflows/demux_deplete.wdl
testParameterFiles:
- empty.json
- name: demux_deplete_and_table_insert
subclass: WDL
primaryDescriptorPath: /pipes/WDL/workflows/demux_deplete_and_table_insert.wdl
testParameterFiles:
- empty.json
- name: demux_plus
subclass: WDL
primaryDescriptorPath: /pipes/WDL/workflows/demux_plus.wdl
Expand Down Expand Up @@ -259,6 +264,11 @@ workflows:
primaryDescriptorPath: /pipes/WDL/workflows/nextclade_single.wdl
testParameterFiles:
- empty.json
- name: populate_library_and_sample_tables_from_flowcell
subclass: WDL
primaryDescriptorPath: /pipes/WDL/workflows/populate_library_and_sample_tables_from_flowcell.wdl
testParameterFiles:
- empty.json
- name: reconstruct_from_alignments
subclass: WDL
primaryDescriptorPath: /pipes/WDL/workflows/reconstruct_from_alignments.wdl
Expand Down
2 changes: 1 addition & 1 deletion pipes/WDL/tasks/tasks_demux.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ task illumina_demux {
--threads $num_fastqc_threads" \
::: $(cat $OUT_BASENAMES)
mv metrics.txt "~{out_base}-demux_metrics.txt"
mv metrics.txt "~{out_base}-demux_metrics.txt"
mv runinfo.json "~{out_base}-runinfo.json"
cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC
Expand Down
22 changes: 11 additions & 11 deletions pipes/WDL/tasks/tasks_reports.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -530,28 +530,28 @@ task MultiQC {
input {
Array[File] input_files
Boolean force = false
Boolean full_names = false
String? title
String? comment
String? file_name
String out_dir = "./multiqc-output"
String out_dir = "./multiqc-output"
String? template
String? tag
String? ignore_analysis_files
String? ignore_sample_names
File? sample_names
Array[String]? exclude_modules
Array[String]? module_to_use
Boolean data_dir = false
Boolean no_data_dir = false
String? output_data_format
Boolean zip_data_dir = false
Boolean export = false
Boolean flat = false
Boolean interactive = true
Boolean lint = false
Boolean pdf = false
Boolean force = false
Boolean full_names = false
Boolean data_dir = false
Boolean no_data_dir = false
Boolean zip_data_dir = false
Boolean export = false
Boolean flat = false
Boolean interactive = true
Boolean lint = false
Boolean pdf = false
Boolean megaQC_upload = false # Upload generated report to MegaQC if MegaQC options are found
File? config # directory
String? config_yaml
Expand Down
297 changes: 274 additions & 23 deletions pipes/WDL/tasks/tasks_terra.wdl

Large diffs are not rendered by default.

29 changes: 27 additions & 2 deletions pipes/WDL/workflows/demux_deplete.wdl
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
version 1.0

import "../tasks/tasks_demux.wdl" as demux
import "../tasks/tasks_taxon_filter.wdl" as taxon_filter
import "../tasks/tasks_reports.wdl" as reports
import "../tasks/tasks_ncbi.wdl" as ncbi
import "../tasks/tasks_reports.wdl" as reports
import "../tasks/tasks_taxon_filter.wdl" as taxon_filter
import "../tasks/tasks_terra.wdl" as terra

workflow demux_deplete {
meta {
Expand All @@ -19,6 +20,7 @@ workflow demux_deplete {
String? read_structure

Boolean sort_reads=true
Boolean insert_demux_outputs_into_terra_tables=false

File? sample_rename_map
File? biosample_map
Expand Down Expand Up @@ -70,6 +72,10 @@ workflow demux_deplete {
description: "Output bam files will be sorted by read name.",
category: "advanced"
}
insert_demux_outputs_into_terra_tables: {
description: "Terra only: if set to 'true', demux output will be used to insert entries in 'library' (per library-lane) and 'sample tables' (referencing one or more libraries per sample ID)",
category: "advanced"
}
bmtaggerDbs: {
description: "Tool that can discriminate between human and bacterial reads and other reads by using short fragments. Databases must be provided to onset depletion.Sequences in fasta format will be indexed on the fly, pre-bmtagger-indexed databases may be provided as tarballs.",
category: "advanced"
Expand Down Expand Up @@ -167,6 +173,25 @@ workflow demux_deplete {
}
}

if(insert_demux_outputs_into_terra_tables){
call terra.check_terra_env

if(check_terra_env.is_running_on_terra) {
call terra.create_or_update_sample_tables {
input:
flowcell_run_id = illumina_demux.run_info[0]['run_id'],
workspace_name = check_terra_env.workspace_name,
workspace_namespace = check_terra_env.workspace_namespace,
workspace_bucket = check_terra_env.workspace_bucket_path,

raw_reads_unaligned_bams = flatten(illumina_demux.raw_reads_unaligned_bams),
cleaned_reads_unaligned_bams = select_all(cleaned_bam_passing),
meta_by_filename_json = meta_filename.merged_json,
meta_by_sample_json = meta_sample.merged_json
}
}
}
#### SRA submission prep
if(defined(biosample_map)) {
call ncbi.sra_meta_prep {
Expand Down
4 changes: 4 additions & 0 deletions pipes/WDL/workflows/dump_gcloud_env_info.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ workflow dump_gcloud_env_info {
String workspace_name = check_terra_env.workspace_name
String workspace_namespace = check_terra_env.workspace_namespace
String workspace_bucket_path = check_terra_env.workspace_bucket_path

String method_version = check_terra_env.method_version
String method_source = check_terra_env.method_source
String method_path = check_terra_env.method_path

String input_table_name = check_terra_env.input_table_name
String input_row_id = check_terra_env.input_row_id
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
version 1.0

import "../tasks/tasks_terra.wdl" as terra

workflow populate_library_and_sample_tables_from_flowcell {
meta {
description: "Terra only: Populate per-library-lane and per-sample tables from existing demultiplexed flowcell output"
author: "Broad Viral Genomics"
email: "viral-ngs@broadinstitute.org"
allowNestedInputs: true
}

input {
String flowcell_run_id
}

# obtain runtime workspace info necessary to read or change data in
# Terra tables of the workspace associated with a job
call terra.check_terra_env

if(check_terra_env.is_running_on_terra) {
call terra.create_or_update_sample_tables {
input:
flowcell_run_id = flowcell_run_id,
workspace_name = check_terra_env.workspace_name,
workspace_namespace = check_terra_env.workspace_namespace,
workspace_bucket = check_terra_env.workspace_bucket_path
}
}
}

0 comments on commit a019ed8

Please sign in to comment.