add workflows to add to or create library and sample tables based on …

…demultiplexing output (#507) This PR adds one new workflows: populate_library_and_sample_tables_from_flowcell populate library and sample tables with per-library-lane and per-sample (i.e. named references to one or more libraries) using existing demultiplexing output It also adds the same functionality of the above workflow as an optional step executed after demux by the existing demux_deplete workflow, if the input insert_demux_outputs_into_terra_tables=true, using outputs passed directly from demultiplexing rather than live table data These workflows rely on a new task, also added by this PR: tasks_terra.wdl::create_or_update_sample_tables
broadinstitute · Jan 29, 2024 · a019ed8 · a019ed8
1 parent 52b297c
commit a019ed8
Show file tree

Hide file tree

Showing 7 changed files with 357 additions and 37 deletions.
diff --git a/.dockstore.yml b/.dockstore.yml
@@ -115,6 +115,11 @@ workflows:
     primaryDescriptorPath: /pipes/WDL/workflows/demux_deplete.wdl
     testParameterFiles:
       - empty.json
+  - name: demux_deplete_and_table_insert
+    subclass: WDL
+    primaryDescriptorPath: /pipes/WDL/workflows/demux_deplete_and_table_insert.wdl
+    testParameterFiles:
+      - empty.json
   - name: demux_plus
     subclass: WDL
     primaryDescriptorPath: /pipes/WDL/workflows/demux_plus.wdl
@@ -259,6 +264,11 @@ workflows:
     primaryDescriptorPath: /pipes/WDL/workflows/nextclade_single.wdl
     testParameterFiles:
       - empty.json
+  - name: populate_library_and_sample_tables_from_flowcell
+    subclass: WDL
+    primaryDescriptorPath: /pipes/WDL/workflows/populate_library_and_sample_tables_from_flowcell.wdl
+    testParameterFiles:
+      - empty.json
   - name: reconstruct_from_alignments
     subclass: WDL
     primaryDescriptorPath: /pipes/WDL/workflows/reconstruct_from_alignments.wdl

diff --git a/pipes/WDL/tasks/tasks_demux.wdl b/pipes/WDL/tasks/tasks_demux.wdl
@@ -383,7 +383,7 @@ task illumina_demux {
         --threads $num_fastqc_threads" \
       ::: $(cat $OUT_BASENAMES)
 
-    mv metrics.txt "~{out_base}-demux_metrics.txt"
+    mv metrics.txt  "~{out_base}-demux_metrics.txt"
     mv runinfo.json "~{out_base}-runinfo.json"
 
     cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC

diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl
@@ -530,28 +530,28 @@ task MultiQC {
   input {
     Array[File]    input_files
 
-    Boolean        force = false
-    Boolean        full_names = false
     String?        title
     String?        comment
     String?        file_name
-    String         out_dir = "./multiqc-output"
+    String         out_dir       = "./multiqc-output"
     String?        template
     String?        tag
     String?        ignore_analysis_files
     String?        ignore_sample_names
     File?          sample_names
     Array[String]? exclude_modules
     Array[String]? module_to_use
-    Boolean        data_dir = false
-    Boolean        no_data_dir = false
     String?        output_data_format
-    Boolean        zip_data_dir = false
-    Boolean        export = false
-    Boolean        flat = false
-    Boolean        interactive = true
-    Boolean        lint = false
-    Boolean        pdf = false
+    Boolean        force         = false
+    Boolean        full_names    = false
+    Boolean        data_dir      = false
+    Boolean        no_data_dir   = false
+    Boolean        zip_data_dir  = false
+    Boolean        export        = false
+    Boolean        flat          = false
+    Boolean        interactive   = true
+    Boolean        lint          = false
+    Boolean        pdf           = false
     Boolean        megaQC_upload = false # Upload generated report to MegaQC if MegaQC options are found
     File?          config  # directory
     String?        config_yaml

diff --git a/pipes/WDL/tasks/tasks_terra.wdl b/pipes/WDL/tasks/tasks_terra.wdl
diff --git a/pipes/WDL/workflows/demux_deplete.wdl b/pipes/WDL/workflows/demux_deplete.wdl
@@ -1,9 +1,10 @@
 version 1.0
 
 import "../tasks/tasks_demux.wdl" as demux
-import "../tasks/tasks_taxon_filter.wdl" as taxon_filter
-import "../tasks/tasks_reports.wdl" as reports
 import "../tasks/tasks_ncbi.wdl" as ncbi
+import "../tasks/tasks_reports.wdl" as reports
+import "../tasks/tasks_taxon_filter.wdl" as taxon_filter
+import "../tasks/tasks_terra.wdl" as terra
 
 workflow demux_deplete {
     meta {
@@ -19,6 +20,7 @@ workflow demux_deplete {
         String?      read_structure
 
         Boolean      sort_reads=true
+        Boolean      insert_demux_outputs_into_terra_tables=false
 
         File?        sample_rename_map
         File?        biosample_map
@@ -70,6 +72,10 @@ workflow demux_deplete {
             description: "Output bam files will be sorted by read name.",
             category: "advanced"
         }
+        insert_demux_outputs_into_terra_tables: {
+            description: "Terra only: if set to 'true', demux output will be used to insert entries in 'library' (per library-lane) and 'sample tables' (referencing one or more libraries per sample ID)",
+            category: "advanced"
+        }
         bmtaggerDbs: {
             description: "Tool that can discriminate between human and bacterial reads and other reads by using short fragments. Databases must be provided to onset depletion.Sequences in fasta format will be indexed on the fly, pre-bmtagger-indexed databases may be provided as tarballs.",
             category: "advanced"
@@ -167,6 +173,25 @@ workflow demux_deplete {
         }
     }
 
+    if(insert_demux_outputs_into_terra_tables){
+        call terra.check_terra_env
+
+        if(check_terra_env.is_running_on_terra) {
+            call terra.create_or_update_sample_tables {
+              input:
+                flowcell_run_id     = illumina_demux.run_info[0]['run_id'],
+                workspace_name      = check_terra_env.workspace_name,
+                workspace_namespace = check_terra_env.workspace_namespace,
+                workspace_bucket    = check_terra_env.workspace_bucket_path,
+
+                raw_reads_unaligned_bams     = flatten(illumina_demux.raw_reads_unaligned_bams),
+                cleaned_reads_unaligned_bams = select_all(cleaned_bam_passing),
+                meta_by_filename_json        = meta_filename.merged_json,
+                meta_by_sample_json          = meta_sample.merged_json
+            }
+        }
+    }
+
     #### SRA submission prep
     if(defined(biosample_map)) {
         call ncbi.sra_meta_prep {

diff --git a/pipes/WDL/workflows/dump_gcloud_env_info.wdl b/pipes/WDL/workflows/dump_gcloud_env_info.wdl
@@ -23,6 +23,10 @@ workflow dump_gcloud_env_info {
         String  workspace_name          = check_terra_env.workspace_name
         String  workspace_namespace     = check_terra_env.workspace_namespace
         String  workspace_bucket_path   = check_terra_env.workspace_bucket_path
+
+        String  method_version          = check_terra_env.method_version
+        String  method_source           = check_terra_env.method_source
+        String  method_path             = check_terra_env.method_path
 
         String  input_table_name        = check_terra_env.input_table_name
         String  input_row_id            = check_terra_env.input_row_id

diff --git a/pipes/WDL/workflows/populate_library_and_sample_tables_from_flowcell.wdl b/pipes/WDL/workflows/populate_library_and_sample_tables_from_flowcell.wdl
@@ -0,0 +1,30 @@
+version 1.0
+
+import "../tasks/tasks_terra.wdl" as terra
+
+workflow populate_library_and_sample_tables_from_flowcell {
+    meta {
+        description: "Terra only: Populate per-library-lane and per-sample tables from existing demultiplexed flowcell output"
+        author: "Broad Viral Genomics"
+        email:  "viral-ngs@broadinstitute.org"
+        allowNestedInputs: true
+    }
+
+    input {
+        String flowcell_run_id
+    }
+
+    # obtain runtime workspace info necessary to read or change data in
+    # Terra tables of the workspace associated with a job
+    call terra.check_terra_env
+
+    if(check_terra_env.is_running_on_terra) {
+        call terra.create_or_update_sample_tables {
+          input:
+            flowcell_run_id     = flowcell_run_id,
+            workspace_name      = check_terra_env.workspace_name,
+            workspace_namespace = check_terra_env.workspace_namespace,
+            workspace_bucket    = check_terra_env.workspace_bucket_path
+        }
+    }
+}