From f333b8d9f04a9451278cabe52bf233ace4b11211 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Fri, 1 Mar 2024 15:44:36 -0500 Subject: [PATCH 01/20] parameterize vm --- pipes/WDL/tasks/tasks_nextstrain.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 614d6a93e..806acd782 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -1147,6 +1147,8 @@ task augur_mafft_align { String docker = "nextstrain/base:build-20230905T192825Z" Int disk_size = 750 + Int mem_size = 180 + Int cpus = 64 } command <<< set -e @@ -1165,8 +1167,8 @@ task augur_mafft_align { >>> runtime { docker: docker - memory: "180 GB" - cpu : 64 + memory: mem_size + " GB" + cpu : cpus disks: "local-disk " + disk_size + " LOCAL" disk: disk_size + " GB" # TES preemptible: 0 From 0d8c043f7fd68313ecccf3faf3147f759df5b01e Mon Sep 17 00:00:00 2001 From: Danny Park Date: Mon, 4 Mar 2024 11:15:15 -0500 Subject: [PATCH 02/20] add more outputs (terra-ready TSVs and genbank-ready files/fields) to assembly table for scaffold_and_refine_multitaxa --- pipes/WDL/tasks/tasks_assembly.wdl | 27 +++++----- pipes/WDL/workflows/assemble_refbased.wdl | 4 +- .../scaffold_and_refine_multitaxa.wdl | 52 ++++++++++++++----- 3 files changed, 55 insertions(+), 28 deletions(-) diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index a0ff7e0fe..b45cb3965 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -578,13 +578,14 @@ task refine_assembly_with_aligned_reads { input { File reference_fasta File reads_aligned_bam - String sample_name + String out_basename = basename(reads_aligned_bam, '.bam') + String sample_name = out_basename Boolean mark_duplicates = false Float major_cutoff = 0.5 Int min_coverage = 3 - Int? machine_mem_gb + Int machine_mem_gb = 15 String docker = "quay.io/broadinstitute/viral-assemble:2.2.4.0" } @@ -614,7 +615,7 @@ task refine_assembly_with_aligned_reads { } } - command { + command <<< set -ex -o pipefail # find 90% memory @@ -639,36 +640,36 @@ task refine_assembly_with_aligned_reads { temp_markdup.bam \ refined.fasta \ --already_realigned_bam=temp_markdup.bam \ - --outVcf ~{sample_name}.sites.vcf.gz \ + --outVcf "~{out_basename}.sites.vcf.gz" \ --min_coverage ~{min_coverage} \ --major_cutoff ~{major_cutoff} \ --JVMmemory "$mem_in_mb"m \ --loglevel=DEBUG file_utils.py rename_fasta_sequences \ - refined.fasta "${sample_name}.fasta" "${sample_name}" + refined.fasta "~{out_basename}.fasta" "~{sample_name}" # collect variant counts if (( $(cat refined.fasta | wc -l) > 1 )); then - bcftools filter -e "FMT/DP<${min_coverage}" -S . "${sample_name}.sites.vcf.gz" -Ou | bcftools filter -i "AC>1" -Ou > "${sample_name}.diffs.vcf" - bcftools filter -i 'TYPE="snp"' "${sample_name}.diffs.vcf" | bcftools query -f '%POS\n' | wc -l | tee num_snps - bcftools filter -i 'TYPE!="snp"' "${sample_name}.diffs.vcf" | bcftools query -f '%POS\n' | wc -l | tee num_indels + bcftools filter -e "FMT/DP<~{min_coverage}" -S . "~{out_basename}.sites.vcf.gz" -Ou | bcftools filter -i "AC>1" -Ou > "~{out_basename}.diffs.vcf" + bcftools filter -i 'TYPE="snp"' "~{out_basename}.diffs.vcf" | bcftools query -f '%POS\n' | wc -l | tee num_snps + bcftools filter -i 'TYPE!="snp"' "~{out_basename}.diffs.vcf" | bcftools query -f '%POS\n' | wc -l | tee num_indels else # empty output echo "0" > num_snps echo "0" > num_indels - cp "${sample_name}.sites.vcf.gz" "${sample_name}.diffs.vcf" + cp "~{out_basename}.sites.vcf.gz" "~{out_basename}.diffs.vcf" fi # collect figures of merit set +o pipefail # grep will exit 1 if it fails to find the pattern grep -v '^>' refined.fasta | tr -d '\n' | wc -c | tee assembly_length grep -v '^>' refined.fasta | tr -d '\nNn' | wc -c | tee assembly_length_unambiguous - } + >>> output { - File refined_assembly_fasta = "${sample_name}.fasta" - File sites_vcf_gz = "${sample_name}.sites.vcf.gz" + File refined_assembly_fasta = "~{out_basename}.fasta" + File sites_vcf_gz = "~{out_basename}.sites.vcf.gz" Int assembly_length = read_int("assembly_length") Int assembly_length_unambiguous = read_int("assembly_length_unambiguous") Int dist_to_ref_snps = read_int("num_snps") @@ -678,7 +679,7 @@ task refine_assembly_with_aligned_reads { runtime { docker: docker - memory: select_first([machine_mem_gb, 15]) + " GB" + memory: machine_mem_gb + " GB" cpu: 8 disks: "local-disk " + disk_size + " LOCAL" disk: disk_size + " GB" # TES diff --git a/pipes/WDL/workflows/assemble_refbased.wdl b/pipes/WDL/workflows/assemble_refbased.wdl index 2b334e26c..4c72fe4f5 100644 --- a/pipes/WDL/workflows/assemble_refbased.wdl +++ b/pipes/WDL/workflows/assemble_refbased.wdl @@ -63,6 +63,7 @@ workflow assemble_refbased { Array[File]+ reads_unmapped_bams File reference_fasta String sample_name = basename(reads_unmapped_bams[0], '.bam') + String? sample_original_name String aligner="minimap2" File? novocraft_license @@ -150,7 +151,8 @@ workflow assemble_refbased { reads_aligned_bam = aligned_trimmed_bam, min_coverage = min_coverage, major_cutoff = major_cutoff, - sample_name = sample_name + out_basename = sample_name, + sample_name = select_first([sample_original_name, sample_name]) } scatter(reads_unmapped_bam in reads_unmapped_bams) { diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index a2dd28723..6edc1b59c 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -3,6 +3,7 @@ version 1.0 import "../tasks/tasks_assembly.wdl" as assembly import "../tasks/tasks_metagenomics.wdl" as metagenomics import "../tasks/tasks_ncbi.wdl" as ncbi +import "../tasks/tasks_reports.wdl" as reports import "../tasks/tasks_utils.wdl" as utils import "assemble_refbased.wdl" as assemble_refbased @@ -16,6 +17,7 @@ workflow scaffold_and_refine_multitaxa { input { String sample_id + Array[String] sample_names File reads_unmapped_bam File taxid_to_ref_accessions_tsv @@ -25,7 +27,8 @@ workflow scaffold_and_refine_multitaxa { # Float min_pct_reference_covered = 0.1 } - Int min_scaffold_unambig = 10 + Int min_scaffold_unambig = 10 + String sample_original_name = flatten([sample_names, [sample_id]])[0] # if kraken reports are available, filter scaffold list to observed hits (output might be empty!) if(defined(focal_report_tsv) && defined(ncbi_taxdump_tgz)) { @@ -37,7 +40,7 @@ workflow scaffold_and_refine_multitaxa { } } - Array[String] assembly_header = ["sample_id", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned"] + Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "sample"] Array[Array[String]] taxid_to_ref_accessions = read_tsv(select_first([filter_refs_to_found_taxa.filtered_taxid_to_ref_accessions_tsv, taxid_to_ref_accessions_tsv])) scatter(taxon in taxid_to_ref_accessions) { # cromwell's read_tsv emits [[""]] on empty (0-byte) file input, turn it into [] @@ -70,9 +73,10 @@ workflow scaffold_and_refine_multitaxa { # polish de novo assembly with reads call assemble_refbased.assemble_refbased as refine { input: - reads_unmapped_bams = [reads_unmapped_bam], - reference_fasta = scaffold.scaffold_fasta, - sample_name = sample_id + reads_unmapped_bams = [reads_unmapped_bam], + reference_fasta = scaffold.scaffold_fasta, + sample_name = sample_id, + sample_original_name = sample_original_name } String assembly_method_denovo = "viral-ngs/assemble_denovo" } @@ -87,8 +91,16 @@ workflow scaffold_and_refine_multitaxa { String assembly_method_refbased = "viral-ngs/assemble_refbased" } - # TO DO: if percent_reference_covered > some threshold, run ncbi.rename_fasta_header and ncbi.align_and_annot_transfer_single - # TO DO: if biosample attributes file provided, run ncbi.biosample_to_genbank + call reports.coverage_report as coverage_self { + input: + mapped_bams = select_all([refine.align_to_self_merged_aligned_only_bam, ref_based.align_to_self_merged_aligned_only_bam]), + mapped_bam_idx = [] + } + call utils.tsv_drop_cols as coverage_two_col { + input: + in_tsv = coverage_self.coverage_report, + drop_cols = ["aln2self_cov_median", "aln2self_cov_mean_non0", "aln2self_cov_1X", "aln2self_cov_5X", "aln2self_cov_20X", "aln2self_cov_100X"] + } String taxid = taxon[0] String tax_name = taxon[1] @@ -96,9 +108,12 @@ workflow scaffold_and_refine_multitaxa { Float percent_reference_covered = 1.0 * assembly_length_unambiguous / scaffold.reference_length File assembly_fasta = select_first([refine.assembly_fasta, ref_based.assembly_fasta]) Map[String, String] stats_by_taxon = { - "sample_id" : sample_id, - "taxid" : taxid, - "tax_name" : tax_name, + "entity:assembly_id" : sample_id + ":" + taxid, + "assembly_name" : tax_name + ": " + sample_original_name, + "sample_id" : sample_id, + "sample_name" : sample_original_name, + "taxid" : taxid, + "tax_name" : tax_name, "assembly_fasta" : assembly_fasta, "aligned_only_reads_bam" : select_first([refine.align_to_self_merged_aligned_only_bam, ref_based.align_to_self_merged_aligned_only_bam]), @@ -108,6 +123,10 @@ workflow scaffold_and_refine_multitaxa { "reads_aligned" : select_first([refine.align_to_self_merged_reads_aligned, ref_based.align_to_self_merged_reads_aligned]), "mean_coverage" : select_first([refine.align_to_self_merged_mean_coverage, ref_based.align_to_self_merged_mean_coverage]), "percent_reference_covered" : percent_reference_covered, + "scaffolding_num_segments_recovered" : scaffold.assembly_num_segments_recovered, + "reference_num_segments_required" : scaffold.reference_num_segments_required, + "reference_length" : scaffold.reference_length, + "reference_accessions" : string_split.tokens, "intermediate_gapfill_fasta" : scaffold.intermediate_gapfill_fasta, "assembly_preimpute_length_unambiguous" : scaffold.assembly_preimpute_length_unambiguous, @@ -122,19 +141,24 @@ workflow scaffold_and_refine_multitaxa { "coverage_tsv" : select_first([refine.align_to_self_merged_coverage_tsv, ref_based.align_to_self_merged_coverage_tsv]), "read_pairs_aligned" : select_first([refine.align_to_self_merged_read_pairs_aligned, ref_based.align_to_self_merged_read_pairs_aligned]), "bases_aligned" : select_first([refine.align_to_self_merged_bases_aligned, ref_based.align_to_self_merged_bases_aligned]), + "coverage_genbank" : coverage_two_col.out_tsv, - "assembly_method" : select_first([assembly_method_denovo, assembly_method_refbased]) + "assembly_method" : select_first([assembly_method_denovo, assembly_method_refbased]), + + "sample": '{"entityType":"sample","entityName":"' + sample_id + '"}' } - scatter(h in assembly_header) { - String stat_by_taxon = stats_by_taxon[h] + if (assembly_length_unambiguous > 0) { + scatter(h in assembly_header) { + String stat_by_taxon = stats_by_taxon[h] + } } } ### summary stats call utils.concatenate { input: - infiles = [write_tsv([assembly_header]), write_tsv(stat_by_taxon)], + infiles = [write_tsv([assembly_header]), write_tsv(select_all(stat_by_taxon))], output_name = "assembly_metadata-~{sample_id}.tsv" } From 6149b48fabae44dc8a8b11beca6a60566d24a018 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Mon, 4 Mar 2024 11:45:35 -0500 Subject: [PATCH 03/20] cromwell fixes --- .../WDL/workflows/scaffold_and_refine_multitaxa.wdl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 6edc1b59c..8006c9d49 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -149,16 +149,20 @@ workflow scaffold_and_refine_multitaxa { } if (assembly_length_unambiguous > 0) { - scatter(h in assembly_header) { - String stat_by_taxon = stats_by_taxon[h] - } + Map[String, String] stats_by_taxon_nonzero = stats_by_taxon + } + } + + scatter(stat in select_all(stats_by_taxon_nonzero)) { + scatter(h in assembly_header) { + String stat_by_taxon = stat[h] } } ### summary stats call utils.concatenate { input: - infiles = [write_tsv([assembly_header]), write_tsv(select_all(stat_by_taxon))], + infiles = [write_tsv([assembly_header]), write_tsv(stat_by_taxon)], output_name = "assembly_metadata-~{sample_id}.tsv" } From 08b69766a6df6daf79a9f91374fcf56779ffa79c Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 4 Mar 2024 13:56:40 -0500 Subject: [PATCH 04/20] remove the filtering of zero lines for now --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 8006c9d49..e20350cbe 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -148,22 +148,16 @@ workflow scaffold_and_refine_multitaxa { "sample": '{"entityType":"sample","entityName":"' + sample_id + '"}' } - if (assembly_length_unambiguous > 0) { - Map[String, String] stats_by_taxon_nonzero = stats_by_taxon - } - } - - scatter(stat in select_all(stats_by_taxon_nonzero)) { scatter(h in assembly_header) { String stat_by_taxon = stat[h] } } - + ### summary stats call utils.concatenate { input: infiles = [write_tsv([assembly_header]), write_tsv(stat_by_taxon)], - output_name = "assembly_metadata-~{sample_id}.tsv" + output_name = "assembly_metadata-~{sample_id}-~{taxid}.tsv" } output { From 171448f1a97981b1d640985d1e2528bcbe430af2 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 4 Mar 2024 13:57:51 -0500 Subject: [PATCH 05/20] fix var ref --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index e20350cbe..0becf6b85 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -149,7 +149,7 @@ workflow scaffold_and_refine_multitaxa { } scatter(h in assembly_header) { - String stat_by_taxon = stat[h] + String stat_by_taxon = stats_by_taxon[h] } } From baa0e88f860f0c1af06b65cff1d5c7f3abb82ec0 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 4 Mar 2024 15:00:46 -0500 Subject: [PATCH 06/20] fix mistake --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 0becf6b85..e81dca440 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -157,7 +157,7 @@ workflow scaffold_and_refine_multitaxa { call utils.concatenate { input: infiles = [write_tsv([assembly_header]), write_tsv(stat_by_taxon)], - output_name = "assembly_metadata-~{sample_id}-~{taxid}.tsv" + output_name = "assembly_metadata-~{sample_id}.tsv" } output { From f45c9260e09ca7614f9132576779201f8f1ea290 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 4 Mar 2024 15:47:30 -0500 Subject: [PATCH 07/20] missing header item --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index e81dca440..d73d9f96f 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -40,7 +40,7 @@ workflow scaffold_and_refine_multitaxa { } } - Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "sample"] + Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "assembly_method", "sample"] Array[Array[String]] taxid_to_ref_accessions = read_tsv(select_first([filter_refs_to_found_taxa.filtered_taxid_to_ref_accessions_tsv, taxid_to_ref_accessions_tsv])) scatter(taxon in taxid_to_ref_accessions) { # cromwell's read_tsv emits [[""]] on empty (0-byte) file input, turn it into [] From 8cda1047a08b48d5f2403cddad6d4f9452197bfd Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 4 Mar 2024 16:21:23 -0500 Subject: [PATCH 08/20] rename fasta files for refbased fastas --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index d73d9f96f..588b20b76 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -84,9 +84,10 @@ workflow scaffold_and_refine_multitaxa { # fall back to refbased assembly if de novo fails call assemble_refbased.assemble_refbased as ref_based { input: - reads_unmapped_bams = [reads_unmapped_bam], - reference_fasta = download_annotations.combined_fasta, - sample_name = sample_id + reads_unmapped_bams = [reads_unmapped_bam], + reference_fasta = download_annotations.combined_fasta, + sample_name = sample_id, + sample_original_name = sample_original_name } String assembly_method_refbased = "viral-ngs/assemble_refbased" } From b68ebcaf1eb3ea699d4e558663c4d2403368a11e Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 4 Mar 2024 16:38:22 -0500 Subject: [PATCH 09/20] incremental cromwell debugging --- .../workflows/scaffold_and_refine_multitaxa.wdl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 588b20b76..65f963f3a 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -40,7 +40,8 @@ workflow scaffold_and_refine_multitaxa { } } - Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "assembly_method", "sample"] + #Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "assembly_method", "sample"] + Array[String] assembly_header = ["sample_id", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned"] Array[Array[String]] taxid_to_ref_accessions = read_tsv(select_first([filter_refs_to_found_taxa.filtered_taxid_to_ref_accessions_tsv, taxid_to_ref_accessions_tsv])) scatter(taxon in taxid_to_ref_accessions) { # cromwell's read_tsv emits [[""]] on empty (0-byte) file input, turn it into [] @@ -124,10 +125,10 @@ workflow scaffold_and_refine_multitaxa { "reads_aligned" : select_first([refine.align_to_self_merged_reads_aligned, ref_based.align_to_self_merged_reads_aligned]), "mean_coverage" : select_first([refine.align_to_self_merged_mean_coverage, ref_based.align_to_self_merged_mean_coverage]), "percent_reference_covered" : percent_reference_covered, - "scaffolding_num_segments_recovered" : scaffold.assembly_num_segments_recovered, - "reference_num_segments_required" : scaffold.reference_num_segments_required, - "reference_length" : scaffold.reference_length, - "reference_accessions" : string_split.tokens, + #"scaffolding_num_segments_recovered" : scaffold.assembly_num_segments_recovered, + #"reference_num_segments_required" : scaffold.reference_num_segments_required, + #"reference_length" : scaffold.reference_length, + #"reference_accessions" : string_split.tokens, "intermediate_gapfill_fasta" : scaffold.intermediate_gapfill_fasta, "assembly_preimpute_length_unambiguous" : scaffold.assembly_preimpute_length_unambiguous, @@ -144,9 +145,9 @@ workflow scaffold_and_refine_multitaxa { "bases_aligned" : select_first([refine.align_to_self_merged_bases_aligned, ref_based.align_to_self_merged_bases_aligned]), "coverage_genbank" : coverage_two_col.out_tsv, - "assembly_method" : select_first([assembly_method_denovo, assembly_method_refbased]), + "assembly_method" : select_first([assembly_method_denovo, assembly_method_refbased]) - "sample": '{"entityType":"sample","entityName":"' + sample_id + '"}' + #"sample": '{"entityType":"sample","entityName":"' + sample_id + '"}' } scatter(h in assembly_header) { From c8101b00c71caa1442260b946a00b454a183b611 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 4 Mar 2024 16:47:26 -0500 Subject: [PATCH 10/20] increment more towards functional --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 65f963f3a..89ed69296 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -41,7 +41,7 @@ workflow scaffold_and_refine_multitaxa { } #Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "assembly_method", "sample"] - Array[String] assembly_header = ["sample_id", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned"] + Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "assembly_method"] Array[Array[String]] taxid_to_ref_accessions = read_tsv(select_first([filter_refs_to_found_taxa.filtered_taxid_to_ref_accessions_tsv, taxid_to_ref_accessions_tsv])) scatter(taxon in taxid_to_ref_accessions) { # cromwell's read_tsv emits [[""]] on empty (0-byte) file input, turn it into [] @@ -125,10 +125,10 @@ workflow scaffold_and_refine_multitaxa { "reads_aligned" : select_first([refine.align_to_self_merged_reads_aligned, ref_based.align_to_self_merged_reads_aligned]), "mean_coverage" : select_first([refine.align_to_self_merged_mean_coverage, ref_based.align_to_self_merged_mean_coverage]), "percent_reference_covered" : percent_reference_covered, - #"scaffolding_num_segments_recovered" : scaffold.assembly_num_segments_recovered, - #"reference_num_segments_required" : scaffold.reference_num_segments_required, - #"reference_length" : scaffold.reference_length, - #"reference_accessions" : string_split.tokens, + "scaffolding_num_segments_recovered" : scaffold.assembly_num_segments_recovered, + "reference_num_segments_required" : scaffold.reference_num_segments_required, + "reference_length" : scaffold.reference_length, + "reference_accessions" : string_split.tokens, "intermediate_gapfill_fasta" : scaffold.intermediate_gapfill_fasta, "assembly_preimpute_length_unambiguous" : scaffold.assembly_preimpute_length_unambiguous, From b39d82c149072322d8631a9fdb4325d67b101157 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 4 Mar 2024 17:30:53 -0500 Subject: [PATCH 11/20] experimenting more --- .../scaffold_and_refine_multitaxa.wdl | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 89ed69296..3391f4278 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -41,7 +41,7 @@ workflow scaffold_and_refine_multitaxa { } #Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "assembly_method", "sample"] - Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "assembly_method"] + Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "assembly_method"] Array[Array[String]] taxid_to_ref_accessions = read_tsv(select_first([filter_refs_to_found_taxa.filtered_taxid_to_ref_accessions_tsv, taxid_to_ref_accessions_tsv])) scatter(taxon in taxid_to_ref_accessions) { # cromwell's read_tsv emits [[""]] on empty (0-byte) file input, turn it into [] @@ -93,22 +93,25 @@ workflow scaffold_and_refine_multitaxa { String assembly_method_refbased = "viral-ngs/assemble_refbased" } - call reports.coverage_report as coverage_self { - input: - mapped_bams = select_all([refine.align_to_self_merged_aligned_only_bam, ref_based.align_to_self_merged_aligned_only_bam]), - mapped_bam_idx = [] - } - call utils.tsv_drop_cols as coverage_two_col { - input: - in_tsv = coverage_self.coverage_report, - drop_cols = ["aln2self_cov_median", "aln2self_cov_mean_non0", "aln2self_cov_1X", "aln2self_cov_5X", "aln2self_cov_20X", "aln2self_cov_100X"] + Int assembly_length_unambiguous = select_first([refine.assembly_length_unambiguous, ref_based.assembly_length_unambiguous]) + Float percent_reference_covered = 1.0 * assembly_length_unambiguous / scaffold.reference_length + File assembly_fasta = select_first([refine.assembly_fasta, ref_based.assembly_fasta]) + + if(assembly_length_unambiguous > 0) { + call reports.coverage_report as coverage_self { + input: + mapped_bams = select_all([refine.align_to_self_merged_aligned_only_bam, ref_based.align_to_self_merged_aligned_only_bam]), + mapped_bam_idx = [] + } + call utils.tsv_drop_cols as coverage_two_col { + input: + in_tsv = coverage_self.coverage_report, + drop_cols = ["aln2self_cov_median", "aln2self_cov_mean_non0", "aln2self_cov_1X", "aln2self_cov_5X", "aln2self_cov_20X", "aln2self_cov_100X"] + } } String taxid = taxon[0] String tax_name = taxon[1] - Int assembly_length_unambiguous = select_first([refine.assembly_length_unambiguous, ref_based.assembly_length_unambiguous]) - Float percent_reference_covered = 1.0 * assembly_length_unambiguous / scaffold.reference_length - File assembly_fasta = select_first([refine.assembly_fasta, ref_based.assembly_fasta]) Map[String, String] stats_by_taxon = { "entity:assembly_id" : sample_id + ":" + taxid, "assembly_name" : tax_name + ": " + sample_original_name, @@ -128,7 +131,7 @@ workflow scaffold_and_refine_multitaxa { "scaffolding_num_segments_recovered" : scaffold.assembly_num_segments_recovered, "reference_num_segments_required" : scaffold.reference_num_segments_required, "reference_length" : scaffold.reference_length, - "reference_accessions" : string_split.tokens, + #"reference_accessions" : string_split.tokens, "intermediate_gapfill_fasta" : scaffold.intermediate_gapfill_fasta, "assembly_preimpute_length_unambiguous" : scaffold.assembly_preimpute_length_unambiguous, @@ -143,7 +146,7 @@ workflow scaffold_and_refine_multitaxa { "coverage_tsv" : select_first([refine.align_to_self_merged_coverage_tsv, ref_based.align_to_self_merged_coverage_tsv]), "read_pairs_aligned" : select_first([refine.align_to_self_merged_read_pairs_aligned, ref_based.align_to_self_merged_read_pairs_aligned]), "bases_aligned" : select_first([refine.align_to_self_merged_bases_aligned, ref_based.align_to_self_merged_bases_aligned]), - "coverage_genbank" : coverage_two_col.out_tsv, + "coverage_genbank" : select_first([coverage_two_col.out_tsv, ""]), "assembly_method" : select_first([assembly_method_denovo, assembly_method_refbased]) From 4b21bf09c93f66df29caaec5ffdf54e302141b66 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 4 Mar 2024 17:43:46 -0500 Subject: [PATCH 12/20] fix and restore full table --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 3391f4278..4d4ee6a36 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -40,8 +40,7 @@ workflow scaffold_and_refine_multitaxa { } } - #Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "assembly_method", "sample"] - Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "assembly_method"] + Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "coverage_genbank", "assembly_method", "sample"] Array[Array[String]] taxid_to_ref_accessions = read_tsv(select_first([filter_refs_to_found_taxa.filtered_taxid_to_ref_accessions_tsv, taxid_to_ref_accessions_tsv])) scatter(taxon in taxid_to_ref_accessions) { # cromwell's read_tsv emits [[""]] on empty (0-byte) file input, turn it into [] @@ -131,7 +130,7 @@ workflow scaffold_and_refine_multitaxa { "scaffolding_num_segments_recovered" : scaffold.assembly_num_segments_recovered, "reference_num_segments_required" : scaffold.reference_num_segments_required, "reference_length" : scaffold.reference_length, - #"reference_accessions" : string_split.tokens, + "reference_accessions" : taxon[2], "intermediate_gapfill_fasta" : scaffold.intermediate_gapfill_fasta, "assembly_preimpute_length_unambiguous" : scaffold.assembly_preimpute_length_unambiguous, @@ -148,9 +147,9 @@ workflow scaffold_and_refine_multitaxa { "bases_aligned" : select_first([refine.align_to_self_merged_bases_aligned, ref_based.align_to_self_merged_bases_aligned]), "coverage_genbank" : select_first([coverage_two_col.out_tsv, ""]), - "assembly_method" : select_first([assembly_method_denovo, assembly_method_refbased]) + "assembly_method" : select_first([assembly_method_denovo, assembly_method_refbased]), - #"sample": '{"entityType":"sample","entityName":"' + sample_id + '"}' + "sample": '{"entityType":"sample","entityName":"' + sample_id + '"}' } scatter(h in assembly_header) { From 40a3a277323da143d97847abaa4694592317f44c Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 4 Mar 2024 17:51:28 -0500 Subject: [PATCH 13/20] filter out tsv to nonzeros --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 4d4ee6a36..332632d40 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -155,12 +155,16 @@ workflow scaffold_and_refine_multitaxa { scatter(h in assembly_header) { String stat_by_taxon = stats_by_taxon[h] } + + if(assembly_length_unambiguous > 0) { + File stat_by_taxon_tsv = write_tsv([stat_by_taxon]) + } } ### summary stats call utils.concatenate { input: - infiles = [write_tsv([assembly_header]), write_tsv(stat_by_taxon)], + infiles = flatten([[write_tsv([assembly_header])], select_all(stat_by_taxon_tsv)]), output_name = "assembly_metadata-~{sample_id}.tsv" } From 698f7a1b8d90831b415d9b010474223ec3718147 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 4 Mar 2024 19:12:07 -0500 Subject: [PATCH 14/20] revert --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 332632d40..4d4ee6a36 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -155,16 +155,12 @@ workflow scaffold_and_refine_multitaxa { scatter(h in assembly_header) { String stat_by_taxon = stats_by_taxon[h] } - - if(assembly_length_unambiguous > 0) { - File stat_by_taxon_tsv = write_tsv([stat_by_taxon]) - } } ### summary stats call utils.concatenate { input: - infiles = flatten([[write_tsv([assembly_header])], select_all(stat_by_taxon_tsv)]), + infiles = [write_tsv([assembly_header]), write_tsv(stat_by_taxon)], output_name = "assembly_metadata-~{sample_id}.tsv" } From 1f6e64607d1b57771c833ceae1c10c994a98979d Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Tue, 5 Mar 2024 16:08:01 -0500 Subject: [PATCH 15/20] test --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 4d4ee6a36..12acce5de 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -155,6 +155,10 @@ workflow scaffold_and_refine_multitaxa { scatter(h in assembly_header) { String stat_by_taxon = stats_by_taxon[h] } + + if(assembly_length_unambiguous > 0) { + File tsv = write_tsv([stat_by_taxon]) + } } ### summary stats From 975335f1d304da283c03f8fb1654bdab55ca0956 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Tue, 5 Mar 2024 16:27:07 -0500 Subject: [PATCH 16/20] undo --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 12acce5de..4d4ee6a36 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -155,10 +155,6 @@ workflow scaffold_and_refine_multitaxa { scatter(h in assembly_header) { String stat_by_taxon = stats_by_taxon[h] } - - if(assembly_length_unambiguous > 0) { - File tsv = write_tsv([stat_by_taxon]) - } } ### summary stats From ea672e21c6ae0d54a1db9e053ab76ba08258048f Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 6 Mar 2024 10:07:07 -0500 Subject: [PATCH 17/20] terra doesnt like colons --- pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 8006c9d49..4004d7479 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -108,7 +108,7 @@ workflow scaffold_and_refine_multitaxa { Float percent_reference_covered = 1.0 * assembly_length_unambiguous / scaffold.reference_length File assembly_fasta = select_first([refine.assembly_fasta, ref_based.assembly_fasta]) Map[String, String] stats_by_taxon = { - "entity:assembly_id" : sample_id + ":" + taxid, + "entity:assembly_id" : sample_id + "-" + taxid, "assembly_name" : tax_name + ": " + sample_original_name, "sample_id" : sample_id, "sample_name" : sample_original_name, From 1e3dd563dd9b862ab3835adb1b8621decb2d7f01 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 6 Mar 2024 10:20:24 -0500 Subject: [PATCH 18/20] add new workflow terra_tsv_to_table --- .dockstore.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.dockstore.yml b/.dockstore.yml index c359fd887..b693e6a43 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -374,6 +374,11 @@ workflows: primaryDescriptorPath: /pipes/WDL/workflows/terra_table_to_tsv.wdl testParameterFiles: - /empty.json + - name: terra_tsv_to_table + subclass: WDL + primaryDescriptorPath: /pipes/WDL/workflows/terra_tsv_to_table.wdl + testParameterFiles: + - /empty.json - name: terra_update_assemblies subclass: WDL primaryDescriptorPath: /pipes/WDL/workflows/terra_update_assemblies.wdl @@ -394,13 +399,11 @@ workflows: primaryDescriptorPath: /pipes/WDL/workflows/bam_to_qiime.wdl testParameterFiles: - /empty.json - - name: create_enterics_qc_viz subclass: WDL primaryDescriptorPath: /pipes/WDL/workflows/create_enterics_qc_viz.wdl testParameterFiles: - /empty.json - - name: create_enterics_qc_viz_general subclass: WDL primaryDescriptorPath: /pipes/WDL/workflows/create_enterics_qc_viz_general.wdl From a4078f25c86e0a07bddb8878248b2d79889142c8 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 6 Mar 2024 10:28:11 -0500 Subject: [PATCH 19/20] forgot to add actual wdl --- pipes/WDL/workflows/terra_tsv_to_table.wdl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 pipes/WDL/workflows/terra_tsv_to_table.wdl diff --git a/pipes/WDL/workflows/terra_tsv_to_table.wdl b/pipes/WDL/workflows/terra_tsv_to_table.wdl new file mode 100644 index 000000000..17d76586b --- /dev/null +++ b/pipes/WDL/workflows/terra_tsv_to_table.wdl @@ -0,0 +1,15 @@ +version 1.0 + +#DX_SKIP_WORKFLOW + +import "../tasks/tasks_terra.wdl" as terra + +workflow terra_tsv_to_table { + meta { + description: "Upload tsv file to Terra data table: insert-or-update on existing rows/columns" + author: "Broad Viral Genomics" + email: "viral-ngs@broadinstitute.org" + } + + call terra.upload_entities_tsv +} \ No newline at end of file From 7898134a49151caf998488295ba702e941bfae33 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Fri, 8 Mar 2024 19:49:21 -0500 Subject: [PATCH 20/20] introspect terra workspace and project names --- pipes/WDL/workflows/terra_tsv_to_table.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/terra_tsv_to_table.wdl b/pipes/WDL/workflows/terra_tsv_to_table.wdl index 17d76586b..44da1bee0 100644 --- a/pipes/WDL/workflows/terra_tsv_to_table.wdl +++ b/pipes/WDL/workflows/terra_tsv_to_table.wdl @@ -11,5 +11,11 @@ workflow terra_tsv_to_table { email: "viral-ngs@broadinstitute.org" } - call terra.upload_entities_tsv + call terra.check_terra_env + + call terra.upload_entities_tsv { + input: + workspace_name = check_terra_env.workspace_name, + terra_project = check_terra_env.workspace_namespace + } } \ No newline at end of file