From fce5c2343335b7d47e85b631070f027f3ba195cc Mon Sep 17 00:00:00 2001 From: William Rowell Date: Tue, 10 Dec 2024 15:38:25 -0800 Subject: [PATCH] Added sawfish. --- wdl-ci.config.json | 293 ++++++++++++++++------------ workflows/downstream/downstream.wdl | 11 +- workflows/family.inputs.json | 1 - workflows/family.wdl | 27 ++- workflows/joint/inputs.json | 5 +- workflows/joint/joint.wdl | 71 +++---- workflows/singleton.wdl | 14 +- workflows/upstream/upstream.wdl | 73 +++---- workflows/wdl-common | 2 +- 9 files changed, 256 insertions(+), 241 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 590ad855..895107b0 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -535,11 +535,11 @@ }, "sv_stats": { "key": "sv_stats", - "digest": "35gn2cqouobao6vacrqbi67zhkh6nmyn", + "digest": "foqixa2ryrx7e64ymqylurlfad7gpdpi", "tests": [ { "inputs": { - "vcf": "${resources_file_path}/sv_stats/HG002.GRCh38.pbsv.phased.vcf.gz", + "vcf": "${resources_file_path}/sawfish_call/output/HG002/HG002.GRCh38.structural_variants.vcf.gz", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { @@ -550,13 +550,13 @@ ] }, "stat_sv_DEL_count": { - "value": "19", + "value": "17", "test_tasks": [ "compare_string" ] }, "stat_sv_INS_count": { - "value": "37", + "value": "46", "test_tasks": [ "compare_string" ] @@ -568,7 +568,13 @@ ] }, "stat_sv_BND_count": { - "value": "0", + "value": "2", + "test_tasks": [ + "compare_string" + ] + }, + "stat_sv_INVBND_count": { + "value": "4", "test_tasks": [ "compare_string" ] @@ -964,7 +970,7 @@ "tasks": { "merge_bam_stats": { "key": "merge_bam_stats", - "digest": "fgbffrsqd32kov6i43cidmt2u5lgvn7g", + "digest": "mjd6zpbxtabbulmq3kwhcx4cnubxaf74", "tests": [ { "inputs": { @@ -1515,120 +1521,6 @@ } } }, - "workflows/wdl-common/wdl/tasks/pbsv.wdl": { - "key": "workflows/wdl-common/wdl/tasks/pbsv.wdl", - "name": "", - "description": "", - "tasks": { - "pbsv_discover": { - "key": "pbsv_discover", - "digest": "mcdiubcggjbwaweaciocgrnypvqn5tnb", - "tests": [ - { - "inputs": { - "aligned_bam": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam", - "aligned_bam_index": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai", - "trf_bed": "${datasets_file_path}/GRCh38/human_GRCh38_no_alt_analysis_set.trf.bed", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "svsig": { - "value": "${resources_file_path}/pbsv_discover/HG002.GRCh38.chr6_10000000_20000000.svsig.gz", - "test_tasks": [ - "compare_file_basename", - "check_gzip", - "check_empty_lines" - ] - } - } - } - ] - }, - "pbsv_call": { - "key": "pbsv_call", - "digest": "2rhytrhz52jofq7lv42akixzl55wa3lk", - "tests": [ - { - "inputs": { - "sample_id": "HG002", - "svsigs": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.svsig.gz" - ], - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "shard_index": 5, - "regions": [ - "chr6" - ], - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "vcf": { - "value": "${resources_file_path}/pbsv_call/singleton/HG002.GRCh38.5.pbsv.vcf.gz", - "test_tasks": [ - "compare_file_basename", - "vcftools_validator", - "check_gzip" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002", - "svsigs": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.svsig.gz" - ], - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "vcf": { - "value": "${resources_file_path}/pbsv_call/singleton_no_shard/HG002.GRCh38.pbsv.vcf.gz", - "test_tasks": [ - "compare_file_basename", - "vcftools_validator", - "check_gzip" - ] - } - } - }, - { - "inputs": { - "sample_id": "HG002-trio", - "svsigs": [ - "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.svsig.gz", - "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.svsig.gz", - "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.svsig.gz" - ], - "sample_count": 3, - "ref_fasta": "${ref_fasta}", - "ref_index": "${ref_index}", - "ref_name": "${ref_name}", - "shard_index": 5, - "regions": [ - "chr6" - ], - "runtime_attributes": "${default_runtime_attributes}" - }, - "output_tests": { - "vcf": { - "value": "${resources_file_path}/pbsv_call/trio/HG002-trio.GRCh38.5.pbsv.vcf.gz", - "test_tasks": [ - "compare_file_basename", - "vcftools_validator", - "check_gzip" - ] - } - } - } - ] - } - } - }, "workflows/wdl-common/wdl/tasks/samtools.wdl": { "key": "workflows/wdl-common/wdl/tasks/samtools.wdl", "name": "", @@ -1678,7 +1570,7 @@ "tasks": { "trgt": { "key": "trgt", - "digest": "rpcfzfr5reqxv7jbtfncu6etivzxvgs2", + "digest": "6i7troewkutvnmy6oft2vkgeltp2jh3z", "tests": [ { "inputs": { @@ -1767,7 +1659,7 @@ }, "trgt_merge": { "key": "trgt_merge", - "digest": "cmdcqkqrcfpn2eoczxfwhs6zqhalzdtt", + "digest": "ljvpgmkt7sfdwpm64dqllrphd23ols56", "tests": [ { "inputs": { @@ -2461,6 +2353,163 @@ ] } } + }, + "workflows/wdl-common/wdl/tasks/sawfish.wdl": { + "key": "workflows/wdl-common/wdl/tasks/sawfish.wdl", + "name": "", + "description": "", + "tasks": { + "sawfish_discover": { + "key": "sawfish_discover", + "digest": "eh67skuq3swjgkbrinqhzfxf2wfea2hp", + "tests": [ + { + "inputs": { + "sex": "MALE", + "aligned_bam": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam", + "aligned_bam_index": "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai", + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "expected_male_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed", + "expected_female_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed", + "out_prefix": "HG002.GRCh38", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "discover_tar": { + "value": "${resources_file_path}/sawfish_discover/output/HG002/HG002.GRCh38.tar", + "test_tasks": [ + "compare_file_basename" + ] + } + } + }, + { + "inputs": { + "sex": "MALE", + "aligned_bam": "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam", + "aligned_bam_index": "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam.bai", + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "expected_male_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed", + "expected_female_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed", + "out_prefix": "HG003.GRCh38", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "discover_tar": { + "value": "${resources_file_path}/sawfish_discover/output/HG003/HG003.GRCh38.tar", + "test_tasks": [ + "compare_file_basename" + ] + } + } + }, + { + "inputs": { + "aligned_bam": "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam", + "aligned_bam_index": "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam.bai", + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "expected_male_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XY.bed", + "expected_female_bed": "${resources_file_path}/hifi-wdl-resources-v2.0.0/GRCh38/hificnv/expected_cn.hg38.XX.bed", + "out_prefix": "HG004.GRCh38", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "discover_tar": { + "value": "${resources_file_path}/sawfish_discover/output/HG004/HG004.GRCh38.tar", + "test_tasks": [ + "compare_file_basename" + ] + } + } + } + ] + }, + "sawfish_call": { + "key": "sawfish_call", + "digest": "gcc2gfurgryq2ziqgyfgxc5a2k3dtkyo", + "tests": [ + { + "inputs": { + "discover_tars": [ + "${resources_file_path}/sawfish_call/input/HG002.GRCh38.tar" + ], + "aligned_bams": [ + "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam" + ], + "aligned_bam_indices": [ + "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai" + ], + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "ref_name": "${ref_name}", + "out_prefix": "HG002.GRCh38.structural_variants", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "vcf": { + "value": "${resources_file_path}/sawfish_call/output/HG002/HG002.GRCh38.structural_variants.vcf.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip", + "vcftools_validator" + ] + }, + "supporting_reads": { + "value": "${resources_file_path}/sawfish_call/output/HG002/HG002.GRCh38.structural_variants.supporting_reads.json.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip" + ] + } + } + }, + { + "inputs": { + "discover_tars": [ + "${resources_file_path}/sawfish_call/input/HG002.GRCh38.tar", + "${resources_file_path}/sawfish_call/input/HG003.GRCh38.tar", + "${resources_file_path}/sawfish_call/input/HG004.GRCh38.tar" + ], + "aligned_bams": [ + "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam", + "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam", + "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam" + ], + "aligned_bam_indices": [ + "${resources_file_path}/inputs/HG002.GRCh38.chr6_10000000_20000000.bam.bai", + "${resources_file_path}/inputs/HG003.GRCh38.chr6_10000000_20000000.bam.bai", + "${resources_file_path}/inputs/HG004.GRCh38.chr6_10000000_20000000.bam.bai" + ], + "ref_fasta": "${ref_fasta}", + "ref_index": "${ref_index}", + "ref_name": "${ref_name}", + "out_prefix": "HG002-trio.joint.GRCh38.structural_variants", + "runtime_attributes": "${default_runtime_attributes}" + }, + "output_tests": { + "vcf": { + "value": "${resources_file_path}/sawfish_call/output/HG002-trio/HG002-trio.joint.GRCh38.structural_variants.vcf.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip", + "vcftools_validator" + ] + }, + "supporting_reads": { + "value": "${resources_file_path}/sawfish_call/output/HG002-trio/HG002-trio.joint.GRCh38.structural_variants.supporting_reads.json.gz", + "test_tasks": [ + "compare_file_basename", + "check_gzip" + ] + } + } + } + ] + } + } } }, "engines": { diff --git a/workflows/downstream/downstream.wdl b/workflows/downstream/downstream.wdl index 70b6aeb4..18ff4271 100644 --- a/workflows/downstream/downstream.wdl +++ b/workflows/downstream/downstream.wdl @@ -191,11 +191,12 @@ workflow downstream { File indel_distribution_plot = bcftools_stats_roh_small_variants.indel_distribution_plot # sv stats - String stat_sv_DUP_count = sv_stats.stat_sv_DUP_count - String stat_sv_DEL_count = sv_stats.stat_sv_DEL_count - String stat_sv_INS_count = sv_stats.stat_sv_INS_count - String stat_sv_INV_count = sv_stats.stat_sv_INV_count - String stat_sv_BND_count = sv_stats.stat_sv_BND_count + String stat_sv_DUP_count = sv_stats.stat_sv_DUP_count + String stat_sv_DEL_count = sv_stats.stat_sv_DEL_count + String stat_sv_INS_count = sv_stats.stat_sv_INS_count + String stat_sv_INV_count = sv_stats.stat_sv_INV_count + String stat_sv_INVBND_count = sv_stats.stat_sv_INVBND_count + String stat_sv_BND_count = sv_stats.stat_sv_BND_count # cpg_pileup outputs File? cpg_combined_bed = cpg_pileup.combined_bed diff --git a/workflows/family.inputs.json b/workflows/family.inputs.json index ec555a3c..90bcb20f 100644 --- a/workflows/family.inputs.json +++ b/workflows/family.inputs.json @@ -22,7 +22,6 @@ "humanwgs_family.pharmcat_min_coverage": "Int (optional, default = 10)", "humanwgs_family.tertiary_map_file": "File? (optional)", "humanwgs_family.glnexus_mem_gb": "Int? (optional)", - "humanwgs_family.pbsv_call_mem_gb": "Int? (optional)", "humanwgs_family.gpu": "Boolean (optional, default = false)", "humanwgs_family.backend": "String", "humanwgs_family.zones": "String? (optional)", diff --git a/workflows/family.wdl b/workflows/family.wdl index bfe9cec5..067c3f91 100644 --- a/workflows/family.wdl +++ b/workflows/family.wdl @@ -45,9 +45,6 @@ workflow humanwgs_family { glnexus_mem_gb: { name: "Override GLnexus memory request (GB)" } - pbsv_call_mem_gb: { - name: "Override PBSV call memory request (GB)" - } gpu: { name: "Use GPU when possible" } @@ -90,7 +87,6 @@ workflow humanwgs_family { File? tertiary_map_file Int? glnexus_mem_gb - Int? pbsv_call_mem_gb Boolean gpu = false @@ -140,12 +136,13 @@ workflow humanwgs_family { input: family_id = family.family_id, sample_ids = sample_id, - gvcfs = upstream.small_variant_gvcf, - gvcf_indices = upstream.small_variant_gvcf_index, - svsigs = flatten(upstream.svsigs), + gvcfs = upstream.small_variant_vcf, + gvcf_indices = upstream.small_variant_vcf_index, + discover_tars = upstream.discover_tar, + aligned_bams = upstream.out_bam, + aligned_bam_indices = upstream.out_bam_index, ref_map_file = ref_map_file, glnexus_mem_gb = glnexus_mem_gb, - pbsv_call_mem_gb = pbsv_call_mem_gb, default_runtime_attributes = default_runtime_attributes } } @@ -193,6 +190,7 @@ workflow humanwgs_family { 'sv_DEL_count': downstream.stat_sv_DEL_count, 'sv_INS_count': downstream.stat_sv_INS_count, 'sv_INV_count': downstream.stat_sv_INV_count, + 'sv_INVBND_count': downstream.stat_sv_INVBND_count, 'sv_BND_count': downstream.stat_sv_BND_count, 'cnv_DUP_count': upstream.stat_cnv_DUP_count, 'cnv_DEL_count': upstream.stat_cnv_DEL_count, @@ -322,11 +320,12 @@ workflow humanwgs_family { Array[File] phased_sv_vcf_index = downstream.phased_sv_vcf_index # sv stats - Array[String] stat_sv_DUP_count = downstream.stat_sv_DUP_count - Array[String] stat_sv_DEL_count = downstream.stat_sv_DEL_count - Array[String] stat_sv_INS_count = downstream.stat_sv_INS_count - Array[String] stat_sv_INV_count = downstream.stat_sv_INV_count - Array[String] stat_sv_BND_count = downstream.stat_sv_BND_count + Array[String] stat_sv_DUP_count = downstream.stat_sv_DUP_count + Array[String] stat_sv_DEL_count = downstream.stat_sv_DEL_count + Array[String] stat_sv_INS_count = downstream.stat_sv_INS_count + Array[String] stat_sv_INV_count = downstream.stat_sv_INV_count + Array[String] stat_sv_INVBND_count = downstream.stat_sv_INVBND_count + Array[String] stat_sv_BND_count = downstream.stat_sv_BND_count # small variant outputs Array[File] phased_small_variant_vcf = downstream.phased_small_variant_vcf @@ -400,6 +399,6 @@ workflow humanwgs_family { # workflow metadata String workflow_name = "humanwgs_family" - String workflow_version = "v2.0.7" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.0-alpha1" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } \ No newline at end of file diff --git a/workflows/joint/inputs.json b/workflows/joint/inputs.json index 9a0ab408..90e3de13 100644 --- a/workflows/joint/inputs.json +++ b/workflows/joint/inputs.json @@ -3,10 +3,11 @@ "joint.sample_ids": "Array[String]", "joint.gvcfs": "Array[File]", "joint.gvcf_indices": "Array[File]", - "joint.svsigs": "Array[File]", + "joint.discover_tars": "Array[File]", + "joint.aligned_bams": "Array[File]", + "joint.aligned_bam_indices": "Array[File]", "joint.ref_map_file": "File", "joint.glnexus_mem_gb": "Int? (optional)", - "joint.pbsv_call_mem_gb": "Int? (optional)", "joint.default_runtime_attributes": { "max_retries": "Int", "container_registry": "String", diff --git a/workflows/joint/joint.wdl b/workflows/joint/joint.wdl index 6f1f32b3..dac45709 100644 --- a/workflows/joint/joint.wdl +++ b/workflows/joint/joint.wdl @@ -2,9 +2,8 @@ version 1.0 import "../wdl-common/wdl/structs.wdl" import "../wdl-common/wdl/tasks/glnexus.wdl" as Glnexus -import "../wdl-common/wdl/tasks/pbsv.wdl" as Pbsv +import "../wdl-common/wdl/tasks/sawfish.wdl" as Sawfish import "../wdl-common/wdl/tasks/bcftools.wdl" as Bcftools -import "../wdl-common/wdl/workflows/get_pbsv_splits/get_pbsv_splits.wdl" as Pbsv_splits workflow joint { meta { @@ -24,8 +23,14 @@ workflow joint { gvcf_indices: { name: "GVCF Indices" } - svsigs: { - name: "SV Signatures" + discover_tars: { + name: "Sawfish discover output tarballs" + } + aligned_bams: { + name: "Aligned BAMs" + } + aligned_bam_indices: { + name: "Aligned BAM Indices" } ref_map_file: { name: "Reference Map File" @@ -33,9 +38,6 @@ workflow joint { glnexus_mem_gb: { name: "GLnexus Memory (GB)" } - pbsv_call_mem_gb: { - name: "PBSV Call Memory (GB)" - } default_runtime_attributes: { name: "Default Runtime Attribute Struct" } @@ -60,63 +62,42 @@ workflow joint { Array[File] gvcfs Array[File] gvcf_indices - Array[File] svsigs + Array[File] discover_tars + Array[File] aligned_bams + Array[File] aligned_bam_indices File ref_map_file Int? glnexus_mem_gb - Int? pbsv_call_mem_gb RuntimeAttributes default_runtime_attributes } Map[String, String] ref_map = read_map(ref_map_file) - call Pbsv_splits.get_pbsv_splits { - input: - pbsv_splits_file = ref_map["pbsv_splits"], # !FileCoercion - default_runtime_attributes = default_runtime_attributes - } - - scatter (shard_index in range(length(get_pbsv_splits.pbsv_splits))) { - Array[String] region_set = get_pbsv_splits.pbsv_splits[shard_index] - - call Pbsv.pbsv_call { - input: - sample_id = family_id + ".joint", - svsigs = svsigs, - sample_count = length(sample_ids), - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - shard_index = shard_index, - regions = region_set, - mem_gb = pbsv_call_mem_gb, - runtime_attributes = default_runtime_attributes - } - } - - # concatenate pbsv vcfs - call Bcftools.concat_pbsv_vcf { + call Sawfish.sawfish_call { input: - vcfs = pbsv_call.vcf, - vcf_indices = pbsv_call.vcf_index, - out_prefix = "~{family_id}.joint.~{ref_map['name']}.structural_variants", - runtime_attributes = default_runtime_attributes + discover_tars = discover_tars, + aligned_bams = aligned_bams, + aligned_bam_indices = aligned_bam_indices, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + out_prefix = "~{family_id}.joint.~{ref_map['name']}.structural_variants", + runtime_attributes = default_runtime_attributes } - String sv_vcf_basename = basename(concat_pbsv_vcf.concatenated_vcf, ".vcf.gz") + String sv_vcf_basename = basename(sawfish_call.vcf, ".vcf.gz") scatter (sample_id in sample_ids) { String split_sv_vcf_name = "~{sample_id}.~{sv_vcf_basename}.vcf.gz" String split_sv_vcf_index_name = "~{sample_id}.~{sv_vcf_basename}.vcf.gz.tbi" } - call Bcftools.split_vcf_by_sample as split_pbsv { + call Bcftools.split_vcf_by_sample as split_sawfish { input: sample_ids = sample_ids, - vcf = concat_pbsv_vcf.concatenated_vcf, - vcf_index = concat_pbsv_vcf.concatenated_vcf_index, + vcf = sawfish_call.vcf, + vcf_index = sawfish_call.vcf_index, split_vcf_names = split_sv_vcf_name, split_vcf_index_names = split_sv_vcf_index_name, runtime_attributes = default_runtime_attributes @@ -150,8 +131,8 @@ workflow joint { } output { - Array[File] split_joint_structural_variant_vcfs = split_pbsv.split_vcfs - Array[File] split_joint_structural_variant_vcf_indices = split_pbsv.split_vcf_indices + Array[File] split_joint_structural_variant_vcfs = split_sawfish.split_vcfs + Array[File] split_joint_structural_variant_vcf_indices = split_sawfish.split_vcf_indices Array[File] split_joint_small_variant_vcfs = split_glnexus.split_vcfs Array[File] split_joint_small_variant_vcf_indices = split_glnexus.split_vcf_indices } diff --git a/workflows/singleton.wdl b/workflows/singleton.wdl index ef7008df..67de00c8 100644 --- a/workflows/singleton.wdl +++ b/workflows/singleton.wdl @@ -167,6 +167,7 @@ workflow humanwgs_singleton { 'sv_DEL_count': [downstream.stat_sv_DEL_count], 'sv_INS_count': [downstream.stat_sv_INS_count], 'sv_INV_count': [downstream.stat_sv_INV_count], + 'sv_INVBND_count': [downstream.stat_sv_INVBND_count], 'sv_BND_count': [downstream.stat_sv_BND_count], 'cnv_DUP_count': [upstream.stat_cnv_DUP_count], 'cnv_DEL_count': [upstream.stat_cnv_DEL_count], @@ -262,11 +263,12 @@ workflow humanwgs_singleton { File phased_sv_vcf_index = downstream.phased_sv_vcf_index # sv stats - String stat_sv_DUP_count = downstream.stat_sv_DUP_count - String stat_sv_DEL_count = downstream.stat_sv_DEL_count - String stat_sv_INS_count = downstream.stat_sv_INS_count - String stat_sv_INV_count = downstream.stat_sv_INV_count - String stat_sv_BND_count = downstream.stat_sv_BND_count + String stat_sv_DUP_count = downstream.stat_sv_DUP_count + String stat_sv_DEL_count = downstream.stat_sv_DEL_count + String stat_sv_INS_count = downstream.stat_sv_INS_count + String stat_sv_INV_count = downstream.stat_sv_INV_count + String stat_sv_INVBND_count = downstream.stat_sv_INVBND_count + String stat_sv_BND_count = downstream.stat_sv_BND_count # small variant outputs File phased_small_variant_vcf = downstream.phased_small_variant_vcf @@ -332,6 +334,6 @@ workflow humanwgs_singleton { # workflow metadata String workflow_name = "humanwgs_family" - String workflow_version = "v2.0.7" + if defined(debug_version) then "~{"-" + debug_version}" else "" + String workflow_version = "v3.0.0-alpha1" + if defined(debug_version) then "~{"-" + debug_version}" else "" } } diff --git a/workflows/upstream/upstream.wdl b/workflows/upstream/upstream.wdl index 05639dd8..5f987b44 100644 --- a/workflows/upstream/upstream.wdl +++ b/workflows/upstream/upstream.wdl @@ -3,15 +3,13 @@ version 1.0 import "../wdl-common/wdl/structs.wdl" import "../wdl-common/wdl/tasks/pbmm2.wdl" as Pbmm2 import "../wdl-common/wdl/tasks/merge_bam_stats.wdl" as MergeBamStats -import "../wdl-common/wdl/tasks/pbsv.wdl" as Pbsv -import "../wdl-common/wdl/tasks/bcftools.wdl" as Bcftools +import "../wdl-common/wdl/tasks/sawfish.wdl" as Sawfish import "../wdl-common/wdl/workflows/deepvariant/deepvariant.wdl" as DeepVariant import "../wdl-common/wdl/tasks/samtools.wdl" as Samtools import "../wdl-common/wdl/tasks/mosdepth.wdl" as Mosdepth import "../wdl-common/wdl/tasks/trgt.wdl" as Trgt import "../wdl-common/wdl/tasks/paraphase.wdl" as Paraphase import "../wdl-common/wdl/tasks/hificnv.wdl" as Hificnv -import "../wdl-common/wdl/workflows/get_pbsv_splits/get_pbsv_splits.wdl" as Pbsv_splits workflow upstream { meta { @@ -78,13 +76,6 @@ workflow upstream { ref_name = ref_map["name"], runtime_attributes = default_runtime_attributes } - call Pbsv.pbsv_discover { - input: - aligned_bam = pbmm2_align.aligned_bam, - aligned_bam_index = pbmm2_align.aligned_bam_index, - trf_bed = ref_map["pbsv_tandem_repeat_bed"], # !FileCoercion - runtime_attributes = default_runtime_attributes - } } call MergeBamStats.merge_bam_stats { @@ -132,6 +123,19 @@ workflow upstream { default_runtime_attributes = default_runtime_attributes } + call Sawfish.sawfish_discover { + input: + sex = select_first([sex, mosdepth.inferred_sex]), + aligned_bam = aligned_bam_data, + aligned_bam_index = aligned_bam_index, + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + out_prefix = "~{sample_id}.~{ref_map['name']}", + expected_male_bed = ref_map["hificnv_expected_bed_male"], # !FileCoercion + expected_female_bed = ref_map["hificnv_expected_bed_female"], # !FileCoercion + runtime_attributes = default_runtime_attributes + } + call Trgt.trgt { input: sample_id = sample_id, @@ -183,35 +187,15 @@ workflow upstream { } if (single_sample) { - call Pbsv_splits.get_pbsv_splits { - input: - pbsv_splits_file = ref_map["pbsv_splits"], # !FileCoercion - default_runtime_attributes = default_runtime_attributes - } - - scatter (shard_index in range(length(get_pbsv_splits.pbsv_splits))) { - Array[String] region_set = get_pbsv_splits.pbsv_splits[shard_index] - - call Pbsv.pbsv_call { - input: - sample_id = sample_id, - svsigs = pbsv_discover.svsig, - ref_fasta = ref_map["fasta"], # !FileCoercion - ref_index = ref_map["fasta_index"], # !FileCoercion - ref_name = ref_map["name"], - shard_index = shard_index, - regions = region_set, - runtime_attributes = default_runtime_attributes - } - } - - # concatenate pbsv vcfs - call Bcftools.concat_pbsv_vcf { - input: - vcfs = pbsv_call.vcf, - vcf_indices = pbsv_call.vcf_index, - out_prefix = "~{sample_id}.~{ref_map['name']}.structural_variants", - runtime_attributes = default_runtime_attributes + call Sawfish.sawfish_call { + input: + discover_tars = [sawfish_discover.discover_tar], + aligned_bams = [aligned_bam_data], + aligned_bam_indices = [aligned_bam_index], + ref_fasta = ref_map["fasta"], # !FileCoercion + ref_index = ref_map["fasta_index"], # !FileCoercion + out_prefix = "~{sample_id}.~{ref_map['name']}.structural_variants", + runtime_attributes = default_runtime_attributes } } @@ -238,13 +222,12 @@ workflow upstream { String inferred_sex = mosdepth.inferred_sex String stat_mean_depth = mosdepth.stat_mean_depth - # per movie sv signatures - # if we've already called variants, no need to keep these - Array[File] svsigs = if single_sample then [] else pbsv_discover.svsig + # per sample sv signatures + File discover_tar = sawfish_discover.discover_tar - # pbsv outputs for single sample - File? sv_vcf = concat_pbsv_vcf.concatenated_vcf - File? sv_vcf_index = concat_pbsv_vcf.concatenated_vcf_index + # sawfish outputs for single sample + File? sv_vcf = sawfish_call.vcf + File? sv_vcf_index = sawfish_call.vcf_index # small variant outputs File small_variant_vcf = deepvariant.vcf diff --git a/workflows/wdl-common b/workflows/wdl-common index 4ca54791..b7248f38 160000 --- a/workflows/wdl-common +++ b/workflows/wdl-common @@ -1 +1 @@ -Subproject commit 4ca54791d3f8fabd88872df3cd8ec81ed8a516c6 +Subproject commit b7248f385099378d15add78a3560f389bd0e9e91