Skip to content

Commit

Permalink
Generate pedigree directly from inputs json structure.
Browse files Browse the repository at this point in the history
Fixes "yaml2ped.py generating ped file with wrong format" #129.
  • Loading branch information
williamrowell committed Jun 10, 2024
1 parent b54b27b commit 797115d
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 43 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ The Docker image used by a particular step of the workflow can be identified by
| pb-cpg-tools | <ul><li>[pb-CpG-tools v2.3.2](https://github.com/PacificBiosciences/pb-CpG-tools/releases/tag/v2.3.2)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/7481837d3b0f539adf4f64209a65cf28eebf3dba/docker/pb-cpg-tools) |
| pbmm2 | <ul><li>[pbmm2 1.10.0](https://github.com/PacificBiosciences/pbmm2/releases/tag/v1.10.0)</li><li>[datamash 1.1.0](https://ftp.gnu.org/gnu/datamash/)</li><li>[pysam 0.16.0.1](https://github.com/pysam-developers/pysam/releases/tag/v0.16.0.1)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/pbmm2) |
| pbsv | <ul><li>[pbsv 2.9.0](https://github.com/PacificBiosciences/pbsv/releases/tag/v2.9.0)</li><li>[htslib 1.14](https://github.com/samtools/htslib/releases/tag/1.14)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/f9e33a757e6d8cb15696ac930a2efd0fd7a885d8/docker/pbsv) |
| pyyaml | <ul><li>[pyyaml 5.3.1](https://github.com/yaml/pyyaml/releases/tag/5.3.1)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/f72e862bca2f209b9909e6043ef0197975762f27/docker/pyyaml) |
| wgs_tertiary image | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/6b13cc246dd44e41903d17a660bb5432cdd18dbe/docker/wgs_tertiary) |
| samtools | <ul><li>[samtools 1.14](https://github.com/samtools/samtools/releases/tag/1.14)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/samtools) |
| slivar | <ul><li>[slivar 0.2.2](https://github.com/brentp/slivar/releases/tag/v0.2.2)</li><li>[bcftools 1.14](https://github.com/samtools/bcftools/releases/tag/1.14)</li><li>[vcfpy 0.13.3](https://github.com/bihealth/vcfpy/releases/tag/v0.13.3)</li><li>[pysam 0.19.1](https://github.com/pysam-developers/pysam/releases/tag/v0.19.1)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/slivar) |
| svpack | <ul><li>[svpack 36180ae6](https://github.com/PacificBiosciences/svpack/tree/a82598ebc4013bf32e70295b83b380ada6302c4a)</li><li>[htslib 1.18](https://github.com/samtools/htslib/releases/tag/1.18)</li><li>[pysam 0.21.0](https://github.com/pysam-developers/pysam/releases/tag/v0.21.0)</li> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/8edbc516abc0ff43ac279b48018003923721b054/docker/svpack) |
Expand Down
18 changes: 5 additions & 13 deletions wdl-ci.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -707,29 +707,21 @@
"name": "",
"description": "",
"tasks": {
"write_yaml_ped_phrank": {
"write_ped_phrank": {
"key": "write_yaml_ped_phrank",
"digest": "e4yxyjj6vw35pxz434pgfalxpa4xh72n",
"tests": [
{
"inputs": {
"cohort_id": "hg005-small-cohort",
"cohort_json": "${resources_file_path}/cohort.json",
"hpo_terms": "${datasets_file_path}/hpo/hpoTerms.txt",
"hpo_dag": "${datasets_file_path}/hpo/hpoDag.txt",
"hpo_annotations": "${datasets_file_path}/hpo/ensembl.hpoPhenotype.tsv",
"ensembl_to_hgnc": "${datasets_file_path}/hpo/ensembl.hgncSymbol.tsv",
"phenotypes": [
"HP:0001250",
"HP:0001263"
],
"runtime_attributes": "${default_runtime_attributes}"
},
"output_tests": {
"cohort_yaml": {
"value": "${resources_file_path}/hg005-small-cohort.yml",
"test_tasks": [
"calculate_md5sum",
"compare_file_basename",
"check_yaml"
]
},
"pedigree": {
"value": "${resources_file_path}/hg005-small-cohort.ped",
"test_tasks": [
Expand Down
53 changes: 24 additions & 29 deletions workflows/tertiary_analysis/tertiary_analysis.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,19 @@ workflow tertiary_analysis {
RuntimeAttributes default_runtime_attributes
}

call write_yaml_ped_phrank {
call write_ped_phrank {
input:
cohort_id = cohort.cohort_id,
cohort_json = write_json(cohort),
hpo_terms = slivar_data.hpo_terms,
hpo_dag = slivar_data.hpo_dag,
hpo_annotations = slivar_data.hpo_annotations,
ensembl_to_hgnc = slivar_data.ensembl_to_hgnc,
phenotypes = cohort.phenotypes,
runtime_attributes = default_runtime_attributes
}
call slivar_small_variant {
input:
vcf = small_variant_vcf.data,
vcf_index = small_variant_vcf.data_index,
pedigree = write_yaml_ped_phrank.pedigree,
pedigree = write_ped_phrank.pedigree,
reference = reference.fasta.data,
reference_index = reference.fasta.data_index,
slivar_js = slivar_data.slivar_js,
Expand All @@ -42,7 +39,7 @@ workflow tertiary_analysis {
gff = select_first([reference.gff]),
lof_lookup = slivar_data.lof_lookup,
clinvar_lookup = slivar_data.clinvar_lookup,
phrank_lookup = write_yaml_ped_phrank.phrank_lookup,
phrank_lookup = write_ped_phrank.phrank_lookup,
runtime_attributes = default_runtime_attributes
}
Expand All @@ -63,10 +60,10 @@ workflow tertiary_analysis {
call slivar_svpack_tsv {
input:
filtered_vcf = svpack_filter_annotated.svpack_vcf,
pedigree = write_yaml_ped_phrank.pedigree,
pedigree = write_ped_phrank.pedigree,
lof_lookup = slivar_data.lof_lookup,
clinvar_lookup = slivar_data.clinvar_lookup,
phrank_lookup = write_yaml_ped_phrank.phrank_lookup,
phrank_lookup = write_ped_phrank.phrank_lookup,
runtime_attributes = default_runtime_attributes
}
Expand All @@ -89,20 +86,17 @@ workflow tertiary_analysis {
}
}

task write_yaml_ped_phrank {
task write_ped_phrank {
input {
String cohort_id
File cohort_json

File hpo_terms
File hpo_dag
File hpo_annotations
File ensembl_to_hgnc
Array[String] phenotypes

RuntimeAttributes runtime_attributes
}
Int disk_size = ceil((size(hpo_terms, "GB") + size(hpo_dag, "GB") + size(hpo_annotations, "GB") + size(ensembl_to_hgnc, "GB")) * 2 + 20)
Int disk_size = 20
command <<<
set -euo pipefail
Expand Down Expand Up @@ -146,7 +140,7 @@ task write_yaml_ped_phrank {
def parse_family(family):
"""For a family struct, return a list of lists of PED fields for each sample."""
family_id = family["family_id"]
family_id = family["cohort_id"]
samples = []
for sample in family["samples"]:
samples.append(parse_sample(family_id, sample))
Expand All @@ -172,31 +166,32 @@ task write_yaml_ped_phrank {
sys.exit(0)
main()
EOF
chmod +x json2ped.py
json2ped.py \
~{cohort_json} \
> ~{cohort_id}.ped
python3 ./json2ped.py ~{cohort_json} > ~{cohort_id}.ped
cat ~{cohort_id}.ped
# ENV HPO_TERMS_TSV "/opt/data/hpo/hpoTerms.txt"
# ENV HPO_DAG_TSV "/opt/data/hpo/hpoDag.txt"
# ENV ENSEMBL_TO_HPO_TSV "/opt/data/hpo/ensembl.hpoPhenotype.tsv"
# ENV ENSEMBL_TO_HGNC "/opt/data/genes/ensembl.hgncSymbol.tsv"
calculate_phrank.py \
~{hpo_terms} \
~{hpo_dag} \
~{hpo_annotations} \
~{ensembl_to_hgnc} \
~{cohort_id}.yml \
~{cohort_id} \
"${HPO_TERMS_TSV}" \
"${HPO_DAG_TSV}" \
"${ENSEMBL_TO_HPO_TSV}" \
"${ENSEMBL_TO_HGNC}" \
~{sep="," phenotypes} \
~{cohort_id}_phrank.tsv
>>>
output {
File cohort_yaml = "~{cohort_id}.yml"
File pedigree = "~{cohort_id}.ped"
File phrank_lookup = "~{cohort_id}_phrank.tsv"
}

runtime {
docker: "~{runtime_attributes.container_registry}/pyyaml@sha256:af6f0689a7412b1edf76bd4bf6434e7fa6a86192eebf19573e8618880d9c1dbb"
docker: "~{runtime_attributes.container_registry}/wgs_tertiary@sha256:46f14de75798b54a38055a364a23ca1c9497bf810fee860431b78abc553434f2"
cpu: 2
memory: "4 GB"
disk: disk_size + " GB"
Expand Down

0 comments on commit 797115d

Please sign in to comment.