Skip to content

Commit

Permalink
317 remove excess header values in VCF extract (#7786)
Browse files Browse the repository at this point in the history
* remove unwanted header

* missed the extra NAY

* update dockstore

* use new headers jar

* remove header lines from test too
  • Loading branch information
RoriCremer authored Apr 15, 2022
1 parent 7a15427 commit 1387d47
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 10 deletions.
1 change: 1 addition & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ workflows:
branches:
- master
- ah_var_store
- rc-vs-317-remove-excess-headers
- name: GvsImportGenomes
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsImportGenomes.wdl
Expand Down
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/GvsExtractCallset.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ workflow GvsExtractCallset {

File interval_list = "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.noCentromeres.noTelomeres.interval_list"
File interval_weights_bed = "gs://broad-public-datasets/gvs/weights/gvs_vet_weights_1kb.bed"
File gatk_override = "gs://broad-dsp-spec-ops/scratch/bigquery-jointcalling/jars/rc-add-AD-04112022/gatk-package-4.2.0.0-498-g1f53709-SNAPSHOT-local.jar"
File gatk_override = "gs://broad-dsp-spec-ops/scratch/bigquery-jointcalling/jars/rc-update-headers-04142022/gatk-package-4.2.0.0-497-gda8a97a-SNAPSHOT-local.jar"

String output_file_base_name = filter_set_name

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,17 +211,14 @@ protected static VCFHeader generateVcfHeader(Set<String> sampleNames,

// Filter fields
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.NAY_FROM_YNG));
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.EXCESS_HET_KEY));
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.EXCESS_ALLELES));
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.NO_HQ_GENOTYPES));

// Info fields
VCFStandardHeaderLines.addStandardInfoLines( headerLines, true,
VCFConstants.ALLELE_COUNT_KEY,
VCFConstants.ALLELE_FREQUENCY_KEY,
VCFConstants.ALLELE_NUMBER_KEY,
VCFConstants.END_KEY
VCFConstants.ALLELE_NUMBER_KEY
);
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.RAW_QUAL_APPROX_KEY));
Expand All @@ -231,7 +228,6 @@ protected static VCFHeader generateVcfHeader(Set<String> sampleNames,
VCFConstants.GENOTYPE_QUALITY_KEY
);
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.EXCESS_HET_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_VQS_LOD_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_YNG_STATUS_KEY));

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
##fileformat=VCFv4.2
##FILTER=<ID=EXCESS_ALLELES,Description="Site has an excess of alternate alleles based on the input threshold (default is 6)">
##FILTER=<ID=ExcessHet,Description="Site has excess het value larger than the threshold">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=NAY,Description="Considered a NAY in the Yay, Nay, Grey table">
##FILTER=<ID=NO_HQ_GENOTYPES,Description="Site has no high quality variant genotypes">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
Expand All @@ -13,8 +11,6 @@
##INFO=<ID=AS_QUALapprox,Number=1,Type=String,Description="Allele-specific QUAL approximations">
##INFO=<ID=AS_VQSLOD,Number=A,Type=String,Description="For each alt allele, the log odds of being a true variant versus being false under the trained gaussian mixture model">
##INFO=<ID=AS_YNG,Number=A,Type=String,Description="For each alt allele, the yay/nay/grey status (yay are known good alleles, nay are known false positives, grey are unknown)">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity">
##INFO=<ID=QUALapprox,Number=1,Type=Integer,Description="Sum of PL[0] values; used to approximate the QUAL score">
##contig=<ID=chr1,length=248956422,assembly=38>
##contig=<ID=chr2,length=242193529,assembly=38>
Expand Down

0 comments on commit 1387d47

Please sign in to comment.