From f62b704a4867e2413db7da0b9430ee000785e4c0 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Thu, 3 Oct 2019 17:14:39 -0400 Subject: [PATCH] Adding an integration test that specifically tests this condition --- .../funcotator/FuncotatorIntegrationTest.java | 45 ++++++++++++++++++- .../dbSnp.regressionTestSet.hg19.vcf.gz.tbi | 3 -- .../dbSnp/hg19/dbSnp.config | 3 ++ .../dbSnp/hg19/dbSnp.repro.vcf | 3 ++ .../dbSnp/hg19/dbSnp.repro.vcf.idx | 3 ++ .../gencode_pik3ca/hg19/gencode.config | 3 ++ .../hg19/gencode.v19.PIK3CA.gtf | 3 ++ .../hg19/gencode.v19.PIK3CA.gtf.idx | 3 ++ .../hg19/gencode.v19.PIK3CA_transcript.dict | 3 ++ .../hg19/gencode.v19.PIK3CA_transcript.fasta | 3 ++ .../gencode.v19.PIK3CA_transcript.fasta.fai | 3 ++ .../tools/funcotator/vcfBugRepro.vcf | 3 ++ 12 files changed, 74 insertions(+), 4 deletions(-) create mode 100644 src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.config create mode 100644 src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.repro.vcf create mode 100644 src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.repro.vcf.idx create mode 100755 src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.config create mode 100644 src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA.gtf create mode 100644 src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA.gtf.idx create mode 100644 src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.dict create mode 100644 src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.fasta create mode 100644 src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.fasta.fai create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/funcotator/vcfBugRepro.vcf diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java index 416a461a4ec..04fc5ebebc3 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java @@ -59,9 +59,11 @@ public class FuncotatorIntegrationTest extends CommandLineProgramTest { private static final String XSV_CLINVAR_MULTIHIT_TEST_VCF = toolsTestDir + "funcotator" + File.separator + "clinvar_hg19_multihit_test.vcf"; private static final String FILTER_TEST_VCF = toolsTestDir + "funcotator" + File.separator + "FILTER_test.vcf"; + private static final String VCF_FIELD_ORDER_SWAP_TEST_VCF =toolsTestDir + "funcotator" + File.separator + "vcfBugRepro.vcf"; private static final String DS_XSV_CLINVAR_TESTS = largeFileTestDir + "funcotator" + File.separator + "small_ds_clinvar_hg19" + File.separator; private static final String DS_FILTER_PARSE_TESTS = largeFileTestDir + "funcotator" + File.separator + "small_ds_FILTER_test" + File.separator; - + public static final String VCF_FIELD_ORDER_TEST_DATA_SOURCES = largeFileTestDir + "funcotator" + File.separator + "vcfFuncotationOrderingBugRepro" + File.separator; + private static final String NOT_M2_TEST_HG19 = toolsTestDir + "funcotator/NotM2_test_custom_maf_fields.vcf"; private static final String M2_TEST_HG19 = toolsTestDir + "funcotator/M2_test_custom_maf_fields.vcf"; private static final String NOT_M2_TEST_HG19_TUMOR_ONLY = toolsTestDir + "funcotator/NotM2_test_custom_maf_fields_tumor_only.vcf"; @@ -301,6 +303,7 @@ private Object[][] provideForNonTrivialLargeDataValidationTest() { FuncotatorTestConstants.NON_TRIVIAL_DATA_VALIDATION_TEST_HG19_DATA_SET_2_EXPECTED_OUTPUT }, { + //This tests https://github.com/broadinstitute/gatk/issues/6173 FuncotatorTestConstants.SINGLE_LINE, b37Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG19, @@ -932,6 +935,46 @@ public void testCanAnnotateHg38ClinvarAndGencodeV28() { .count(), NUM_CLINVAR_HITS); } + //Test for https://github.com/broadinstitute/gatk/issues/6173 + @Test + public void testVCFColumnsArentShuffled() { + final File outputFile = createTempFile("tmpTestFilterParsing", "vcf"); + + final ArgumentsBuilder arguments = createBaselineArgumentsForFuncotator( + VCF_FIELD_ORDER_SWAP_TEST_VCF, + outputFile, + b37Reference, + VCF_FIELD_ORDER_TEST_DATA_SOURCES, + FuncotatorTestConstants.REFERENCE_VERSION_HG19, + FuncotatorArgumentDefinitions.OutputFormatType.VCF, + false); + + arguments.addBooleanArgument(FuncotatorArgumentDefinitions.FORCE_B37_TO_HG19_REFERENCE_CONTIG_CONVERSION, true); + + runCommandLine(arguments); + + final Pair> tempVcf = VariantContextTestUtils.readEntireVCFIntoMemory(outputFile.getAbsolutePath()); + Assert.assertEquals( tempVcf.getRight().size(), 1 ); + + final String[] funcotatorKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(tempVcf.getLeft().getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME).getDescription()); + + final VariantContext variantContext = tempVcf.getRight().get(0); + final Map funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute( + funcotatorKeys, variantContext, "Gencode_19_annotationTranscript", "FAKE_SOURCE"); + + Allele allele = variantContext.getAlternateAllele(0); + final String txId = funcs.get(allele).getTranscriptList().get(0); + Assert.assertEquals( funcs.get(allele).get(txId).size(), 1 ); + + final Funcotation funcotation = funcs.get(allele).get(txId).get(0); + + //Assert that the value of the field F# is F#|F# encoded in Funcotator's percent encoding scheme + for(int i = 1; i <= 9; i++){ + Assert.assertEquals(funcotation.getField("dbSnp_F"+i), "F"+i+"_%7C_"+"F"+i); + } + } + + @Test public void testFilterParsing() { diff --git a/src/test/resources/large/funcotator/funcotator_dataSources/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz.tbi b/src/test/resources/large/funcotator/funcotator_dataSources/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz.tbi index 64c179a311e..e69de29bb2d 100644 --- a/src/test/resources/large/funcotator/funcotator_dataSources/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz.tbi +++ b/src/test/resources/large/funcotator/funcotator_dataSources/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz.tbi @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aa8e18795653074cf54b163cd27f11004b372c3ad7bb5456a7b6048f1bb746aa -size 18769 diff --git a/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.config b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.config new file mode 100644 index 00000000000..048cf7a2ebd --- /dev/null +++ b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54bfd2fbe2afa38aabbe8d0ea04e2da03d99d84786c283371f367791fc1e9b42 +size 1600 diff --git a/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.repro.vcf b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.repro.vcf new file mode 100644 index 00000000000..6fffe16e0b0 --- /dev/null +++ b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.repro.vcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e00b7c376ece6d964f7bbea28c9413ebc3befe1c1ce4a42016b574dcb3cbed9 +size 10308 diff --git a/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.repro.vcf.idx b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.repro.vcf.idx new file mode 100644 index 00000000000..b3ee16bf9bb --- /dev/null +++ b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/dbSnp/hg19/dbSnp.repro.vcf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dcdb552992a915189f5af26766c99153ea88fa7c863d0af3338af7eca768681 +size 1697 diff --git a/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.config b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.config new file mode 100755 index 00000000000..71fe5e48a94 --- /dev/null +++ b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1cc1ae8d60a9879e63cc6876c5defe3ffc3dd833fbfc0f4485f59cdf42f259c +size 1786 diff --git a/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA.gtf b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA.gtf new file mode 100644 index 00000000000..ce87c954a55 --- /dev/null +++ b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA.gtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7c09d309121a8664c3f465ca169fadbfe539a188ab541052c4e6719aa209a15 +size 23214 diff --git a/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA.gtf.idx b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA.gtf.idx new file mode 100644 index 00000000000..10817a1e3a6 --- /dev/null +++ b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA.gtf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2aa8475b77a6c8e2c782a2de47c7ec8070bfd541303e548e96f787a8920c1d +size 312 diff --git a/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.dict b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.dict new file mode 100644 index 00000000000..a22de84a368 --- /dev/null +++ b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.dict @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74883af09a5772034d53f2df884a82e34df415951081e5754bedf1517ee9faa9 +size 968 diff --git a/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.fasta b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.fasta new file mode 100644 index 00000000000..1ca5bf20690 --- /dev/null +++ b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.fasta @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d3f582d1ca1ed259c77e4a82f4bfed8e9253b4b5c3aba9a63eb08f75f5cc68d +size 10353 diff --git a/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.fasta.fai b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.fasta.fai new file mode 100644 index 00000000000..47e29d36fdd --- /dev/null +++ b/src/test/resources/large/funcotator/vcfFuncotationOrderingBugRepro/gencode_pik3ca/hg19/gencode.v19.PIK3CA_transcript.fasta.fai @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4e49f4d57d32943e1663cf9902460ff7a2e2c03dfaf0bd3c5fc7ad9f01c2d47 +size 441 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/vcfBugRepro.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/vcfBugRepro.vcf new file mode 100644 index 00000000000..dc04f3ae5f1 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/vcfBugRepro.vcf @@ -0,0 +1,3 @@ +##fileformat=VCFv4.2 +#CHROM POS ID REF ALT QUAL FILTER INFO +chr3 178865903 r1 CCC C . PASS . \ No newline at end of file