diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducer.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducer.java index 63be9f3a8c6..de29280e5eb 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducer.java @@ -119,9 +119,9 @@ static VariantContext produceAnnotatedVcFromInferredTypeAndRefLocations(final Si public static VariantContext produceAnnotatedVcFromEvidenceTargetLink(final EvidenceTargetLink e, final SvType svType, - final SAMSequenceDictionary sequenceDictionary, + final ReadMetadata metadata, final ReferenceMultiSource reference) { - final String sequenceName = sequenceDictionary.getSequence(e.getPairedStrandedIntervals().getLeft().getInterval().getContig()).getSequenceName(); + final String sequenceName = metadata.getContigName(e.getPairedStrandedIntervals().getLeft().getInterval().getContig()); final int start = e.getPairedStrandedIntervals().getLeft().getInterval().midpoint(); final int end = e.getPairedStrandedIntervals().getRight().getInterval().midpoint(); try { @@ -212,7 +212,7 @@ static VariantContext annotateWithImpreciseEvidenceLinks(final VariantContext va if (variant.getStructuralVariantType() == StructuralVariantType.DEL) { SVContext svc = SVContext.of(variant); final int padding = (metadata == null) ? defaultUncertainty : (metadata.getMaxMedianFragmentSize() / 2); - PairedStrandedIntervals svcIntervals = svc.getPairedStrandedIntervals(referenceSequenceDictionary, padding); + PairedStrandedIntervals svcIntervals = svc.getPairedStrandedIntervals(metadata, referenceSequenceDictionary, padding); final Iterator> overlappers = evidenceTargetLinks.overlappers(svcIntervals); int readPairs = 0; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java index 5ac2a99fb43..5305459cc0d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSpark.java @@ -191,6 +191,9 @@ public static void discoverVariantsAndWriteVCF(final JavaRDD alig final ReadMetadata metadata, final SAMSequenceDictionary sequenceDictionary) { + Utils.validate(! (evidenceTargetLinks != null && metadata == null), + "Must supply read metadata when incorporating evidence target links"); + JavaRDD annotatedVariants = alignedContigs.filter(alignedContig -> alignedContig.alignmentIntervals.size()>1) // filter out any contigs that has less than two alignment records .mapToPair(alignedContig -> new Tuple2<>(alignedContig.contigSequence, // filter a contig's alignment and massage into ordered collection of chimeric alignments @@ -216,8 +219,7 @@ public static void discoverVariantsAndWriteVCF(final JavaRDD alig collectedAnnotatedVariants, broadcastReference.getValue()); } - SVVCFWriter.writeVCF(collectedAnnotatedVariants, vcfOutputFileName, sequenceDictionary, localLogger - ); + SVVCFWriter.writeVCF(collectedAnnotatedVariants, vcfOutputFileName, sequenceDictionary, localLogger); } /** @@ -248,7 +250,7 @@ static List processEvidenceTargetLinks(final StructuralVariation .map(p -> p._2) .filter(EvidenceTargetLink::isImpreciseDeletion) .filter(e -> e.getReadPairs() + e.getSplitReads() > parameters.impreciseEvidenceVariantCallingThreshold) - .map(e -> createImpreciseDeletionVariant(e, reference.getReferenceSequenceDictionary(null), reference)) + .map(e -> createImpreciseDeletionVariant(e, metadata, reference)) .collect(Collectors.toList()); localLogger.info("Called " + impreciseVariants.size() + " imprecise deletion variants"); @@ -257,11 +259,11 @@ static List processEvidenceTargetLinks(final StructuralVariation } private static VariantContext createImpreciseDeletionVariant(final EvidenceTargetLink e, - final SAMSequenceDictionary sequenceDictionary, + final ReadMetadata metadata, final ReferenceMultiSource reference) { - final SvType svType = new SimpleSVType.ImpreciseDeletion(e, sequenceDictionary); + final SvType svType = new SimpleSVType.ImpreciseDeletion(e, metadata); return AnnotatedVariantProducer - .produceAnnotatedVcFromEvidenceTargetLink(e, svType, sequenceDictionary, reference); + .produceAnnotatedVcFromEvidenceTargetLink(e, svType, metadata, reference); } // TODO: 7/6/17 interface to be changed in the new implementation, where one contig produces a set of NARL's. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVType.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVType.java index b2aaf54e28b..acef0935d4e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVType.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVType.java @@ -3,6 +3,7 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.variant.variantcontext.Allele; import org.broadinstitute.hellbender.tools.spark.sv.evidence.EvidenceTargetLink; +import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; @@ -128,21 +129,22 @@ public String toString() { } @SuppressWarnings("unchecked") - ImpreciseDeletion(final EvidenceTargetLink evidenceTargetLink, final SAMSequenceDictionary sequenceDictionary) { + ImpreciseDeletion(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata) { - super(getIDString(evidenceTargetLink, sequenceDictionary), + super(getIDString(evidenceTargetLink, metadata), Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_ALLELE_DEL)), (evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().midpoint() - evidenceTargetLink.getPairedStrandedIntervals().getRight().getInterval().midpoint()), Collections.EMPTY_MAP); } - private static String getIDString(final EvidenceTargetLink evidenceTargetLink, final SAMSequenceDictionary sequenceDictionary) { + private static String getIDString(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata) { return TYPES.DEL.name() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR + GATKSVVCFConstants.IMPRECISE + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR - + sequenceDictionary.getSequence(evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().getContig()).getSequenceName() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR + + metadata.getContigName(evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().getContig()) + + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR + evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().getStart() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR + evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().getEnd() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR + evidenceTargetLink.getPairedStrandedIntervals().getRight().getInterval().getStart() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVContext.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVContext.java index d9921d519fa..88e974ee6d8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVContext.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVContext.java @@ -306,18 +306,18 @@ private static SimpleInterval composePaddedInterval(final String contig, final i } - public PairedStrandedIntervals getPairedStrandedIntervals(final SAMSequenceDictionary samSequenceDictionary, final int padding) { + public PairedStrandedIntervals getPairedStrandedIntervals(final ReadMetadata metadata, final SAMSequenceDictionary referenceSequenceDictionary, final int padding) { final StructuralVariantType type = getStructuralVariantType(); if (type == StructuralVariantType.DEL) { - final List breakPointIntervals = getBreakPointIntervals(padding, samSequenceDictionary, true); + final List breakPointIntervals = getBreakPointIntervals(padding, referenceSequenceDictionary, true); final SimpleInterval leftBreakpointSimpleInterval = breakPointIntervals.get(0); final SVInterval leftBreakpointInterval = new SVInterval( - samSequenceDictionary.getSequenceIndex(leftBreakpointSimpleInterval.getContig()), + metadata.getContigID(leftBreakpointSimpleInterval.getContig()), leftBreakpointSimpleInterval.getStart(), leftBreakpointSimpleInterval.getEnd() + 1); final SimpleInterval rightBreakpointSimpleInterval = breakPointIntervals.get(1); final SVInterval rightBreakpointInterval = new SVInterval( - samSequenceDictionary.getSequenceIndex(rightBreakpointSimpleInterval.getContig()), + metadata.getContigID(rightBreakpointSimpleInterval.getContig()), rightBreakpointSimpleInterval.getStart(), rightBreakpointSimpleInterval.getEnd() + 1); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSparkUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSparkUnitTest.java index d346b9262ce..9524c7d9ae0 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSparkUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSparkUnitTest.java @@ -126,7 +126,7 @@ public void testProcessEvidenceTargetLinks(final List etls, ReadMetadata metadata = Mockito.mock(ReadMetadata.class); when(metadata.getMaxMedianFragmentSize()).thenReturn(300); - + when(metadata.getContigName(0)).thenReturn("20"); PairedStrandedIntervalTree evidenceTree = new PairedStrandedIntervalTree<>(); etls.forEach(e -> evidenceTree.put(e.getPairedStrandedIntervals(), e));