Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix bug causing error finding ref base for imprecise deletions #3671

Merged
merged 1 commit into from
Oct 6, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,9 @@ static VariantContext produceAnnotatedVcFromInferredTypeAndRefLocations(final Si

public static VariantContext produceAnnotatedVcFromEvidenceTargetLink(final EvidenceTargetLink e,
final SvType svType,
final SAMSequenceDictionary sequenceDictionary,
final ReadMetadata metadata,
final ReferenceMultiSource reference) {
final String sequenceName = sequenceDictionary.getSequence(e.getPairedStrandedIntervals().getLeft().getInterval().getContig()).getSequenceName();
final String sequenceName = metadata.getContigName(e.getPairedStrandedIntervals().getLeft().getInterval().getContig());
final int start = e.getPairedStrandedIntervals().getLeft().getInterval().midpoint();
final int end = e.getPairedStrandedIntervals().getRight().getInterval().midpoint();
try {
Expand Down Expand Up @@ -212,7 +212,7 @@ static VariantContext annotateWithImpreciseEvidenceLinks(final VariantContext va
if (variant.getStructuralVariantType() == StructuralVariantType.DEL) {
SVContext svc = SVContext.of(variant);
final int padding = (metadata == null) ? defaultUncertainty : (metadata.getMaxMedianFragmentSize() / 2);
PairedStrandedIntervals svcIntervals = svc.getPairedStrandedIntervals(referenceSequenceDictionary, padding);
PairedStrandedIntervals svcIntervals = svc.getPairedStrandedIntervals(metadata, referenceSequenceDictionary, padding);

final Iterator<Tuple2<PairedStrandedIntervals, EvidenceTargetLink>> overlappers = evidenceTargetLinks.overlappers(svcIntervals);
int readPairs = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ public static void discoverVariantsAndWriteVCF(final JavaRDD<AlignedContig> alig
final ReadMetadata metadata,
final SAMSequenceDictionary sequenceDictionary) {

Utils.validate(! (evidenceTargetLinks != null && metadata == null),
"Must supply read metadata when incorporating evidence target links");

JavaRDD<VariantContext> annotatedVariants =
alignedContigs.filter(alignedContig -> alignedContig.alignmentIntervals.size()>1) // filter out any contigs that has less than two alignment records
.mapToPair(alignedContig -> new Tuple2<>(alignedContig.contigSequence, // filter a contig's alignment and massage into ordered collection of chimeric alignments
Expand All @@ -216,8 +219,7 @@ public static void discoverVariantsAndWriteVCF(final JavaRDD<AlignedContig> alig
collectedAnnotatedVariants, broadcastReference.getValue());
}

SVVCFWriter.writeVCF(collectedAnnotatedVariants, vcfOutputFileName, sequenceDictionary, localLogger
);
SVVCFWriter.writeVCF(collectedAnnotatedVariants, vcfOutputFileName, sequenceDictionary, localLogger);
}

/**
Expand Down Expand Up @@ -248,7 +250,7 @@ static List<VariantContext> processEvidenceTargetLinks(final StructuralVariation
.map(p -> p._2)
.filter(EvidenceTargetLink::isImpreciseDeletion)
.filter(e -> e.getReadPairs() + e.getSplitReads() > parameters.impreciseEvidenceVariantCallingThreshold)
.map(e -> createImpreciseDeletionVariant(e, reference.getReferenceSequenceDictionary(null), reference))
.map(e -> createImpreciseDeletionVariant(e, metadata, reference))
.collect(Collectors.toList());

localLogger.info("Called " + impreciseVariants.size() + " imprecise deletion variants");
Expand All @@ -257,11 +259,11 @@ static List<VariantContext> processEvidenceTargetLinks(final StructuralVariation
}

private static VariantContext createImpreciseDeletionVariant(final EvidenceTargetLink e,
final SAMSequenceDictionary sequenceDictionary,
final ReadMetadata metadata,
final ReferenceMultiSource reference) {
final SvType svType = new SimpleSVType.ImpreciseDeletion(e, sequenceDictionary);
final SvType svType = new SimpleSVType.ImpreciseDeletion(e, metadata);
return AnnotatedVariantProducer
.produceAnnotatedVcFromEvidenceTargetLink(e, svType, sequenceDictionary, reference);
.produceAnnotatedVcFromEvidenceTargetLink(e, svType, metadata, reference);
}

// TODO: 7/6/17 interface to be changed in the new implementation, where one contig produces a set of NARL's.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.hellbender.tools.spark.sv.evidence.EvidenceTargetLink;
import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata;
import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval;

Expand Down Expand Up @@ -128,21 +129,22 @@ public String toString() {
}

@SuppressWarnings("unchecked")
ImpreciseDeletion(final EvidenceTargetLink evidenceTargetLink, final SAMSequenceDictionary sequenceDictionary) {
ImpreciseDeletion(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata) {

super(getIDString(evidenceTargetLink, sequenceDictionary),
super(getIDString(evidenceTargetLink, metadata),
Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_ALLELE_DEL)),
(evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().midpoint() -
evidenceTargetLink.getPairedStrandedIntervals().getRight().getInterval().midpoint()),
Collections.EMPTY_MAP);
}

private static String getIDString(final EvidenceTargetLink evidenceTargetLink, final SAMSequenceDictionary sequenceDictionary) {
private static String getIDString(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata) {

return TYPES.DEL.name()
+ GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
+ GATKSVVCFConstants.IMPRECISE + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
+ sequenceDictionary.getSequence(evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().getContig()).getSequenceName() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
+ metadata.getContigName(evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().getContig())
+ GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
+ evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().getStart() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
+ evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().getEnd() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
+ evidenceTargetLink.getPairedStrandedIntervals().getRight().getInterval().getStart() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,18 +306,18 @@ private static SimpleInterval composePaddedInterval(final String contig, final i

}

public PairedStrandedIntervals getPairedStrandedIntervals(final SAMSequenceDictionary samSequenceDictionary, final int padding) {
public PairedStrandedIntervals getPairedStrandedIntervals(final ReadMetadata metadata, final SAMSequenceDictionary referenceSequenceDictionary, final int padding) {
final StructuralVariantType type = getStructuralVariantType();
if (type == StructuralVariantType.DEL) {
final List<SimpleInterval> breakPointIntervals = getBreakPointIntervals(padding, samSequenceDictionary, true);
final List<SimpleInterval> breakPointIntervals = getBreakPointIntervals(padding, referenceSequenceDictionary, true);
final SimpleInterval leftBreakpointSimpleInterval = breakPointIntervals.get(0);
final SVInterval leftBreakpointInterval = new SVInterval(
samSequenceDictionary.getSequenceIndex(leftBreakpointSimpleInterval.getContig()),
metadata.getContigID(leftBreakpointSimpleInterval.getContig()),
leftBreakpointSimpleInterval.getStart(),
leftBreakpointSimpleInterval.getEnd() + 1);
final SimpleInterval rightBreakpointSimpleInterval = breakPointIntervals.get(1);
final SVInterval rightBreakpointInterval = new SVInterval(
samSequenceDictionary.getSequenceIndex(rightBreakpointSimpleInterval.getContig()),
metadata.getContigID(rightBreakpointSimpleInterval.getContig()),
rightBreakpointSimpleInterval.getStart(),
rightBreakpointSimpleInterval.getEnd() + 1);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public void testProcessEvidenceTargetLinks(final List<EvidenceTargetLink> etls,

ReadMetadata metadata = Mockito.mock(ReadMetadata.class);
when(metadata.getMaxMedianFragmentSize()).thenReturn(300);

when(metadata.getContigName(0)).thenReturn("20");

PairedStrandedIntervalTree<EvidenceTargetLink> evidenceTree = new PairedStrandedIntervalTree<>();
etls.forEach(e -> evidenceTree.put(e.getPairedStrandedIntervals(), e));
Expand Down