From c0fea1de55673c6c4b649d2bb86a1e7d22413be1 Mon Sep 17 00:00:00 2001 From: James Date: Fri, 30 Nov 2018 14:23:08 -0500 Subject: [PATCH 1/8] Reduced some of the repeated steps in ReferenceConfidenceModel.calcNIndelinformativeReads --- .../ReferenceConfidenceModel.java | 14 +++--- .../hellbender/utils/read/AlignmentUtils.java | 43 ++++++++++++------- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java index 797fc8da5ea..316cf64597a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java @@ -6,6 +6,7 @@ import htsjdk.samtools.CigarOperator; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.util.Locatable; +import htsjdk.samtools.util.Tuple; import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.vcf.VCFSimpleHeaderLine; @@ -485,21 +486,20 @@ boolean isReadInformativeAboutIndelsOfSize(final GATKRead read, // We are safe to use the faster no-copy versions of getBases and getBaseQualities here, // since we're not modifying the returned arrays in any way. This makes a small difference // in the HaplotypeCaller profile, since this method is a major hotspot. - final byte[] readBases = AlignmentUtils.getBasesAlignedOneToOne(read); //calls getBasesNoCopy if CIGAR is all match - final byte[] readQuals = AlignmentUtils.getBaseQualsAlignedOneToOne(read); + final Tuple readBasesAndBaseQualities = AlignmentUtils.getBasesAndBaseQualitiesAlginedOneToOne(read); //calls getBasesNoCopy if CIGAR is all match - final int baselineMMSum = sumMismatchingQualities(readBases, readQuals, readStart, refBases, refStart, Integer.MAX_VALUE); + final int baselineMMSum = sumMismatchingQualities(readBasesAndBaseQualities.a, readBasesAndBaseQualities.b, readStart, refBases, refStart, Integer.MAX_VALUE); // consider each indel size up to max in term, checking if an indel that deletes either the ref bases (deletion // or read bases (insertion) would fit as well as the origin baseline sum of mismatching quality scores for ( int indelSize = 1; indelSize <= maxIndelSize; indelSize++ ) { // check insertions: - if (sumMismatchingQualities(readBases, readQuals, readStart + indelSize, refBases, refStart, baselineMMSum) <= baselineMMSum) { + if (sumMismatchingQualities(readBasesAndBaseQualities.a, readBasesAndBaseQualities.b, readStart + indelSize, refBases, refStart, baselineMMSum) <= baselineMMSum) { return false; } // check deletions: - if (sumMismatchingQualities(readBases, readQuals, readStart, refBases, refStart + indelSize, baselineMMSum) <= baselineMMSum) { + if (sumMismatchingQualities(readBasesAndBaseQualities.a, readBasesAndBaseQualities.b, readStart, refBases, refStart + indelSize, baselineMMSum) <= baselineMMSum) { return false; } } @@ -548,8 +548,8 @@ int calcNIndelInformativeReads(final ReadPileup pileup, final int pileupOffsetIn protected int getCigarModifiedOffset (final PileupElement p){ final GATKRead read = p.getRead(); int offset = (p.getCurrentCigarElement().getOperator().consumesReferenceBases() || p.getCurrentCigarElement().getOperator() == CigarOperator.S)? p.getOffsetInCurrentCigar() : 0; - for (int i = 0; i < p.getCurrentCigarOffset(); i++ ) { - CigarElement elem = read.getCigarElement(i); + for (int i = 0; i < p.getCurrentCigarOffset(); i++) { + final CigarElement elem = read.getCigarElement(i); if (elem.getOperator().consumesReferenceBases() || elem.getOperator() == CigarOperator.S) { offset += elem.getLength(); } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java index 4347941cf10..d6589af2dc3 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java @@ -3,6 +3,7 @@ import htsjdk.samtools.Cigar; import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.util.Tuple; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.utils.BaseUtils; @@ -200,25 +201,33 @@ public static byte[] getBasesCoveringRefInterval(final int refStart, final int r return Arrays.copyOfRange(bases, basesStart, basesStop + 1); } - public static byte[] getBasesAlignedOneToOne(final GATKRead read) { - return getSequenceAlignedOneToOne(read, r -> r.getBasesNoCopy(), GAP_CHARACTER); + public static Tuple getBasesAndBaseQualitiesAlginedOneToOne(final GATKRead read) { + return getBasesAndBaseQualitiesAlginedOneToOne(read, GAP_CHARACTER, (byte)0); } - public static byte[] getBaseQualsAlignedOneToOne(final GATKRead read) { - return getSequenceAlignedOneToOne(read, r -> r.getBaseQualitiesNoCopy(), (byte)0); - } - - public static byte[] getSequenceAlignedOneToOne(final GATKRead read, final Function bytesProvider, final byte padWith) { + public static Tuple getBasesAndBaseQualitiesAlginedOneToOne(final GATKRead read, final byte gapCharacter, final byte qualityPadCharacter) { Utils.nonNull(read); - Utils.nonNull(bytesProvider); final Cigar cigar = read.getCigar(); - final byte[] sequence = bytesProvider.apply(read); - - if (!cigar.containsOperator(CigarOperator.DELETION) && !cigar.containsOperator(CigarOperator.INSERTION)) { - return sequence; + final byte[] bases = read.getBasesNoCopy(); + final byte[] baseQualities = read.getBaseQualitiesNoCopy(); + final int numCigarElements = cigar.numCigarElements(); + boolean sawIndel = false; + + // Check if the cigar contains indels + for (int i = 0; i < numCigarElements; i++) { + final CigarOperator e = cigar.getCigarElement(i).getOperator(); + if (e == CigarOperator.INSERTION || e == CigarOperator.DELETION) { + sawIndel = true; + break; + } + } + if (!sawIndel) { + return new Tuple<>(bases, baseQualities); } else { - final byte[] paddedBases = new byte[CigarUtils.countRefBasesIncludingSoftClips(read, 0, cigar.numCigarElements())]; + int numberRefBasesIncludingSoftclips = CigarUtils.countRefBasesIncludingSoftClips(read, 0, numCigarElements); + final byte[] paddedBases = new byte[numberRefBasesIncludingSoftclips]; + final byte[] paddedBaseQualities = new byte[numberRefBasesIncludingSoftclips]; int literalPos = 0; int paddedPos = 0; for ( int i = 0; i < cigar.numCigarElements(); i++ ) { @@ -229,19 +238,21 @@ public static byte[] getSequenceAlignedOneToOne(final GATKRead read, final Funct literalPos += ce.getLength(); //skip inserted bases } else { - System.arraycopy(sequence, literalPos, paddedBases, paddedPos, ce.getLength()); + System.arraycopy(bases, literalPos, paddedBases, paddedPos, ce.getLength()); + System.arraycopy(baseQualities, literalPos, paddedBaseQualities, paddedPos, ce.getLength()); literalPos += ce.getLength(); paddedPos += ce.getLength(); } } else if (co.consumesReferenceBases()) { for ( int j = 0; j < ce.getLength(); j++ ) { //pad deleted bases - paddedBases[paddedPos] = padWith; + paddedBases[paddedPos] = gapCharacter; + paddedBaseQualities[paddedPos] = qualityPadCharacter; paddedPos++; } } } - return paddedBases; + return new Tuple<>(paddedBases, paddedBaseQualities); } } From c79f9e5a53b95f9b3c8d555bb7730d541a5125bb Mon Sep 17 00:00:00 2001 From: James Date: Fri, 30 Nov 2018 16:12:54 -0500 Subject: [PATCH 2/8] spellcheck --- .../walkers/haplotypecaller/ReferenceConfidenceModel.java | 2 +- .../hellbender/utils/read/AlignmentUtils.java | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java index 316cf64597a..860b3f6901a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java @@ -486,7 +486,7 @@ boolean isReadInformativeAboutIndelsOfSize(final GATKRead read, // We are safe to use the faster no-copy versions of getBases and getBaseQualities here, // since we're not modifying the returned arrays in any way. This makes a small difference // in the HaplotypeCaller profile, since this method is a major hotspot. - final Tuple readBasesAndBaseQualities = AlignmentUtils.getBasesAndBaseQualitiesAlginedOneToOne(read); //calls getBasesNoCopy if CIGAR is all match + final Tuple readBasesAndBaseQualities = AlignmentUtils.getBasesAndBaseQualitiesAlignedOneToOne(read); //calls getBasesNoCopy if CIGAR is all match final int baselineMMSum = sumMismatchingQualities(readBasesAndBaseQualities.a, readBasesAndBaseQualities.b, readStart, refBases, refStart, Integer.MAX_VALUE); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java index d6589af2dc3..d95d04785c5 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java @@ -7,7 +7,6 @@ import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.utils.BaseUtils; -import org.broadinstitute.hellbender.utils.Nucleotide; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.pileup.PileupElement; @@ -15,7 +14,6 @@ import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAlignment; import java.util.*; -import java.util.function.Function; public final class AlignmentUtils { @@ -201,11 +199,11 @@ public static byte[] getBasesCoveringRefInterval(final int refStart, final int r return Arrays.copyOfRange(bases, basesStart, basesStop + 1); } - public static Tuple getBasesAndBaseQualitiesAlginedOneToOne(final GATKRead read) { - return getBasesAndBaseQualitiesAlginedOneToOne(read, GAP_CHARACTER, (byte)0); + public static Tuple getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read) { + return getBasesAndBaseQualitiesAlignedOneToOne(read, GAP_CHARACTER, (byte)0); } - public static Tuple getBasesAndBaseQualitiesAlginedOneToOne(final GATKRead read, final byte gapCharacter, final byte qualityPadCharacter) { + public static Tuple getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read, final byte gapCharacter, final byte qualityPadCharacter) { Utils.nonNull(read); final Cigar cigar = read.getCigar(); final byte[] bases = read.getBasesNoCopy(); From 5da3a4a798e870545f8c38b97955e43420f802cd Mon Sep 17 00:00:00 2001 From: James Date: Wed, 5 Dec 2018 13:11:58 -0500 Subject: [PATCH 3/8] Removed the call to getCigar() for further optimization --- .../hellbender/utils/read/AlignmentUtils.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java index d95d04785c5..ae3462671a4 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java @@ -205,15 +205,14 @@ public static Tuple getBasesAndBaseQualitiesAlignedOneToOne(fina public static Tuple getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read, final byte gapCharacter, final byte qualityPadCharacter) { Utils.nonNull(read); - final Cigar cigar = read.getCigar(); final byte[] bases = read.getBasesNoCopy(); final byte[] baseQualities = read.getBaseQualitiesNoCopy(); - final int numCigarElements = cigar.numCigarElements(); + final int numCigarElements = read.numCigarElements(); boolean sawIndel = false; // Check if the cigar contains indels for (int i = 0; i < numCigarElements; i++) { - final CigarOperator e = cigar.getCigarElement(i).getOperator(); + final CigarOperator e = read.getCigarElement(i).getOperator(); if (e == CigarOperator.INSERTION || e == CigarOperator.DELETION) { sawIndel = true; break; @@ -228,8 +227,8 @@ public static Tuple getBasesAndBaseQualitiesAlignedOneToOne(fina final byte[] paddedBaseQualities = new byte[numberRefBasesIncludingSoftclips]; int literalPos = 0; int paddedPos = 0; - for ( int i = 0; i < cigar.numCigarElements(); i++ ) { - final CigarElement ce = cigar.getCigarElement(i); + for ( int i = 0; i < read.numCigarElements(); i++ ) { + final CigarElement ce = read.getCigarElement(i); final CigarOperator co = ce.getOperator(); if (co.consumesReadBases()) { if (!co.consumesReferenceBases()) { From 286c0360bc351ddf5cc5d28b9aa280fff18dc33c Mon Sep 17 00:00:00 2001 From: James Date: Tue, 29 Jan 2019 16:09:46 -0500 Subject: [PATCH 4/8] responded to comments --- .../ReferenceConfidenceModel.java | 11 +++-- .../hellbender/utils/read/AlignmentUtils.java | 21 +++++++-- .../utils/read/AlignmentUtilsUnitTest.java | 47 +++++++++++++++++++ 3 files changed, 70 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java index 860b3f6901a..efe9ee517cd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java @@ -10,6 +10,8 @@ import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.vcf.VCFSimpleHeaderLine; +import javafx.util.Pair; +import org.broadinstitute.hellbender.engine.AlignmentContext; import org.broadinstitute.hellbender.engine.AssemblyRegion; import org.broadinstitute.hellbender.tools.walkers.genotyper.PloidyModel; import org.broadinstitute.hellbender.tools.walkers.variantutils.PosteriorProbabilitiesUtils; @@ -486,20 +488,19 @@ boolean isReadInformativeAboutIndelsOfSize(final GATKRead read, // We are safe to use the faster no-copy versions of getBases and getBaseQualities here, // since we're not modifying the returned arrays in any way. This makes a small difference // in the HaplotypeCaller profile, since this method is a major hotspot. - final Tuple readBasesAndBaseQualities = AlignmentUtils.getBasesAndBaseQualitiesAlignedOneToOne(read); //calls getBasesNoCopy if CIGAR is all match + final Pair readBasesAndBaseQualities = AlignmentUtils.getBasesAndBaseQualitiesAlignedOneToOne(read); //calls getBasesNoCopy if CIGAR is all match - - final int baselineMMSum = sumMismatchingQualities(readBasesAndBaseQualities.a, readBasesAndBaseQualities.b, readStart, refBases, refStart, Integer.MAX_VALUE); + final int baselineMMSum = sumMismatchingQualities(readBasesAndBaseQualities.getKey(), readBasesAndBaseQualities.getValue(), readStart, refBases, refStart, Integer.MAX_VALUE); // consider each indel size up to max in term, checking if an indel that deletes either the ref bases (deletion // or read bases (insertion) would fit as well as the origin baseline sum of mismatching quality scores for ( int indelSize = 1; indelSize <= maxIndelSize; indelSize++ ) { // check insertions: - if (sumMismatchingQualities(readBasesAndBaseQualities.a, readBasesAndBaseQualities.b, readStart + indelSize, refBases, refStart, baselineMMSum) <= baselineMMSum) { + if (sumMismatchingQualities(readBasesAndBaseQualities.getKey(), readBasesAndBaseQualities.getValue(), readStart + indelSize, refBases, refStart, baselineMMSum) <= baselineMMSum) { return false; } // check deletions: - if (sumMismatchingQualities(readBasesAndBaseQualities.a, readBasesAndBaseQualities.b, readStart, refBases, refStart + indelSize, baselineMMSum) <= baselineMMSum) { + if (sumMismatchingQualities(readBasesAndBaseQualities.getKey(), readBasesAndBaseQualities.getValue(), readStart, refBases, refStart + indelSize, baselineMMSum) <= baselineMMSum) { return false; } } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java index ae3462671a4..6cf25b65dd2 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java @@ -4,6 +4,7 @@ import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarOperator; import htsjdk.samtools.util.Tuple; +import javafx.util.Pair; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.utils.BaseUtils; @@ -199,11 +200,23 @@ public static byte[] getBasesCoveringRefInterval(final int refStart, final int r return Arrays.copyOfRange(bases, basesStart, basesStop + 1); } - public static Tuple getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read) { + /** + * Returns the "IGV View" of all the bases and base qualities in a read aligned to the reference according to the cigar, dropping any bases + * that might be in the read but don't aren't in the reference. Any bases that appear in the reference but not the read + * will be filled in with GAP_CHARACTER values for the read bases and 0's for base qualities to indicate that they don't exist. + * + * If the cigar for input read is all matches to the reference then this method will return references to the original + * read base/base quality byte arrays in the underlying SamRecord in order to save on array allocation/copying performance effects. + * + * @param read a read to return aligned to the reference + * @return A tuple of byte arrays where the first array corresponds to the bases aligned to the reference and second + * array corresponds to the baseQualities aligned to the reference. + */ + public static Pair getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read) { return getBasesAndBaseQualitiesAlignedOneToOne(read, GAP_CHARACTER, (byte)0); } - public static Tuple getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read, final byte gapCharacter, final byte qualityPadCharacter) { + private static Pair getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read, final byte gapCharacter, final byte qualityPadCharacter) { Utils.nonNull(read); final byte[] bases = read.getBasesNoCopy(); final byte[] baseQualities = read.getBaseQualitiesNoCopy(); @@ -219,7 +232,7 @@ public static Tuple getBasesAndBaseQualitiesAlignedOneToOne(fina } } if (!sawIndel) { - return new Tuple<>(bases, baseQualities); + return new Pair<>(bases, baseQualities); } else { int numberRefBasesIncludingSoftclips = CigarUtils.countRefBasesIncludingSoftClips(read, 0, numCigarElements); @@ -249,7 +262,7 @@ else if (co.consumesReferenceBases()) { } } } - return new Tuple<>(paddedBases, paddedBaseQualities); + return new Pair<>(paddedBases, paddedBaseQualities); } } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/read/AlignmentUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/read/AlignmentUtilsUnitTest.java index 37074f2b1ad..b84483f0fd4 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/read/AlignmentUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/read/AlignmentUtilsUnitTest.java @@ -1,11 +1,14 @@ package org.broadinstitute.hellbender.utils.read; import htsjdk.samtools.*; +import javafx.util.Pair; import org.apache.commons.lang3.ArrayUtils; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.pileup.PileupElement; +import org.broadinstitute.hellbender.utils.pileup.ReadPileup; import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner; import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanJavaAligner; import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAlignment; @@ -776,6 +779,50 @@ else if ( middleOp == 'I' && i == startOfIndelBases - 1 ) } } + /////////////////////////////////////////////////////////////////// + // Test AlignmentUtils.getBasesAndBaseQualitiesAlignedOneToOne() // + /////////////////////////////////////////////////////////////////// + + @DataProvider + public Object[][] makeGetBasesAndBaseQualitiesAlignedOneToOneTest() { + final String readBases = "ATCGATCG"; + List tests = new ArrayList<>(); + + { // very basic testing + final String cigar1 = "8M"; + final String cigar2 = "4M4D4M"; + final String cigar3 = "2M3I3M"; + final String cigar4 = "2I6M"; + final String cigar5 = "6M2I"; + final String cigar6 = "1D8M1D"; + final String cigar7 = "2M1I1D2M2I1M2D"; + + tests.add(new Object[]{readBases, cigar1, "ATCGATCG", new byte[]{10, 10, 10, 10, 10, 10, 10, 10}}); + tests.add(new Object[]{readBases, cigar2, "ATCG----ATCG", new byte[]{10, 10, 10, 10,0,0,0,0, 10, 10, 10, 10}}); + tests.add(new Object[]{readBases, cigar3, "ATTCG", new byte[]{10, 10, 10, 10, 10}}); + tests.add(new Object[]{readBases, cigar4, "CGATCG", new byte[]{10, 10, 10, 10, 10, 10}}); + tests.add(new Object[]{readBases, cigar5, "ATCGAT", new byte[]{10, 10, 10, 10, 10, 10}}); + tests.add(new Object[]{readBases, cigar6, "-ATCGATCG-", new byte[]{0, 10, 10, 10, 10, 10, 10, 10, 10, 0}}); + tests.add(new Object[]{readBases, cigar7, "AT-GAG--", new byte[]{10, 10, 0, 10, 10, 10, 0, 0}}); + } + + return tests.toArray(new Object[][]{}); + } + + + @Test(dataProvider = "makeGetBasesAndBaseQualitiesAlignedOneToOneTest") + public void testCalcNIndelInformativeReads(final String readBases, final String cigar, final String expectedBases, final byte[] expectedQuals ) { + final byte qual = (byte)10; + final byte[] quals = Utils.dupBytes(qual, readBases.length()); + + final GATKRead read = ArtificialReadUtils.createArtificialRead(readBases.getBytes(), quals, cigar); + + Pair actual = AlignmentUtils.getBasesAndBaseQualitiesAlignedOneToOne(read); + + Assert.assertEquals(new String(actual.getKey()), expectedBases); + Assert.assertEquals(actual.getValue(), expectedQuals); + } + ////////////////////////////////////////// // Test AlignmentUtils.leftAlignIndel() // ////////////////////////////////////////// From 8688051ac7ee9485880745b133245debf4bb85f1 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 5 Feb 2019 10:52:07 -0500 Subject: [PATCH 5/8] switched to a Pair implementaiton that does exist on the J9 compiler that travis uses --- .../walkers/haplotypecaller/ReferenceConfidenceModel.java | 2 +- .../broadinstitute/hellbender/utils/read/AlignmentUtils.java | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java index efe9ee517cd..f7cfff98a84 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java @@ -10,7 +10,7 @@ import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.vcf.VCFSimpleHeaderLine; -import javafx.util.Pair; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.engine.AlignmentContext; import org.broadinstitute.hellbender.engine.AssemblyRegion; import org.broadinstitute.hellbender.tools.walkers.genotyper.PloidyModel; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java index 6cf25b65dd2..c7c00b0ba31 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java @@ -4,7 +4,8 @@ import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarOperator; import htsjdk.samtools.util.Tuple; -import javafx.util.Pair; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.utils.BaseUtils; @@ -232,7 +233,7 @@ private static Pair getBasesAndBaseQualitiesAlignedOneToOne(fina } } if (!sawIndel) { - return new Pair<>(bases, baseQualities); + return new ImmutablePair<>(bases, baseQualities); } else { int numberRefBasesIncludingSoftclips = CigarUtils.countRefBasesIncludingSoftClips(read, 0, numCigarElements); From a8c685efd03cdc42f650b1ed99b686e0d3852b0a Mon Sep 17 00:00:00 2001 From: James Date: Tue, 5 Feb 2019 12:06:01 -0500 Subject: [PATCH 6/8] again --- .../broadinstitute/hellbender/utils/read/AlignmentUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java index c7c00b0ba31..79657aecccb 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java @@ -263,7 +263,7 @@ else if (co.consumesReferenceBases()) { } } } - return new Pair<>(paddedBases, paddedBaseQualities); + return new ImmutablePair<>(paddedBases, paddedBaseQualities); } } From 0de19387b9e01ea6257238b5b4d9ee8a38073a55 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 7 Feb 2019 15:13:00 -0500 Subject: [PATCH 7/8] responding to second round of review comments --- .../haplotypecaller/ReferenceConfidenceModel.java | 8 ++++---- .../hellbender/utils/read/AlignmentUtils.java | 15 +++++++++------ .../utils/read/AlignmentUtilsUnitTest.java | 8 ++++---- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java index f7cfff98a84..6e46499c6bd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java @@ -490,22 +490,22 @@ boolean isReadInformativeAboutIndelsOfSize(final GATKRead read, // in the HaplotypeCaller profile, since this method is a major hotspot. final Pair readBasesAndBaseQualities = AlignmentUtils.getBasesAndBaseQualitiesAlignedOneToOne(read); //calls getBasesNoCopy if CIGAR is all match - final int baselineMMSum = sumMismatchingQualities(readBasesAndBaseQualities.getKey(), readBasesAndBaseQualities.getValue(), readStart, refBases, refStart, Integer.MAX_VALUE); + final int baselineMMSum = sumMismatchingQualities(readBasesAndBaseQualities.getLeft(), readBasesAndBaseQualities.getRight(), readStart, refBases, refStart, Integer.MAX_VALUE); // consider each indel size up to max in term, checking if an indel that deletes either the ref bases (deletion // or read bases (insertion) would fit as well as the origin baseline sum of mismatching quality scores for ( int indelSize = 1; indelSize <= maxIndelSize; indelSize++ ) { // check insertions: - if (sumMismatchingQualities(readBasesAndBaseQualities.getKey(), readBasesAndBaseQualities.getValue(), readStart + indelSize, refBases, refStart, baselineMMSum) <= baselineMMSum) { + if (sumMismatchingQualities(readBasesAndBaseQualities.getLeft(), readBasesAndBaseQualities.getRight(), readStart + indelSize, refBases, refStart, baselineMMSum) <= baselineMMSum) { return false; } // check deletions: - if (sumMismatchingQualities(readBasesAndBaseQualities.getKey(), readBasesAndBaseQualities.getValue(), readStart, refBases, refStart + indelSize, baselineMMSum) <= baselineMMSum) { + if (sumMismatchingQualities(readBasesAndBaseQualities.getLeft(), readBasesAndBaseQualities.getRight(), readStart, refBases, refStart + indelSize, baselineMMSum) <= baselineMMSum) { return false; } } - return true; + return true;x } /** diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java index 79657aecccb..ef6068c05ea 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/AlignmentUtils.java @@ -203,28 +203,31 @@ public static byte[] getBasesCoveringRefInterval(final int refStart, final int r /** * Returns the "IGV View" of all the bases and base qualities in a read aligned to the reference according to the cigar, dropping any bases - * that might be in the read but don't aren't in the reference. Any bases that appear in the reference but not the read + * that might be in the read but aren't in the reference. Any bases that appear in the reference but not the read * will be filled in with GAP_CHARACTER values for the read bases and 0's for base qualities to indicate that they don't exist. * * If the cigar for input read is all matches to the reference then this method will return references to the original * read base/base quality byte arrays in the underlying SamRecord in order to save on array allocation/copying performance effects. * * @param read a read to return aligned to the reference - * @return A tuple of byte arrays where the first array corresponds to the bases aligned to the reference and second + * @return A Pair of byte arrays where the left array corresponds to the bases aligned to the reference and right * array corresponds to the baseQualities aligned to the reference. */ public static Pair getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read) { return getBasesAndBaseQualitiesAlignedOneToOne(read, GAP_CHARACTER, (byte)0); } - private static Pair getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read, final byte gapCharacter, final byte qualityPadCharacter) { + private static Pair getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read, final byte basePadCharacter, final byte qualityPadCharacter) { Utils.nonNull(read); + // As this code is performance sensitive in the HaplotypeCaller, we elect to use the noCopy versions of these getters. + // We can do this because we don't mutate base or quality arrays in this method or in its accessors final byte[] bases = read.getBasesNoCopy(); final byte[] baseQualities = read.getBaseQualitiesNoCopy(); final int numCigarElements = read.numCigarElements(); boolean sawIndel = false; // Check if the cigar contains indels + // Note that we don't call ContainsOperator() here twice to avoid the performance hit of building stream iterators twice for (int i = 0; i < numCigarElements; i++) { final CigarOperator e = read.getCigarElement(i).getOperator(); if (e == CigarOperator.INSERTION || e == CigarOperator.DELETION) { @@ -236,12 +239,12 @@ private static Pair getBasesAndBaseQualitiesAlignedOneToOne(fina return new ImmutablePair<>(bases, baseQualities); } else { - int numberRefBasesIncludingSoftclips = CigarUtils.countRefBasesIncludingSoftClips(read, 0, numCigarElements); + final int numberRefBasesIncludingSoftclips = CigarUtils.countRefBasesIncludingSoftClips(read, 0, numCigarElements); final byte[] paddedBases = new byte[numberRefBasesIncludingSoftclips]; final byte[] paddedBaseQualities = new byte[numberRefBasesIncludingSoftclips]; int literalPos = 0; int paddedPos = 0; - for ( int i = 0; i < read.numCigarElements(); i++ ) { + for ( int i = 0; i < numCigarElements; i++ ) { final CigarElement ce = read.getCigarElement(i); final CigarOperator co = ce.getOperator(); if (co.consumesReadBases()) { @@ -257,7 +260,7 @@ private static Pair getBasesAndBaseQualitiesAlignedOneToOne(fina } else if (co.consumesReferenceBases()) { for ( int j = 0; j < ce.getLength(); j++ ) { //pad deleted bases - paddedBases[paddedPos] = gapCharacter; + paddedBases[paddedPos] = basePadCharacter; paddedBaseQualities[paddedPos] = qualityPadCharacter; paddedPos++; } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/read/AlignmentUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/read/AlignmentUtilsUnitTest.java index b84483f0fd4..04926b2b71a 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/read/AlignmentUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/read/AlignmentUtilsUnitTest.java @@ -1,8 +1,8 @@ package org.broadinstitute.hellbender.utils.read; import htsjdk.samtools.*; -import javafx.util.Pair; import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; @@ -811,7 +811,7 @@ public Object[][] makeGetBasesAndBaseQualitiesAlignedOneToOneTest() { @Test(dataProvider = "makeGetBasesAndBaseQualitiesAlignedOneToOneTest") - public void testCalcNIndelInformativeReads(final String readBases, final String cigar, final String expectedBases, final byte[] expectedQuals ) { + public void testGetBasesAndBaseQualitiesAlignedOneToOne(final String readBases, final String cigar, final String expectedBases, final byte[] expectedQuals ) { final byte qual = (byte)10; final byte[] quals = Utils.dupBytes(qual, readBases.length()); @@ -819,8 +819,8 @@ public void testCalcNIndelInformativeReads(final String readBases, final String Pair actual = AlignmentUtils.getBasesAndBaseQualitiesAlignedOneToOne(read); - Assert.assertEquals(new String(actual.getKey()), expectedBases); - Assert.assertEquals(actual.getValue(), expectedQuals); + Assert.assertEquals(new String(actual.getLeft()), expectedBases); + Assert.assertEquals(actual.getRight(), expectedQuals); } ////////////////////////////////////////// From 4beeb47b42ab985562e1688344becd145d4585c6 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 7 Feb 2019 15:17:45 -0500 Subject: [PATCH 8/8] removing x --- .../tools/walkers/haplotypecaller/ReferenceConfidenceModel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java index 6e46499c6bd..81ae9f01cb2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java @@ -505,7 +505,7 @@ boolean isReadInformativeAboutIndelsOfSize(final GATKRead read, } } - return true;x + return true; } /**