-
Notifications
You must be signed in to change notification settings - Fork 596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reduced some of the repeated steps in ReferenceConfidenceModel.calcNIndelinformativeReads #5469
Changes from all commits
c0fea1d
c79f9e5
5da3a4a
286c036
8688051
a8c685e
0de1938
4beeb47
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,18 +3,19 @@ | |
import htsjdk.samtools.Cigar; | ||
import htsjdk.samtools.CigarElement; | ||
import htsjdk.samtools.CigarOperator; | ||
import htsjdk.samtools.util.Tuple; | ||
import org.apache.commons.lang3.tuple.ImmutablePair; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; | ||
import org.broadinstitute.hellbender.exceptions.GATKException; | ||
import org.broadinstitute.hellbender.utils.BaseUtils; | ||
import org.broadinstitute.hellbender.utils.Nucleotide; | ||
import org.broadinstitute.hellbender.utils.Utils; | ||
import org.broadinstitute.hellbender.utils.haplotype.Haplotype; | ||
import org.broadinstitute.hellbender.utils.pileup.PileupElement; | ||
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner; | ||
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAlignment; | ||
|
||
import java.util.*; | ||
import java.util.function.Function; | ||
|
||
|
||
public final class AlignmentUtils { | ||
|
@@ -200,48 +201,72 @@ public static byte[] getBasesCoveringRefInterval(final int refStart, final int r | |
return Arrays.copyOfRange(bases, basesStart, basesStop + 1); | ||
} | ||
|
||
public static byte[] getBasesAlignedOneToOne(final GATKRead read) { | ||
return getSequenceAlignedOneToOne(read, r -> r.getBasesNoCopy(), GAP_CHARACTER); | ||
} | ||
|
||
public static byte[] getBaseQualsAlignedOneToOne(final GATKRead read) { | ||
return getSequenceAlignedOneToOne(read, r -> r.getBaseQualitiesNoCopy(), (byte)0); | ||
/** | ||
* Returns the "IGV View" of all the bases and base qualities in a read aligned to the reference according to the cigar, dropping any bases | ||
* that might be in the read but aren't in the reference. Any bases that appear in the reference but not the read | ||
* will be filled in with GAP_CHARACTER values for the read bases and 0's for base qualities to indicate that they don't exist. | ||
* | ||
* If the cigar for input read is all matches to the reference then this method will return references to the original | ||
* read base/base quality byte arrays in the underlying SamRecord in order to save on array allocation/copying performance effects. | ||
* | ||
* @param read a read to return aligned to the reference | ||
* @return A Pair of byte arrays where the left array corresponds to the bases aligned to the reference and right | ||
* array corresponds to the baseQualities aligned to the reference. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Description of return value is out of date -- it returns a Pair, not a Tuple. |
||
*/ | ||
public static Pair<byte[], byte[]> getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read) { | ||
return getBasesAndBaseQualitiesAlignedOneToOne(read, GAP_CHARACTER, (byte)0); | ||
} | ||
|
||
public static byte[] getSequenceAlignedOneToOne(final GATKRead read, final Function<GATKRead, byte[]> bytesProvider, final byte padWith) { | ||
private static Pair<byte[], byte[]> getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read, final byte basePadCharacter, final byte qualityPadCharacter) { | ||
Utils.nonNull(read); | ||
Utils.nonNull(bytesProvider); | ||
final Cigar cigar = read.getCigar(); | ||
final byte[] sequence = bytesProvider.apply(read); | ||
|
||
if (!cigar.containsOperator(CigarOperator.DELETION) && !cigar.containsOperator(CigarOperator.INSERTION)) { | ||
return sequence; | ||
// As this code is performance sensitive in the HaplotypeCaller, we elect to use the noCopy versions of these getters. | ||
// We can do this because we don't mutate base or quality arrays in this method or in its accessors | ||
final byte[] bases = read.getBasesNoCopy(); | ||
final byte[] baseQualities = read.getBaseQualitiesNoCopy(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a brief comment explaining why it's safe to use the no-copy version of these accessors. |
||
final int numCigarElements = read.numCigarElements(); | ||
boolean sawIndel = false; | ||
|
||
// Check if the cigar contains indels | ||
// Note that we don't call ContainsOperator() here twice to avoid the performance hit of building stream iterators twice | ||
for (int i = 0; i < numCigarElements; i++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make a note explaining why you're not just calling |
||
final CigarOperator e = read.getCigarElement(i).getOperator(); | ||
if (e == CigarOperator.INSERTION || e == CigarOperator.DELETION) { | ||
sawIndel = true; | ||
break; | ||
} | ||
} | ||
if (!sawIndel) { | ||
return new ImmutablePair<>(bases, baseQualities); | ||
} | ||
else { | ||
final byte[] paddedBases = new byte[CigarUtils.countRefBasesIncludingSoftClips(read, 0, cigar.numCigarElements())]; | ||
final int numberRefBasesIncludingSoftclips = CigarUtils.countRefBasesIncludingSoftClips(read, 0, numCigarElements); | ||
final byte[] paddedBases = new byte[numberRefBasesIncludingSoftclips]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
final byte[] paddedBaseQualities = new byte[numberRefBasesIncludingSoftclips]; | ||
int literalPos = 0; | ||
int paddedPos = 0; | ||
for ( int i = 0; i < cigar.numCigarElements(); i++ ) { | ||
final CigarElement ce = cigar.getCigarElement(i); | ||
for ( int i = 0; i < numCigarElements; i++ ) { | ||
final CigarElement ce = read.getCigarElement(i); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use the already-initialized |
||
final CigarOperator co = ce.getOperator(); | ||
if (co.consumesReadBases()) { | ||
if (!co.consumesReferenceBases()) { | ||
literalPos += ce.getLength(); //skip inserted bases | ||
} | ||
else { | ||
System.arraycopy(sequence, literalPos, paddedBases, paddedPos, ce.getLength()); | ||
System.arraycopy(bases, literalPos, paddedBases, paddedPos, ce.getLength()); | ||
System.arraycopy(baseQualities, literalPos, paddedBaseQualities, paddedPos, ce.getLength()); | ||
literalPos += ce.getLength(); | ||
paddedPos += ce.getLength(); | ||
} | ||
} | ||
else if (co.consumesReferenceBases()) { | ||
for ( int j = 0; j < ce.getLength(); j++ ) { //pad deleted bases | ||
paddedBases[paddedPos] = padWith; | ||
paddedBases[paddedPos] = basePadCharacter; | ||
paddedBaseQualities[paddedPos] = qualityPadCharacter; | ||
paddedPos++; | ||
} | ||
} | ||
} | ||
return paddedBases; | ||
return new ImmutablePair<>(paddedBases, paddedBaseQualities); | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any advantage to using
Tuple
overPair
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Huh, I'm not sure what Pair implementation you are talking about, we have one in gatk that is specific to MarkDuplicates that should probably be renamed to be less confusing anyway...
I think the different tuple implementations are mostly interchangeable but this one happens to live in htsjdk so that is generally a dependency plus.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was talking about
org.apache.commons.lang3.tuple.Pair
. I've been using it a lot in Funcotator.