From f82fe6589306b007dc3c829d2c38a22556ba4755 Mon Sep 17 00:00:00 2001 From: Frank Austin Nothaft Date: Tue, 28 Mar 2017 23:23:43 -0700 Subject: [PATCH] End clip passes. --- .../rdd/read/realignment/RealignIndels.scala | 8 +++-- .../read/realignment/RealignIndelsSuite.scala | 29 +++++++++++++++++-- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala index c773cf549d..a65af507d1 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala @@ -481,19 +481,23 @@ private[read] class RealignIndels( val cigarElements = if (bestConsensus.consensus.length > 0) { List[CigarElement]( + new CigarElement(r.getBasesTrimmedFromStart, CigarOperator.H), new CigarElement(startClipped, CigarOperator.S), new CigarElement((bestConsensus.index.start - (refStart + finalRemapping) + 1 - startClipped).toInt, CigarOperator.M), adjustedIdElement, new CigarElement(endLength.toInt - endClipped, CigarOperator.M), - new CigarElement(endClipped, CigarOperator.S) + new CigarElement(endClipped, CigarOperator.S), + new CigarElement(r.getBasesTrimmedFromEnd, CigarOperator.H) ).filter(_.getLength > 0) } else { List[CigarElement]( + new CigarElement(r.getBasesTrimmedFromStart, CigarOperator.H), new CigarElement(startClipped, CigarOperator.S), new CigarElement((bestConsensus.index.start - (refStart + finalRemapping)).toInt - startClipped, CigarOperator.M), adjustedIdElement, new CigarElement(endLength.toInt - endClipped, CigarOperator.M), - new CigarElement(endClipped, CigarOperator.S) + new CigarElement(endClipped, CigarOperator.S), + new CigarElement(r.getBasesTrimmedFromEnd, CigarOperator.H) ).filter(_.getLength > 0) } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala index 0e1d0074c8..30376f952d 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala @@ -459,6 +459,7 @@ class RealignIndelsSuite extends ADAMFunSuite { // ovs: AGTT CCAC // st: TT CCACA // sc: agA GGTC + // ec: A GGTCt val insRead = AlignmentRecord.newBuilder .setContigName("1") .setStart(10L) @@ -525,14 +526,32 @@ class RealignIndelsSuite extends ADAMFunSuite { .setReadMapped(true) .setMapq(44) .build + val ecRead = AlignmentRecord.newBuilder + .setContigName("1") + .setStart(13L) + .setEnd(18L) + .setSequence("AGGTCA") + .setQual("......") + .setCigar("5M1S1H") + .setMismatchingPositions("1C0C0A1") + .setBasesTrimmedFromEnd(1) + .setReadMapped(true) + .setMapq(45) + .build - val rdd = AlignmentRecordRDD(sc.parallelize(Seq(insRead, extRead, ovlRead, ovsRead, stRead, scRead)), + val rdd = AlignmentRecordRDD(sc.parallelize(Seq(insRead, + extRead, + ovlRead, + ovsRead, + stRead, + scRead, + ecRead)), new SequenceDictionary(Vector(SequenceRecord("1", 20L))), RecordGroupDictionary.empty) val realignedReads = rdd.realignIndels(lodThreshold = 0.0) .rdd .collect - assert(realignedReads.count(_.getMapq >= 50) === 5) + assert(realignedReads.count(_.getMapq >= 50) === 6) val realignedExtRead = realignedReads.filter(_.getMapq == 50).head assert(realignedExtRead.getStart === 8L) assert(realignedExtRead.getEnd === 14L) @@ -558,6 +577,12 @@ class RealignIndelsSuite extends ADAMFunSuite { assert(realignedScRead.getEnd === 15L) assert(realignedScRead.getCigar === "2S1M3I1M") assert(realignedScRead.getMismatchingPositions === "2") + val realignedEcRead = realignedReads.filter(_.getMapq == 55).head + assert(realignedEcRead.getStart === 13L) + assert(realignedEcRead.getEnd === 15L) + assert(realignedEcRead.getCigar === "1M3I1M1S1H") + assert(realignedEcRead.getMismatchingPositions === "2") + assert(realignedEcRead.getBasesTrimmedFromEnd === 1) } sparkTest("if realigning a target doesn't improve the LOD, don't drop reads") {