diff --git a/.gitignore b/.gitignore
index 910e7c97a8..c08b87429c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,5 @@ build
*.log
.*.swp
.DS_Store
+
+*#*
\ No newline at end of file
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala
index 9ac1b6fd3f..f72826c5cd 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala
@@ -17,194 +17,44 @@
*/
package org.bdgenomics.adam.models
-import org.bdgenomics.formats.avro._
-import org.bdgenomics.adam.rdd.ADAMContext._
import com.esotericsoftware.kryo.{ Kryo, Serializer }
import com.esotericsoftware.kryo.io.{ Input, Output }
-import scala.annotation.tailrec
-
-object ReferencePositionWithOrientation {
-
- def apply(record: AlignmentRecord): Option[ReferencePositionWithOrientation] = {
- if (record.getReadMapped) {
- Some(new ReferencePositionWithOrientation(ReferencePosition(record).get, record.getReadNegativeStrand))
- } else {
- None
- }
- }
-
- def fivePrime(record: AlignmentRecord): Option[ReferencePositionWithOrientation] = {
- if (record.getReadMapped) {
- Some(new ReferencePositionWithOrientation(ReferencePosition.fivePrime(record).get, record.getReadNegativeStrand))
- } else {
- None
- }
- }
- /**
- * Given an index into a sequence (e.g., a transcript), and a sequence
- * of regions that map this sequence to a reference genome, returns a
- * reference position.
- *
- * @param idx Index into a sequence.
- * @param mapping Sequence of reference regions that maps the sequence to a
- * reference genome.
- * @return Returns a lifted over reference position.
- *
- * @throws AssertionError Throws an assertion error if the strandedness does
- * not match between the reference position and the mapping regions.
- * @throws IllegalArgumentException Throws an exception if the index given
- * is out of the range of the mappings given.
- */
- def liftOverToReference(idx: Long,
- mapping: Seq[ReferenceRegionWithOrientation]): ReferencePositionWithOrientation = {
- val negativeStrand = mapping.head.negativeStrand
-
- @tailrec def liftOverHelper(regions: Iterator[ReferenceRegionWithOrientation],
- idx: Long): ReferencePositionWithOrientation = {
- if (!regions.hasNext) {
- throw new IllegalArgumentException("Liftover is out of range")
- } else {
- val reg = regions.next()
- assert(reg.negativeStrand == negativeStrand,
- "Strand is inconsistent across set of regions.")
-
- // is our position in this region?
- if (reg.width > idx) {
- // get contig
- val chr = reg.referenceName
-
- // get our pos
- val pos = if (negativeStrand) {
- reg.end - idx - 1 // need -1 because of open end coordinate
- } else {
- reg.start + idx
- }
-
- // return
- ReferencePositionWithOrientation(ReferencePosition(chr, pos),
- negativeStrand)
- } else {
- // recurse
- liftOverHelper(regions, idx - reg.width)
- }
- }
- }
+import org.bdgenomics.formats.avro._
- // call out to helper
- if (negativeStrand) {
- liftOverHelper(mapping.reverseIterator, idx)
- } else {
- liftOverHelper(mapping.toIterator, idx)
- }
- }
+object PositionOrdering extends ReferenceOrdering[ReferencePosition] {
}
-
-case class ReferencePositionWithOrientation(refPos: ReferencePosition,
- negativeStrand: Boolean)
- extends Ordered[ReferencePositionWithOrientation] {
-
- /**
- * Given a sequence of regions that map from a reference genome to a sequence,
- * returns an index into a sequence.
- *
- * @param mapping Sequence of reference regions that maps the sequence to a
- * reference genome.
- * @return Returns an index into a sequence.
- *
- * @throws AssertionError Throws an assertion error if the strandedness does
- * not match between the reference position and the mapping regions.
- * @throws IllegalArgumentException Throws an exception if the index given
- * is out of the range of the mappings given.
- */
- def liftOverFromReference(mapping: Seq[ReferenceRegionWithOrientation]): Long = {
- val pos = refPos.pos
-
- @tailrec def liftOverHelper(regions: Iterator[ReferenceRegionWithOrientation],
- idx: Long = 0L): Long = {
- if (!regions.hasNext) {
- throw new IllegalArgumentException("Position was not contained in mapping.")
- } else {
- // get region
- val reg = regions.next()
- assert(reg.negativeStrand == negativeStrand,
- "Strand is inconsistent across set of regions.")
-
- // are we in this region?
- if (reg.contains(this)) {
- // how far are we from the end of the region
- val into = if (negativeStrand) {
- reg.end - pos
- } else {
- pos - reg.start
- }
-
- // add to index
- idx + into
- } else {
- liftOverHelper(regions, idx + reg.width)
- }
- }
- }
-
- // call out to helper
- liftOverHelper(mapping.toIterator)
- }
-
- override def compare(that: ReferencePositionWithOrientation): Int = {
- val posCompare = refPos.compare(that.refPos)
- if (posCompare != 0) {
- posCompare
- } else {
- negativeStrand.compare(that.negativeStrand)
- }
- }
+object OptionalPositionOrdering extends OptionalReferenceOrdering[ReferencePosition] {
+ val baseOrdering = PositionOrdering
}
-object ReferencePosition {
+object ReferencePosition extends Serializable {
+
+ implicit def orderingForPositions = PositionOrdering
+ implicit def orderingForOptionalPositions = OptionalPositionOrdering
/**
* The UNMAPPED value is a convenience value, which can be used to indicate a position
* which is not located anywhere along the reference sequences (see, e.g. its use in
* GenomicPositionPartitioner).
*/
- val UNMAPPED = new ReferencePosition("", -1)
-
- /**
- * Checks to see if a read is mapped with a valid position.
- *
- * @param record Read to check for mapping.
- * @return True if read is mapped and has a valid position, else false.
- */
- def mappedPositionCheck(record: AlignmentRecord): Boolean = {
- val contig = Option(record.getContig)
- val start = Option(record.getStart)
- record.getReadMapped && (contig.isDefined && Option(contig.get.getContigName).isDefined) && start.isDefined
- }
+ val UNMAPPED = new ReferencePosition("", 0)
/**
* Generates a reference position from a read. This function generates the
* position from the start mapping position of the read.
*
* @param record Read from which to generate a reference position.
- * @return Reference position wrapped inside of an option. If the read is
- * mapped, the option will be stuffed, else it will be empty (None).
+ * @return Reference position of the start position.
*
* @see fivePrime
*/
- def apply(record: AlignmentRecord): Option[ReferencePosition] = {
- if (mappedPositionCheck(record)) {
- Some(new ReferencePosition(record.getContig.getContigName.toString, record.getStart))
- } else {
- None
- }
+ def apply(record: AlignmentRecord): ReferencePosition = {
+ new ReferencePosition(record.getContig.getContigName.toString, record.getStart)
}
/**
* Generates a reference position from a called variant.
*
- * @note An invariant of variants is that they have a defined position,
- * therefore we do not wrap them in an option.
- *
* @param variant Called variant from which to generate a
* reference position.
* @return The reference position of this variant.
@@ -216,9 +66,6 @@ object ReferencePosition {
/**
* Generates a reference position from a genotype.
*
- * @note An invariant of genotypes is that they have a defined position,
- * therefore we do not wrap them in an option.
- *
* @param genotype Genotype from which to generate a reference position.
* @return The reference position of this genotype.
*/
@@ -227,63 +74,33 @@ object ReferencePosition {
new ReferencePosition(variant.getContig.getContigName, variant.getStart)
}
- /**
- * Generates a reference position from the five prime end of a read. This
- * will be different from the start mapping position of a read if this
- * read is a reverse strand read.
- *
- * @param record Read from which to generate a reference position.
- * @return Reference position wrapped inside of an option. If the read is
- * mapped, the option will be stuffed, else it will be empty (None).
- *
- * @see apply(record: Read)
- */
- def fivePrime(record: AlignmentRecord): Option[ReferencePosition] = {
- if (mappedPositionCheck(record)) {
- Some(new ReferencePosition(record.getContig.getContigName, record.fivePrimePosition.get))
- } else {
- None
- }
+ def apply(referenceName: String, pos: Long): ReferencePosition = {
+ new ReferencePosition(referenceName, pos)
}
-}
-case class ReferencePosition(referenceName: String, pos: Long) extends Ordered[ReferencePosition] {
-
- override def compare(that: ReferencePosition): Int = {
- // Note: important to compare by reference first for coordinate ordering
- val refCompare = referenceName.compare(that.referenceName)
- if (refCompare != 0) {
- refCompare
- } else {
- pos.compare(that.pos)
- }
+ def apply(referenceName: String, pos: Long, orientation: Strand): ReferencePosition = {
+ new ReferencePosition(referenceName, pos, orientation)
}
}
-class ReferencePositionWithOrientationSerializer extends Serializer[ReferencePositionWithOrientation] {
- val referencePositionSerializer = new ReferencePositionSerializer()
-
- def write(kryo: Kryo, output: Output, obj: ReferencePositionWithOrientation) = {
- output.writeBoolean(obj.negativeStrand)
- referencePositionSerializer.write(kryo, output, obj.refPos)
- }
-
- def read(kryo: Kryo, input: Input, klazz: Class[ReferencePositionWithOrientation]): ReferencePositionWithOrientation = {
- val negativeStrand = input.readBoolean()
- val referencePosition = referencePositionSerializer.read(kryo, input, classOf[ReferencePosition])
- new ReferencePositionWithOrientation(referencePosition, negativeStrand)
- }
+class ReferencePosition(override val referenceName: String,
+ val pos: Long,
+ override val orientation: Strand = Strand.Independent) extends ReferenceRegion(referenceName, pos, pos + 1, orientation) {
}
class ReferencePositionSerializer extends Serializer[ReferencePosition] {
+ private val enumValues = Strand.values()
+
def write(kryo: Kryo, output: Output, obj: ReferencePosition) = {
output.writeString(obj.referenceName)
output.writeLong(obj.pos)
+ output.writeInt(obj.orientation.ordinal)
}
def read(kryo: Kryo, input: Input, klazz: Class[ReferencePosition]): ReferencePosition = {
val refName = input.readString()
val pos = input.readLong()
- new ReferencePosition(refName, pos)
+ val orientation = input.readInt()
+ new ReferencePosition(refName, pos, enumValues(orientation))
}
}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePositionPair.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePositionPair.scala
index e86bb0d99e..5cd7983578 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePositionPair.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePositionPair.scala
@@ -19,18 +19,25 @@ package org.bdgenomics.adam.models
import com.esotericsoftware.kryo.{ Kryo, Serializer }
import com.esotericsoftware.kryo.io.{ Input, Output }
-import org.apache.spark.Logging
import Ordering.Option
+import org.apache.spark.Logging
import org.bdgenomics.adam.instrumentation.Timers.CreateReferencePositionPair
+import org.bdgenomics.adam.models.ReferenceRegion._
+import org.bdgenomics.adam.rich.RichAlignmentRecord
+import org.bdgenomics.formats.avro.AlignmentRecord
object ReferencePositionPair extends Logging {
+ private def posForRead(read: AlignmentRecord): Option[ReferencePosition] = {
+ RichAlignmentRecord(read).fivePrimeReferencePosition
+ }
+
def apply(singleReadBucket: SingleReadBucket): ReferencePositionPair = CreateReferencePositionPair.time {
singleReadBucket.primaryMapped.toSeq.lift(0) match {
case None =>
// No mapped reads
new ReferencePositionPair(None, None)
case Some(read1) =>
- val read1pos = ReferencePositionWithOrientation.fivePrime(read1)
+ val read1pos = posForRead(read1)
if (read1.getReadPaired && read1.getMateMapped) {
singleReadBucket.primaryMapped.toSeq.lift(1) match {
case None =>
@@ -39,8 +46,8 @@ object ReferencePositionPair extends Logging {
new ReferencePositionPair(read1pos, None)
case Some(read2) =>
// Both reads are mapped
- val read2pos = ReferencePositionWithOrientation.fivePrime(read2)
- if (read1pos < read2pos) {
+ val read2pos = posForRead(read2)
+ if (read1pos.get.disorient.compareTo(read2pos.get.disorient) < 0) {
new ReferencePositionPair(read1pos, read2pos)
} else {
new ReferencePositionPair(read2pos, read1pos)
@@ -52,9 +59,9 @@ object ReferencePositionPair extends Logging {
// Mate is not mapped...
new ReferencePositionPair(read1pos, None)
case Some(read2) =>
- val read2pos = ReferencePositionWithOrientation.fivePrime(read2)
+ val read2pos = posForRead(read2)
log.warn("%s claimed to not have mate but mate found".format(read1.getReadName))
- if (read1pos < read2pos) {
+ if (read1pos.get.disorient.compareTo(read2pos.get.disorient) < 0) {
new ReferencePositionPair(read1pos, read2pos)
} else {
new ReferencePositionPair(read2pos, read1pos)
@@ -65,13 +72,13 @@ object ReferencePositionPair extends Logging {
}
}
-case class ReferencePositionPair(read1refPos: Option[ReferencePositionWithOrientation],
- read2refPos: Option[ReferencePositionWithOrientation])
+case class ReferencePositionPair(read1refPos: Option[ReferencePosition],
+ read2refPos: Option[ReferencePosition])
class ReferencePositionPairSerializer extends Serializer[ReferencePositionPair] {
- val rps = new ReferencePositionWithOrientationSerializer()
+ val rps = new ReferencePositionSerializer()
- def writeOptionalReferencePos(kryo: Kryo, output: Output, optRefPos: Option[ReferencePositionWithOrientation]) = {
+ def writeOptionalReferencePos(kryo: Kryo, output: Output, optRefPos: Option[ReferencePosition]) = {
optRefPos match {
case None =>
output.writeBoolean(false)
@@ -81,10 +88,10 @@ class ReferencePositionPairSerializer extends Serializer[ReferencePositionPair]
}
}
- def readOptionalReferencePos(kryo: Kryo, input: Input): Option[ReferencePositionWithOrientation] = {
+ def readOptionalReferencePos(kryo: Kryo, input: Input): Option[ReferencePosition] = {
val exists = input.readBoolean()
if (exists) {
- Some(rps.read(kryo, input, classOf[ReferencePositionWithOrientation]))
+ Some(rps.read(kryo, input, classOf[ReferencePosition]))
} else {
None
}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala
index 86bd7d036e..e6ffa2ffb4 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala
@@ -20,73 +20,55 @@ package org.bdgenomics.adam.models
import com.esotericsoftware.kryo.io.{ Input, Output }
import com.esotericsoftware.kryo.{ Kryo, Serializer }
import org.bdgenomics.adam.rdd.ADAMContext._
-import org.bdgenomics.formats.avro.{ Feature, AlignmentRecord, NucleotideContigFragment }
+import org.bdgenomics.formats.avro._
import scala.math.{ max, min }
-object ReferenceRegionWithOrientation {
-
- /**
- * Builds an oriented reference region from the individual parameters
- *
- * @param referenceName The name of the sequence (chromosome) in the reference genome
- * @param start The 0-based residue-coordinate for the start of the region
- * @param end The 0-based residue-coordinate for the first residue after the start
- * which is not in the region -- i.e. [start, end) define a 0-based
- * half-open interval.
- * @param negativeStrand Boolean flag as to whether the region is on the forward or
- * reverse strand of the reference region.
- */
- def apply(referenceName: String,
- start: Long,
- end: Long,
- negativeStrand: Boolean): ReferenceRegionWithOrientation = {
- ReferenceRegionWithOrientation(ReferenceRegion(referenceName, start, end), negativeStrand)
- }
-}
-
-/**
- * Represents a contiguous region of the reference genome with strand information.
- *
- * @param region The genomic locus as a ReferenceRegion
- * @param negativeStrand Boolean flag as to whether the region is on the forward or
- * reverse strand of the reference region.
- */
-case class ReferenceRegionWithOrientation(region: ReferenceRegion,
- negativeStrand: Boolean) extends Ordered[ReferenceRegionWithOrientation] {
- def width: Long = region.width
-
- def contains(other: ReferencePositionWithOrientation): Boolean = {
- negativeStrand == other.negativeStrand && region.contains(other.refPos)
- }
-
- def contains(other: ReferenceRegionWithOrientation): Boolean = {
- region.contains(other.region) && negativeStrand == other.negativeStrand
- }
-
- def overlaps(other: ReferenceRegionWithOrientation): Boolean = {
- region.overlaps(other.region) && negativeStrand == other.negativeStrand
+trait ReferenceOrdering[T <: ReferenceRegion] extends Ordering[T] {
+ private def regionCompare(a: T,
+ b: T): Int = {
+ if (a.referenceName != b.referenceName) {
+ a.referenceName.compareTo(b.referenceName)
+ } else if (a.start != b.start) {
+ a.start.compareTo(b.start)
+ } else {
+ a.end.compareTo(b.end)
+ }
}
- def compare(that: ReferenceRegionWithOrientation): Int = {
- val regionCompare = region.compare(that.region)
- if (regionCompare != 0) {
- regionCompare
+ def compare(a: T,
+ b: T): Int = {
+ val rc = regionCompare(a, b)
+ if (rc == 0) {
+ a.orientation.ordinal compare b.orientation.ordinal
} else {
- negativeStrand.compare(that.negativeStrand)
+ rc
}
}
+}
- def toReferenceRegion: ReferenceRegion = region
-
- def referenceName: String = region.referenceName
+trait OptionalReferenceOrdering[T <: ReferenceRegion] extends Ordering[Option[T]] {
+ val baseOrdering: ReferenceOrdering[T]
- def start: Long = region.start
+ def compare(a: Option[T],
+ b: Option[T]): Int = (a, b) match {
+ case (None, None) => 0
+ case (Some(pa), Some(pb)) => baseOrdering.compare(pa, pb)
+ case (Some(pa), None) => -1
+ case (None, Some(pb)) => -1
+ }
+}
- def end: Long = region.end
+object RegionOrdering extends ReferenceOrdering[ReferenceRegion] {
+}
+object OptionalRegionOrdering extends OptionalReferenceOrdering[ReferenceRegion] {
+ val baseOrdering = RegionOrdering
}
object ReferenceRegion {
+ implicit def orderingForPositions = RegionOrdering
+ implicit def orderingForOptionalPositions = OptionalRegionOrdering
+
/**
* Generates a reference region from read data. Returns None if the read is not mapped;
* else, returns the inclusive region from the start to the end of the read alignment.
@@ -144,13 +126,15 @@ object ReferenceRegion {
* which is not in the region -- i.e. [start, end) define a 0-based
* half-open interval.
*/
-case class ReferenceRegion(referenceName: String, start: Long, end: Long) extends Ordered[ReferenceRegion] with Interval {
+case class ReferenceRegion(referenceName: String, start: Long, end: Long, orientation: Strand = Strand.Independent) extends Comparable[ReferenceRegion] with Interval {
assert(start >= 0)
assert(end >= start)
def width: Long = end - start
+ def disorient: ReferenceRegion = new ReferenceRegion(referenceName, start, end)
+
/**
* Merges two reference regions that are contiguous.
*
@@ -189,6 +173,7 @@ case class ReferenceRegion(referenceName: String, start: Long, end: Long) extend
* @see merge
*/
def hull(region: ReferenceRegion): ReferenceRegion = {
+ assert(orientation == region.orientation, "Cannot compute convex hull of differently oriented regions.")
assert(referenceName == region.referenceName, "Cannot compute convex hull of regions on different references.")
ReferenceRegion(referenceName, min(start, region.start), max(end, region.end))
}
@@ -202,28 +187,6 @@ case class ReferenceRegion(referenceName: String, start: Long, end: Long) extend
def isAdjacent(region: ReferenceRegion): Boolean =
distance(region).map(_ == 1).getOrElse(false)
- /**
- * Returns the distance between this reference region and a point in the reference space.
- *
- * @note Distance here is defined as the minimum distance between any point within this region, and
- * the point we are measuring against. If the point is within this region, its distance will be 0.
- * Else, the distance will be greater than or equal to 1.
- *
- * @param other Point to compare against.
- * @return Returns an option containing the distance between two points. If the point is not in
- * our reference space, we return an empty option (None).
- */
- def distance(other: ReferencePosition): Option[Long] =
- if (referenceName == other.referenceName)
- if (other.pos < start)
- Some(start - other.pos)
- else if (other.pos >= end)
- Some(other.pos - end + 1)
- else
- Some(0)
- else
- None
-
/**
* Returns the distance between this reference region and another region in the reference space.
*
@@ -236,7 +199,7 @@ case class ReferenceRegion(referenceName: String, start: Long, end: Long) extend
* our reference space, we return an empty option (None).
*/
def distance(other: ReferenceRegion): Option[Long] =
- if (referenceName == other.referenceName)
+ if (referenceName == other.referenceName && orientation == other.orientation)
if (overlaps(other))
Some(0)
else if (other.start >= end)
@@ -246,22 +209,27 @@ case class ReferenceRegion(referenceName: String, start: Long, end: Long) extend
else
None
- def contains(other: ReferencePosition): Boolean =
- referenceName == other.referenceName && start <= other.pos && end > other.pos
+ def contains(other: ReferencePosition): Boolean = {
+ orientation == other.orientation &&
+ referenceName == other.referenceName &&
+ start <= other.pos && end > other.pos
+ }
- def contains(other: ReferenceRegion): Boolean =
- referenceName == other.referenceName && start <= other.start && end >= other.end
+ def contains(other: ReferenceRegion): Boolean = {
+ orientation == other.orientation &&
+ referenceName == other.referenceName &&
+ start <= other.start && end >= other.end
+ }
- def overlaps(other: ReferenceRegion): Boolean =
- referenceName == other.referenceName && end > other.start && start < other.end
+ def overlaps(other: ReferenceRegion): Boolean = {
+ orientation == other.orientation &&
+ referenceName == other.referenceName &&
+ end > other.start && start < other.end
+ }
- def compare(that: ReferenceRegion): Int =
- if (referenceName != that.referenceName)
- referenceName.compareTo(that.referenceName)
- else if (start != that.start)
- start.compareTo(that.start)
- else
- end.compareTo(that.end)
+ def compareTo(that: ReferenceRegion): Int = {
+ RegionOrdering.compare(this, that)
+ }
def length(): Long = {
end - start
@@ -269,16 +237,20 @@ case class ReferenceRegion(referenceName: String, start: Long, end: Long) extend
}
class ReferenceRegionSerializer extends Serializer[ReferenceRegion] {
+ private val enumValues = Strand.values()
+
def write(kryo: Kryo, output: Output, obj: ReferenceRegion) = {
output.writeString(obj.referenceName)
output.writeLong(obj.start)
output.writeLong(obj.end)
+ output.writeInt(obj.orientation.ordinal())
}
def read(kryo: Kryo, input: Input, klazz: Class[ReferenceRegion]): ReferenceRegion = {
val referenceName = input.readString()
val start = input.readLong()
val end = input.readLong()
- new ReferenceRegion(referenceName, start, end)
+ val orientation = input.readInt()
+ new ReferenceRegion(referenceName, start, end, enumValues(orientation))
}
}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala
index f7ca6a3688..6d39956dc1 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala
@@ -15,12 +15,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.bdgenomics.adam.rdd
import org.apache.spark.{ Logging, Partitioner, SparkContext }
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD
+import org.bdgenomics.adam.models.ReferenceRegion._
import org.bdgenomics.adam.models.{ SequenceDictionary, SequenceRecord, ReferenceRegion }
import org.bdgenomics.adam.rdd.ADAMContext._
import scala.collection.mutable.ListBuffer
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala
index 86976d9421..198c129196 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala
@@ -78,7 +78,7 @@ class NucleotideContigFragmentRDDFunctions(rdd: RDD[NucleotideContigFragment]) e
assert(kv1._1.isAdjacent(kv2._1), "Regions being joined must be adjacent. For: " +
kv1 + ", " + kv2)
- (kv1._1.merge(kv2._1), if (kv1._1.compare(kv2._1) <= 0) {
+ (kv1._1.merge(kv2._1), if (kv1._1.compareTo(kv2._1) <= 0) {
kv1._2 + kv2._2
} else {
kv2._2 + kv1._2
@@ -94,7 +94,7 @@ class NucleotideContigFragmentRDDFunctions(rdd: RDD[NucleotideContigFragment]) e
.map(kv => getString(kv))
.reduce(reducePairs)
- assert(pair._1.compare(region) == 0,
+ assert(pair._1.compareTo(region) == 0,
"Merging fragments returned a different region than requested.")
pair._2
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala
index e4bbe2ad35..8e0f6334b6 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala
@@ -32,6 +32,7 @@ import org.bdgenomics.adam.algorithms.consensus.{
import org.bdgenomics.adam.converters.AlignmentRecordConverter
import org.bdgenomics.adam.instrumentation.Timers._
import org.bdgenomics.adam.models._
+import org.bdgenomics.adam.models.ReferenceRegion._
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.{ ADAMSaveArgs, ADAMSaveAnyArgs, ADAMSequenceDictionaryRDDAggregator }
import org.bdgenomics.adam.rdd.read.correction.{ ErrorCorrection, TrimReads }
@@ -266,12 +267,10 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord])
}
rdd.map(p => {
- val referencePos = ReferencePosition(p) match {
- case None =>
- // Move unmapped reads to the end of the file
- ReferencePosition(
- unmappedReferenceNames.next(), Long.MaxValue)
- case Some(pos) => pos
+ val referencePos = if (p.getReadMapped) {
+ ReferencePosition(p)
+ } else {
+ ReferencePosition(unmappedReferenceNames.next(), 0)
}
(referencePos, p)
}).sortByKey().map(p => p._2)
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MarkDuplicates.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MarkDuplicates.scala
index 3af8565448..c4afada9c7 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MarkDuplicates.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MarkDuplicates.scala
@@ -19,7 +19,11 @@ package org.bdgenomics.adam.rdd.read
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.instrumentation.Timers._
-import org.bdgenomics.adam.models.{ ReferencePositionPair, ReferencePositionWithOrientation, SingleReadBucket }
+import org.bdgenomics.adam.models.{
+ ReferencePosition,
+ ReferencePositionPair,
+ SingleReadBucket
+}
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.AlignmentRecord
@@ -62,19 +66,21 @@ private[rdd] object MarkDuplicates extends Serializable {
def apply(rdd: RDD[AlignmentRecord]): RDD[AlignmentRecord] = {
// Group by library and left position
- def leftPositionAndLibrary(p: (ReferencePositionPair, SingleReadBucket)): (Option[ReferencePositionWithOrientation], String) = {
+ def leftPositionAndLibrary(p: (ReferencePositionPair, SingleReadBucket)): (Option[ReferencePosition], String) = {
(p._1.read1refPos, p._2.allReads.head.getRecordGroupLibrary)
}
// Group by right position
- def rightPosition(p: (ReferencePositionPair, SingleReadBucket)): Option[ReferencePositionWithOrientation] = {
+ def rightPosition(p: (ReferencePositionPair, SingleReadBucket)): Option[ReferencePosition] = {
p._1.read2refPos
}
- rdd.adamSingleReadBuckets().keyBy(ReferencePositionPair(_)).groupBy(leftPositionAndLibrary)
+ rdd.adamSingleReadBuckets()
+ .keyBy(ReferencePositionPair(_))
+ .groupBy(leftPositionAndLibrary)
.flatMap(kv => PerformDuplicateMarking.time {
- val leftPos: Option[ReferencePositionWithOrientation] = kv._1._1
+ val leftPos: Option[ReferencePosition] = kv._1._1
val readsAtLeftPos: Iterable[(ReferencePositionPair, SingleReadBucket)] = kv._2
leftPos match {
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala
index f916bbe4e3..db94c35f8a 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala
@@ -51,7 +51,7 @@ object TargetOrdering extends Ordering[IndelRealignmentTarget] {
* @param b Indel realignment target to compare.
* @return Comparison done by starting position.
*/
- def compare(a: IndelRealignmentTarget, b: IndelRealignmentTarget): Int = a.readRange compare b.readRange
+ def compare(a: IndelRealignmentTarget, b: IndelRealignmentTarget): Int = a.readRange compareTo b.readRange
/**
* Check to see if an indel realignment target contains the given read.
@@ -76,7 +76,7 @@ object TargetOrdering extends Ordering[IndelRealignmentTarget] {
def lt(target: IndelRealignmentTarget, read: RichAlignmentRecord): Boolean = {
val region = read.readRegion
- region.forall(r => target.readRange.compare(r) < 0)
+ region.forall(r => target.readRange.compareTo(r) < 0)
}
/**
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala
index ea435aede6..e4da3d53ff 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala
@@ -22,6 +22,7 @@ import org.apache.spark.Logging
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.algorithms.consensus.{ ConsensusGenerator, ConsensusGeneratorFromReads }
+import org.bdgenomics.adam.models.ReferenceRegion._
import org.bdgenomics.adam.models.{ Consensus, ReferencePosition, ReferenceRegion }
import org.bdgenomics.adam.rich.RichAlignmentRecord
import org.bdgenomics.adam.rich.RichAlignmentRecord._
@@ -463,7 +464,7 @@ private[rdd] class RealignIndels(val consensusModel: ConsensusGenerator = new Co
rdd.filter(r => r.getReadMapped)
} else {
val sr = rdd.filter(r => r.getReadMapped)
- .keyBy(r => ReferencePosition(r).get)
+ .keyBy(ReferencePosition(_))
.sortByKey()
sr.map(kv => kv._2)
}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala
index 001159e27a..0349e00de8 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala
@@ -22,7 +22,7 @@ import htsjdk.samtools.{ Cigar, CigarElement, CigarOperator, TextCigarCodec }
import org.bdgenomics.adam.models.{ Attribute, ReferencePosition, ReferenceRegion }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.util._
-import org.bdgenomics.formats.avro.AlignmentRecord
+import org.bdgenomics.formats.avro.{ AlignmentRecord, Strand }
import scala.collection.immutable.NumericRange
object RichAlignmentRecord {
@@ -139,6 +139,17 @@ class RichAlignmentRecord(val record: AlignmentRecord) {
}
}
+ def fivePrimeReferencePosition: Option[ReferencePosition] = {
+ fivePrimePosition.map(rp => {
+ val strand = if (record.getReadNegativeStrand) {
+ Strand.Reverse
+ } else {
+ Strand.Forward
+ }
+ ReferencePosition(record.getContig.getContigName, rp, strand)
+ })
+ }
+
// Does this read overlap with the given reference position?
def overlapsReferencePosition(pos: ReferencePosition): Option[Boolean] = {
readRegion.map(_.contains(pos))
@@ -164,8 +175,8 @@ class RichAlignmentRecord(val record: AlignmentRecord) {
}
def getReferenceContext(readOffset: Int, referencePosition: Long, cigarElem: CigarElement, elemOffset: Int): ReferenceSequenceContext = {
- val position = if (ReferencePosition.mappedPositionCheck(record)) {
- Some(new ReferencePosition(record.getContig.getContigName.toString, referencePosition))
+ val position = if (record.getReadMapped) {
+ Some(ReferencePosition(record.getContig.getContigName, referencePosition))
} else {
None
}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala b/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala
index 641082d052..ff3626eb69 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala
@@ -78,7 +78,6 @@ class ADAMKryoRegistrator extends KryoRegistrator {
kryo.register(classOf[DatabaseVariantAnnotation], new AvroSerializer[DatabaseVariantAnnotation]())
kryo.register(classOf[Dbxref], new AvroSerializer[Dbxref]())
kryo.register(classOf[Feature], new AvroSerializer[Feature]())
- kryo.register(classOf[ReferencePositionWithOrientation], new ReferencePositionWithOrientationSerializer)
kryo.register(classOf[ReferencePosition], new ReferencePositionSerializer)
kryo.register(classOf[ReferencePositionPair], new ReferencePositionPairSerializer)
kryo.register(classOf[SingleReadBucket], new SingleReadBucketSerializer)
diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala
index 6721a2e8ae..b2c35c66d2 100644
--- a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala
+++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala
@@ -33,69 +33,12 @@ class ReferencePositionSuite extends FunSuite {
.setReadMapped(true)
.build()
- val refPosOpt = ReferencePosition(read)
-
- assert(refPosOpt.isDefined)
-
- val refPos = refPosOpt.get
+ val refPos = ReferencePosition(read)
assert(refPos.referenceName === "chr1")
assert(refPos.pos === 1L)
}
- test("create reference position from unmapped read") {
- val read = AlignmentRecord.newBuilder()
- .setReadMapped(false)
- .build()
-
- val refPosOpt = ReferencePosition(read)
-
- assert(refPosOpt.isEmpty)
- }
-
- test("create reference position from mapped read but contig not specified") {
- val read = AlignmentRecord.newBuilder()
- .setReadMapped(true)
- .setStart(1L)
- .build()
-
- val refPosOpt = ReferencePosition(read)
-
- assert(refPosOpt.isEmpty)
- }
-
- test("create reference position from mapped read but contig is underspecified") {
- val contig = Contig.newBuilder
- // contigName is NOT set
- //.setContigName("chr1")
- .build
-
- val read = AlignmentRecord.newBuilder()
- .setReadMapped(true)
- .setStart(1L)
- .setContig(contig)
- .build()
-
- val refPosOpt = ReferencePosition(read)
-
- assert(refPosOpt.isEmpty)
- }
-
- test("create reference position from mapped read but start not specified") {
- val contig = Contig.newBuilder
- .setContigName("chr1")
- .build
-
- val read = AlignmentRecord.newBuilder()
- .setReadMapped(true)
- .setContig(contig)
- .build()
-
- val refPosOpt = ReferencePosition(read)
-
- assert(refPosOpt.isEmpty)
- }
-
test("create reference position from variant") {
val variant = Variant.newBuilder()
.setContig(Contig.newBuilder.setContigName("chr10").build())
@@ -127,81 +70,4 @@ class ReferencePositionSuite extends FunSuite {
assert(refPos.referenceName === "chr10")
assert(refPos.pos === 100L)
}
-
- test("liftOverToReference works with a multi-block alignment on the forward strand") {
- val exons = Seq(ReferenceRegionWithOrientation("1", 100, 200, negativeStrand = false),
- ReferenceRegionWithOrientation("1", 300, 400, negativeStrand = false),
- ReferenceRegionWithOrientation("1", 500, 600, negativeStrand = false))
-
- val p0 = ReferencePositionWithOrientation.liftOverToReference(0, exons)
- assert(p0.refPos.referenceName === "1")
- assert(p0.refPos.pos === 100)
-
- val p1 = ReferencePositionWithOrientation.liftOverToReference(50, exons)
- assert(p1.refPos.referenceName === "1")
- assert(p1.refPos.pos === 150)
-
- val p2 = ReferencePositionWithOrientation.liftOverToReference(150, exons)
- assert(p2.refPos.referenceName === "1")
- assert(p2.refPos.pos === 350)
-
- val p3 = ReferencePositionWithOrientation.liftOverToReference(250, exons)
- assert(p3.refPos.referenceName === "1")
- assert(p3.refPos.pos === 550)
- }
-
- test("liftOverToReference works with a multi-block alignment on the reverse strand") {
- val exons = Seq(ReferenceRegionWithOrientation("1", 100, 200, negativeStrand = true),
- ReferenceRegionWithOrientation("1", 300, 400, negativeStrand = true),
- ReferenceRegionWithOrientation("1", 500, 600, negativeStrand = true))
-
- val p1 = ReferencePositionWithOrientation.liftOverToReference(50, exons)
- assert(p1.refPos.referenceName === "1")
- assert(p1.refPos.pos === 549)
-
- val p2 = ReferencePositionWithOrientation.liftOverToReference(150, exons)
- assert(p2.refPos.referenceName === "1")
- assert(p2.refPos.pos === 349)
-
- val p3 = ReferencePositionWithOrientation.liftOverToReference(250, exons)
- assert(p3.refPos.referenceName === "1")
- assert(p3.refPos.pos === 149)
- }
-
- test("lift over between two transcripts on the forward strand") {
- // create mappings for transcripts
- val t1 = Seq(ReferenceRegionWithOrientation("chr0", 0L, 201L, negativeStrand = false))
- val t2 = Seq(ReferenceRegionWithOrientation("chr0", 50L, 101L, negativeStrand = false),
- ReferenceRegionWithOrientation("chr0", 175L, 201L, negativeStrand = false))
-
- // check forward strand
- val pos = ReferencePositionWithOrientation.liftOverToReference(60, t1)
-
- assert(pos.refPos.referenceName === "chr0")
- assert(pos.refPos.pos === 60L)
- assert(!pos.negativeStrand)
-
- val idx = pos.liftOverFromReference(t2)
-
- assert(idx === 10L)
- }
-
- test("lift over between two transcripts on the reverse strand") {
- // create mappings for transcripts
- val t1 = Seq(ReferenceRegionWithOrientation("chr0", 0L, 201L, negativeStrand = true))
- val t2 = Seq(ReferenceRegionWithOrientation("chr0", 175L, 201L, negativeStrand = true),
- ReferenceRegionWithOrientation("chr0", 50L, 101L, negativeStrand = true))
-
- // check reverse strand
- val idx = ReferencePositionWithOrientation(ReferencePosition("chr0", 190L), negativeStrand = true)
- .liftOverFromReference(t2)
-
- assert(idx === 11L)
-
- val pos = ReferencePositionWithOrientation.liftOverToReference(idx, t1)
-
- assert(pos.refPos.referenceName === "chr0")
- assert(pos.refPos.pos === 189L)
- assert(pos.negativeStrand)
- }
}
diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala
index dbb9b5b4c0..f6fbc2fc3d 100644
--- a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala
+++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala
@@ -18,7 +18,7 @@
package org.bdgenomics.adam.models
import org.scalatest._
-import org.bdgenomics.formats.avro.{ AlignmentRecord, Contig }
+import org.bdgenomics.formats.avro.{ AlignmentRecord, Contig, Strand }
class ReferenceRegionSuite extends FunSuite {
@@ -233,68 +233,21 @@ class ReferenceRegionSuite extends FunSuite {
def point(refName: String, pos: Long): ReferencePosition =
ReferencePosition(refName, pos)
- test("build oriented reference region from non-oriented") {
- val rrf = ReferenceRegionWithOrientation(ReferenceRegion("chr1", 10L, 20L), negativeStrand = false)
- assert(rrf.referenceName === "chr1")
- assert(rrf.start === 10L)
- assert(rrf.end === 20L)
- assert(!rrf.negativeStrand)
-
- val rrr = ReferenceRegionWithOrientation(ReferenceRegion("chr1", 10L, 20L), negativeStrand = true)
- assert(rrr.referenceName === "chr1")
- assert(rrr.start === 10L)
- assert(rrr.end === 20L)
- assert(rrr.negativeStrand)
- }
-
- test("comparison tests for oriented reference region") {
- assert(ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = false)
- .contains(ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = false)))
- assert(ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = true)
- .contains(ReferenceRegionWithOrientation("chr1", 15L, 17L, negativeStrand = true)))
-
- val rrf = ReferenceRegionWithOrientation(ReferenceRegion("chr1", 10L, 20L), negativeStrand = false)
- val rrr = ReferenceRegionWithOrientation(ReferenceRegion("chr1", 10L, 20L), negativeStrand = true)
- assert(!rrf.contains(rrr))
-
- assert(!ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = false)
- .contains(ReferenceRegionWithOrientation("chr2", 10L, 20L, negativeStrand = false)))
- assert(!ReferenceRegionWithOrientation("chr1", 20L, 50L, negativeStrand = true)
- .contains(ReferenceRegionWithOrientation("chr1", 50L, 100L, negativeStrand = true)))
- }
-
- test("comparison tests for oriented reference region vs position") {
- assert(ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = false)
- .contains(ReferencePositionWithOrientation(ReferencePosition("chr1", 10L), negativeStrand = false)))
- assert(ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = true)
- .contains(ReferencePositionWithOrientation(ReferencePosition("chr1", 17L), negativeStrand = true)))
-
- assert(!ReferenceRegionWithOrientation(ReferenceRegion("chr1", 10L, 20L), negativeStrand = false)
- .contains(ReferencePositionWithOrientation(ReferencePosition("chr1", 17L), negativeStrand = true)))
- assert(!ReferenceRegionWithOrientation(ReferenceRegion("chr1", 10L, 20L), negativeStrand = true)
- .contains(ReferencePositionWithOrientation(ReferencePosition("chr1", 10L), negativeStrand = false)))
-
- assert(!ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = false)
- .contains(ReferencePositionWithOrientation(ReferencePosition("chr2", 10L), negativeStrand = false)))
- assert(!ReferenceRegionWithOrientation("chr1", 20L, 50L, negativeStrand = true)
- .contains(ReferencePositionWithOrientation(ReferencePosition("chr1", 100L), negativeStrand = true)))
- }
-
test("overlap tests for oriented reference region") {
- assert(ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = false)
- .overlaps(ReferenceRegionWithOrientation("chr1", 15L, 25L, negativeStrand = false)))
- assert(ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = true)
- .overlaps(ReferenceRegionWithOrientation("chr1", 5L, 15L, negativeStrand = true)))
-
- assert(!ReferenceRegionWithOrientation("chr1", 10L, 20L, negativeStrand = false)
- .overlaps(ReferenceRegionWithOrientation("chr2", 10L, 20L, negativeStrand = false)))
- assert(!ReferenceRegionWithOrientation("chr1", 20L, 50L, negativeStrand = true)
- .overlaps(ReferenceRegionWithOrientation("chr1", 51L, 100L, negativeStrand = true)))
+ assert(ReferenceRegion("chr1", 10L, 20L, Strand.Forward)
+ .overlaps(ReferenceRegion("chr1", 15L, 25L, Strand.Forward)))
+ assert(ReferenceRegion("chr1", 10L, 20L, Strand.Reverse)
+ .overlaps(ReferenceRegion("chr1", 5L, 15L, Strand.Reverse)))
+
+ assert(!ReferenceRegion("chr1", 10L, 20L, Strand.Forward)
+ .overlaps(ReferenceRegion("chr2", 10L, 20L, Strand.Forward)))
+ assert(!ReferenceRegion("chr1", 20L, 50L, Strand.Reverse)
+ .overlaps(ReferenceRegion("chr1", 51L, 100L, Strand.Reverse)))
}
test("check the width of a reference region") {
assert(ReferenceRegion("chr1", 100, 201).width === 101)
- assert(ReferenceRegionWithOrientation("chr2", 200, 401, negativeStrand = false).width === 201)
- assert(ReferenceRegionWithOrientation("chr3", 399, 1000, negativeStrand = true).width === 601)
+ assert(ReferenceRegion("chr2", 200, 401, Strand.Forward).width === 201)
+ assert(ReferenceRegion("chr3", 399, 1000, Strand.Reverse).width === 601)
}
}
diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala
index 29e135a9d5..68313e6548 100644
--- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala
+++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala
@@ -28,7 +28,7 @@ class MarkDuplicatesSuite extends ADAMFunSuite {
AlignmentRecord.newBuilder().setReadMapped(false).build()
}
- def createMappedRead(referenceName: String, position: Long,
+ def createMappedRead(referenceName: String, start: Long, end: Long,
readName: String = UUID.randomUUID().toString, avgPhredScore: Int = 20,
numClippedBases: Int = 0, isPrimaryAlignment: Boolean = true,
isNegativeStrand: Boolean = false) = {
@@ -37,15 +37,15 @@ class MarkDuplicatesSuite extends ADAMFunSuite {
val cigar = if (numClippedBases > 0) "%dS%dM".format(numClippedBases, 100 - numClippedBases) else "100M"
val contig = Contig.newBuilder
- .setContigName("reference%s".format(referenceName))
+ .setContigName(referenceName)
.build
AlignmentRecord.newBuilder()
.setContig(contig)
- .setStart(position)
+ .setStart(start)
.setQual(qual)
.setCigar(cigar)
- .setEnd(if (isNegativeStrand) position - 100 + numClippedBases else position + 100 - numClippedBases)
+ .setEnd(end)
.setReadMapped(true)
.setPrimaryAlignment(isPrimaryAlignment)
.setReadName(readName)
@@ -56,8 +56,8 @@ class MarkDuplicatesSuite extends ADAMFunSuite {
.build()
}
- def createPair(firstReferenceName: String, firstPosition: Long,
- secondReferenceName: String, secondPosition: Long,
+ def createPair(firstReferenceName: String, firstStart: Long, firstEnd: Long,
+ secondReferenceName: String, secondStart: Long, secondEnd: Long,
readName: String = UUID.randomUUID().toString,
avgPhredScore: Int = 20): Seq[AlignmentRecord] = {
val firstContig = Contig.newBuilder
@@ -68,19 +68,19 @@ class MarkDuplicatesSuite extends ADAMFunSuite {
.setContigName(secondReferenceName)
.build
- val firstOfPair = createMappedRead(firstReferenceName, firstPosition,
+ val firstOfPair = createMappedRead(firstReferenceName, firstStart, firstEnd,
readName = readName, avgPhredScore = avgPhredScore)
firstOfPair.setFirstOfPair(true)
firstOfPair.setMateMapped(true)
firstOfPair.setMateContig(secondContig)
- firstOfPair.setMateAlignmentStart(secondPosition)
+ firstOfPair.setMateAlignmentStart(secondStart)
firstOfPair.setReadPaired(true)
- val secondOfPair = createMappedRead(secondReferenceName, secondPosition,
+ val secondOfPair = createMappedRead(secondReferenceName, secondStart, secondEnd,
readName = readName, avgPhredScore = avgPhredScore, isNegativeStrand = true)
secondOfPair.setSecondOfPair(true)
secondOfPair.setMateMapped(true)
secondOfPair.setMateContig(firstContig)
- secondOfPair.setMateAlignmentStart(firstPosition)
+ secondOfPair.setMateAlignmentStart(firstStart)
secondOfPair.setReadPaired(true)
Seq(firstOfPair, secondOfPair)
}
@@ -90,15 +90,15 @@ class MarkDuplicatesSuite extends ADAMFunSuite {
}
sparkTest("single read") {
- val read = createMappedRead("0", 100)
+ val read = createMappedRead("0", 100, 200)
val marked = markDuplicates(read)
// Can't have duplicates with a single read, should return the read unchanged.
assert(marked(0) == read)
}
sparkTest("reads at different positions") {
- val read1 = createMappedRead("0", 42)
- val read2 = createMappedRead("0", 43)
+ val read1 = createMappedRead("0", 42, 142)
+ val read2 = createMappedRead("0", 43, 143)
val marked = markDuplicates(read1, read2)
// Reads shouldn't be modified
assert(marked.contains(read1) && marked.contains(read2))
@@ -106,9 +106,9 @@ class MarkDuplicatesSuite extends ADAMFunSuite {
sparkTest("reads at the same position") {
val poorReads = for (i <- 0 until 10) yield {
- createMappedRead("1", 42, avgPhredScore = 20, readName = "poor%d".format(i))
+ createMappedRead("1", 42, 142, avgPhredScore = 20, readName = "poor%d".format(i))
}
- val bestRead = createMappedRead("1", 42, avgPhredScore = 30, readName = "best")
+ val bestRead = createMappedRead("1", 42, 142, avgPhredScore = 30, readName = "best")
val marked = markDuplicates(List(bestRead) ++ poorReads: _*)
val (dups, nonDup) = marked.partition(p => p.getDuplicateRead)
assert(nonDup.size == 1 && nonDup(0) == bestRead)
@@ -117,12 +117,12 @@ class MarkDuplicatesSuite extends ADAMFunSuite {
sparkTest("reads at the same position with clipping") {
val poorClippedReads = for (i <- 0 until 5) yield {
- createMappedRead("1", 44, numClippedBases = 2, avgPhredScore = 20, readName = "poorClipped%d".format(i))
+ createMappedRead("1", 44, 142, numClippedBases = 2, avgPhredScore = 20, readName = "poorClipped%d".format(i))
}
val poorUnclippedReads = for (i <- 0 until 5) yield {
- createMappedRead("1", 42, avgPhredScore = 20, readName = "poorUnclipped%d".format(i))
+ createMappedRead("1", 42, 142, avgPhredScore = 20, readName = "poorUnclipped%d".format(i))
}
- val bestRead = createMappedRead("1", 42, avgPhredScore = 30, readName = "best")
+ val bestRead = createMappedRead("1", 42, 142, avgPhredScore = 30, readName = "best")
val marked = markDuplicates(List(bestRead) ++ poorClippedReads ++ poorUnclippedReads: _*)
val (dups, nonDup) = marked.partition(p => p.getDuplicateRead)
assert(nonDup.size == 1 && nonDup(0) == bestRead)
@@ -131,9 +131,9 @@ class MarkDuplicatesSuite extends ADAMFunSuite {
sparkTest("reads on reverse strand") {
val poorReads = for (i <- 0 until 7) yield {
- createMappedRead("10", 42, isNegativeStrand = true, avgPhredScore = 20, readName = "poor%d".format(i))
+ createMappedRead("10", 42, 142, isNegativeStrand = true, avgPhredScore = 20, readName = "poor%d".format(i))
}
- val bestRead = createMappedRead("10", 42, isNegativeStrand = true, avgPhredScore = 30, readName = "best")
+ val bestRead = createMappedRead("10", 42, 142, isNegativeStrand = true, avgPhredScore = 30, readName = "best")
val marked = markDuplicates(List(bestRead) ++ poorReads: _*)
val (dups, nonDup) = marked.partition(p => p.getDuplicateRead)
assert(nonDup.size == 1 && nonDup(0) == bestRead)
@@ -151,9 +151,9 @@ class MarkDuplicatesSuite extends ADAMFunSuite {
sparkTest("read pairs") {
val poorPairs = for (
i <- 0 until 10;
- read <- createPair("0", 10, "0", 210, avgPhredScore = 20, readName = "poor%d".format(i))
+ read <- createPair("0", 10, 110, "0", 110, 210, avgPhredScore = 20, readName = "poor%d".format(i))
) yield read
- val bestPair = createPair("0", 10, "0", 210, avgPhredScore = 30, readName = "best")
+ val bestPair = createPair("0", 10, 110, "0", 110, 210, avgPhredScore = 30, readName = "best")
val marked = markDuplicates(bestPair ++ poorPairs: _*)
val (dups, nonDups) = marked.partition(_.getDuplicateRead)
assert(nonDups.size == 2 && nonDups.forall(p => p.getReadName.toString == "best"))
@@ -162,10 +162,10 @@ class MarkDuplicatesSuite extends ADAMFunSuite {
sparkTest("read pairs with fragments") {
val fragments = for (i <- 0 until 10) yield {
- createMappedRead("2", 33, avgPhredScore = 40, readName = "fragment%d".format(i))
+ createMappedRead("2", 33, 133, avgPhredScore = 40, readName = "fragment%d".format(i))
}
// Even though the phred score is lower, pairs always score higher than fragments
- val pairs = createPair("2", 33, "2", 200, avgPhredScore = 20, readName = "pair")
+ val pairs = createPair("2", 33, 133, "2", 100, 200, avgPhredScore = 20, readName = "pair")
val marked = markDuplicates(fragments ++ pairs: _*)
val (dups, nonDups) = marked.partition(_.getDuplicateRead)
assert(nonDups.size == 2 && nonDups.forall(p => p.getReadName.toString == "pair"))
@@ -182,9 +182,9 @@ class MarkDuplicatesSuite extends ADAMFunSuite {
sparkTest("read pairs that cross chromosomes") {
val poorPairs = for (
i <- 0 until 10;
- read <- createPair("ref0", 10, "ref1", 210, avgPhredScore = 20, readName = "poor%d".format(i))
+ read <- createPair("ref0", 10, 110, "ref1", 110, 210, avgPhredScore = 20, readName = "poor%d".format(i))
) yield read
- val bestPair = createPair("ref0", 10, "ref1", 210, avgPhredScore = 30, readName = "best")
+ val bestPair = createPair("ref0", 10, 110, "ref1", 110, 210, avgPhredScore = 30, readName = "best")
val marked = markDuplicates(bestPair ++ poorPairs: _*)
val (dups, nonDups) = marked.partition(_.getDuplicateRead)
assert(nonDups.size == 2 && nonDups.forall(p => p.getReadName.toString == "best"))
diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibrationSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibrationSuite.scala
index 2c8f7d893a..6adad99132 100644
--- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibrationSuite.scala
+++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibrationSuite.scala
@@ -41,11 +41,9 @@ class BaseQualityRecalibrationSuite extends ADAMFunSuite {
assert(bqsr.result.count == reads.count)
// Compare the ObservationTables
- val referenceObs: Set[String] = scala.io.Source.fromFile(new File(obsFilepath)).getLines().filter(_.length > 0).toSet
- val testObs: Set[String] = bqsr.observed.toCSV.split('\n').filter(_.length > 0).toSet
- assert(testObs == referenceObs)
-
- // TODO: Compare `result` against the reference output
+ val referenceObs: Seq[String] = scala.io.Source.fromFile(new File(obsFilepath)).getLines().filter(_.length > 0).toSeq.sortWith((kv1, kv2) => kv1.compare(kv2) < 0)
+ val testObs: Seq[String] = bqsr.observed.toCSV.split('\n').filter(_.length > 0).toSeq.sortWith((kv1, kv2) => kv1.compare(kv2) < 0)
+ referenceObs.zip(testObs).foreach(p => assert(p._1 === p._2))
}
sparkTest("BQSR Test Input #1 w/ VCF Sites") {