Skip to content

Commit

Permalink
Fix bug in localKeySort, re-enable orderedRDD constructor logic
Browse files Browse the repository at this point in the history
  • Loading branch information
tpoterba committed Aug 22, 2016
1 parent 5f69629 commit efbc826
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 17 deletions.
5 changes: 0 additions & 5 deletions src/main/scala/org/apache/spark/rdd/OrderedRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,6 @@ object OrderedRDD {
new OrderedRDD[T, K, V](new ShuffledRDD[K, V, V](rdd, partitioner).setKeyOrdering(kOrd), partitioner)
}

fromShuffle()

// FIXME testVariantTSVAnnotator breaks here (one line of the annotation is getting dropped)
/*
rdd match {
case _: OrderedRDD[T, K, V] =>
rdd.asInstanceOf[OrderedRDD[T, K, V]]
Expand All @@ -105,7 +101,6 @@ object OrderedRDD {
.getOrElse(fromShuffle())
}
}
*/
}

/**
Expand Down
10 changes: 2 additions & 8 deletions src/main/scala/org/broadinstitute/hail/Utils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -599,15 +599,15 @@ class RichPairIterator[K, V](val it: Iterator[(K, V)]) extends AnyVal {

implicit val kvOrd = new Ordering[KV] {
// ascending
def compare(x: KV, y: KV): Int = - kOrd.compare(x._1, y._1)
def compare(x: KV, y: KV): Int = -kOrd.compare(x._1, y._1)
}

val bit = it.buffered

new Iterator[KV] {
val q = new mutable.PriorityQueue[(K, V)]

def hasNext: Boolean = it.hasNext || q.nonEmpty
def hasNext: Boolean = bit.hasNext || q.nonEmpty

def next(): KV = {
if (q.isEmpty) {
Expand Down Expand Up @@ -639,22 +639,16 @@ class RichPairIterator[K, V](val it: Iterator[(K, V)]) extends AnyVal {
def next(): T = {
val (k, v) = it.next()

println("k", k, bother.hasNext)

while (bother.hasNext && bother.head._1 < k) {
val n = bother.next()
println("early skipped", n)
}

if (bother.hasNext && bother.head._1 == k) {
val (k2, v2) = bother.next()

println("k2", k2)

/* implement distinct on the right */
while (bother.hasNext && bother.head._1 == k) {
val n = bother.next()
println("skipped", n)
}

(k, (v, Some(v2)))
Expand Down
8 changes: 4 additions & 4 deletions src/test/scala/org/broadinstitute/hail/utils/UtilsSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -127,21 +127,21 @@ class UtilsSuite extends SparkSuite {
check1 && check2
}

p.check() // important to keep size at ~1000 to get reasonable levels of match and no match
p.check()
}

@Test def testKeySortIterator() {
val g = for (chr <- Gen.oneOf("1", "2");
pos <- Gen.choose(1, 25);
pos <- Gen.choose(1, 50);
ref <- genDNAString;
alt <- genDNAString.filter(_ != ref);
v <- arbitrary[Int]) yield (Variant(chr, pos, ref, alt), v)
val p = Prop.forAll(Gen.buildableOf[IndexedSeq, (Variant, Int)](g)) { is =>
val kSorted = is.sortBy(_._1)
val tSorted = is.sortBy(_._1.locus)
val kSorted2 = tSorted.iterator.localKeySort[Locus](_.locus).toIndexedSeq
val localKeySort = is.sortBy(_._1.locus).iterator.localKeySort[Locus](_.locus).toIndexedSeq

kSorted == kSorted2
kSorted == localKeySort
}

p.check()
Expand Down

0 comments on commit efbc826

Please sign in to comment.