forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from markhamstra/streamingIterable
SPY-287 updated streaming iterable
- Loading branch information
Showing
4 changed files
with
103 additions
and
76 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
core/src/main/scala/org/apache/spark/rdd/RDDiterator.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package org.apache.spark.rdd | ||
|
||
import scala.concurrent.{Await, Future} | ||
import scala.collection.mutable.ArrayBuffer | ||
import scala.concurrent.duration.Duration | ||
import scala.annotation.tailrec | ||
import scala.collection.mutable | ||
import org.apache.spark.rdd.RDDiterator._ | ||
import org.apache.spark.FutureAction | ||
|
||
/** | ||
* Iterable whose iterator iterates over all elements of an RDD without fetching all partitions | ||
* to the driver process | ||
* | ||
* @param rdd RDD to iterate | ||
* @param prefetchPartitions The number of partitions to prefetch. | ||
* If <1 will not prefetch. | ||
* partitions prefetched = min(prefetchPartitions, partitionBatchSize) | ||
* @param partitionBatchSize How many partitions to fetch per job | ||
* @param timeOut How long to wait for each partition before failing. | ||
*/ | ||
class RDDiterator[T: ClassManifest](rdd: RDD[T], prefetchPartitions: Int, partitionBatchSize: Int, | ||
timeOut: Duration) | ||
extends Iterator[T] { | ||
|
||
val batchSize = math.max(1,partitionBatchSize) | ||
var partitionsBatches: Iterator[Seq[Int]] = Range(0, rdd.partitions.size).grouped(batchSize) | ||
var pendingFetchesQueue = mutable.Queue.empty[Future[Seq[Seq[T]]]] | ||
//add prefetchPartitions prefetch | ||
0.until(math.max(0, prefetchPartitions / batchSize)).foreach(x=>enqueueDataFetch()) | ||
|
||
var currentIterator: Iterator[T] = Iterator.empty | ||
@tailrec | ||
final def hasNext = { | ||
if (currentIterator.hasNext) { | ||
//Still values in the current partition | ||
true | ||
} else { | ||
//Move on to the next partition | ||
//Queue new prefetch of a partition | ||
enqueueDataFetch() | ||
if (pendingFetchesQueue.isEmpty) { | ||
//No more partitions | ||
currentIterator = Iterator.empty | ||
false | ||
} else { | ||
val future = pendingFetchesQueue.dequeue() | ||
currentIterator = Await.result(future, timeOut).flatMap(x => x).iterator | ||
//Next partition might be empty so check again. | ||
this.hasNext | ||
} | ||
} | ||
} | ||
def next() = { | ||
hasNext | ||
currentIterator.next() | ||
} | ||
|
||
def enqueueDataFetch() ={ | ||
if (partitionsBatches.hasNext) { | ||
pendingFetchesQueue.enqueue(fetchData(partitionsBatches.next(), rdd)) | ||
} | ||
} | ||
} | ||
|
||
object RDDiterator { | ||
private def fetchData[T: ClassManifest](partitionIndexes: Seq[Int], | ||
rdd: RDD[T]): FutureAction[Seq[Seq[T]]] = { | ||
val results = new ArrayBuffer[Seq[T]]() | ||
rdd.context.submitJob[T, Array[T], Seq[Seq[T]]](rdd, | ||
x => x.toArray, | ||
partitionIndexes, | ||
(inx: Int, res: Array[T]) => results.append(res), | ||
results.toSeq) | ||
} | ||
} |
59 changes: 0 additions & 59 deletions
59
core/src/main/scala/org/apache/spark/util/RDDiterable.scala
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters