Minor Scala style fixes

zapletal-martin · Jun 9, 2015 · 7e8e0fe · 7e8e0fe
1 parent 01e8721
commit 7e8e0fe
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 8 deletions.
diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleReader.scala
@@ -53,15 +53,17 @@ private[spark] class HashShuffleReader[K, C](
     val recordIterator = wrappedStreams.flatMap { wrappedStream =>
       val kvIter = serializerInstance.deserializeStream(wrappedStream).asKeyValueIterator
       CompletionIterator[(Any, Any), Iterator[(Any, Any)]](kvIter, {
-        // Close the stream once all the records have been read from it
+        // Close the stream once all the records have been read from it to free underlying
+        // ManagedBuffer as soon as possible. Note that in case of task failure, the task's
+        // TaskCompletionListener will make sure this is released.
         wrappedStream.close()
       })
     }
 
     // Update read metrics for each record materialized
-    val iter = new InterruptibleIterator[Any](context, recordIterator) {
+    val iter = new InterruptibleIterator[(Any, Any)](context, recordIterator) {
      val readMetrics = context.taskMetrics.createShuffleReadMetricsForDependency()
-     override def next(): Any = {
+     override def next(): (Any, Any) = {
        readMetrics.incRecordsRead(1)
        delegate.next()
      }
@@ -70,14 +72,14 @@ private[spark] class HashShuffleReader[K, C](
     val aggregatedIter: Iterator[Product2[K, C]] = if (dep.aggregator.isDefined) {
       if (dep.mapSideCombine) {
         // We are reading values that are already combined
-        val combinedKeyValuesIterator = iter.asInstanceOf[Iterator[(K,C)]]
+        val combinedKeyValuesIterator = iter.asInstanceOf[Iterator[(K, C)]]
         new InterruptibleIterator(context,
           dep.aggregator.get.combineCombinersByKey(combinedKeyValuesIterator, context))
       } else {
         // We don't know the value type, but also don't care -- the dependency *should*
         // have made sure its compatible w/ this aggregator, which will convert the value
         // type to the combined type C
-        val keyValuesIterator = iter.asInstanceOf[Iterator[(K,Nothing)]]
+        val keyValuesIterator = iter.asInstanceOf[Iterator[(K, Nothing)]]
         new InterruptibleIterator(context,
           dep.aggregator.get.combineValuesByKey(keyValuesIterator, context))
       }

diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -20,8 +20,7 @@ package org.apache.spark.storage
 import java.io.InputStream
 import java.util.concurrent.LinkedBlockingQueue
 
-import scala.collection.mutable
-import scala.collection.mutable.{ArrayBuffer, HashSet}
+import scala.collection.mutable.{ArrayBuffer, HashSet, Queue}
 import scala.util.{Failure, Try}
 
 import org.apache.spark.network.buffer.ManagedBuffer
@@ -96,7 +95,7 @@ final class ShuffleBlockFetcherIterator(
    * Queue of fetch requests to issue; we'll pull requests off this gradually to make sure that
    * the number of bytes in flight is limited to maxBytesInFlight.
    */
-  private[this] val fetchRequests = new mutable.Queue[FetchRequest]
+  private[this] val fetchRequests = new Queue[FetchRequest]
 
   /** Current bytes in flight from our requests */
   private[this] var bytesInFlight = 0L
@@ -275,6 +274,12 @@ final class ShuffleBlockFetcherIterator(
 
   override def hasNext: Boolean = numBlocksProcessed < numBlocksToFetch
 
+  /**
+   * Fetches the next (BlockId, Try[InputStream]). If a task fails, the ManagedBuffers
+   * underlying each InputStream will be freed by the cleanup() method registered with the
+   * TaskCompletionListener. However, callers should close() these InputStreams
+   * as soon as they are no longer needed, in order to release memory as early as possible.
+   */
   override def next(): (BlockId, Try[InputStream]) = {
     numBlocksProcessed += 1
     val startFetchWait = System.currentTimeMillis()