From c8e0c0dc17ad24870f7f5a03ae1246a652a1c74e Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 6 Nov 2013 09:36:14 -0800 Subject: [PATCH 01/39] Merge pull request #145 from aarondav/sls-fix Attempt to fix SparkListenerSuite breakage Could not reproduce locally, but this test could've been flaky if the build machine was too fast, due to typo. (index 0 is intentionally slowed down to ensure total time is >= 1 ms) This should be merged into branch-0.8 as well. (cherry picked from commit 951024feeadcf73b50c3c80ec9e75c7e2214a7a4) Signed-off-by: Reynold Xin --- .../scala/org/apache/spark/scheduler/SparkListenerSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala index f7f599532a96c..1fd76420eaa24 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala @@ -83,7 +83,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with ShouldMatc i } - val d = sc.parallelize(1 to 1e4.toInt, 64).map{i => w(i)} + val d = sc.parallelize(0 to 1e4.toInt, 64).map{i => w(i)} d.count() assert(sc.dagScheduler.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)) listener.stageInfos.size should be (1) From 1d9412b6dd4575808ed095c5619fe44a1459e125 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 6 Nov 2013 13:27:47 -0800 Subject: [PATCH 02/39] Merge pull request #144 from liancheng/runjob-clean Removed unused return value in SparkContext.runJob Return type of this `runJob` version is `Unit`: def runJob[T, U: ClassManifest]( rdd: RDD[T], func: (TaskContext, Iterator[T]) => U, partitions: Seq[Int], allowLocal: Boolean, resultHandler: (Int, U) => Unit) { ... } It's obviously unnecessary to "return" `result`. (cherry picked from commit aadeda5e7697a433c82879033e758fbc403680dc) Signed-off-by: Reynold Xin --- core/src/main/scala/org/apache/spark/SparkContext.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 10d3c53f10c9d..1e706286f7e2e 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -799,11 +799,10 @@ class SparkContext( val cleanedFunc = clean(func) logInfo("Starting job: " + callSite) val start = System.nanoTime - val result = dagScheduler.runJob(rdd, cleanedFunc, partitions, callSite, allowLocal, + dagScheduler.runJob(rdd, cleanedFunc, partitions, callSite, allowLocal, resultHandler, localProperties.get) logInfo("Job finished: " + callSite + ", took " + (System.nanoTime - start) / 1e9 + " s") rdd.doCheckpoint() - result } /** From d5ae953c3045ef8e8e69f0f79eab2a063b8c2868 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 6 Nov 2013 23:22:47 -0800 Subject: [PATCH 03/39] Merge pull request #23 from jerryshao/multi-user Add Spark multi-user support for standalone mode and Mesos This PR add multi-user support for Spark both standalone mode and Mesos (coarse and fine grained ) mode, user can specify the user name who submit app through environment variable `SPARK_USER` or use default one. Executor will communicate with Hadoop using specified user name. Also I fixed one bug in JobLogger when different user wrote job log to specified folder which has no right file permission. I separate previous [PR750](https://github.com/mesos/spark/pull/750) into two PRs, in this PR I only solve multi-user support problem. I will try to solve security auth problem in subsequent PR because security auth is a complicated problem especially for Shark Server like long-run app (both Kerberos TGT and HDFS delegation token should be renewed or re-created through app's run time). (cherry picked from commit be7e8da98ad04d66b61cd7fc8af7ae61a649d71c) Signed-off-by: Reynold Xin --- .../scala/org/apache/spark/SparkContext.scala | 10 + .../apache/spark/deploy/SparkHadoopUtil.scala | 18 +- .../org/apache/spark/executor/Executor.scala | 7 +- .../apache/spark/scheduler/JobLogger.scala | 757 +++++++++--------- .../spark/scheduler/JobLoggerSuite.scala | 4 +- 5 files changed, 417 insertions(+), 379 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 1e706286f7e2e..512daf30cc3c5 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -145,6 +145,14 @@ class SparkContext( executorEnvs ++= environment } + // Set SPARK_USER for user who is running SparkContext. + val sparkUser = Option { + Option(System.getProperty("user.name")).getOrElse(System.getenv("SPARK_USER")) + }.getOrElse { + SparkContext.SPARK_UNKNOWN_USER + } + executorEnvs("SPARK_USER") = sparkUser + // Create and start the scheduler private[spark] var taskScheduler: TaskScheduler = { // Regular expression used for local[N] master format @@ -984,6 +992,8 @@ object SparkContext { private[spark] val SPARK_JOB_GROUP_ID = "spark.jobGroup.id" + private[spark] val SPARK_UNKNOWN_USER = "" + implicit object DoubleAccumulatorParam extends AccumulatorParam[Double] { def addInPlace(t1: Double, t2: Double): Double = t1 + t2 def zero(initialValue: Double) = 0.0 diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 6bc846aa92eb4..c29a30184af13 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -17,8 +17,11 @@ package org.apache.spark.deploy +import java.security.PrivilegedExceptionAction + import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapred.JobConf +import org.apache.hadoop.security.UserGroupInformation import org.apache.spark.SparkException @@ -27,6 +30,15 @@ import org.apache.spark.SparkException */ private[spark] class SparkHadoopUtil { + val conf = newConfiguration() + UserGroupInformation.setConfiguration(conf) + + def runAsUser(user: String)(func: () => Unit) { + val ugi = UserGroupInformation.createRemoteUser(user) + ugi.doAs(new PrivilegedExceptionAction[Unit] { + def run: Unit = func() + }) + } /** * Return an appropriate (subclass) of Configuration. Creating config can initializes some Hadoop @@ -42,9 +54,9 @@ class SparkHadoopUtil { def isYarnMode(): Boolean = { false } } - + object SparkHadoopUtil { - private val hadoop = { + private val hadoop = { val yarnMode = java.lang.Boolean.valueOf(System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE"))) if (yarnMode) { try { @@ -56,7 +68,7 @@ object SparkHadoopUtil { new SparkHadoopUtil } } - + def get: SparkHadoopUtil = { hadoop } diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index b773346df305d..5c9bb9db1ce9e 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -25,8 +25,9 @@ import java.util.concurrent._ import scala.collection.JavaConversions._ import scala.collection.mutable.HashMap -import org.apache.spark.scheduler._ import org.apache.spark._ +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.scheduler._ import org.apache.spark.storage.{StorageLevel, TaskResultBlockId} import org.apache.spark.util.Utils @@ -129,6 +130,8 @@ private[spark] class Executor( // Maintains the list of running tasks. private val runningTasks = new ConcurrentHashMap[Long, TaskRunner] + val sparkUser = Option(System.getenv("SPARK_USER")).getOrElse(SparkContext.SPARK_UNKNOWN_USER) + def launchTask(context: ExecutorBackend, taskId: Long, serializedTask: ByteBuffer) { val tr = new TaskRunner(context, taskId, serializedTask) runningTasks.put(taskId, tr) @@ -176,7 +179,7 @@ private[spark] class Executor( } } - override def run() { + override def run(): Unit = SparkHadoopUtil.get.runAsUser(sparkUser) { () => val startTime = System.currentTimeMillis() SparkEnv.set(env) Thread.currentThread.setContextClassLoader(replClassLoader) diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala index 94f8b0128502c..60927831a159a 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala @@ -1,373 +1,384 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.scheduler - -import java.io.PrintWriter -import java.io.File -import java.io.FileNotFoundException -import java.text.SimpleDateFormat -import java.util.{Date, Properties} -import java.util.concurrent.LinkedBlockingQueue - -import scala.collection.mutable.{HashMap, HashSet, ListBuffer} - -import org.apache.spark._ -import org.apache.spark.rdd.RDD -import org.apache.spark.executor.TaskMetrics -import org.apache.spark.storage.StorageLevel - -/** - * A logger class to record runtime information for jobs in Spark. This class outputs one log file - * for each Spark job, containing RDD graph, tasks start/stop, shuffle information. - * JobLogger is a subclass of SparkListener, use addSparkListener to add JobLogger to a SparkContext - * after the SparkContext is created. - * Note that each JobLogger only works for one SparkContext - * @param logDirName The base directory for the log files. - */ -class JobLogger(val logDirName: String) extends SparkListener with Logging { - - private val logDir = Option(System.getenv("SPARK_LOG_DIR")).getOrElse("/tmp/spark") - - private val jobIDToPrintWriter = new HashMap[Int, PrintWriter] - private val stageIDToJobID = new HashMap[Int, Int] - private val jobIDToStages = new HashMap[Int, ListBuffer[Stage]] - private val DATE_FORMAT = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss") - private val eventQueue = new LinkedBlockingQueue[SparkListenerEvents] - - createLogDir() - def this() = this(String.valueOf(System.currentTimeMillis())) - - // The following 5 functions are used only in testing. - private[scheduler] def getLogDir = logDir - private[scheduler] def getJobIDtoPrintWriter = jobIDToPrintWriter - private[scheduler] def getStageIDToJobID = stageIDToJobID - private[scheduler] def getJobIDToStages = jobIDToStages - private[scheduler] def getEventQueue = eventQueue - - /** Create a folder for log files, the folder's name is the creation time of jobLogger */ - protected def createLogDir() { - val dir = new File(logDir + "/" + logDirName + "/") - if (!dir.exists() && !dir.mkdirs()) { - logError("Error creating log directory: " + logDir + "/" + logDirName + "/") - } - } - - /** - * Create a log file for one job - * @param jobID ID of the job - * @exception FileNotFoundException Fail to create log file - */ - protected def createLogWriter(jobID: Int) { - try { - val fileWriter = new PrintWriter(logDir + "/" + logDirName + "/" + jobID) - jobIDToPrintWriter += (jobID -> fileWriter) - } catch { - case e: FileNotFoundException => e.printStackTrace() - } - } - - /** - * Close log file, and clean the stage relationship in stageIDToJobID - * @param jobID ID of the job - */ - protected def closeLogWriter(jobID: Int) { - jobIDToPrintWriter.get(jobID).foreach { fileWriter => - fileWriter.close() - jobIDToStages.get(jobID).foreach(_.foreach{ stage => - stageIDToJobID -= stage.id - }) - jobIDToPrintWriter -= jobID - jobIDToStages -= jobID - } - } - - /** - * Write info into log file - * @param jobID ID of the job - * @param info Info to be recorded - * @param withTime Controls whether to record time stamp before the info, default is true - */ - protected def jobLogInfo(jobID: Int, info: String, withTime: Boolean = true) { - var writeInfo = info - if (withTime) { - val date = new Date(System.currentTimeMillis()) - writeInfo = DATE_FORMAT.format(date) + ": " +info - } - jobIDToPrintWriter.get(jobID).foreach(_.println(writeInfo)) - } - - /** - * Write info into log file - * @param stageID ID of the stage - * @param info Info to be recorded - * @param withTime Controls whether to record time stamp before the info, default is true - */ - protected def stageLogInfo(stageID: Int, info: String, withTime: Boolean = true) { - stageIDToJobID.get(stageID).foreach(jobID => jobLogInfo(jobID, info, withTime)) - } - - /** - * Build stage dependency for a job - * @param jobID ID of the job - * @param stage Root stage of the job - */ - protected def buildJobDep(jobID: Int, stage: Stage) { - if (stage.jobId == jobID) { - jobIDToStages.get(jobID) match { - case Some(stageList) => stageList += stage - case None => val stageList = new ListBuffer[Stage] - stageList += stage - jobIDToStages += (jobID -> stageList) - } - stageIDToJobID += (stage.id -> jobID) - stage.parents.foreach(buildJobDep(jobID, _)) - } - } - - /** - * Record stage dependency and RDD dependency for a stage - * @param jobID Job ID of the stage - */ - protected def recordStageDep(jobID: Int) { - def getRddsInStage(rdd: RDD[_]): ListBuffer[RDD[_]] = { - var rddList = new ListBuffer[RDD[_]] - rddList += rdd - rdd.dependencies.foreach { - case shufDep: ShuffleDependency[_, _] => - case dep: Dependency[_] => rddList ++= getRddsInStage(dep.rdd) - } - rddList - } - jobIDToStages.get(jobID).foreach {_.foreach { stage => - var depRddDesc: String = "" - getRddsInStage(stage.rdd).foreach { rdd => - depRddDesc += rdd.id + "," - } - var depStageDesc: String = "" - stage.parents.foreach { stage => - depStageDesc += "(" + stage.id + "," + stage.shuffleDep.get.shuffleId + ")" - } - jobLogInfo(jobID, "STAGE_ID=" + stage.id + " RDD_DEP=(" + - depRddDesc.substring(0, depRddDesc.length - 1) + ")" + - " STAGE_DEP=" + depStageDesc, false) - } - } - } - - /** - * Generate indents and convert to String - * @param indent Number of indents - * @return string of indents - */ - protected def indentString(indent: Int): String = { - val sb = new StringBuilder() - for (i <- 1 to indent) { - sb.append(" ") - } - sb.toString() - } - - /** - * Get RDD's name - * @param rdd Input RDD - * @return String of RDD's name - */ - protected def getRddName(rdd: RDD[_]): String = { - var rddName = rdd.getClass.getSimpleName - if (rdd.name != null) { - rddName = rdd.name - } - rddName - } - - /** - * Record RDD dependency graph in a stage - * @param jobID Job ID of the stage - * @param rdd Root RDD of the stage - * @param indent Indent number before info - */ - protected def recordRddInStageGraph(jobID: Int, rdd: RDD[_], indent: Int) { - val rddInfo = - if (rdd.getStorageLevel != StorageLevel.NONE) { - "RDD_ID=" + rdd.id + " " + getRddName(rdd) + " CACHED" + " " + - rdd.origin + " " + rdd.generator - } else { - "RDD_ID=" + rdd.id + " " + getRddName(rdd) + " NONE" + " " + - rdd.origin + " " + rdd.generator - } - jobLogInfo(jobID, indentString(indent) + rddInfo, false) - rdd.dependencies.foreach { - case shufDep: ShuffleDependency[_, _] => - val depInfo = "SHUFFLE_ID=" + shufDep.shuffleId - jobLogInfo(jobID, indentString(indent + 1) + depInfo, false) - case dep: Dependency[_] => recordRddInStageGraph(jobID, dep.rdd, indent + 1) - } - } - - /** - * Record stage dependency graph of a job - * @param jobID Job ID of the stage - * @param stage Root stage of the job - * @param indent Indent number before info, default is 0 - */ - protected def recordStageDepGraph(jobID: Int, stage: Stage, idSet: HashSet[Int], indent: Int = 0) { - val stageInfo = if (stage.isShuffleMap) { - "STAGE_ID=" + stage.id + " MAP_STAGE SHUFFLE_ID=" + stage.shuffleDep.get.shuffleId - } else { - "STAGE_ID=" + stage.id + " RESULT_STAGE" - } - if (stage.jobId == jobID) { - jobLogInfo(jobID, indentString(indent) + stageInfo, false) - if (!idSet.contains(stage.id)) { - idSet += stage.id - recordRddInStageGraph(jobID, stage.rdd, indent) - stage.parents.foreach(recordStageDepGraph(jobID, _, idSet, indent + 2)) - } - } else { - jobLogInfo(jobID, indentString(indent) + stageInfo + " JOB_ID=" + stage.jobId, false) - } - } - - /** - * Record task metrics into job log files, including execution info and shuffle metrics - * @param stageID Stage ID of the task - * @param status Status info of the task - * @param taskInfo Task description info - * @param taskMetrics Task running metrics - */ - protected def recordTaskMetrics(stageID: Int, status: String, - taskInfo: TaskInfo, taskMetrics: TaskMetrics) { - val info = " TID=" + taskInfo.taskId + " STAGE_ID=" + stageID + - " START_TIME=" + taskInfo.launchTime + " FINISH_TIME=" + taskInfo.finishTime + - " EXECUTOR_ID=" + taskInfo.executorId + " HOST=" + taskMetrics.hostname - val executorRunTime = " EXECUTOR_RUN_TIME=" + taskMetrics.executorRunTime - val readMetrics = taskMetrics.shuffleReadMetrics match { - case Some(metrics) => - " SHUFFLE_FINISH_TIME=" + metrics.shuffleFinishTime + - " BLOCK_FETCHED_TOTAL=" + metrics.totalBlocksFetched + - " BLOCK_FETCHED_LOCAL=" + metrics.localBlocksFetched + - " BLOCK_FETCHED_REMOTE=" + metrics.remoteBlocksFetched + - " REMOTE_FETCH_WAIT_TIME=" + metrics.fetchWaitTime + - " REMOTE_FETCH_TIME=" + metrics.remoteFetchTime + - " REMOTE_BYTES_READ=" + metrics.remoteBytesRead - case None => "" - } - val writeMetrics = taskMetrics.shuffleWriteMetrics match { - case Some(metrics) => " SHUFFLE_BYTES_WRITTEN=" + metrics.shuffleBytesWritten - case None => "" - } - stageLogInfo(stageID, status + info + executorRunTime + readMetrics + writeMetrics) - } - - /** - * When stage is submitted, record stage submit info - * @param stageSubmitted Stage submitted event - */ - override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) { - stageLogInfo(stageSubmitted.stage.stageId,"STAGE_ID=%d STATUS=SUBMITTED TASK_SIZE=%d".format( - stageSubmitted.stage.stageId, stageSubmitted.stage.numTasks)) - } - - /** - * When stage is completed, record stage completion status - * @param stageCompleted Stage completed event - */ - override def onStageCompleted(stageCompleted: StageCompleted) { - stageLogInfo(stageCompleted.stage.stageId, "STAGE_ID=%d STATUS=COMPLETED".format( - stageCompleted.stage.stageId)) - } - - override def onTaskStart(taskStart: SparkListenerTaskStart) { } - - /** - * When task ends, record task completion status and metrics - * @param taskEnd Task end event - */ - override def onTaskEnd(taskEnd: SparkListenerTaskEnd) { - val task = taskEnd.task - val taskInfo = taskEnd.taskInfo - var taskStatus = "" - task match { - case resultTask: ResultTask[_, _] => taskStatus = "TASK_TYPE=RESULT_TASK" - case shuffleMapTask: ShuffleMapTask => taskStatus = "TASK_TYPE=SHUFFLE_MAP_TASK" - } - taskEnd.reason match { - case Success => taskStatus += " STATUS=SUCCESS" - recordTaskMetrics(task.stageId, taskStatus, taskInfo, taskEnd.taskMetrics) - case Resubmitted => - taskStatus += " STATUS=RESUBMITTED TID=" + taskInfo.taskId + - " STAGE_ID=" + task.stageId - stageLogInfo(task.stageId, taskStatus) - case FetchFailed(bmAddress, shuffleId, mapId, reduceId) => - taskStatus += " STATUS=FETCHFAILED TID=" + taskInfo.taskId + " STAGE_ID=" + - task.stageId + " SHUFFLE_ID=" + shuffleId + " MAP_ID=" + - mapId + " REDUCE_ID=" + reduceId - stageLogInfo(task.stageId, taskStatus) - case OtherFailure(message) => - taskStatus += " STATUS=FAILURE TID=" + taskInfo.taskId + - " STAGE_ID=" + task.stageId + " INFO=" + message - stageLogInfo(task.stageId, taskStatus) - case _ => - } - } - - /** - * When job ends, recording job completion status and close log file - * @param jobEnd Job end event - */ - override def onJobEnd(jobEnd: SparkListenerJobEnd) { - val job = jobEnd.job - var info = "JOB_ID=" + job.jobId - jobEnd.jobResult match { - case JobSucceeded => info += " STATUS=SUCCESS" - case JobFailed(exception, _) => - info += " STATUS=FAILED REASON=" - exception.getMessage.split("\\s+").foreach(info += _ + "_") - case _ => - } - jobLogInfo(job.jobId, info.substring(0, info.length - 1).toUpperCase) - closeLogWriter(job.jobId) - } - - /** - * Record job properties into job log file - * @param jobID ID of the job - * @param properties Properties of the job - */ - protected def recordJobProperties(jobID: Int, properties: Properties) { - if(properties != null) { - val description = properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION, "") - jobLogInfo(jobID, description, false) - } - } - - /** - * When job starts, record job property and stage graph - * @param jobStart Job start event - */ - override def onJobStart(jobStart: SparkListenerJobStart) { - val job = jobStart.job - val properties = jobStart.properties - createLogWriter(job.jobId) - recordJobProperties(job.jobId, properties) - buildJobDep(job.jobId, job.finalStage) - recordStageDep(job.jobId) - recordStageDepGraph(job.jobId, job.finalStage, new HashSet[Int]) - jobLogInfo(job.jobId, "JOB_ID=" + job.jobId + " STATUS=STARTED") - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler + +import java.io.{IOException, File, FileNotFoundException, PrintWriter} +import java.text.SimpleDateFormat +import java.util.{Date, Properties} +import java.util.concurrent.LinkedBlockingQueue + +import scala.collection.mutable.{HashMap, HashSet, ListBuffer} + +import org.apache.spark._ +import org.apache.spark.rdd.RDD +import org.apache.spark.executor.TaskMetrics +import org.apache.spark.storage.StorageLevel + +/** + * A logger class to record runtime information for jobs in Spark. This class outputs one log file + * for each Spark job, containing RDD graph, tasks start/stop, shuffle information. + * JobLogger is a subclass of SparkListener, use addSparkListener to add JobLogger to a SparkContext + * after the SparkContext is created. + * Note that each JobLogger only works for one SparkContext + * @param logDirName The base directory for the log files. + */ + +class JobLogger(val user: String, val logDirName: String) + extends SparkListener with Logging { + + def this() = this(System.getProperty("user.name", ""), + String.valueOf(System.currentTimeMillis())) + + private val logDir = + if (System.getenv("SPARK_LOG_DIR") != null) + System.getenv("SPARK_LOG_DIR") + else + "/tmp/spark-%s".format(user) + + private val jobIDToPrintWriter = new HashMap[Int, PrintWriter] + private val stageIDToJobID = new HashMap[Int, Int] + private val jobIDToStages = new HashMap[Int, ListBuffer[Stage]] + private val DATE_FORMAT = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss") + private val eventQueue = new LinkedBlockingQueue[SparkListenerEvents] + + createLogDir() + + // The following 5 functions are used only in testing. + private[scheduler] def getLogDir = logDir + private[scheduler] def getJobIDtoPrintWriter = jobIDToPrintWriter + private[scheduler] def getStageIDToJobID = stageIDToJobID + private[scheduler] def getJobIDToStages = jobIDToStages + private[scheduler] def getEventQueue = eventQueue + + /** Create a folder for log files, the folder's name is the creation time of jobLogger */ + protected def createLogDir() { + val dir = new File(logDir + "/" + logDirName + "/") + if (dir.exists()) { + return + } + if (dir.mkdirs() == false) { + // JobLogger should throw a exception rather than continue to construct this object. + throw new IOException("create log directory error:" + logDir + "/" + logDirName + "/") + } + } + + /** + * Create a log file for one job + * @param jobID ID of the job + * @exception FileNotFoundException Fail to create log file + */ + protected def createLogWriter(jobID: Int) { + try { + val fileWriter = new PrintWriter(logDir + "/" + logDirName + "/" + jobID) + jobIDToPrintWriter += (jobID -> fileWriter) + } catch { + case e: FileNotFoundException => e.printStackTrace() + } + } + + /** + * Close log file, and clean the stage relationship in stageIDToJobID + * @param jobID ID of the job + */ + protected def closeLogWriter(jobID: Int) { + jobIDToPrintWriter.get(jobID).foreach { fileWriter => + fileWriter.close() + jobIDToStages.get(jobID).foreach(_.foreach{ stage => + stageIDToJobID -= stage.id + }) + jobIDToPrintWriter -= jobID + jobIDToStages -= jobID + } + } + + /** + * Write info into log file + * @param jobID ID of the job + * @param info Info to be recorded + * @param withTime Controls whether to record time stamp before the info, default is true + */ + protected def jobLogInfo(jobID: Int, info: String, withTime: Boolean = true) { + var writeInfo = info + if (withTime) { + val date = new Date(System.currentTimeMillis()) + writeInfo = DATE_FORMAT.format(date) + ": " +info + } + jobIDToPrintWriter.get(jobID).foreach(_.println(writeInfo)) + } + + /** + * Write info into log file + * @param stageID ID of the stage + * @param info Info to be recorded + * @param withTime Controls whether to record time stamp before the info, default is true + */ + protected def stageLogInfo(stageID: Int, info: String, withTime: Boolean = true) { + stageIDToJobID.get(stageID).foreach(jobID => jobLogInfo(jobID, info, withTime)) + } + + /** + * Build stage dependency for a job + * @param jobID ID of the job + * @param stage Root stage of the job + */ + protected def buildJobDep(jobID: Int, stage: Stage) { + if (stage.jobId == jobID) { + jobIDToStages.get(jobID) match { + case Some(stageList) => stageList += stage + case None => val stageList = new ListBuffer[Stage] + stageList += stage + jobIDToStages += (jobID -> stageList) + } + stageIDToJobID += (stage.id -> jobID) + stage.parents.foreach(buildJobDep(jobID, _)) + } + } + + /** + * Record stage dependency and RDD dependency for a stage + * @param jobID Job ID of the stage + */ + protected def recordStageDep(jobID: Int) { + def getRddsInStage(rdd: RDD[_]): ListBuffer[RDD[_]] = { + var rddList = new ListBuffer[RDD[_]] + rddList += rdd + rdd.dependencies.foreach { + case shufDep: ShuffleDependency[_, _] => + case dep: Dependency[_] => rddList ++= getRddsInStage(dep.rdd) + } + rddList + } + jobIDToStages.get(jobID).foreach {_.foreach { stage => + var depRddDesc: String = "" + getRddsInStage(stage.rdd).foreach { rdd => + depRddDesc += rdd.id + "," + } + var depStageDesc: String = "" + stage.parents.foreach { stage => + depStageDesc += "(" + stage.id + "," + stage.shuffleDep.get.shuffleId + ")" + } + jobLogInfo(jobID, "STAGE_ID=" + stage.id + " RDD_DEP=(" + + depRddDesc.substring(0, depRddDesc.length - 1) + ")" + + " STAGE_DEP=" + depStageDesc, false) + } + } + } + + /** + * Generate indents and convert to String + * @param indent Number of indents + * @return string of indents + */ + protected def indentString(indent: Int): String = { + val sb = new StringBuilder() + for (i <- 1 to indent) { + sb.append(" ") + } + sb.toString() + } + + /** + * Get RDD's name + * @param rdd Input RDD + * @return String of RDD's name + */ + protected def getRddName(rdd: RDD[_]): String = { + var rddName = rdd.getClass.getSimpleName + if (rdd.name != null) { + rddName = rdd.name + } + rddName + } + + /** + * Record RDD dependency graph in a stage + * @param jobID Job ID of the stage + * @param rdd Root RDD of the stage + * @param indent Indent number before info + */ + protected def recordRddInStageGraph(jobID: Int, rdd: RDD[_], indent: Int) { + val rddInfo = + if (rdd.getStorageLevel != StorageLevel.NONE) { + "RDD_ID=" + rdd.id + " " + getRddName(rdd) + " CACHED" + " " + + rdd.origin + " " + rdd.generator + } else { + "RDD_ID=" + rdd.id + " " + getRddName(rdd) + " NONE" + " " + + rdd.origin + " " + rdd.generator + } + jobLogInfo(jobID, indentString(indent) + rddInfo, false) + rdd.dependencies.foreach { + case shufDep: ShuffleDependency[_, _] => + val depInfo = "SHUFFLE_ID=" + shufDep.shuffleId + jobLogInfo(jobID, indentString(indent + 1) + depInfo, false) + case dep: Dependency[_] => recordRddInStageGraph(jobID, dep.rdd, indent + 1) + } + } + + /** + * Record stage dependency graph of a job + * @param jobID Job ID of the stage + * @param stage Root stage of the job + * @param indent Indent number before info, default is 0 + */ + protected def recordStageDepGraph(jobID: Int, stage: Stage, idSet: HashSet[Int], indent: Int = 0) { + val stageInfo = if (stage.isShuffleMap) { + "STAGE_ID=" + stage.id + " MAP_STAGE SHUFFLE_ID=" + stage.shuffleDep.get.shuffleId + } else { + "STAGE_ID=" + stage.id + " RESULT_STAGE" + } + if (stage.jobId == jobID) { + jobLogInfo(jobID, indentString(indent) + stageInfo, false) + if (!idSet.contains(stage.id)) { + idSet += stage.id + recordRddInStageGraph(jobID, stage.rdd, indent) + stage.parents.foreach(recordStageDepGraph(jobID, _, idSet, indent + 2)) + } + } else { + jobLogInfo(jobID, indentString(indent) + stageInfo + " JOB_ID=" + stage.jobId, false) + } + } + + /** + * Record task metrics into job log files, including execution info and shuffle metrics + * @param stageID Stage ID of the task + * @param status Status info of the task + * @param taskInfo Task description info + * @param taskMetrics Task running metrics + */ + protected def recordTaskMetrics(stageID: Int, status: String, + taskInfo: TaskInfo, taskMetrics: TaskMetrics) { + val info = " TID=" + taskInfo.taskId + " STAGE_ID=" + stageID + + " START_TIME=" + taskInfo.launchTime + " FINISH_TIME=" + taskInfo.finishTime + + " EXECUTOR_ID=" + taskInfo.executorId + " HOST=" + taskMetrics.hostname + val executorRunTime = " EXECUTOR_RUN_TIME=" + taskMetrics.executorRunTime + val readMetrics = taskMetrics.shuffleReadMetrics match { + case Some(metrics) => + " SHUFFLE_FINISH_TIME=" + metrics.shuffleFinishTime + + " BLOCK_FETCHED_TOTAL=" + metrics.totalBlocksFetched + + " BLOCK_FETCHED_LOCAL=" + metrics.localBlocksFetched + + " BLOCK_FETCHED_REMOTE=" + metrics.remoteBlocksFetched + + " REMOTE_FETCH_WAIT_TIME=" + metrics.fetchWaitTime + + " REMOTE_FETCH_TIME=" + metrics.remoteFetchTime + + " REMOTE_BYTES_READ=" + metrics.remoteBytesRead + case None => "" + } + val writeMetrics = taskMetrics.shuffleWriteMetrics match { + case Some(metrics) => " SHUFFLE_BYTES_WRITTEN=" + metrics.shuffleBytesWritten + case None => "" + } + stageLogInfo(stageID, status + info + executorRunTime + readMetrics + writeMetrics) + } + + /** + * When stage is submitted, record stage submit info + * @param stageSubmitted Stage submitted event + */ + override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) { + stageLogInfo(stageSubmitted.stage.stageId,"STAGE_ID=%d STATUS=SUBMITTED TASK_SIZE=%d".format( + stageSubmitted.stage.stageId, stageSubmitted.stage.numTasks)) + } + + /** + * When stage is completed, record stage completion status + * @param stageCompleted Stage completed event + */ + override def onStageCompleted(stageCompleted: StageCompleted) { + stageLogInfo(stageCompleted.stage.stageId, "STAGE_ID=%d STATUS=COMPLETED".format( + stageCompleted.stage.stageId)) + } + + override def onTaskStart(taskStart: SparkListenerTaskStart) { } + + /** + * When task ends, record task completion status and metrics + * @param taskEnd Task end event + */ + override def onTaskEnd(taskEnd: SparkListenerTaskEnd) { + val task = taskEnd.task + val taskInfo = taskEnd.taskInfo + var taskStatus = "" + task match { + case resultTask: ResultTask[_, _] => taskStatus = "TASK_TYPE=RESULT_TASK" + case shuffleMapTask: ShuffleMapTask => taskStatus = "TASK_TYPE=SHUFFLE_MAP_TASK" + } + taskEnd.reason match { + case Success => taskStatus += " STATUS=SUCCESS" + recordTaskMetrics(task.stageId, taskStatus, taskInfo, taskEnd.taskMetrics) + case Resubmitted => + taskStatus += " STATUS=RESUBMITTED TID=" + taskInfo.taskId + + " STAGE_ID=" + task.stageId + stageLogInfo(task.stageId, taskStatus) + case FetchFailed(bmAddress, shuffleId, mapId, reduceId) => + taskStatus += " STATUS=FETCHFAILED TID=" + taskInfo.taskId + " STAGE_ID=" + + task.stageId + " SHUFFLE_ID=" + shuffleId + " MAP_ID=" + + mapId + " REDUCE_ID=" + reduceId + stageLogInfo(task.stageId, taskStatus) + case OtherFailure(message) => + taskStatus += " STATUS=FAILURE TID=" + taskInfo.taskId + + " STAGE_ID=" + task.stageId + " INFO=" + message + stageLogInfo(task.stageId, taskStatus) + case _ => + } + } + + /** + * When job ends, recording job completion status and close log file + * @param jobEnd Job end event + */ + override def onJobEnd(jobEnd: SparkListenerJobEnd) { + val job = jobEnd.job + var info = "JOB_ID=" + job.jobId + jobEnd.jobResult match { + case JobSucceeded => info += " STATUS=SUCCESS" + case JobFailed(exception, _) => + info += " STATUS=FAILED REASON=" + exception.getMessage.split("\\s+").foreach(info += _ + "_") + case _ => + } + jobLogInfo(job.jobId, info.substring(0, info.length - 1).toUpperCase) + closeLogWriter(job.jobId) + } + + /** + * Record job properties into job log file + * @param jobID ID of the job + * @param properties Properties of the job + */ + protected def recordJobProperties(jobID: Int, properties: Properties) { + if(properties != null) { + val description = properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION, "") + jobLogInfo(jobID, description, false) + } + } + + /** + * When job starts, record job property and stage graph + * @param jobStart Job start event + */ + override def onJobStart(jobStart: SparkListenerJobStart) { + val job = jobStart.job + val properties = jobStart.properties + createLogWriter(job.jobId) + recordJobProperties(job.jobId, properties) + buildJobDep(job.jobId, job.finalStage) + recordStageDep(job.jobId) + recordStageDepGraph(job.jobId, job.finalStage, new HashSet[Int]) + jobLogInfo(job.jobId, "JOB_ID=" + job.jobId + " STATUS=STARTED") + } +} + diff --git a/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala index 7d7ca9ba8cc1f..984881861c9a9 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala @@ -91,8 +91,10 @@ class JobLoggerSuite extends FunSuite with LocalSparkContext with ShouldMatchers sc.addSparkListener(joblogger) val rdd = sc.parallelize(1 to 1e2.toInt, 4).map{ i => (i % 12, 2 * i) } rdd.reduceByKey(_+_).collect() + + val user = System.getProperty("user.name", SparkContext.SPARK_UNKNOWN_USER) - joblogger.getLogDir should be ("/tmp/spark") + joblogger.getLogDir should be ("/tmp/spark-%s".format(user)) joblogger.getJobIDtoPrintWriter.size should be (1) joblogger.getStageIDToJobID.size should be (2) joblogger.getStageIDToJobID.get(0) should be (Some(0)) From a5916b98193a6cc45aeb41eb05687ed43793338d Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 7 Nov 2013 11:08:27 -0800 Subject: [PATCH 04/39] Merge pull request #148 from squito/include_appId Include appId in executor cmd line args add the appId back into the executor cmd line args. I also made a pretty lame regression test, just to make sure it doesn't get dropped in the future. not sure it will run on the build server, though, b/c `ExecutorRunner.buildCommandSeq()` expects to be abel to run the scripts in `bin`. (cherry picked from commit 3d4ad84b63e440fd3f4b3edb1b120ff7c14a42d1) Signed-off-by: Reynold Xin --- .../spark/deploy/worker/ExecutorRunner.scala | 2 +- .../CoarseGrainedExecutorBackend.scala | 2 +- .../deploy/worker/ExecutorRunnerTest.scala | 20 +++++++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala index 8fabc95665901..fff9cb60c7849 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala @@ -104,7 +104,7 @@ private[spark] class ExecutorRunner( // SPARK-698: do not call the run.cmd script, as process.destroy() // fails to kill a process tree on Windows Seq(runner) ++ buildJavaOpts() ++ Seq(command.mainClass) ++ - command.arguments.map(substituteVariables) + (command.arguments ++ Seq(appId)).map(substituteVariables) } /** diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index 80ff4c59cb484..caee6b01ab1fa 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -111,7 +111,7 @@ private[spark] object CoarseGrainedExecutorBackend { def main(args: Array[String]) { if (args.length < 4) { - //the reason we allow the last frameworkId argument is to make it easy to kill rogue executors + //the reason we allow the last appid argument is to make it easy to kill rogue executors System.err.println( "Usage: CoarseGrainedExecutorBackend " + "[]") diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala new file mode 100644 index 0000000000000..d433806987596 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala @@ -0,0 +1,20 @@ +package org.apache.spark.deploy.worker + +import java.io.File +import org.scalatest.FunSuite +import org.apache.spark.deploy.{ExecutorState, Command, ApplicationDescription} + +class ExecutorRunnerTest extends FunSuite { + + test("command includes appId") { + def f(s:String) = new File(s) + val sparkHome = sys.props("user.dir") + val appDesc = new ApplicationDescription("app name", 8, 500, Command("foo", Seq(),Map()), + sparkHome, "appUiUrl") + val appId = "12345-worker321-9876" + val er = new ExecutorRunner(appId, 1, appDesc, 8, 500, null, "blah", "worker321", f(sparkHome), + f("ooga"), ExecutorState.RUNNING) + + assert(er.buildCommandSeq().last === appId) + } +} From ab4f44513fcbe180a3c77c7a1c7a559fbbef656d Mon Sep 17 00:00:00 2001 From: Aaron Davidson Date: Fri, 8 Nov 2013 12:51:05 -0800 Subject: [PATCH 05/39] Use SPARK_HOME instead of user.dir in ExecutorRunnerTest (cherry picked from commit dd63c548c228d7775670e4664be18ebd1c62bed7) Signed-off-by: Reynold Xin --- .../org/apache/spark/deploy/worker/ExecutorRunnerTest.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala index d433806987596..8f0954122b322 100644 --- a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala +++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala @@ -5,10 +5,9 @@ import org.scalatest.FunSuite import org.apache.spark.deploy.{ExecutorState, Command, ApplicationDescription} class ExecutorRunnerTest extends FunSuite { - test("command includes appId") { def f(s:String) = new File(s) - val sparkHome = sys.props("user.dir") + val sparkHome = sys.env("SPARK_HOME") val appDesc = new ApplicationDescription("app name", 8, 500, Command("foo", Seq(),Map()), sparkHome, "appUiUrl") val appId = "12345-worker321-9876" From 07ae5240afd754b1cadf1962717c5505d0a4e5b6 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Sat, 9 Nov 2013 11:55:16 -0800 Subject: [PATCH 06/39] Merge pull request #152 from rxin/repl Propagate SparkContext local properties from spark-repl caller thread to the repl execution thread. (cherry picked from commit 72a601ec318d017e5ec2b878abeac19e30ebb554) Signed-off-by: Reynold Xin --- .../scala/org/apache/spark/SparkContext.scala | 6 ++++ .../org/apache/spark/repl/SparkIMain.scala | 11 ++++-- .../org/apache/spark/repl/ReplSuite.scala | 35 +++++++++++++++++-- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 512daf30cc3c5..d563423ce25fe 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -283,6 +283,12 @@ class SparkContext( override protected def childValue(parent: Properties): Properties = new Properties(parent) } + private[spark] def getLocalProperties(): Properties = localProperties.get() + + private[spark] def setLocalProperties(props: Properties) { + localProperties.set(props) + } + def initLocalProperties() { localProperties.set(new Properties()) } diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala index e6e35c9b5df9e..870e12de341dd 100644 --- a/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala @@ -878,14 +878,21 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends (message, false) } } + + // Get a copy of the local properties from SparkContext, and set it later in the thread + // that triggers the execution. This is to make sure the caller of this function can pass + // the right thread local (inheritable) properties down into Spark. + val sc = org.apache.spark.repl.Main.interp.sparkContext + val props = if (sc != null) sc.getLocalProperties() else null try { val execution = lineManager.set(originalLine) { // MATEI: set the right SparkEnv for our SparkContext, because // this execution will happen in a separate thread - val sc = org.apache.spark.repl.Main.interp.sparkContext - if (sc != null && sc.env != null) + if (sc != null && sc.env != null) { SparkEnv.set(sc.env) + sc.setLocalProperties(props) + } // Execute the line lineRep call "$export" } diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 8f9b632c0eea6..6e4504d4d5f41 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -21,12 +21,14 @@ import java.io._ import java.net.URLClassLoader import scala.collection.mutable.ArrayBuffer -import scala.collection.JavaConversions._ -import org.scalatest.FunSuite import com.google.common.io.Files +import org.scalatest.FunSuite +import org.apache.spark.SparkContext + class ReplSuite extends FunSuite { + def runInterpreter(master: String, input: String): String = { val in = new BufferedReader(new StringReader(input + "\n")) val out = new StringWriter() @@ -64,6 +66,35 @@ class ReplSuite extends FunSuite { "Interpreter output contained '" + message + "':\n" + output) } + test("propagation of local properties") { + // A mock ILoop that doesn't install the SIGINT handler. + class ILoop(out: PrintWriter) extends SparkILoop(None, out, None) { + settings = new scala.tools.nsc.Settings + settings.usejavacp.value = true + org.apache.spark.repl.Main.interp = this + override def createInterpreter() { + intp = new SparkILoopInterpreter + intp.setContextClassLoader() + } + } + + val out = new StringWriter() + val interp = new ILoop(new PrintWriter(out)) + interp.sparkContext = new SparkContext("local", "repl-test") + interp.createInterpreter() + interp.intp.initialize() + interp.sparkContext.setLocalProperty("someKey", "someValue") + + // Make sure the value we set in the caller to interpret is propagated in the thread that + // interprets the command. + interp.interpret("org.apache.spark.repl.Main.interp.sparkContext.getLocalProperty(\"someKey\")") + assert(out.toString.contains("someValue")) + + interp.sparkContext.stop() + System.clearProperty("spark.driver.port") + System.clearProperty("spark.hostPort") + } + test ("simple foreach with accumulator") { val output = runInterpreter("local", """ val accum = sc.accumulator(0) From 32a0c4f61ae8f6fa30fae274f96e6159417e0a0e Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sat, 9 Nov 2013 15:40:29 -0800 Subject: [PATCH 07/39] Merge pull request #155 from rxin/jobgroup Don't reset job group when a new job description is set. (cherry picked from commit 83bf1920c85c33d8d725831b28bc5d11b53c27bd) Signed-off-by: Reynold Xin --- core/src/main/scala/org/apache/spark/SparkContext.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index d563423ce25fe..ff5b2e064a531 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -310,7 +310,7 @@ class SparkContext( /** Set a human readable description of the current job. */ @deprecated("use setJobGroup", "0.8.1") def setJobDescription(value: String) { - setJobGroup("", value) + setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, value) } /** From 1d52b5081171bad15527bc882509a874e865260c Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Sat, 9 Nov 2013 13:48:00 -0800 Subject: [PATCH 08/39] Merge pull request #149 from tgravescs/fixSecureHdfsAccess Fix secure hdfs access for spark on yarn https://github.com/apache/incubator-spark/pull/23 broke secure hdfs access. Not sure if it works with secure hdfs on standalone. Fixing it at least for spark on yarn. The broadcasting of jobconf change also broke secure hdfs access as it didn't take into account things calling the getPartitions before sparkContext is initialized. The DAGScheduler does this as it tries to getShuffleMapStage. (cherry picked from commit 8af99f2356ed19fe43d722ada02f8802cbd46d40) Signed-off-by: Reynold Xin --- .../apache/spark/deploy/SparkHadoopUtil.scala | 21 ++++++++++++++----- .../org/apache/spark/rdd/HadoopRDD.scala | 2 ++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index c29a30184af13..fc1537f7963c4 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -23,7 +23,7 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.security.UserGroupInformation -import org.apache.spark.SparkException +import org.apache.spark.{SparkContext, SparkException} /** * Contains util methods to interact with Hadoop from Spark. @@ -34,10 +34,21 @@ class SparkHadoopUtil { UserGroupInformation.setConfiguration(conf) def runAsUser(user: String)(func: () => Unit) { - val ugi = UserGroupInformation.createRemoteUser(user) - ugi.doAs(new PrivilegedExceptionAction[Unit] { - def run: Unit = func() - }) + // if we are already running as the user intended there is no reason to do the doAs. It + // will actually break secure HDFS access as it doesn't fill in the credentials. Also if + // the user is UNKNOWN then we shouldn't be creating a remote unknown user + // (this is actually the path spark on yarn takes) since SPARK_USER is initialized only + // in SparkContext. + val currentUser = Option(System.getProperty("user.name")). + getOrElse(SparkContext.SPARK_UNKNOWN_USER) + if (user != SparkContext.SPARK_UNKNOWN_USER && currentUser != user) { + val ugi = UserGroupInformation.createRemoteUser(user) + ugi.doAs(new PrivilegedExceptionAction[Unit] { + def run: Unit = func() + }) + } else { + func() + } } /** diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index 32901a508f53b..47e958b5e6f4b 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -132,6 +132,8 @@ class HadoopRDD[K, V]( override def getPartitions: Array[Partition] = { val jobConf = getJobConf() + // add the credentials here as this can be called before SparkContext initialized + SparkHadoopUtil.get.addCredentials(jobConf) val inputFormat = getInputFormat(jobConf) if (inputFormat.isInstanceOf[Configurable]) { inputFormat.asInstanceOf[Configurable].setConf(jobConf) From 5ce6c756793d0da421e45fadd50c5b0511406f1c Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Sat, 9 Nov 2013 17:53:49 -0800 Subject: [PATCH 09/39] Merge pull request #147 from JoshRosen/fix-java-api-completeness-checker Add spark-tools assembly to spark-class'ss classpath This commit adds an assembly for `spark-tools` and adds it to `spark-class`'s classpath, allowing the JavaAPICompletenessChecker to be run against Spark 0.8+ with ./spark-class org.apache.spark.tools.JavaAPICompletenessChecker Previously, this tool was run through the `run` script. I chose to add this to `run-example` because I didn't want to duplicate code in a `run-tool` script. (cherry picked from commit 3efc0195625977335914f0a18cf32bd4e9b1d6d4) Signed-off-by: Reynold Xin --- project/SparkBuild.scala | 2 +- spark-class | 13 +++++++++++++ spark-class2.cmd | 7 +++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f6cbc1d296bb4..ae5506a57d9e1 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -257,7 +257,7 @@ object SparkBuild extends Build { def toolsSettings = sharedSettings ++ Seq( name := "spark-tools" - ) + ) ++ assemblySettings ++ extraAssemblySettings def bagelSettings = sharedSettings ++ Seq( name := "spark-bagel" diff --git a/spark-class b/spark-class index fb9d1a4f8eaaf..bbeca7f245692 100755 --- a/spark-class +++ b/spark-class @@ -110,8 +110,21 @@ if [ ! -f "$FWDIR/RELEASE" ]; then fi fi +TOOLS_DIR="$FWDIR"/tools +SPARK_TOOLS_JAR="" +if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then + # Use the JAR from the SBT build + export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar` +fi +if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then + # Use the JAR from the Maven build + # TODO: this also needs to become an assembly! + export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar` +fi + # Compute classpath using external script CLASSPATH=`$FWDIR/bin/compute-classpath.sh` +CLASSPATH="$SPARK_TOOLS_JAR:$CLASSPATH" export CLASSPATH if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then diff --git a/spark-class2.cmd b/spark-class2.cmd index d4d853e8ad930..3869d0761bfaa 100644 --- a/spark-class2.cmd +++ b/spark-class2.cmd @@ -65,10 +65,17 @@ if "%FOUND_JAR%"=="0" ( ) :skip_build_test +set TOOLS_DIR=%FWDIR%tools +set SPARK_TOOLS_JAR= +for %%d in ("%TOOLS_DIR%\target\scala-%SCALA_VERSION%\spark-tools*assembly*.jar") do ( + set SPARK_TOOLS_JAR=%%d +) + rem Compute classpath using external script set DONT_PRINT_CLASSPATH=1 call "%FWDIR%bin\compute-classpath.cmd" set DONT_PRINT_CLASSPATH=0 +set CLASSPATH=%SPARK_TOOLS_JAR%;%CLASSPATH% rem Figure out where java is. set RUNNER=java From 30786c650f5eb9d0f215b0ec9933f967d9bb264c Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Sun, 10 Nov 2013 09:23:56 -0800 Subject: [PATCH 10/39] Merge pull request #157 from rxin/kryo 3 Kryo related changes. 1. Call Kryo setReferences before calling user specified Kryo registrator. This is done so the user specified registrator can override the default setting. 2. Register more internal classes (MapStatus, BlockManagerId). 3. Slightly refactored the internal class registration to allocate less memory. (cherry picked from commit 58d4f6c8a5d9739dc2a3f26f116528457336f0d3) Signed-off-by: Reynold Xin --- .../spark/serializer/KryoSerializer.scala | 52 +++++++++++-------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala index 55b25f145ae0d..e748c2275d589 100644 --- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala @@ -27,13 +27,17 @@ import com.twitter.chill.{EmptyScalaKryoInstantiator, AllScalaRegistrar} import org.apache.spark.{SerializableWritable, Logging} import org.apache.spark.broadcast.HttpBroadcast -import org.apache.spark.storage.{GetBlock,GotBlock, PutBlock, StorageLevel, TestBlockId} +import org.apache.spark.scheduler.MapStatus +import org.apache.spark.storage._ /** - * A Spark serializer that uses the [[http://code.google.com/p/kryo/wiki/V1Documentation Kryo 1.x library]]. + * A Spark serializer that uses the [[https://code.google.com/p/kryo/ Kryo serialization library]]. */ class KryoSerializer extends org.apache.spark.serializer.Serializer with Logging { - private val bufferSize = System.getProperty("spark.kryoserializer.buffer.mb", "2").toInt * 1024 * 1024 + + private val bufferSize = { + System.getProperty("spark.kryoserializer.buffer.mb", "2").toInt * 1024 * 1024 + } def newKryoOutput() = new KryoOutput(bufferSize) @@ -42,21 +46,11 @@ class KryoSerializer extends org.apache.spark.serializer.Serializer with Logging val kryo = instantiator.newKryo() val classLoader = Thread.currentThread.getContextClassLoader - val blockId = TestBlockId("1") - // Register some commonly used classes - val toRegister: Seq[AnyRef] = Seq( - ByteBuffer.allocate(1), - StorageLevel.MEMORY_ONLY, - PutBlock(blockId, ByteBuffer.allocate(1), StorageLevel.MEMORY_ONLY), - GotBlock(blockId, ByteBuffer.allocate(1)), - GetBlock(blockId), - 1 to 10, - 1 until 10, - 1L to 10L, - 1L until 10L - ) - - for (obj <- toRegister) kryo.register(obj.getClass) + // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops. + // Do this before we invoke the user registrator so the user registrator can override this. + kryo.setReferences(System.getProperty("spark.kryo.referenceTracking", "true").toBoolean) + + for (cls <- KryoSerializer.toRegister) kryo.register(cls) // Allow sending SerializableWritable kryo.register(classOf[SerializableWritable[_]], new KryoJavaSerializer()) @@ -78,10 +72,6 @@ class KryoSerializer extends org.apache.spark.serializer.Serializer with Logging new AllScalaRegistrar().apply(kryo) kryo.setClassLoader(classLoader) - - // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops - kryo.setReferences(System.getProperty("spark.kryo.referenceTracking", "true").toBoolean) - kryo } @@ -165,3 +155,21 @@ private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends Serializ trait KryoRegistrator { def registerClasses(kryo: Kryo) } + +private[serializer] object KryoSerializer { + // Commonly used classes. + private val toRegister: Seq[Class[_]] = Seq( + ByteBuffer.allocate(1).getClass, + classOf[StorageLevel], + classOf[PutBlock], + classOf[GotBlock], + classOf[GetBlock], + classOf[MapStatus], + classOf[BlockManagerId], + classOf[Array[Byte]], + (1 to 10).getClass, + (1 until 10).getClass, + (1L to 10L).getClass, + (1L until 10L).getClass + ) +} From c85665157afa75caeac3a91adf97a0edc0cac3a5 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Tue, 12 Nov 2013 09:10:05 -0800 Subject: [PATCH 11/39] Merge pull request #164 from tdas/kafka-fix Made block generator thread safe to fix Kafka bug. This is a very important bug fix. Data can and was being lost in the kafka due to this. (cherry picked from commit dfd1ebc2d1e5c34a5979648e571302ae81a178f5) Signed-off-by: Reynold Xin --- .../dstream/NetworkInputDStream.scala | 4 +- .../spark/streaming/InputStreamsSuite.scala | 83 +++++++++++++++++-- 2 files changed, 80 insertions(+), 7 deletions(-) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala index 8d3ac0fc65ad5..a82862c8029b2 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala @@ -232,11 +232,11 @@ abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Log logInfo("Data handler stopped") } - def += (obj: T) { + def += (obj: T): Unit = synchronized { currentBuffer += obj } - private def updateCurrentBuffer(time: Long) { + private def updateCurrentBuffer(time: Long): Unit = synchronized { try { val newBlockBuffer = currentBuffer currentBuffer = new ArrayBuffer[T] diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala index 42e3e51e3fa15..ca2da6816a139 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala @@ -23,15 +23,15 @@ import akka.actor.IOManager import akka.actor.Props import akka.util.ByteString -import dstream.SparkFlumeEvent +import org.apache.spark.streaming.dstream.{NetworkReceiver, SparkFlumeEvent} import java.net.{InetSocketAddress, SocketException, Socket, ServerSocket} import java.io.{File, BufferedWriter, OutputStreamWriter} -import java.util.concurrent.{TimeUnit, ArrayBlockingQueue} +import java.util.concurrent.{Executors, TimeUnit, ArrayBlockingQueue} import collection.mutable.{SynchronizedBuffer, ArrayBuffer} import util.ManualClock import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.receivers.Receiver -import org.apache.spark.Logging +import org.apache.spark.{SparkContext, Logging} import scala.util.Random import org.apache.commons.io.FileUtils import org.scalatest.BeforeAndAfter @@ -44,6 +44,7 @@ import java.nio.ByteBuffer import collection.JavaConversions._ import java.nio.charset.Charset import com.google.common.io.Files +import java.util.concurrent.atomic.AtomicInteger class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { @@ -61,7 +62,6 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { System.clearProperty("spark.hostPort") } - test("socket input stream") { // Start the server val testServer = new TestServer() @@ -271,10 +271,49 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { val kafkaParams = Map("zk.connect"->"localhost:12345","groupid"->"consumer-group") val test3 = ssc.kafkaStream[String, kafka.serializer.StringDecoder](kafkaParams, topics, StorageLevel.MEMORY_AND_DISK) } + + test("multi-thread receiver") { + // set up the test receiver + val numThreads = 10 + val numRecordsPerThread = 1000 + val numTotalRecords = numThreads * numRecordsPerThread + val testReceiver = new MultiThreadTestReceiver(numThreads, numRecordsPerThread) + MultiThreadTestReceiver.haveAllThreadsFinished = false + + // set up the network stream using the test receiver + val ssc = new StreamingContext(master, framework, batchDuration) + val networkStream = ssc.networkStream[Int](testReceiver) + val countStream = networkStream.count + val outputBuffer = new ArrayBuffer[Seq[Long]] with SynchronizedBuffer[Seq[Long]] + val outputStream = new TestOutputStream(countStream, outputBuffer) + def output = outputBuffer.flatMap(x => x) + ssc.registerOutputStream(outputStream) + ssc.start() + + // Let the data from the receiver be received + val clock = ssc.scheduler.clock.asInstanceOf[ManualClock] + val startTime = System.currentTimeMillis() + while((!MultiThreadTestReceiver.haveAllThreadsFinished || output.sum < numTotalRecords) && + System.currentTimeMillis() - startTime < 5000) { + Thread.sleep(100) + clock.addToTime(batchDuration.milliseconds) + } + Thread.sleep(1000) + logInfo("Stopping context") + ssc.stop() + + // Verify whether data received was as expected + logInfo("--------------------------------") + logInfo("output.size = " + outputBuffer.size) + logInfo("output") + outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]")) + logInfo("--------------------------------") + assert(output.sum === numTotalRecords) + } } -/** This is server to test the network input stream */ +/** This is a server to test the network input stream */ class TestServer() extends Logging { val queue = new ArrayBlockingQueue[String](100) @@ -336,6 +375,7 @@ object TestServer { } } +/** This is an actor for testing actor input stream */ class TestActor(port: Int) extends Actor with Receiver { def bytesToString(byteString: ByteString) = byteString.utf8String @@ -347,3 +387,36 @@ class TestActor(port: Int) extends Actor with Receiver { pushBlock(bytesToString(bytes)) } } + +/** This is a receiver to test multiple threads inserting data using block generator */ +class MultiThreadTestReceiver(numThreads: Int, numRecordsPerThread: Int) + extends NetworkReceiver[Int] { + lazy val executorPool = Executors.newFixedThreadPool(numThreads) + lazy val blockGenerator = new BlockGenerator(StorageLevel.MEMORY_ONLY) + lazy val finishCount = new AtomicInteger(0) + + protected def onStart() { + blockGenerator.start() + (1 to numThreads).map(threadId => { + val runnable = new Runnable { + def run() { + (1 to numRecordsPerThread).foreach(i => + blockGenerator += (threadId * numRecordsPerThread + i) ) + if (finishCount.incrementAndGet == numThreads) { + MultiThreadTestReceiver.haveAllThreadsFinished = true + } + logInfo("Finished thread " + threadId) + } + } + executorPool.submit(runnable) + }) + } + + protected def onStop() { + executorPool.shutdown() + } +} + +object MultiThreadTestReceiver { + var haveAllThreadsFinished = false +} From 333859f51a9845bbcbe804ec969902b1fd38dad1 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 14 Nov 2013 10:25:48 -0800 Subject: [PATCH 12/39] Merge pull request #171 from RIA-pierre-borckmans/master Fixed typos in the CDH4 distributions version codes. Nothing important, but annoying when doing a copy/paste... (cherry picked from commit d76f5203af1f714efea30106bef9c2a3a80fd56e) Signed-off-by: Reynold Xin --- docs/hadoop-third-party-distributions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md index f706625fe9d5b..b33af2cf24d77 100644 --- a/docs/hadoop-third-party-distributions.md +++ b/docs/hadoop-third-party-distributions.md @@ -25,8 +25,8 @@ the _exact_ Hadoop version you are running to avoid any compatibility errors.

CDH Releases

- - + + From 41dc566feed30448f755f6225fb0d177277bf668 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 14 Nov 2013 10:30:36 -0800 Subject: [PATCH 13/39] Merge pull request #170 from liancheng/hadooprdd-doc-typo Fixed a scaladoc typo in HadoopRDD.scala (cherry picked from commit 5a4f483652c3dc0730d1c908a6a46e2f77d270e1) Signed-off-by: Reynold Xin --- core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index 47e958b5e6f4b..53f77a38f55f6 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -52,7 +52,7 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, @transient s: InputSp * sources in HBase, or S3). * * @param sc The SparkContext to associate the RDD with. - * @param broadCastedConf A general Hadoop Configuration, or a subclass of it. If the enclosed + * @param broadcastedConf A general Hadoop Configuration, or a subclass of it. If the enclosed * variabe references an instance of JobConf, then that JobConf will be used for the Hadoop job. * Otherwise, a new JobConf will be created on each slave using the enclosed Configuration. * @param initLocalJobConfFuncOpt Optional closure used to initialize any JobConf that HadoopRDD From e7927ad3dba185e16ddf13f48d0b9f83c538de0c Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 14 Nov 2013 10:32:11 -0800 Subject: [PATCH 14/39] Merge pull request #169 from kayousterhout/mesos_fix Don't ignore spark.cores.max when using Mesos Coarse mode totalCoresAcquired is decremented but never incremented, causing Spark to effectively ignore spark.cores.max in coarse grained Mesos mode. (cherry picked from commit 1a4cfbea334c7b0dae287eab4c3131c8f4b8a992) Signed-off-by: Reynold Xin --- .../scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala index 300fe693f1c55..cd521e0f2be0a 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala @@ -181,6 +181,7 @@ private[spark] class CoarseMesosSchedulerBackend( !slaveIdsWithExecutors.contains(slaveId)) { // Launch an executor on the slave val cpusToUse = math.min(cpus, maxCores - totalCoresAcquired) + totalCoresAcquired += cpusToUse val taskId = newMesosTaskId() taskIdToSlaveId(taskId) = slaveId slaveIdsWithExecutors += slaveId From 24e238bfc2fce60ce16adb3b913855a814dc4375 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sun, 17 Nov 2013 18:42:18 -0800 Subject: [PATCH 15/39] Merge pull request #182 from rxin/vector Slightly enhanced PrimitiveVector: 1. Added trim() method 2. Added size method. 3. Renamed getUnderlyingArray to array. 4. Minor documentation update. (cherry picked from commit e2ebc3a9d8bca83bf842b134f2f056c1af0ad2be) Signed-off-by: Reynold Xin --- .../util/collection/PrimitiveVector.scala | 46 ++++--- .../collection/PrimitiveVectorSuite.scala | 117 ++++++++++++++++++ 2 files changed, 148 insertions(+), 15 deletions(-) create mode 100644 core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala index 369519c5595de..20554f0aaba70 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala @@ -17,35 +17,51 @@ package org.apache.spark.util.collection -/** Provides a simple, non-threadsafe, array-backed vector that can store primitives. */ +/** + * An append-only, non-threadsafe, array-backed vector that is optimized for primitive types. + */ private[spark] class PrimitiveVector[@specialized(Long, Int, Double) V: ClassManifest](initialSize: Int = 64) { - private var numElements = 0 - private var array: Array[V] = _ + private var _numElements = 0 + private var _array: Array[V] = _ // NB: This must be separate from the declaration, otherwise the specialized parent class - // will get its own array with the same initial size. TODO: Figure out why... - array = new Array[V](initialSize) + // will get its own array with the same initial size. + _array = new Array[V](initialSize) def apply(index: Int): V = { - require(index < numElements) - array(index) + require(index < _numElements) + _array(index) } def +=(value: V) { - if (numElements == array.length) { resize(array.length * 2) } - array(numElements) = value - numElements += 1 + if (_numElements == _array.length) { + resize(_array.length * 2) + } + _array(_numElements) = value + _numElements += 1 } - def length = numElements + def capacity: Int = _array.length + + def length: Int = _numElements + + def size: Int = _numElements + + /** Gets the underlying array backing this vector. */ + def array: Array[V] = _array - def getUnderlyingArray = array + /** Trims this vector so that the capacity is equal to the size. */ + def trim(): PrimitiveVector[V] = resize(size) /** Resizes the array, dropping elements if the total length decreases. */ - def resize(newLength: Int) { + def resize(newLength: Int): PrimitiveVector[V] = { val newArray = new Array[V](newLength) - array.copyToArray(newArray) - array = newArray + _array.copyToArray(newArray) + _array = newArray + if (newLength < _numElements) { + _numElements = newLength + } + this } } diff --git a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala new file mode 100644 index 0000000000000..970dade628fe4 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveVectorSuite.scala @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util.collection + +import org.scalatest.FunSuite + +import org.apache.spark.util.SizeEstimator + +class PrimitiveVectorSuite extends FunSuite { + + test("primitive value") { + val vector = new PrimitiveVector[Int] + + for (i <- 0 until 1000) { + vector += i + assert(vector(i) === i) + } + + assert(vector.size === 1000) + assert(vector.size == vector.length) + intercept[IllegalArgumentException] { + vector(1000) + } + + for (i <- 0 until 1000) { + assert(vector(i) == i) + } + } + + test("non-primitive value") { + val vector = new PrimitiveVector[String] + + for (i <- 0 until 1000) { + vector += i.toString + assert(vector(i) === i.toString) + } + + assert(vector.size === 1000) + assert(vector.size == vector.length) + intercept[IllegalArgumentException] { + vector(1000) + } + + for (i <- 0 until 1000) { + assert(vector(i) == i.toString) + } + } + + test("ideal growth") { + val vector = new PrimitiveVector[Long](initialSize = 1) + vector += 1 + for (i <- 1 until 1024) { + vector += i + assert(vector.size === i + 1) + assert(vector.capacity === Integer.highestOneBit(i) * 2) + } + assert(vector.capacity === 1024) + vector += 1024 + assert(vector.capacity === 2048) + } + + test("ideal size") { + val vector = new PrimitiveVector[Long](8192) + for (i <- 0 until 8192) { + vector += i + } + assert(vector.size === 8192) + assert(vector.capacity === 8192) + val actualSize = SizeEstimator.estimate(vector) + val expectedSize = 8192 * 8 + // Make sure we are not allocating a significant amount of memory beyond our expected. + // Due to specialization wonkiness, we need to ensure we don't have 2 copies of the array. + assert(actualSize < expectedSize * 1.1) + } + + test("resizing") { + val vector = new PrimitiveVector[Long] + for (i <- 0 until 4097) { + vector += i + } + assert(vector.size === 4097) + assert(vector.capacity === 8192) + vector.trim() + assert(vector.size === 4097) + assert(vector.capacity === 4097) + vector.resize(5000) + assert(vector.size === 4097) + assert(vector.capacity === 5000) + vector.resize(4000) + assert(vector.size === 4000) + assert(vector.capacity === 4000) + vector.resize(5000) + assert(vector.size === 4000) + assert(vector.capacity === 5000) + for (i <- 0 until 4000) { + assert(vector(i) == i) + } + intercept[IllegalArgumentException] { + vector(4000) + } + } +} From 9d563716da91c129535b6843244125b69ee4cbcb Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Thu, 14 Nov 2013 22:29:28 -0800 Subject: [PATCH 16/39] Merge pull request #173 from kayousterhout/scheduler_hang Fix bug where scheduler could hang after task failure. When a task fails, we need to call reviveOffers() so that the task can be rescheduled on a different machine. In the current code, the state in ClusterTaskSetManager indicating which tasks are pending may be updated after revive offers is called (there's a race condition here), so when revive offers is called, the task set manager does not yet realize that there are failed tasks that need to be relaunched. This isn't currently unit tested but will be once my pull request for merging the cluster and local schedulers goes in -- at which point many more of the unit tests will exercise the code paths through the cluster scheduler (currently the failure test suite uses the local scheduler, which is why we didn't see this bug before). (cherry picked from commit 96e0fb46309698b685c811a65bd8e1a691389994) Signed-off-by: Reynold Xin --- .../spark/scheduler/cluster/ClusterScheduler.scala | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala index 85033958ef54f..27145ccd5f102 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala @@ -256,7 +256,6 @@ private[spark] class ClusterScheduler(val sc: SparkContext) def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) { var failedExecutor: Option[String] = None - var taskFailed = false synchronized { try { if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) { @@ -276,9 +275,6 @@ private[spark] class ClusterScheduler(val sc: SparkContext) } taskIdToExecutorId.remove(tid) } - if (state == TaskState.FAILED) { - taskFailed = true - } activeTaskSets.get(taskSetId).foreach { taskSet => if (state == TaskState.FINISHED) { taskSet.removeRunningTask(tid) @@ -300,10 +296,6 @@ private[spark] class ClusterScheduler(val sc: SparkContext) dagScheduler.executorLost(failedExecutor.get) backend.reviveOffers() } - if (taskFailed) { - // Also revive offers if a task had failed for some reason other than host lost - backend.reviveOffers() - } } def handleTaskGettingResult(taskSetManager: ClusterTaskSetManager, tid: Long) { @@ -323,8 +315,9 @@ private[spark] class ClusterScheduler(val sc: SparkContext) taskState: TaskState, reason: Option[TaskEndReason]) = synchronized { taskSetManager.handleFailedTask(tid, taskState, reason) - if (taskState == TaskState.FINISHED) { - // The task finished successfully but the result was lost, so we should revive offers. + if (taskState != TaskState.KILLED) { + // Need to revive offers again now that the task set manager state has been updated to + // reflect failed tasks that need to be re-run. backend.reviveOffers() } } From f0d350a1ed1f87255af5a9a7f6efb784ce4e34b2 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Thu, 14 Nov 2013 19:44:50 -0800 Subject: [PATCH 17/39] Merge pull request #175 from kayousterhout/no_retry_not_serializable Don't retry tasks when they fail due to a NotSerializableException As with my previous pull request, this will be unit tested once the Cluster and Local schedulers get merged. (cherry picked from commit dfd40e9f6f87ff1f205944997cdbbb6bb7f0312c) Signed-off-by: Reynold Xin --- .../spark/scheduler/cluster/ClusterTaskSetManager.scala | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala index ee47aaffcae11..4c5eca8537cd6 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala @@ -17,6 +17,7 @@ package org.apache.spark.scheduler.cluster +import java.io.NotSerializableException import java.util.Arrays import scala.collection.mutable.ArrayBuffer @@ -484,6 +485,14 @@ private[spark] class ClusterTaskSetManager( case ef: ExceptionFailure => sched.dagScheduler.taskEnded(tasks(index), ef, null, null, info, ef.metrics.getOrElse(null)) + if (ef.className == classOf[NotSerializableException].getName()) { + // If the task result wasn't serializable, there's no point in trying to re-execute it. + logError("Task %s:%s had a not serializable result: %s; not retrying".format( + taskSet.id, index, ef.description)) + abort("Task %s:%s had a not serializable result: %s".format( + taskSet.id, index, ef.description)) + return + } val key = ef.description val now = clock.getTime() val (printFull, dupCount) = { From 6c607682ca738c0407f1683e15b2e35b7f3ad382 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Tue, 12 Nov 2013 16:19:50 -0800 Subject: [PATCH 18/39] Merge pull request #160 from xiajunluan/JIRA-923 Fix bug JIRA-923 Fix column sort issue in UI for JIRA-923. https://spark-project.atlassian.net/browse/SPARK-923 Signed-off-by: Reynold Xin Conflicts: core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala (cherry picked from commit b8bf04a085162478a64ca5d7be15d1af2f6a930e) Signed-off-by: Reynold Xin --- .../org/apache/spark/ui/jobs/StagePage.scala | 31 +++++++++++++++---- .../org/apache/spark/ui/jobs/StageTable.scala | 11 ++++--- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala index 35b5d5fd59534..c1c7aa70e6c92 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -152,6 +152,22 @@ private[spark] class StagePage(parent: JobProgressUI) { else metrics.map(m => parent.formatDuration(m.executorRunTime)).getOrElse("") val gcTime = metrics.map(m => m.jvmGCTime).getOrElse(0L) + var shuffleReadSortable: String = "" + var shuffleReadReadable: String = "" + if (shuffleRead) { + shuffleReadSortable = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => s.remoteBytesRead}.toString() + shuffleReadReadable = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => + Utils.bytesToString(s.remoteBytesRead)}.getOrElse("") + } + + var shuffleWriteSortable: String = "" + var shuffleWriteReadable: String = "" + if (shuffleWrite) { + shuffleWriteSortable = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleBytesWritten}.toString() + shuffleWriteReadable = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => + Utils.bytesToString(s.shuffleBytesWritten)}.getOrElse("") + } + @@ -166,14 +182,17 @@ private[spark] class StagePage(parent: JobProgressUI) { {if (gcTime > 0) parent.formatDuration(gcTime) else ""} {if (shuffleRead) { - + }} {if (shuffleWrite) { - - + + }} - - + + } } From a64397b1ea3c920a53fdfe7a39c389753694866e Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Tue, 12 Nov 2013 16:26:09 -0800 Subject: [PATCH 19/39] Merge pull request #153 from ankurdave/stop-spot-cluster Enable stopping and starting a spot cluster Clusters launched using `--spot-price` contain an on-demand master and spot slaves. Because EC2 does not support stopping spot instances, the spark-ec2 script previously could only destroy such clusters. This pull request makes it possible to stop and restart a spot cluster. * The `stop` command works as expected for a spot cluster: the master is stopped and the slaves are terminated. * To start a stopped spot cluster, the user must invoke `launch --use-existing-master`. This launches fresh spot slaves but resumes the existing master. (cherry picked from commit 87f2f4e5c2812351cdd1b2e35e2b12f62eeb3fdc) Signed-off-by: Reynold Xin --- ec2/spark_ec2.py | 51 ++++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index d652c902a3430..267c8ba849821 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -97,6 +97,8 @@ def parse_args(): help="The SSH user you want to connect as (default: root)") parser.add_option("--delete-groups", action="store_true", default=False, help="When destroying a cluster, delete the security groups that were created") + parser.add_option("--use-existing-master", action="store_true", default=False, + help="Launch fresh slaves, but use an existing stopped master if possible") (opts, args) = parser.parse_args() if len(args) != 2: @@ -232,9 +234,9 @@ def launch_cluster(conn, opts, cluster_name): slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0') # Check if instances are already running in our groups - active_nodes = get_existing_cluster(conn, opts, cluster_name, - die_on_error=False) - if any(active_nodes): + existing_masters, existing_slaves = get_existing_cluster(conn, opts, cluster_name, + die_on_error=False) + if existing_slaves or (existing_masters and not opts.use_existing_master): print >> stderr, ("ERROR: There are already instances running in " + "group %s or %s" % (master_group.name, slave_group.name)) sys.exit(1) @@ -335,21 +337,28 @@ def launch_cluster(conn, opts, cluster_name): zone, slave_res.id) i += 1 - # Launch masters - master_type = opts.master_instance_type - if master_type == "": - master_type = opts.instance_type - if opts.zone == 'all': - opts.zone = random.choice(conn.get_all_zones()).name - master_res = image.run(key_name = opts.key_pair, - security_groups = [master_group], - instance_type = master_type, - placement = opts.zone, - min_count = 1, - max_count = 1, - block_device_map = block_map) - master_nodes = master_res.instances - print "Launched master in %s, regid = %s" % (zone, master_res.id) + # Launch or resume masters + if existing_masters: + print "Starting master..." + for inst in existing_masters: + if inst.state not in ["shutting-down", "terminated"]: + inst.start() + master_nodes = existing_masters + else: + master_type = opts.master_instance_type + if master_type == "": + master_type = opts.instance_type + if opts.zone == 'all': + opts.zone = random.choice(conn.get_all_zones()).name + master_res = image.run(key_name = opts.key_pair, + security_groups = [master_group], + instance_type = master_type, + placement = opts.zone, + min_count = 1, + max_count = 1, + block_device_map = block_map) + master_nodes = master_res.instances + print "Launched master in %s, regid = %s" % (zone, master_res.id) # Return all the instances return (master_nodes, slave_nodes) @@ -684,6 +693,7 @@ def main(): cluster_name + "?\nDATA ON EPHEMERAL DISKS WILL BE LOST, " + "BUT THE CLUSTER WILL KEEP USING SPACE ON\n" + "AMAZON EBS IF IT IS EBS-BACKED!!\n" + + "All data on spot-instance slaves will be lost.\n" + "Stop cluster " + cluster_name + " (y/N): ") if response == "y": (master_nodes, slave_nodes) = get_existing_cluster( @@ -695,7 +705,10 @@ def main(): print "Stopping slaves..." for inst in slave_nodes: if inst.state not in ["shutting-down", "terminated"]: - inst.stop() + if inst.spot_instance_request_id: + inst.terminate() + else: + inst.stop() elif action == "start": (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name) From af98fbc9302f5653bd315b0f34e064d61ddcb94f Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Wed, 13 Nov 2013 16:48:44 -0800 Subject: [PATCH 20/39] Merge pull request #165 from NathanHowell/kerberos-master spark-assembly.jar fails to authenticate with YARN ResourceManager The META-INF/services/ sbt MergeStrategy was discarding support for Kerberos, among others. This pull request changes to a merge strategy similar to sbt-assembly's default. I've also included an update to sbt-assembly 0.9.2, a minor fix to it's zip file handling. (cherry picked from commit 9290e5bcd2c8e4d8bbf1d0ce1ac09bbf62ece4e0) Signed-off-by: Reynold Xin --- project/SparkBuild.scala | 2 +- project/plugins.sbt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index ae5506a57d9e1..4f07851e86ebc 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -312,7 +312,7 @@ object SparkBuild extends Build { case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard case "log4j.properties" => MergeStrategy.discard - case "META-INF/services/org.apache.hadoop.fs.FileSystem" => MergeStrategy.concat + case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines case "reference.conf" => MergeStrategy.concat case _ => MergeStrategy.first } diff --git a/project/plugins.sbt b/project/plugins.sbt index cfcd85082a8cc..4ba0e4280a9de 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -4,7 +4,7 @@ resolvers += "Typesafe Repository" at "http://repo.typesafe.com/typesafe/release resolvers += "Spray Repository" at "http://repo.spray.cc/" -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.9.1") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.9.2") addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.2.0") From e134ed5d670b82a183c060ad148676d2d5dd31b9 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Tue, 12 Nov 2013 19:13:39 -0800 Subject: [PATCH 21/39] Merge pull request #137 from tgravescs/sparkYarnJarsHdfsRebase Allow spark on yarn to be run from HDFS. Allows the spark.jar, app.jar, and log4j.properties to be put into hdfs. Allows you to specify the files on a different hdfs cluster and it will copy them over. It makes sure permissions are correct and makes sure to put things into public distributed cache so they can be reused amongst users if their permissions are appropriate. Also add a bit of error handling for missing arguments. (cherry picked from commit f49ea28d25728e19e56b140a2f374631c94153bc) Signed-off-by: Reynold Xin --- docs/running-on-yarn.md | 1 + pom.xml | 6 + project/SparkBuild.scala | 3 +- yarn/pom.xml | 50 ++++ .../spark/deploy/yarn/ApplicationMaster.scala | 2 +- .../org/apache/spark/deploy/yarn/Client.scala | 276 +++++++++--------- .../yarn/ClientDistributedCacheManager.scala | 228 +++++++++++++++ .../spark/deploy/yarn/WorkerRunnable.scala | 42 +-- .../ClientDistributedCacheManagerSuite.scala | 220 ++++++++++++++ 9 files changed, 655 insertions(+), 173 deletions(-) create mode 100644 yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala create mode 100644 yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 2898af0bed8c0..6fd1d0d150306 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -21,6 +21,7 @@ The assembled JAR will be something like this: # Preparations - Building a YARN-enabled assembly (see above). +- The assembled jar can be installed into HDFS or used locally. - Your application code must be packaged into a separate JAR file. If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different. diff --git a/pom.xml b/pom.xml index 0ebcd2c94d398..07213f9d3015f 100644 --- a/pom.xml +++ b/pom.xml @@ -356,6 +356,12 @@ 3.1 test + + org.mockito + mockito-all + 1.8.5 + test + org.scalacheck scalacheck_2.9.3 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 4f07851e86ebc..95a9ca9d5c1c8 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -162,7 +162,8 @@ object SparkBuild extends Build { "org.scalatest" %% "scalatest" % "1.9.1" % "test", "org.scalacheck" %% "scalacheck" % "1.10.0" % "test", "com.novocode" % "junit-interface" % "0.9" % "test", - "org.easymock" % "easymock" % "3.1" % "test" + "org.easymock" % "easymock" % "3.1" % "test", + "org.mockito" % "mockito-all" % "1.8.5" % "test" ), /* Workaround for issue #206 (fixed after SBT 0.11.0) */ watchTransitiveSources <<= Defaults.inDependencies[Task[Seq[File]]](watchSources.task, diff --git a/yarn/pom.xml b/yarn/pom.xml index 5ad6422ae9cbb..d9168e33f6084 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -61,6 +61,16 @@ org.apache.avro avro-ipc + + org.scalatest + scalatest_2.9.3 + test + + + org.mockito + mockito-all + test + @@ -106,6 +116,46 @@ + + org.apache.maven.plugins + maven-antrun-plugin + + + test + + run + + + true + + + + + + + + + + + + + + + + + + + + org.scalatest + scalatest-maven-plugin + + + ${basedir}/.. + 1 + ${spark.classpath} + + + diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index c1a87d33738be..4302ef4cda261 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -349,7 +349,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e try { val preserveFiles = System.getProperty("spark.yarn.preserve.staging.files", "false").toBoolean if (!preserveFiles) { - stagingDirPath = new Path(System.getenv("SPARK_YARN_JAR_PATH")).getParent() + stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR")) if (stagingDirPath == null) { logError("Staging directory is null") return diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 1a380ae714534..4e0e060ddc29b 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -17,26 +17,31 @@ package org.apache.spark.deploy.yarn -import java.net.{InetSocketAddress, URI} +import java.net.{InetAddress, InetSocketAddress, UnknownHostException, URI} import java.nio.ByteBuffer + import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} +import org.apache.hadoop.fs.{FileContext, FileStatus, FileSystem, Path, FileUtil} +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.mapred.Master import org.apache.hadoop.net.NetUtils import org.apache.hadoop.io.DataOutputBuffer import org.apache.hadoop.security.UserGroupInformation import org.apache.hadoop.yarn.api._ +import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.api.protocolrecords._ import org.apache.hadoop.yarn.client.YarnClientImpl import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.ipc.YarnRPC +import org.apache.hadoop.yarn.util.{Apps, Records} + import scala.collection.mutable.HashMap +import scala.collection.mutable.Map import scala.collection.JavaConversions._ + import org.apache.spark.Logging import org.apache.spark.util.Utils -import org.apache.hadoop.yarn.util.{Apps, Records, ConverterUtils} -import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.spark.deploy.SparkHadoopUtil class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl with Logging { @@ -46,13 +51,14 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl var rpc: YarnRPC = YarnRPC.create(conf) val yarnConf: YarnConfiguration = new YarnConfiguration(conf) val credentials = UserGroupInformation.getCurrentUser().getCredentials() - private var distFiles = None: Option[String] - private var distFilesTimeStamps = None: Option[String] - private var distFilesFileSizes = None: Option[String] - private var distArchives = None: Option[String] - private var distArchivesTimeStamps = None: Option[String] - private var distArchivesFileSizes = None: Option[String] - + private val SPARK_STAGING: String = ".sparkStaging" + private val distCacheMgr = new ClientDistributedCacheManager() + + // staging directory is private! -> rwx-------- + val STAGING_DIR_PERMISSION: FsPermission = FsPermission.createImmutable(0700:Short) + // app files are world-wide readable and owner writable -> rw-r--r-- + val APP_FILE_PERMISSION: FsPermission = FsPermission.createImmutable(0644:Short) + def run() { init(yarnConf) start() @@ -63,8 +69,9 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl verifyClusterResources(newApp) val appContext = createApplicationSubmissionContext(appId) - val localResources = prepareLocalResources(appId, ".sparkStaging") - val env = setupLaunchEnv(localResources) + val appStagingDir = getAppStagingDir(appId) + val localResources = prepareLocalResources(appStagingDir) + val env = setupLaunchEnv(localResources, appStagingDir) val amContainer = createContainerLaunchContext(newApp, localResources, env) appContext.setQueue(args.amQueue) @@ -76,7 +83,10 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl monitorApplication(appId) System.exit(0) } - + + def getAppStagingDir(appId: ApplicationId): String = { + SPARK_STAGING + Path.SEPARATOR + appId.toString() + Path.SEPARATOR + } def logClusterResourceDetails() { val clusterMetrics: YarnClusterMetrics = super.getYarnClusterMetrics @@ -116,73 +126,73 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl return appContext } + /* + * see if two file systems are the same or not. + */ + private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = { + val srcUri = srcFs.getUri() + val dstUri = destFs.getUri() + if (srcUri.getScheme() == null) { + return false + } + if (!srcUri.getScheme().equals(dstUri.getScheme())) { + return false + } + var srcHost = srcUri.getHost() + var dstHost = dstUri.getHost() + if ((srcHost != null) && (dstHost != null)) { + try { + srcHost = InetAddress.getByName(srcHost).getCanonicalHostName(); + dstHost = InetAddress.getByName(dstHost).getCanonicalHostName(); + } catch { + case e: UnknownHostException => + return false + } + if (!srcHost.equals(dstHost)) { + return false + } + } else if (srcHost == null && dstHost != null) { + return false + } else if (srcHost != null && dstHost == null) { + return false + } + //check for ports + if (srcUri.getPort() != dstUri.getPort()) { + return false + } + return true; + } + /** - * Copy the local file into HDFS and configure to be distributed with the - * job via the distributed cache. - * If a fragment is specified the file will be referenced as that fragment. + * Copy the file into HDFS if needed. */ - private def copyLocalFile( + private def copyRemoteFile( dstDir: Path, - resourceType: LocalResourceType, originalPath: Path, replication: Short, - localResources: HashMap[String,LocalResource], - fragment: String, - appMasterOnly: Boolean = false): Unit = { + setPerms: Boolean = false): Path = { val fs = FileSystem.get(conf) - val newPath = new Path(dstDir, originalPath.getName()) - logInfo("Uploading " + originalPath + " to " + newPath) - fs.copyFromLocalFile(false, true, originalPath, newPath) - fs.setReplication(newPath, replication); - val destStatus = fs.getFileStatus(newPath) - - val amJarRsrc = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] - amJarRsrc.setType(resourceType) - amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION) - amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(newPath)) - amJarRsrc.setTimestamp(destStatus.getModificationTime()) - amJarRsrc.setSize(destStatus.getLen()) - var pathURI: URI = new URI(newPath.toString() + "#" + originalPath.getName()); - if ((fragment == null) || (fragment.isEmpty())){ - localResources(originalPath.getName()) = amJarRsrc - } else { - localResources(fragment) = amJarRsrc - pathURI = new URI(newPath.toString() + "#" + fragment); - } - val distPath = pathURI.toString() - if (appMasterOnly == true) return - if (resourceType == LocalResourceType.FILE) { - distFiles match { - case Some(path) => - distFilesFileSizes = Some(distFilesFileSizes.get + "," + - destStatus.getLen().toString()) - distFilesTimeStamps = Some(distFilesTimeStamps.get + "," + - destStatus.getModificationTime().toString()) - distFiles = Some(path + "," + distPath) - case _ => - distFilesFileSizes = Some(destStatus.getLen().toString()) - distFilesTimeStamps = Some(destStatus.getModificationTime().toString()) - distFiles = Some(distPath) - } - } else { - distArchives match { - case Some(path) => - distArchivesTimeStamps = Some(distArchivesTimeStamps.get + "," + - destStatus.getModificationTime().toString()) - distArchivesFileSizes = Some(distArchivesFileSizes.get + "," + - destStatus.getLen().toString()) - distArchives = Some(path + "," + distPath) - case _ => - distArchivesTimeStamps = Some(destStatus.getModificationTime().toString()) - distArchivesFileSizes = Some(destStatus.getLen().toString()) - distArchives = Some(distPath) - } - } + val remoteFs = originalPath.getFileSystem(conf); + var newPath = originalPath + if (! compareFs(remoteFs, fs)) { + newPath = new Path(dstDir, originalPath.getName()) + logInfo("Uploading " + originalPath + " to " + newPath) + FileUtil.copy(remoteFs, originalPath, fs, newPath, false, conf); + fs.setReplication(newPath, replication); + if (setPerms) fs.setPermission(newPath, new FsPermission(APP_FILE_PERMISSION)) + } + // resolve any symlinks in the URI path so using a "current" symlink + // to point to a specific version shows the specific version + // in the distributed cache configuration + val qualPath = fs.makeQualified(newPath) + val fc = FileContext.getFileContext(qualPath.toUri(), conf) + val destPath = fc.resolvePath(qualPath) + destPath } - def prepareLocalResources(appId: ApplicationId, sparkStagingDir: String): HashMap[String, LocalResource] = { + def prepareLocalResources(appStagingDir: String): HashMap[String, LocalResource] = { logInfo("Preparing Local resources") - // Upload Spark and the application JAR to the remote file system + // Upload Spark and the application JAR to the remote file system if necessary // Add them as local resources to the AM val fs = FileSystem.get(conf) @@ -193,9 +203,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl System.exit(1) } } - - val pathSuffix = sparkStagingDir + "/" + appId.toString() + "/" - val dst = new Path(fs.getHomeDirectory(), pathSuffix) + val dst = new Path(fs.getHomeDirectory(), appStagingDir) val replication = System.getProperty("spark.yarn.submit.file.replication", "3").toShort if (UserGroupInformation.isSecurityEnabled()) { @@ -203,55 +211,65 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl dstFs.addDelegationTokens(delegTokenRenewer, credentials); } val localResources = HashMap[String, LocalResource]() + FileSystem.mkdirs(fs, dst, new FsPermission(STAGING_DIR_PERMISSION)) + + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + + if (System.getenv("SPARK_JAR") == null || args.userJar == null) { + logError("Error: You must set SPARK_JAR environment variable and specify a user jar!") + System.exit(1) + } - Map("spark.jar" -> System.getenv("SPARK_JAR"), "app.jar" -> args.userJar, "log4j.properties" -> System.getenv("SPARK_LOG4J_CONF")) + Map(Client.SPARK_JAR -> System.getenv("SPARK_JAR"), Client.APP_JAR -> args.userJar, + Client.LOG4J_PROP -> System.getenv("SPARK_LOG4J_CONF")) .foreach { case(destName, _localPath) => val localPath: String = if (_localPath != null) _localPath.trim() else "" if (! localPath.isEmpty()) { - val src = new Path(localPath) - val newPath = new Path(dst, destName) - logInfo("Uploading " + src + " to " + newPath) - fs.copyFromLocalFile(false, true, src, newPath) - fs.setReplication(newPath, replication); - val destStatus = fs.getFileStatus(newPath) - - val amJarRsrc = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] - amJarRsrc.setType(LocalResourceType.FILE) - amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION) - amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(newPath)) - amJarRsrc.setTimestamp(destStatus.getModificationTime()) - amJarRsrc.setSize(destStatus.getLen()) - localResources(destName) = amJarRsrc + var localURI = new URI(localPath) + // if not specified assume these are in the local filesystem to keep behavior like Hadoop + if (localURI.getScheme() == null) { + localURI = new URI(FileSystem.getLocal(conf).makeQualified(new Path(localPath)).toString()) + } + val setPermissions = if (destName.equals(Client.APP_JAR)) true else false + val destPath = copyRemoteFile(dst, new Path(localURI), replication, setPermissions) + distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, + destName, statCache) } } // handle any add jars if ((args.addJars != null) && (!args.addJars.isEmpty())){ args.addJars.split(',').foreach { case file: String => - val tmpURI = new URI(file) - val tmp = new Path(tmpURI) - copyLocalFile(dst, LocalResourceType.FILE, tmp, replication, localResources, - tmpURI.getFragment(), true) + val localURI = new URI(file.trim()) + val localPath = new Path(localURI) + val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName()) + val destPath = copyRemoteFile(dst, localPath, replication) + distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, + linkname, statCache, true) } } // handle any distributed cache files if ((args.files != null) && (!args.files.isEmpty())){ args.files.split(',').foreach { case file: String => - val tmpURI = new URI(file) - val tmp = new Path(tmpURI) - copyLocalFile(dst, LocalResourceType.FILE, tmp, replication, localResources, - tmpURI.getFragment()) + val localURI = new URI(file.trim()) + val localPath = new Path(localURI) + val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName()) + val destPath = copyRemoteFile(dst, localPath, replication) + distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, + linkname, statCache) } } // handle any distributed cache archives if ((args.archives != null) && (!args.archives.isEmpty())) { args.archives.split(',').foreach { case file:String => - val tmpURI = new URI(file) - val tmp = new Path(tmpURI) - copyLocalFile(dst, LocalResourceType.ARCHIVE, tmp, replication, - localResources, tmpURI.getFragment()) + val localURI = new URI(file.trim()) + val localPath = new Path(localURI) + val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName()) + val destPath = copyRemoteFile(dst, localPath, replication) + distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, + linkname, statCache) } } @@ -259,44 +277,21 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl return localResources } - def setupLaunchEnv(localResources: HashMap[String, LocalResource]): HashMap[String, String] = { + def setupLaunchEnv( + localResources: HashMap[String, LocalResource], + stagingDir: String): HashMap[String, String] = { logInfo("Setting up the launch environment") - val log4jConfLocalRes = localResources.getOrElse("log4j.properties", null) + val log4jConfLocalRes = localResources.getOrElse(Client.LOG4J_PROP, null) val env = new HashMap[String, String]() Client.populateClasspath(yarnConf, log4jConfLocalRes != null, env) env("SPARK_YARN_MODE") = "true" - env("SPARK_YARN_JAR_PATH") = - localResources("spark.jar").getResource().getScheme.toString() + "://" + - localResources("spark.jar").getResource().getFile().toString() - env("SPARK_YARN_JAR_TIMESTAMP") = localResources("spark.jar").getTimestamp().toString() - env("SPARK_YARN_JAR_SIZE") = localResources("spark.jar").getSize().toString() - - env("SPARK_YARN_USERJAR_PATH") = - localResources("app.jar").getResource().getScheme.toString() + "://" + - localResources("app.jar").getResource().getFile().toString() - env("SPARK_YARN_USERJAR_TIMESTAMP") = localResources("app.jar").getTimestamp().toString() - env("SPARK_YARN_USERJAR_SIZE") = localResources("app.jar").getSize().toString() - - if (log4jConfLocalRes != null) { - env("SPARK_YARN_LOG4J_PATH") = - log4jConfLocalRes.getResource().getScheme.toString() + "://" + log4jConfLocalRes.getResource().getFile().toString() - env("SPARK_YARN_LOG4J_TIMESTAMP") = log4jConfLocalRes.getTimestamp().toString() - env("SPARK_YARN_LOG4J_SIZE") = log4jConfLocalRes.getSize().toString() - } + env("SPARK_YARN_STAGING_DIR") = stagingDir // set the environment variables to be passed on to the Workers - if (distFiles != None) { - env("SPARK_YARN_CACHE_FILES") = distFiles.get - env("SPARK_YARN_CACHE_FILES_TIME_STAMPS") = distFilesTimeStamps.get - env("SPARK_YARN_CACHE_FILES_FILE_SIZES") = distFilesFileSizes.get - } - if (distArchives != None) { - env("SPARK_YARN_CACHE_ARCHIVES") = distArchives.get - env("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") = distArchivesTimeStamps.get - env("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") = distArchivesFileSizes.get - } + distCacheMgr.setDistFilesEnv(env) + distCacheMgr.setDistArchivesEnv(env) // allow users to specify some environment variables Apps.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV")) @@ -365,6 +360,11 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl javaCommand = Environment.JAVA_HOME.$() + "/bin/java" } + if (args.userClass == null) { + logError("Error: You must specify a user class!") + System.exit(1) + } + val commands = List[String](javaCommand + " -server " + JAVA_OPTS + @@ -432,6 +432,10 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl } object Client { + val SPARK_JAR: String = "spark.jar" + val APP_JAR: String = "app.jar" + val LOG4J_PROP: String = "log4j.properties" + def main(argStrings: Array[String]) { // Set an env variable indicating we are running in YARN mode. // Note that anything with SPARK prefix gets propagated to all (remote) processes @@ -453,22 +457,22 @@ object Client { // If log4j present, ensure ours overrides all others if (addLog4j) { Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + - Path.SEPARATOR + "log4j.properties") + Path.SEPARATOR + LOG4J_PROP) } // normally the users app.jar is last in case conflicts with spark jars val userClasspathFirst = System.getProperty("spark.yarn.user.classpath.first", "false") .toBoolean if (userClasspathFirst) { Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + - Path.SEPARATOR + "app.jar") + Path.SEPARATOR + APP_JAR) } Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + - Path.SEPARATOR + "spark.jar") + Path.SEPARATOR + SPARK_JAR) Client.populateHadoopClasspath(conf, env) if (!userClasspathFirst) { Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + - Path.SEPARATOR + "app.jar") + Path.SEPARATOR + APP_JAR) } Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + Path.SEPARATOR + "*") diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala new file mode 100644 index 0000000000000..07686fefd7c06 --- /dev/null +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.yarn + +import java.net.URI; + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileStatus +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.permission.FsAction +import org.apache.hadoop.yarn.api.records.LocalResource +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility +import org.apache.hadoop.yarn.api.records.LocalResourceType +import org.apache.hadoop.yarn.util.{Records, ConverterUtils} + +import org.apache.spark.Logging + +import scala.collection.mutable.HashMap +import scala.collection.mutable.LinkedHashMap +import scala.collection.mutable.Map + + +/** Client side methods to setup the Hadoop distributed cache */ +class ClientDistributedCacheManager() extends Logging { + private val distCacheFiles: Map[String, Tuple3[String, String, String]] = + LinkedHashMap[String, Tuple3[String, String, String]]() + private val distCacheArchives: Map[String, Tuple3[String, String, String]] = + LinkedHashMap[String, Tuple3[String, String, String]]() + + + /** + * Add a resource to the list of distributed cache resources. This list can + * be sent to the ApplicationMaster and possibly the workers so that it can + * be downloaded into the Hadoop distributed cache for use by this application. + * Adds the LocalResource to the localResources HashMap passed in and saves + * the stats of the resources to they can be sent to the workers and verified. + * + * @param fs FileSystem + * @param conf Configuration + * @param destPath path to the resource + * @param localResources localResource hashMap to insert the resource into + * @param resourceType LocalResourceType + * @param link link presented in the distributed cache to the destination + * @param statCache cache to store the file/directory stats + * @param appMasterOnly Whether to only add the resource to the app master + */ + def addResource( + fs: FileSystem, + conf: Configuration, + destPath: Path, + localResources: HashMap[String, LocalResource], + resourceType: LocalResourceType, + link: String, + statCache: Map[URI, FileStatus], + appMasterOnly: Boolean = false) = { + val destStatus = fs.getFileStatus(destPath) + val amJarRsrc = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] + amJarRsrc.setType(resourceType) + val visibility = getVisibility(conf, destPath.toUri(), statCache) + amJarRsrc.setVisibility(visibility) + amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(destPath)) + amJarRsrc.setTimestamp(destStatus.getModificationTime()) + amJarRsrc.setSize(destStatus.getLen()) + if (link == null || link.isEmpty()) throw new Exception("You must specify a valid link name") + localResources(link) = amJarRsrc + + if (appMasterOnly == false) { + val uri = destPath.toUri() + val pathURI = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, link) + if (resourceType == LocalResourceType.FILE) { + distCacheFiles(pathURI.toString()) = (destStatus.getLen().toString(), + destStatus.getModificationTime().toString(), visibility.name()) + } else { + distCacheArchives(pathURI.toString()) = (destStatus.getLen().toString(), + destStatus.getModificationTime().toString(), visibility.name()) + } + } + } + + /** + * Adds the necessary cache file env variables to the env passed in + * @param env + */ + def setDistFilesEnv(env: Map[String, String]) = { + val (keys, tupleValues) = distCacheFiles.unzip + val (sizes, timeStamps, visibilities) = tupleValues.unzip3 + + if (keys.size > 0) { + env("SPARK_YARN_CACHE_FILES") = keys.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_FILES_TIME_STAMPS") = + timeStamps.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_FILES_FILE_SIZES") = + sizes.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_FILES_VISIBILITIES") = + visibilities.reduceLeft[String] { (acc,n) => acc + "," + n } + } + } + + /** + * Adds the necessary cache archive env variables to the env passed in + * @param env + */ + def setDistArchivesEnv(env: Map[String, String]) = { + val (keys, tupleValues) = distCacheArchives.unzip + val (sizes, timeStamps, visibilities) = tupleValues.unzip3 + + if (keys.size > 0) { + env("SPARK_YARN_CACHE_ARCHIVES") = keys.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") = + timeStamps.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") = + sizes.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") = + visibilities.reduceLeft[String] { (acc,n) => acc + "," + n } + } + } + + /** + * Returns the local resource visibility depending on the cache file permissions + * @param conf + * @param uri + * @param statCache + * @return LocalResourceVisibility + */ + def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): + LocalResourceVisibility = { + if (isPublic(conf, uri, statCache)) { + return LocalResourceVisibility.PUBLIC + } + return LocalResourceVisibility.PRIVATE + } + + /** + * Returns a boolean to denote whether a cache file is visible to all(public) + * or not + * @param conf + * @param uri + * @param statCache + * @return true if the path in the uri is visible to all, false otherwise + */ + def isPublic(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): Boolean = { + val fs = FileSystem.get(uri, conf) + val current = new Path(uri.getPath()) + //the leaf level file should be readable by others + if (!checkPermissionOfOther(fs, current, FsAction.READ, statCache)) { + return false + } + return ancestorsHaveExecutePermissions(fs, current.getParent(), statCache) + } + + /** + * Returns true if all ancestors of the specified path have the 'execute' + * permission set for all users (i.e. that other users can traverse + * the directory heirarchy to the given path) + * @param fs + * @param path + * @param statCache + * @return true if all ancestors have the 'execute' permission set for all users + */ + def ancestorsHaveExecutePermissions(fs: FileSystem, path: Path, + statCache: Map[URI, FileStatus]): Boolean = { + var current = path + while (current != null) { + //the subdirs in the path should have execute permissions for others + if (!checkPermissionOfOther(fs, current, FsAction.EXECUTE, statCache)) { + return false + } + current = current.getParent() + } + return true + } + + /** + * Checks for a given path whether the Other permissions on it + * imply the permission in the passed FsAction + * @param fs + * @param path + * @param action + * @param statCache + * @return true if the path in the uri is visible to all, false otherwise + */ + def checkPermissionOfOther(fs: FileSystem, path: Path, + action: FsAction, statCache: Map[URI, FileStatus]): Boolean = { + val status = getFileStatus(fs, path.toUri(), statCache); + val perms = status.getPermission() + val otherAction = perms.getOtherAction() + if (otherAction.implies(action)) { + return true; + } + return false + } + + /** + * Checks to see if the given uri exists in the cache, if it does it + * returns the existing FileStatus, otherwise it stats the uri, stores + * it in the cache, and returns the FileStatus. + * @param fs + * @param uri + * @param statCache + * @return FileStatus + */ + def getFileStatus(fs: FileSystem, uri: URI, statCache: Map[URI, FileStatus]): FileStatus = { + val stat = statCache.get(uri) match { + case Some(existstat) => existstat + case None => + val newStat = fs.getFileStatus(new Path(uri)) + statCache.put(uri, newStat) + newStat + } + return stat + } +} diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala index ba352daac485d..7a66532254c74 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala @@ -142,11 +142,12 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S rtype: LocalResourceType, localResources: HashMap[String, LocalResource], timestamp: String, - size: String) = { + size: String, + vis: String) = { val uri = new URI(file) val amJarRsrc = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] amJarRsrc.setType(rtype) - amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION) + amJarRsrc.setVisibility(LocalResourceVisibility.valueOf(vis)) amJarRsrc.setResource(ConverterUtils.getYarnUrlFromURI(uri)) amJarRsrc.setTimestamp(timestamp.toLong) amJarRsrc.setSize(size.toLong) @@ -158,44 +159,14 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S logInfo("Preparing Local resources") val localResources = HashMap[String, LocalResource]() - // Spark JAR - val sparkJarResource = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] - sparkJarResource.setType(LocalResourceType.FILE) - sparkJarResource.setVisibility(LocalResourceVisibility.APPLICATION) - sparkJarResource.setResource(ConverterUtils.getYarnUrlFromURI( - new URI(System.getenv("SPARK_YARN_JAR_PATH")))) - sparkJarResource.setTimestamp(System.getenv("SPARK_YARN_JAR_TIMESTAMP").toLong) - sparkJarResource.setSize(System.getenv("SPARK_YARN_JAR_SIZE").toLong) - localResources("spark.jar") = sparkJarResource - // User JAR - val userJarResource = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] - userJarResource.setType(LocalResourceType.FILE) - userJarResource.setVisibility(LocalResourceVisibility.APPLICATION) - userJarResource.setResource(ConverterUtils.getYarnUrlFromURI( - new URI(System.getenv("SPARK_YARN_USERJAR_PATH")))) - userJarResource.setTimestamp(System.getenv("SPARK_YARN_USERJAR_TIMESTAMP").toLong) - userJarResource.setSize(System.getenv("SPARK_YARN_USERJAR_SIZE").toLong) - localResources("app.jar") = userJarResource - - // Log4j conf - if available - if (System.getenv("SPARK_YARN_LOG4J_PATH") != null) { - val log4jConfResource = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] - log4jConfResource.setType(LocalResourceType.FILE) - log4jConfResource.setVisibility(LocalResourceVisibility.APPLICATION) - log4jConfResource.setResource(ConverterUtils.getYarnUrlFromURI( - new URI(System.getenv("SPARK_YARN_LOG4J_PATH")))) - log4jConfResource.setTimestamp(System.getenv("SPARK_YARN_LOG4J_TIMESTAMP").toLong) - log4jConfResource.setSize(System.getenv("SPARK_YARN_LOG4J_SIZE").toLong) - localResources("log4j.properties") = log4jConfResource - } - if (System.getenv("SPARK_YARN_CACHE_FILES") != null) { val timeStamps = System.getenv("SPARK_YARN_CACHE_FILES_TIME_STAMPS").split(',') val fileSizes = System.getenv("SPARK_YARN_CACHE_FILES_FILE_SIZES").split(',') val distFiles = System.getenv("SPARK_YARN_CACHE_FILES").split(',') + val visibilities = System.getenv("SPARK_YARN_CACHE_FILES_VISIBILITIES").split(',') for( i <- 0 to distFiles.length - 1) { setupDistributedCache(distFiles(i), LocalResourceType.FILE, localResources, timeStamps(i), - fileSizes(i)) + fileSizes(i), visibilities(i)) } } @@ -203,9 +174,10 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S val timeStamps = System.getenv("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS").split(',') val fileSizes = System.getenv("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES").split(',') val distArchives = System.getenv("SPARK_YARN_CACHE_ARCHIVES").split(',') + val visibilities = System.getenv("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES").split(',') for( i <- 0 to distArchives.length - 1) { setupDistributedCache(distArchives(i), LocalResourceType.ARCHIVE, localResources, - timeStamps(i), fileSizes(i)) + timeStamps(i), fileSizes(i), visibilities(i)) } } diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala new file mode 100644 index 0000000000000..c0a2af0c6faf3 --- /dev/null +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.yarn + +import java.net.URI; + +import org.scalatest.FunSuite +import org.scalatest.mock.MockitoSugar +import org.mockito.Mockito.when + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileStatus +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.permission.FsAction +import org.apache.hadoop.yarn.api.records.LocalResource +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility +import org.apache.hadoop.yarn.api.records.LocalResourceType +import org.apache.hadoop.yarn.util.{Records, ConverterUtils} + +import scala.collection.mutable.HashMap +import scala.collection.mutable.Map + + +class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar { + + class MockClientDistributedCacheManager extends ClientDistributedCacheManager { + override def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): + LocalResourceVisibility = { + return LocalResourceVisibility.PRIVATE + } + } + + test("test getFileStatus empty") { + val distMgr = new ClientDistributedCacheManager() + val fs = mock[FileSystem] + val uri = new URI("/tmp/testing") + when(fs.getFileStatus(new Path(uri))).thenReturn(new FileStatus()) + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + val stat = distMgr.getFileStatus(fs, uri, statCache) + assert(stat.getPath() === null) + } + + test("test getFileStatus cached") { + val distMgr = new ClientDistributedCacheManager() + val fs = mock[FileSystem] + val uri = new URI("/tmp/testing") + val realFileStatus = new FileStatus(10, false, 1, 1024, 10, 10, null, "testOwner", + null, new Path("/tmp/testing")) + when(fs.getFileStatus(new Path(uri))).thenReturn(new FileStatus()) + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus](uri -> realFileStatus) + val stat = distMgr.getFileStatus(fs, uri, statCache) + assert(stat.getPath().toString() === "/tmp/testing") + } + + test("test addResource") { + val distMgr = new MockClientDistributedCacheManager() + val fs = mock[FileSystem] + val conf = new Configuration() + val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing") + val localResources = HashMap[String, LocalResource]() + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + when(fs.getFileStatus(destPath)).thenReturn(new FileStatus()) + + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, "link", + statCache, false) + val resource = localResources("link") + assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE) + assert(ConverterUtils.getPathFromYarnURL(resource.getResource()) === destPath) + assert(resource.getTimestamp() === 0) + assert(resource.getSize() === 0) + assert(resource.getType() === LocalResourceType.FILE) + + val env = new HashMap[String, String]() + distMgr.setDistFilesEnv(env) + assert(env("SPARK_YARN_CACHE_FILES") === "file:/foo.invalid.com:8080/tmp/testing#link") + assert(env("SPARK_YARN_CACHE_FILES_TIME_STAMPS") === "0") + assert(env("SPARK_YARN_CACHE_FILES_FILE_SIZES") === "0") + assert(env("SPARK_YARN_CACHE_FILES_VISIBILITIES") === LocalResourceVisibility.PRIVATE.name()) + + distMgr.setDistArchivesEnv(env) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") === None) + + //add another one and verify both there and order correct + val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", + null, new Path("/tmp/testing2")) + val destPath2 = new Path("file:///foo.invalid.com:8080/tmp/testing2") + when(fs.getFileStatus(destPath2)).thenReturn(realFileStatus) + distMgr.addResource(fs, conf, destPath2, localResources, LocalResourceType.FILE, "link2", + statCache, false) + val resource2 = localResources("link2") + assert(resource2.getVisibility() === LocalResourceVisibility.PRIVATE) + assert(ConverterUtils.getPathFromYarnURL(resource2.getResource()) === destPath2) + assert(resource2.getTimestamp() === 10) + assert(resource2.getSize() === 20) + assert(resource2.getType() === LocalResourceType.FILE) + + val env2 = new HashMap[String, String]() + distMgr.setDistFilesEnv(env2) + val timestamps = env2("SPARK_YARN_CACHE_FILES_TIME_STAMPS").split(',') + val files = env2("SPARK_YARN_CACHE_FILES").split(',') + val sizes = env2("SPARK_YARN_CACHE_FILES_FILE_SIZES").split(',') + val visibilities = env2("SPARK_YARN_CACHE_FILES_VISIBILITIES") .split(',') + assert(files(0) === "file:/foo.invalid.com:8080/tmp/testing#link") + assert(timestamps(0) === "0") + assert(sizes(0) === "0") + assert(visibilities(0) === LocalResourceVisibility.PRIVATE.name()) + + assert(files(1) === "file:/foo.invalid.com:8080/tmp/testing2#link2") + assert(timestamps(1) === "10") + assert(sizes(1) === "20") + assert(visibilities(1) === LocalResourceVisibility.PRIVATE.name()) + } + + test("test addResource link null") { + val distMgr = new MockClientDistributedCacheManager() + val fs = mock[FileSystem] + val conf = new Configuration() + val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing") + val localResources = HashMap[String, LocalResource]() + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + when(fs.getFileStatus(destPath)).thenReturn(new FileStatus()) + + intercept[Exception] { + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, null, + statCache, false) + } + assert(localResources.get("link") === None) + assert(localResources.size === 0) + } + + test("test addResource appmaster only") { + val distMgr = new MockClientDistributedCacheManager() + val fs = mock[FileSystem] + val conf = new Configuration() + val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing") + val localResources = HashMap[String, LocalResource]() + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", + null, new Path("/tmp/testing")) + when(fs.getFileStatus(destPath)).thenReturn(realFileStatus) + + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", + statCache, true) + val resource = localResources("link") + assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE) + assert(ConverterUtils.getPathFromYarnURL(resource.getResource()) === destPath) + assert(resource.getTimestamp() === 10) + assert(resource.getSize() === 20) + assert(resource.getType() === LocalResourceType.ARCHIVE) + + val env = new HashMap[String, String]() + distMgr.setDistFilesEnv(env) + assert(env.get("SPARK_YARN_CACHE_FILES") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_TIME_STAMPS") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_FILE_SIZES") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_VISIBILITIES") === None) + + distMgr.setDistArchivesEnv(env) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") === None) + } + + test("test addResource archive") { + val distMgr = new MockClientDistributedCacheManager() + val fs = mock[FileSystem] + val conf = new Configuration() + val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing") + val localResources = HashMap[String, LocalResource]() + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", + null, new Path("/tmp/testing")) + when(fs.getFileStatus(destPath)).thenReturn(realFileStatus) + + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", + statCache, false) + val resource = localResources("link") + assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE) + assert(ConverterUtils.getPathFromYarnURL(resource.getResource()) === destPath) + assert(resource.getTimestamp() === 10) + assert(resource.getSize() === 20) + assert(resource.getType() === LocalResourceType.ARCHIVE) + + val env = new HashMap[String, String]() + + distMgr.setDistArchivesEnv(env) + assert(env("SPARK_YARN_CACHE_ARCHIVES") === "file:/foo.invalid.com:8080/tmp/testing#link") + assert(env("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") === "10") + assert(env("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") === "20") + assert(env("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") === LocalResourceVisibility.PRIVATE.name()) + + distMgr.setDistFilesEnv(env) + assert(env.get("SPARK_YARN_CACHE_FILES") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_TIME_STAMPS") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_FILE_SIZES") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_VISIBILITIES") === None) + } + + +} From 8823057992b5bbad760ee594da6f44457b63991f Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Wed, 13 Nov 2013 08:39:05 -0800 Subject: [PATCH 22/39] Merge pull request #166 from ahirreddy/simr-spark-ui SIMR Backend Scheduler will now write Spark UI URL to HDFS, which is to ... ...be retrieved by SIMR clients (cherry picked from commit 39af914b273e35ff431844951ee8dfadcbc0c400) Signed-off-by: Reynold Xin --- .../scheduler/cluster/SimrSchedulerBackend.scala | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala index d78bdbaa7a02b..6b9193540085e 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala @@ -31,6 +31,10 @@ private[spark] class SimrSchedulerBackend( val tmpPath = new Path(driverFilePath + "_tmp") val filePath = new Path(driverFilePath) + val uiFilePath = driverFilePath + "_ui" + val tmpUiPath = new Path(uiFilePath + "_tmp") + val uiPath = new Path(uiFilePath) + val maxCores = System.getProperty("spark.simr.executor.cores", "1").toInt override def start() { @@ -45,6 +49,8 @@ private[spark] class SimrSchedulerBackend( logInfo("Writing to HDFS file: " + driverFilePath) logInfo("Writing Akka address: " + driverUrl) + logInfo("Writing to HDFS file: " + uiFilePath) + logInfo("Writing Spark UI Address: " + sc.ui.appUIAddress) // Create temporary file to prevent race condition where executors get empty driverUrl file val temp = fs.create(tmpPath, true) @@ -54,6 +60,12 @@ private[spark] class SimrSchedulerBackend( // "Atomic" rename fs.rename(tmpPath, filePath) + + // Write Spark UI Address to file + val uiTemp = fs.create(tmpUiPath, true) + uiTemp.writeUTF(sc.ui.appUIAddress) + uiTemp.close() + fs.rename(tmpUiPath, uiPath) } override def stop() { From 37126e8c9013fc0f060a31bc8ffcaa1b2cbc9999 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Thu, 14 Nov 2013 19:43:55 -0800 Subject: [PATCH 23/39] Merge pull request #174 from ahirreddy/master Write Spark UI url to driver file on HDFS This makes the SIMR code path simpler (cherry picked from commit ed25105fd9733acd631dab0993560ac66ffeae16) Signed-off-by: Reynold Xin --- .../scheduler/cluster/SimrSchedulerBackend.scala | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala index 6b9193540085e..0ea35e2b7a311 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala @@ -31,10 +31,6 @@ private[spark] class SimrSchedulerBackend( val tmpPath = new Path(driverFilePath + "_tmp") val filePath = new Path(driverFilePath) - val uiFilePath = driverFilePath + "_ui" - val tmpUiPath = new Path(uiFilePath + "_tmp") - val uiPath = new Path(uiFilePath) - val maxCores = System.getProperty("spark.simr.executor.cores", "1").toInt override def start() { @@ -49,23 +45,17 @@ private[spark] class SimrSchedulerBackend( logInfo("Writing to HDFS file: " + driverFilePath) logInfo("Writing Akka address: " + driverUrl) - logInfo("Writing to HDFS file: " + uiFilePath) logInfo("Writing Spark UI Address: " + sc.ui.appUIAddress) // Create temporary file to prevent race condition where executors get empty driverUrl file val temp = fs.create(tmpPath, true) temp.writeUTF(driverUrl) temp.writeInt(maxCores) + temp.writeUTF(sc.ui.appUIAddress) temp.close() // "Atomic" rename fs.rename(tmpPath, filePath) - - // Write Spark UI Address to file - val uiTemp = fs.create(tmpUiPath, true) - uiTemp.writeUTF(sc.ui.appUIAddress) - uiTemp.close() - fs.rename(tmpUiPath, uiPath) } override def stop() { From 4b2fd3f78903005ad9e8033f6647b2b6f1a4a957 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 21 Nov 2013 07:15:55 +0800 Subject: [PATCH 24/39] Merge branch 'master' of github.com:tbfenet/incubator-spark PartitionPruningRDD is using index from parent I was getting a ArrayIndexOutOfBoundsException exception after doing union on pruned RDD. The index it was using on the partition was the index in the original RDD not the new pruned RDD. (cherry picked from commit 2fead510f74b962b293de4d724136c24a9825271) Signed-off-by: Reynold Xin --- .../spark/rdd/PartitionPruningRDD.scala | 8 +- .../spark/PartitionPruningRDDSuite.scala | 45 ---------- .../spark/rdd/PartitionPruningRDDSuite.scala | 86 +++++++++++++++++++ 3 files changed, 91 insertions(+), 48 deletions(-) delete mode 100644 core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala create mode 100644 core/src/test/scala/org/apache/spark/rdd/PartitionPruningRDDSuite.scala diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala index 165cd412fcfb8..574dd4233fb27 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala @@ -33,11 +33,13 @@ class PruneDependency[T](rdd: RDD[T], @transient partitionFilterFunc: Int => Boo extends NarrowDependency[T](rdd) { @transient - val partitions: Array[Partition] = rdd.partitions.zipWithIndex - .filter(s => partitionFilterFunc(s._2)) + val partitions: Array[Partition] = rdd.partitions + .filter(s => partitionFilterFunc(s.index)).zipWithIndex .map { case(split, idx) => new PartitionPruningRDDPartition(idx, split) : Partition } - override def getParents(partitionId: Int) = List(partitions(partitionId).index) + override def getParents(partitionId: Int) = { + List(partitions(partitionId).asInstanceOf[PartitionPruningRDDPartition].parentSplit.index) + } } diff --git a/core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala b/core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala deleted file mode 100644 index 21f16ef2c6ece..0000000000000 --- a/core/src/test/scala/org/apache/spark/PartitionPruningRDDSuite.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark - -import org.scalatest.FunSuite -import org.apache.spark.SparkContext._ -import org.apache.spark.rdd.{RDD, PartitionPruningRDD} - - -class PartitionPruningRDDSuite extends FunSuite with SharedSparkContext { - - test("Pruned Partitions inherit locality prefs correctly") { - class TestPartition(i: Int) extends Partition { - def index = i - } - val rdd = new RDD[Int](sc, Nil) { - override protected def getPartitions = { - Array[Partition]( - new TestPartition(1), - new TestPartition(2), - new TestPartition(3)) - } - def compute(split: Partition, context: TaskContext) = {Iterator()} - } - val prunedRDD = PartitionPruningRDD.create(rdd, {x => if (x==2) true else false}) - val p = prunedRDD.partitions(0) - assert(p.index == 2) - assert(prunedRDD.partitions.length == 1) - } -} diff --git a/core/src/test/scala/org/apache/spark/rdd/PartitionPruningRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PartitionPruningRDDSuite.scala new file mode 100644 index 0000000000000..53a7b7c44df1c --- /dev/null +++ b/core/src/test/scala/org/apache/spark/rdd/PartitionPruningRDDSuite.scala @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.rdd + +import org.scalatest.FunSuite +import org.apache.spark.{TaskContext, Partition, SharedSparkContext} + + +class PartitionPruningRDDSuite extends FunSuite with SharedSparkContext { + + + test("Pruned Partitions inherit locality prefs correctly") { + + val rdd = new RDD[Int](sc, Nil) { + override protected def getPartitions = { + Array[Partition]( + new TestPartition(0, 1), + new TestPartition(1, 1), + new TestPartition(2, 1)) + } + + def compute(split: Partition, context: TaskContext) = { + Iterator() + } + } + val prunedRDD = PartitionPruningRDD.create(rdd, { + x => if (x == 2) true else false + }) + assert(prunedRDD.partitions.length == 1) + val p = prunedRDD.partitions(0) + assert(p.index == 0) + assert(p.asInstanceOf[PartitionPruningRDDPartition].parentSplit.index == 2) + } + + + test("Pruned Partitions can be unioned ") { + + val rdd = new RDD[Int](sc, Nil) { + override protected def getPartitions = { + Array[Partition]( + new TestPartition(0, 4), + new TestPartition(1, 5), + new TestPartition(2, 6)) + } + + def compute(split: Partition, context: TaskContext) = { + List(split.asInstanceOf[TestPartition].testValue).iterator + } + } + val prunedRDD1 = PartitionPruningRDD.create(rdd, { + x => if (x == 0) true else false + }) + + val prunedRDD2 = PartitionPruningRDD.create(rdd, { + x => if (x == 2) true else false + }) + + val merged = prunedRDD1 ++ prunedRDD2 + assert(merged.count() == 2) + val take = merged.take(2) + assert(take.apply(0) == 4) + assert(take.apply(1) == 6) + } +} + +class TestPartition(i: Int, value: Int) extends Partition with Serializable { + def index = i + + def testValue = this.value + +} From f475e94207896a3acf7d7688e0fb372367366983 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Fri, 22 Nov 2013 10:12:13 +0800 Subject: [PATCH 25/39] Merge pull request #196 from pwendell/master TimeTrackingOutputStream should pass on calls to close() and flush(). Without this fix you get a huge number of open files when running shuffles. (cherry picked from commit f20093c3afa68439b1c9010de189d497df787c2a) Signed-off-by: Reynold Xin --- .../main/scala/org/apache/spark/storage/BlockObjectWriter.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala index 469e68fed74bb..b4451fc7b8e56 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala @@ -93,6 +93,8 @@ class DiskBlockObjectWriter( def write(i: Int): Unit = callWithTiming(out.write(i)) override def write(b: Array[Byte]) = callWithTiming(out.write(b)) override def write(b: Array[Byte], off: Int, len: Int) = callWithTiming(out.write(b, off, len)) + override def close() = out.close() + override def flush() = out.flush() } private val syncWrites = System.getProperty("spark.shuffle.sync", "false").toBoolean From 5d79d76ec1f01fc2b867905cfea5e8614ec4b220 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Fri, 22 Nov 2013 10:26:39 +0800 Subject: [PATCH 26/39] Merge pull request #193 from aoiwelle/patch-1 Fix Kryo Serializer buffer documentation inconsistency The documentation here is inconsistent with the coded default and other documentation. (cherry picked from commit 086b097e33a2ce622ec6352819bccc92106f43b7) Signed-off-by: Reynold Xin --- docs/tuning.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tuning.md b/docs/tuning.md index f491ae9b95c08..f33fda37ebaba 100644 --- a/docs/tuning.md +++ b/docs/tuning.md @@ -67,7 +67,7 @@ The [Kryo documentation](http://code.google.com/p/kryo/) describes more advanced registration options, such as adding custom serialization code. If your objects are large, you may also need to increase the `spark.kryoserializer.buffer.mb` -system property. The default is 32, but this value needs to be large enough to hold the *largest* +system property. The default is 2, but this value needs to be large enough to hold the *largest* object you will serialize. Finally, if you don't register your classes, Kryo will still work, but it will have to store the From fb83780e239c3ea446521ea2e925155059c0eea1 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sun, 24 Nov 2013 11:02:02 +0800 Subject: [PATCH 27/39] Merge pull request #200 from mateiz/hash-fix AppendOnlyMap fixes - Chose a more random reshuffling step for values returned by Object.hashCode to avoid some long chaining that was happening for consecutive integers (e.g. `sc.makeRDD(1 to 100000000, 100).map(t => (t, t)).reduceByKey(_ + _).count`) - Some other small optimizations throughout (see commit comments) (cherry picked from commit 718cc803f7e0600c9ab265022eb6027926a38010) Signed-off-by: Reynold Xin --- .../org/apache/spark/util/AppendOnlyMap.scala | 93 ++++++++++--------- 1 file changed, 50 insertions(+), 43 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala index f60deafc6f323..8bb4ee3bfa22e 100644 --- a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala +++ b/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala @@ -35,6 +35,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi private var capacity = nextPowerOf2(initialCapacity) private var mask = capacity - 1 private var curSize = 0 + private var growThreshold = LOAD_FACTOR * capacity // Holds keys and values in the same array for memory locality; specifically, the order of // elements is key0, value0, key1, value1, key2, value2, etc. @@ -56,7 +57,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi var i = 1 while (true) { val curKey = data(2 * pos) - if (k.eq(curKey) || k == curKey) { + if (k.eq(curKey) || k.equals(curKey)) { return data(2 * pos + 1).asInstanceOf[V] } else if (curKey.eq(null)) { return null.asInstanceOf[V] @@ -80,9 +81,23 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi haveNullValue = true return } - val isNewEntry = putInto(data, k, value.asInstanceOf[AnyRef]) - if (isNewEntry) { - incrementSize() + var pos = rehash(key.hashCode) & mask + var i = 1 + while (true) { + val curKey = data(2 * pos) + if (curKey.eq(null)) { + data(2 * pos) = k + data(2 * pos + 1) = value.asInstanceOf[AnyRef] + incrementSize() // Since we added a new key + return + } else if (k.eq(curKey) || k.equals(curKey)) { + data(2 * pos + 1) = value.asInstanceOf[AnyRef] + return + } else { + val delta = i + pos = (pos + delta) & mask + i += 1 + } } } @@ -104,7 +119,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi var i = 1 while (true) { val curKey = data(2 * pos) - if (k.eq(curKey) || k == curKey) { + if (k.eq(curKey) || k.equals(curKey)) { val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V]) data(2 * pos + 1) = newValue.asInstanceOf[AnyRef] return newValue @@ -161,45 +176,17 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi /** Increase table size by 1, rehashing if necessary */ private def incrementSize() { curSize += 1 - if (curSize > LOAD_FACTOR * capacity) { + if (curSize > growThreshold) { growTable() } } /** - * Re-hash a value to deal better with hash functions that don't differ - * in the lower bits, similar to java.util.HashMap + * Re-hash a value to deal better with hash functions that don't differ in the lower bits. + * We use the Murmur Hash 3 finalization step that's also used in fastutil. */ private def rehash(h: Int): Int = { - val r = h ^ (h >>> 20) ^ (h >>> 12) - r ^ (r >>> 7) ^ (r >>> 4) - } - - /** - * Put an entry into a table represented by data, returning true if - * this increases the size of the table or false otherwise. Assumes - * that "data" has at least one empty slot. - */ - private def putInto(data: Array[AnyRef], key: AnyRef, value: AnyRef): Boolean = { - val mask = (data.length / 2) - 1 - var pos = rehash(key.hashCode) & mask - var i = 1 - while (true) { - val curKey = data(2 * pos) - if (curKey.eq(null)) { - data(2 * pos) = key - data(2 * pos + 1) = value.asInstanceOf[AnyRef] - return true - } else if (curKey.eq(key) || curKey == key) { - data(2 * pos + 1) = value.asInstanceOf[AnyRef] - return false - } else { - val delta = i - pos = (pos + delta) & mask - i += 1 - } - } - return false // Never reached but needed to keep compiler happy + it.unimi.dsi.fastutil.HashCommon.murmurHash3(h) } /** Double the table's size and re-hash everything */ @@ -211,16 +198,36 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi throw new Exception("Can't make capacity bigger than 2^29 elements") } val newData = new Array[AnyRef](2 * newCapacity) - var pos = 0 - while (pos < capacity) { - if (!data(2 * pos).eq(null)) { - putInto(newData, data(2 * pos), data(2 * pos + 1)) + val newMask = newCapacity - 1 + // Insert all our old values into the new array. Note that because our old keys are + // unique, there's no need to check for equality here when we insert. + var oldPos = 0 + while (oldPos < capacity) { + if (!data(2 * oldPos).eq(null)) { + val key = data(2 * oldPos) + val value = data(2 * oldPos + 1) + var newPos = rehash(key.hashCode) & newMask + var i = 1 + var keepGoing = true + while (keepGoing) { + val curKey = newData(2 * newPos) + if (curKey.eq(null)) { + newData(2 * newPos) = key + newData(2 * newPos + 1) = value + keepGoing = false + } else { + val delta = i + newPos = (newPos + delta) & newMask + i += 1 + } + } } - pos += 1 + oldPos += 1 } data = newData capacity = newCapacity - mask = newCapacity - 1 + mask = newMask + growThreshold = LOAD_FACTOR * newCapacity } private def nextPowerOf2(n: Int): Int = { From 75819ea73d15fa8e644b2455f1f9db31035b9089 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Mon, 25 Nov 2013 07:50:46 +0800 Subject: [PATCH 28/39] Merge pull request #197 from aarondav/patrick-fix Fix 'timeWriting' stat for shuffle files Due to concurrent git branches, changes from shuffle file consolidation patch caused the shuffle write timing patch to no longer actually measure the time, since it requires time be measured after the stream has been closed. (cherry picked from commit 972171b9d93b07e8511a2da3a33f897ba033484b) Signed-off-by: Reynold Xin --- .../org/apache/spark/scheduler/ShuffleMapTask.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala index 1dc71a04282e5..0f2deb4bcbbb2 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala @@ -167,6 +167,7 @@ private[spark] class ShuffleMapTask( var totalTime = 0L val compressedSizes: Array[Byte] = shuffle.writers.map { writer: BlockObjectWriter => writer.commit() + writer.close() val size = writer.fileSegment().length totalBytes += size totalTime += writer.timeWriting() @@ -184,14 +185,16 @@ private[spark] class ShuffleMapTask( } catch { case e: Exception => // If there is an exception from running the task, revert the partial writes // and throw the exception upstream to Spark. - if (shuffle != null) { - shuffle.writers.foreach(_.revertPartialWrites()) + if (shuffle != null && shuffle.writers != null) { + for (writer <- shuffle.writers) { + writer.revertPartialWrites() + writer.close() + } } throw e } finally { // Release the writers back to the shuffle block manager. if (shuffle != null && shuffle.writers != null) { - shuffle.writers.foreach(_.close()) shuffle.releaseWriters(success) } // Execute the callbacks on task completion. From 3483f716d16a737801106eea50e05833507e8ef7 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Mon, 25 Nov 2013 18:50:18 -0800 Subject: [PATCH 29/39] Merge pull request #201 from rxin/mappartitions Use the proper partition index in mapPartitionsWIthIndex mapPartitionsWithIndex uses TaskContext.partitionId as the partition index. TaskContext.partitionId used to be identical to the partition index in a RDD. However, pull request #186 introduced a scenario (with partition pruning) that the two can be different. This pull request uses the right partition index in all mapPartitionsWithIndex related calls. Also removed the extra MapPartitionsWIthContextRDD and put all the mapPartitions related functionality in MapPartitionsRDD. (cherry picked from commit 14bb465bb3d65f5b1034ada85cfcad7460034073) Signed-off-by: Reynold Xin --- .../apache/spark/rdd/MapPartitionsRDD.scala | 10 ++--- .../rdd/MapPartitionsWithContextRDD.scala | 41 ------------------- .../main/scala/org/apache/spark/rdd/RDD.scala | 39 ++++++++---------- .../org/apache/spark/CheckpointSuite.scala | 2 - 4 files changed, 22 insertions(+), 70 deletions(-) delete mode 100644 core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithContextRDD.scala diff --git a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala index 203179c4ea823..ae70d559511c9 100644 --- a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala @@ -20,18 +20,16 @@ package org.apache.spark.rdd import org.apache.spark.{Partition, TaskContext} -private[spark] -class MapPartitionsRDD[U: ClassManifest, T: ClassManifest]( +private[spark] class MapPartitionsRDD[U: ClassManifest, T: ClassManifest]( prev: RDD[T], - f: Iterator[T] => Iterator[U], + f: (TaskContext, Int, Iterator[T]) => Iterator[U], // (TaskContext, partition index, iterator) preservesPartitioning: Boolean = false) extends RDD[U](prev) { - override val partitioner = - if (preservesPartitioning) firstParent[T].partitioner else None + override val partitioner = if (preservesPartitioning) firstParent[T].partitioner else None override def getPartitions: Array[Partition] = firstParent[T].partitions override def compute(split: Partition, context: TaskContext) = - f(firstParent[T].iterator(split, context)) + f(context, split.index, firstParent[T].iterator(split, context)) } diff --git a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithContextRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithContextRDD.scala deleted file mode 100644 index aea08ff81bfdb..0000000000000 --- a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithContextRDD.scala +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.rdd - -import org.apache.spark.{Partition, TaskContext} - - -/** - * A variant of the MapPartitionsRDD that passes the TaskContext into the closure. From the - * TaskContext, the closure can either get access to the interruptible flag or get the index - * of the partition in the RDD. - */ -private[spark] -class MapPartitionsWithContextRDD[U: ClassManifest, T: ClassManifest]( - prev: RDD[T], - f: (TaskContext, Iterator[T]) => Iterator[U], - preservesPartitioning: Boolean - ) extends RDD[U](prev) { - - override def getPartitions: Array[Partition] = firstParent[T].partitions - - override val partitioner = if (preservesPartitioning) prev.partitioner else None - - override def compute(split: Partition, context: TaskContext) = - f(context, firstParent[T].iterator(split, context)) -} diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 6e88be6f6ac64..852c131776b83 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -408,7 +408,6 @@ abstract class RDD[T: ClassManifest]( def pipe(command: String, env: Map[String, String]): RDD[String] = new PipedRDD(this, command, env) - /** * Return an RDD created by piping elements to a forked external process. * The print behavior can be customized by providing two functions. @@ -442,7 +441,8 @@ abstract class RDD[T: ClassManifest]( */ def mapPartitions[U: ClassManifest]( f: Iterator[T] => Iterator[U], preservesPartitioning: Boolean = false): RDD[U] = { - new MapPartitionsRDD(this, sc.clean(f), preservesPartitioning) + val func = (context: TaskContext, index: Int, iter: Iterator[T]) => f(iter) + new MapPartitionsRDD(this, sc.clean(func), preservesPartitioning) } /** @@ -451,8 +451,8 @@ abstract class RDD[T: ClassManifest]( */ def mapPartitionsWithIndex[U: ClassManifest]( f: (Int, Iterator[T]) => Iterator[U], preservesPartitioning: Boolean = false): RDD[U] = { - val func = (context: TaskContext, iter: Iterator[T]) => f(context.partitionId, iter) - new MapPartitionsWithContextRDD(this, sc.clean(func), preservesPartitioning) + val func = (context: TaskContext, index: Int, iter: Iterator[T]) => f(index, iter) + new MapPartitionsRDD(this, sc.clean(func), preservesPartitioning) } /** @@ -462,7 +462,8 @@ abstract class RDD[T: ClassManifest]( def mapPartitionsWithContext[U: ClassManifest]( f: (TaskContext, Iterator[T]) => Iterator[U], preservesPartitioning: Boolean = false): RDD[U] = { - new MapPartitionsWithContextRDD(this, sc.clean(f), preservesPartitioning) + val func = (context: TaskContext, index: Int, iter: Iterator[T]) => f(context, iter) + new MapPartitionsRDD(this, sc.clean(func), preservesPartitioning) } /** @@ -483,11 +484,10 @@ abstract class RDD[T: ClassManifest]( def mapWith[A: ClassManifest, U: ClassManifest] (constructA: Int => A, preservesPartitioning: Boolean = false) (f: (T, A) => U): RDD[U] = { - def iterF(context: TaskContext, iter: Iterator[T]): Iterator[U] = { - val a = constructA(context.partitionId) + mapPartitionsWithIndex((index, iter) => { + val a = constructA(index) iter.map(t => f(t, a)) - } - new MapPartitionsWithContextRDD(this, sc.clean(iterF _), preservesPartitioning) + }, preservesPartitioning) } /** @@ -498,11 +498,10 @@ abstract class RDD[T: ClassManifest]( def flatMapWith[A: ClassManifest, U: ClassManifest] (constructA: Int => A, preservesPartitioning: Boolean = false) (f: (T, A) => Seq[U]): RDD[U] = { - def iterF(context: TaskContext, iter: Iterator[T]): Iterator[U] = { - val a = constructA(context.partitionId) + mapPartitionsWithIndex((index, iter) => { + val a = constructA(index) iter.flatMap(t => f(t, a)) - } - new MapPartitionsWithContextRDD(this, sc.clean(iterF _), preservesPartitioning) + }, preservesPartitioning) } /** @@ -511,11 +510,10 @@ abstract class RDD[T: ClassManifest]( * partition with the index of that partition. */ def foreachWith[A: ClassManifest](constructA: Int => A)(f: (T, A) => Unit) { - def iterF(context: TaskContext, iter: Iterator[T]): Iterator[T] = { - val a = constructA(context.partitionId) + mapPartitionsWithIndex { (index, iter) => + val a = constructA(index) iter.map(t => {f(t, a); t}) - } - new MapPartitionsWithContextRDD(this, sc.clean(iterF _), true).foreach(_ => {}) + }.foreach(_ => {}) } /** @@ -524,11 +522,10 @@ abstract class RDD[T: ClassManifest]( * partition with the index of that partition. */ def filterWith[A: ClassManifest](constructA: Int => A)(p: (T, A) => Boolean): RDD[T] = { - def iterF(context: TaskContext, iter: Iterator[T]): Iterator[T] = { - val a = constructA(context.partitionId) + mapPartitionsWithIndex((index, iter) => { + val a = constructA(index) iter.filter(t => p(t, a)) - } - new MapPartitionsWithContextRDD(this, sc.clean(iterF _), true) + }, preservesPartitioning = true) } /** diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala index f26c44d3e76ff..d2226aa5a5663 100644 --- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala +++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala @@ -62,8 +62,6 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging { testCheckpointing(_.sample(false, 0.5, 0)) testCheckpointing(_.glom()) testCheckpointing(_.mapPartitions(_.map(_.toString))) - testCheckpointing(r => new MapPartitionsWithContextRDD(r, - (context: TaskContext, iter: Iterator[Int]) => iter.map(_.toString), false )) testCheckpointing(_.map(x => (x % 2, 1)).reduceByKey(_ + _).mapValues(_.toString)) testCheckpointing(_.map(x => (x % 2, 1)).reduceByKey(_ + _).flatMapValues(x => 1 to x)) testCheckpointing(_.pipe(Seq("cat"))) From 83c7f265ea3f58845ca5821239e480f7802829fa Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sun, 1 Dec 2013 12:46:58 -0800 Subject: [PATCH 30/39] Merge pull request #219 from sundeepn/schedulerexception Scheduler quits when newStage fails The current scheduler thread does not handle exceptions from newStage stage while launching new jobs. The thread fails on any exception that gets triggered at that level, leaving the cluster hanging with no schduler. (cherry picked from commit 740922f25d5f81617fbe02c7bcd1610d6426bbef) Signed-off-by: Reynold Xin --- .../org/apache/spark/scheduler/DAGScheduler.scala | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 7d386ad29a5a1..ec19daf89bf1c 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -372,7 +372,17 @@ class DAGScheduler( private[scheduler] def processEvent(event: DAGSchedulerEvent): Boolean = { event match { case JobSubmitted(jobId, rdd, func, partitions, allowLocal, callSite, listener, properties) => - val finalStage = newStage(rdd, partitions.size, None, jobId, Some(callSite)) + var finalStage: Stage = null + try { + // New stage creation at times and if its not protected, the scheduler thread is killed. + // e.g. it can fail when jobs are run on HadoopRDD whose underlying hdfs files have been deleted + finalStage = newStage(rdd, partitions.size, None, jobId, Some(callSite)) + } catch { + case e: Exception => + logWarning("Creating new stage failed due to exception - job: " + jobId, e) + listener.jobFailed(e) + return false + } val job = new ActiveJob(jobId, finalStage, func, partitions, callSite, listener, properties) clearCacheLocs() logInfo("Got job " + job.jobId + " (" + callSite + ") with " + partitions.length + From 4701f489269cf6e020a327ff5e88d0beef4985ed Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Tue, 19 Nov 2013 16:11:31 -0800 Subject: [PATCH 31/39] Merge pull request #181 from BlackNiuza/fix_tasks_number correct number of tasks in ExecutorsUI Index `a` is not `execId` here (cherry picked from commit f568912f85f58ae152db90f199c1f3a002f270c1) Signed-off-by: Reynold Xin --- .../apache/spark/ui/exec/ExecutorsUI.scala | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala index 42e9be6e19254..e596690bc3df8 100644 --- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala @@ -76,7 +76,7 @@ private[spark] class ExecutorsUI(val sc: SparkContext) { } - val execInfo = for (b <- 0 until storageStatusList.size) yield getExecInfo(b) + val execInfo = for (statusId <- 0 until storageStatusList.size) yield getExecInfo(statusId) val execTable = UIUtils.listingTable(execHead, execRow, execInfo) val content = @@ -99,16 +99,17 @@ private[spark] class ExecutorsUI(val sc: SparkContext) { UIUtils.headerSparkPage(content, sc, "Executors (" + execInfo.size + ")", Executors) } - def getExecInfo(a: Int): Seq[String] = { - val execId = sc.getExecutorStorageStatus(a).blockManagerId.executorId - val hostPort = sc.getExecutorStorageStatus(a).blockManagerId.hostPort - val rddBlocks = sc.getExecutorStorageStatus(a).blocks.size.toString - val memUsed = sc.getExecutorStorageStatus(a).memUsed().toString - val maxMem = sc.getExecutorStorageStatus(a).maxMem.toString - val diskUsed = sc.getExecutorStorageStatus(a).diskUsed().toString - val activeTasks = listener.executorToTasksActive.get(a.toString).map(l => l.size).getOrElse(0) - val failedTasks = listener.executorToTasksFailed.getOrElse(a.toString, 0) - val completedTasks = listener.executorToTasksComplete.getOrElse(a.toString, 0) + def getExecInfo(statusId: Int): Seq[String] = { + val status = sc.getExecutorStorageStatus(statusId) + val execId = status.blockManagerId.executorId + val hostPort = status.blockManagerId.hostPort + val rddBlocks = status.blocks.size.toString + val memUsed = status.memUsed().toString + val maxMem = status.maxMem.toString + val diskUsed = status.diskUsed().toString + val activeTasks = listener.executorToTasksActive.getOrElse(execId, HashSet.empty[Long]).size + val failedTasks = listener.executorToTasksFailed.getOrElse(execId, 0) + val completedTasks = listener.executorToTasksComplete.getOrElse(execId, 0) val totalTasks = activeTasks + failedTasks + completedTasks Seq( From 4019ba7cfc759ed08af44997e8919b4f7e174c45 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Tue, 3 Dec 2013 14:21:40 -0800 Subject: [PATCH 32/39] Merge pull request #218 from JoshRosen/spark-970-pyspark-unicode-error Fix UnicodeEncodeError in PySpark saveAsTextFile() (SPARK-970) This fixes [SPARK-970](https://spark-project.atlassian.net/browse/SPARK-970), an issue where PySpark's saveAsTextFile() could throw UnicodeEncodeError when called on an RDD of Unicode strings. Please merge this into master and branch-0.8. (cherry picked from commit 8a3475aed66617772f4e98e9f774b109756eb391) Signed-off-by: Reynold Xin --- python/pyspark/rdd.py | 5 ++++- python/pyspark/tests.py | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 7019fb8beefc8..0c599e0c5f1a1 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -598,7 +598,10 @@ def saveAsTextFile(self, path): '0\\n1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n' """ def func(split, iterator): - return (str(x).encode("utf-8") for x in iterator) + for x in iterator: + if not isinstance(x, basestring): + x = unicode(x) + yield x.encode("utf-8") keyed = PipelinedRDD(self, func) keyed._bypass_serializer = True keyed._jrdd.map(self.ctx._jvm.BytesToString()).saveAsTextFile(path) diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 29d6a128f6a9b..d3f6c2bcfbb34 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -19,6 +19,8 @@ Unit tests for PySpark; additional tests are implemented as doctests in individual modules. """ +from fileinput import input +from glob import glob import os import shutil import sys @@ -137,6 +139,19 @@ def func(): self.assertEqual("Hello World from inside a package!", UserClass().hello()) +class TestRDDFunctions(PySparkTestCase): + + def test_save_as_textfile_with_unicode(self): + # Regression test for SPARK-970 + x = u"\u00A1Hola, mundo!" + data = self.sc.parallelize([x]) + tempFile = NamedTemporaryFile(delete=True) + tempFile.close() + data.saveAsTextFile(tempFile.name) + raw_contents = ''.join(input(glob(tempFile.name + "/part-0000*"))) + self.assertEqual(x, unicode(raw_contents.strip(), "utf-8")) + + class TestIO(PySparkTestCase): def test_stdout_redirection(self): From e29bcd77082c30333b86b852e9a5530eddae5dce Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Mon, 21 Oct 2013 20:33:29 -0700 Subject: [PATCH 33/39] Merge pull request #95 from aarondav/perftest Minor: Put StoragePerfTester in org/apache/ (cherry picked from commit a51359c917a9ebe379b32ebc53fd093c454ea195) Signed-off-by: Reynold Xin --- .../scala/{ => org/apache}/spark/storage/StoragePerfTester.scala | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename core/src/main/scala/{ => org/apache}/spark/storage/StoragePerfTester.scala (100%) diff --git a/core/src/main/scala/spark/storage/StoragePerfTester.scala b/core/src/main/scala/org/apache/spark/storage/StoragePerfTester.scala similarity index 100% rename from core/src/main/scala/spark/storage/StoragePerfTester.scala rename to core/src/main/scala/org/apache/spark/storage/StoragePerfTester.scala From f05fd17f9ca37542c9bff54ec2d1cdc9dd1c8c47 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Wed, 4 Dec 2013 10:28:50 -0800 Subject: [PATCH 34/39] Merge pull request #223 from rxin/transient Mark partitioner, name, and generator field in RDD as @transient. As part of the effort to reduce serialized task size. (cherry picked from commit d6e5473872f405a6f4e466705e33cf893af915c1) Signed-off-by: Patrick Wendell --- core/src/main/scala/org/apache/spark/rdd/RDD.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 852c131776b83..717247ba2fc4d 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -101,7 +101,7 @@ abstract class RDD[T: ClassManifest]( protected def getPreferredLocations(split: Partition): Seq[String] = Nil /** Optionally overridden by subclasses to specify how they are partitioned. */ - val partitioner: Option[Partitioner] = None + @transient val partitioner: Option[Partitioner] = None // ======================================================================= // Methods and fields available on all RDDs @@ -114,7 +114,7 @@ abstract class RDD[T: ClassManifest]( val id: Int = sc.newRddId() /** A friendly name for this RDD */ - var name: String = null + @transient var name: String = null /** Assign a name to this RDD */ def setName(_name: String) = { @@ -123,7 +123,7 @@ abstract class RDD[T: ClassManifest]( } /** User-defined generator of this RDD*/ - var generator = Utils.getCallSiteInfo.firstUserClass + @transient var generator = Utils.getCallSiteInfo.firstUserClass /** Reset generator*/ def setGenerator(_generator: String) = { @@ -925,7 +925,7 @@ abstract class RDD[T: ClassManifest]( private var storageLevel: StorageLevel = StorageLevel.NONE /** Record user function generating this RDD. */ - private[spark] val origin = Utils.formatSparkCallSite + @transient private[spark] val origin = Utils.formatSparkCallSite private[spark] def elementClassManifest: ClassManifest[T] = classManifest[T] @@ -940,7 +940,7 @@ abstract class RDD[T: ClassManifest]( def context = sc // Avoid handling doCheckpoint multiple times to prevent excessive recursion - private var doCheckpointCalled = false + @transient private var doCheckpointCalled = false /** * Performs the checkpointing of this RDD by saving this. It is called by the DAGScheduler From 06c8c46a37536ad3c03ffb4a46870c3304a1802d Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 4 Dec 2013 15:52:07 -0800 Subject: [PATCH 35/39] Merge pull request #227 from pwendell/master Fix small bug in web UI and minor clean-up. There was a bug where sorting order didn't work correctly for write time metrics. I also cleaned up some earlier code that fixed the same issue for read and write bytes. (cherry picked from commit 182f9baeed8e4cc62ca14ae04413394477a7ccfb) Signed-off-by: Patrick Wendell --- .../org/apache/spark/ui/jobs/StagePage.scala | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala index c1c7aa70e6c92..c8625716e9d8e 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -152,21 +152,18 @@ private[spark] class StagePage(parent: JobProgressUI) { else metrics.map(m => parent.formatDuration(m.executorRunTime)).getOrElse("") val gcTime = metrics.map(m => m.jvmGCTime).getOrElse(0L) - var shuffleReadSortable: String = "" - var shuffleReadReadable: String = "" - if (shuffleRead) { - shuffleReadSortable = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => s.remoteBytesRead}.toString() - shuffleReadReadable = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => - Utils.bytesToString(s.remoteBytesRead)}.getOrElse("") - } + val maybeShuffleRead = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => s.remoteBytesRead} + val shuffleReadSortable = maybeShuffleRead.map(_.toString).getOrElse("") + val shuffleReadReadable = maybeShuffleRead.map{Utils.bytesToString(_)}.getOrElse("") - var shuffleWriteSortable: String = "" - var shuffleWriteReadable: String = "" - if (shuffleWrite) { - shuffleWriteSortable = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleBytesWritten}.toString() - shuffleWriteReadable = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => - Utils.bytesToString(s.shuffleBytesWritten)}.getOrElse("") - } + val maybeShuffleWrite = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleBytesWritten} + val shuffleWriteSortable = maybeShuffleWrite.map(_.toString).getOrElse("") + val shuffleWriteReadable = maybeShuffleWrite.map{Utils.bytesToString(_)}.getOrElse("") + + val maybeWriteTime = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleWriteTime} + val writeTimeSortable = maybeWriteTime.map(_.toString).getOrElse("") + val writeTimeReadable = maybeWriteTime.map{ t => t / (1000 * 1000)}.map{ ms => + if (ms == 0) "" else parent.formatDuration(ms)}.getOrElse("") @@ -187,8 +184,8 @@ private[spark] class StagePage(parent: JobProgressUI) { }} {if (shuffleWrite) { - - + + + + + + + + + + + + + + + + + + + + + + + + + +
ReleaseVersion code
CDH 4.X.X (YARN mode)2.0.0-chd4.X.X
CDH 4.X.X2.0.0-mr1-chd4.X.X
CDH 4.X.X (YARN mode)2.0.0-cdh4.X.X
CDH 4.X.X2.0.0-mr1-cdh4.X.X
CDH 3u60.20.2-cdh3u6
CDH 3u50.20.2-cdh3u5
CDH 3u40.20.2-cdh3u4
{info.index} {info.taskId}{metrics.flatMap{m => m.shuffleReadMetrics}.map{s => - Utils.bytesToString(s.remoteBytesRead)}.getOrElse("")} + {shuffleReadReadable} + {metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => - parent.formatDuration(s.shuffleWriteTime / (1000 * 1000))}.getOrElse("")}{metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => - Utils.bytesToString(s.shuffleBytesWritten)}.getOrElse("")}{metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => + parent.formatDuration(s.shuffleWriteTime / (1000 * 1000))}.getOrElse("")} + + {shuffleWriteReadable} + {exception.map(e => diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala index d7d0441c388fa..9ad6de3c6d8de 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala @@ -79,11 +79,14 @@ private[spark] class StageTable(val stages: Seq[StageInfo], val parent: JobProgr case None => "Unknown" } - val shuffleRead = listener.stageIdToShuffleRead.getOrElse(s.stageId, 0L) match { + val shuffleReadSortable = listener.stageIdToShuffleRead.getOrElse(s.stageId, 0L) + val shuffleRead = shuffleReadSortable match { case 0 => "" case b => Utils.bytesToString(b) } - val shuffleWrite = listener.stageIdToShuffleWrite.getOrElse(s.stageId, 0L) match { + + val shuffleWriteSortable = listener.stageIdToShuffleWrite.getOrElse(s.stageId, 0L) + val shuffleWrite = shuffleWriteSortable match { case 0 => "" case b => Utils.bytesToString(b) } @@ -119,8 +122,8 @@ private[spark] class StageTable(val stages: Seq[StageInfo], val parent: JobProgr {makeProgressBar(startedTasks, completedTasks, failedTasks, totalTasks)} {shuffleRead}{shuffleWrite}{shuffleRead}{shuffleWrite}
{info.index}{metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => - parent.formatDuration(s.shuffleWriteTime / (1000 * 1000))}.getOrElse("")} + + {writeTimeReadable} {shuffleWriteReadable} From 73206fd1e95987468e2f0beaac97fea5c361be19 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 4 Dec 2013 16:28:06 -0800 Subject: [PATCH 36/39] Change log for release 0.8.1-incubating --- CHANGES.txt | 1373 +++++++++------------------------------------------ 1 file changed, 245 insertions(+), 1128 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 17f9c093b6288..311d90526e28a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,1253 +1,370 @@ Spark Change Log -Release 0.8.0-incubating +Release 0.8.1-incubating - 2aff798 Sun Sep 15 14:05:04 2013 -0700 - Merge pull request #933 from jey/yarn-typo-fix - [Fix typo in Maven build docs] + cc33f9f Wed Dec 4 15:57:47 2013 -0800 + Merge pull request #227 from pwendell/master - dbd2c4f Sun Sep 15 13:20:41 2013 -0700 - Merge pull request #932 from pwendell/mesos-version - [Bumping Mesos version to 0.13.0] + ba44f21 Wed Dec 4 15:56:58 2013 -0800 + Merge pull request #223 from rxin/transient - 9fb0b9d Sun Sep 15 13:02:53 2013 -0700 - Merge pull request #931 from pwendell/yarn-docs - [Explain yarn.version in Maven build docs] + 31da065 Wed Dec 4 14:01:13 2013 -0800 + Merge pull request #95 from aarondav/perftest - c4c1db2 Fri Sep 13 19:52:12 2013 -0700 - Merge pull request #929 from pwendell/master - [Use different Hadoop version for YARN artifacts.] + daaaee1 Tue Dec 3 14:22:05 2013 -0800 + Merge pull request #218 from JoshRosen/spark-970-pyspark-unicode-error - a310de6 Wed Sep 11 19:36:11 2013 -0700 - Merge pull request #926 from kayousterhout/dynamic - [Changed localProperties to use ThreadLocal (not DynamicVariable).] + 8b091fe Mon Dec 2 21:28:13 2013 -0800 + Merge pull request #181 from BlackNiuza/fix_tasks_number - 58c7d8b Wed Sep 11 17:33:42 2013 -0700 - Merge pull request #927 from benh/mesos-docs - [Updated Spark on Mesos documentation.] + d21266e Sun Dec 1 12:47:30 2013 -0800 + Merge pull request #219 from sundeepn/schedulerexception - 91a59e6 Wed Sep 11 10:21:48 2013 -0700 - Merge pull request #919 from mateiz/jets3t - [Add explicit jets3t dependency, which is excluded in hadoop-client] + be9c176 Tue Nov 26 10:27:41 2013 -0800 + Merge pull request #201 from rxin/mappartitions - b9128d3 Wed Sep 11 10:03:06 2013 -0700 - Merge pull request #922 from pwendell/port-change - [Change default port number from 3030 to 4030.] + 9949561 Mon Nov 25 07:51:23 2013 +0800 + Merge pull request #197 from aarondav/patrick-fix - e07eef8 Wed Sep 11 07:35:39 2013 -0700 - Merge pull request #925 from davidmccauley/master - [SPARK-894 - Not all WebUI fields delivered VIA JSON] + c59ce18 Sun Nov 24 11:04:00 2013 +0800 + Merge pull request #200 from mateiz/hash-fix - 8432f27 Tue Sep 10 23:19:53 2013 -0700 - Merge pull request #923 from haoyuan/master - [fix run-example script] + d7ab87e Fri Nov 22 10:27:16 2013 +0800 + Merge pull request #193 from aoiwelle/patch-1 - d40f140 Tue Sep 10 23:05:29 2013 -0700 - Merge pull request #921 from pwendell/master - [Fix HDFS access bug with assembly build.] + d7c6a00 Fri Nov 22 10:13:37 2013 +0800 + Merge pull request #196 from pwendell/master - 0a6c051 Mon Sep 9 23:37:57 2013 -0700 - Merge pull request #918 from pwendell/branch-0.8 - [Update versions for 0.8.0 release.] + 37126e8 Sun Nov 17 18:53:16 2013 -0800 + Merge pull request #174 from ahirreddy/master - 8c14f4b Mon Sep 9 22:07:58 2013 -0700 - Merge pull request #917 from pwendell/master - [Document libgfortran dependency for MLBase] + 8823057 Sun Nov 17 18:53:04 2013 -0800 + Merge pull request #166 from ahirreddy/simr-spark-ui - c81377b Mon Sep 9 20:16:19 2013 -0700 - Merge pull request #915 from ooyala/master - [Get rid of / improve ugly NPE when Utils.deleteRecursively() fails] + e134ed5 Sun Nov 17 18:51:19 2013 -0800 + Merge pull request #137 from tgravescs/sparkYarnJarsHdfsRebase - 61d2a01 Mon Sep 9 18:21:01 2013 -0700 - Merge pull request #916 from mateiz/mkdist-fix - [Fix copy issue in https://github.com/mesos/spark/pull/899] + af98fbc Sun Nov 17 18:50:23 2013 -0800 + Merge pull request #165 from NathanHowell/kerberos-master - a85758c Mon Sep 9 13:45:40 2013 -0700 - Merge pull request #907 from stephenh/document_coalesce_shuffle - [Add better docs for coalesce.] + a64397b Sun Nov 17 18:49:40 2013 -0800 + Merge pull request #153 from ankurdave/stop-spot-cluster - 084fc36 Mon Sep 9 12:01:35 2013 -0700 - Merge pull request #912 from tgravescs/ganglia-pom - [Add metrics-ganglia to core pom file] + 6c60768 Sun Nov 17 18:46:40 2013 -0800 + Merge pull request #160 from xiajunluan/JIRA-923 - 0456384 Mon Sep 9 09:57:54 2013 -0700 - Merge pull request #911 from pwendell/ganglia-sink - [Adding Manen dependency for Ganglia] + f0d350a Sun Nov 17 18:44:49 2013 -0800 + Merge pull request #175 from kayousterhout/no_retry_not_serializable - bf984e2 Sun Sep 8 23:50:24 2013 -0700 - Merge pull request #890 from mridulm/master - [Fix hash bug] + 9d56371 Sun Nov 17 18:43:59 2013 -0800 + Merge pull request #173 from kayousterhout/scheduler_hang - e9d4f44 Sun Sep 8 23:36:48 2013 -0700 - Merge pull request #909 from mateiz/exec-id-fix - [Fix an instance where full standalone mode executor IDs were passed to] + 24e238b Sun Nov 17 18:42:41 2013 -0800 + Merge pull request #182 from rxin/vector - 2447b1c Sun Sep 8 22:27:49 2013 -0700 - Merge pull request #910 from mateiz/ml-doc-tweaks - [Small tweaks to MLlib docs] + e7927ad Thu Nov 14 10:32:27 2013 -0800 + Merge pull request #169 from kayousterhout/mesos_fix - 7d3204b Sun Sep 8 21:39:12 2013 -0700 - Merge pull request #905 from mateiz/docs2 - [Job scheduling and cluster mode docs] + 41dc566 Thu Nov 14 10:30:57 2013 -0800 + Merge pull request #170 from liancheng/hadooprdd-doc-typo - f1f8371 Sun Sep 8 21:26:11 2013 -0700 - Merge pull request #896 from atalwalkar/master - [updated content] + 333859f Thu Nov 14 10:26:27 2013 -0800 + Merge pull request #171 from RIA-pierre-borckmans/master - f68848d Sun Sep 8 18:32:16 2013 -0700 - Merge pull request #906 from pwendell/ganglia-sink - [Clean-up of Metrics Code/Docs and Add Ganglia Sink] + c856651 Tue Nov 12 10:22:18 2013 -0800 + Merge pull request #164 from tdas/kafka-fix - 0b95799 Sun Sep 8 15:30:16 2013 -0700 - Merge pull request #908 from pwendell/master - [Fix target JVM version in scala build] + 30786c6 Sun Nov 10 11:58:58 2013 -0800 + Merge pull request #157 from rxin/kryo - 04cfb3a Sun Sep 8 10:33:20 2013 -0700 - Merge pull request #898 from ilikerps/660 - [SPARK-660: Add StorageLevel support in Python] + 5ce6c75 Sat Nov 9 22:31:59 2013 -0800 + Merge pull request #147 from JoshRosen/fix-java-api-completeness-checker - 38488ac Sun Sep 8 00:28:53 2013 -0700 - Merge pull request #900 from pwendell/cdh-docs - [Provide docs to describe running on CDH/HDP cluster.] + 1d52b50 Sat Nov 9 15:47:40 2013 -0800 + Merge pull request #149 from tgravescs/fixSecureHdfsAccess - a8e376e Sat Sep 7 21:16:01 2013 -0700 - Merge pull request #904 from pwendell/master - [Adding Apache license to two files] + 32a0c4f Sat Nov 9 15:40:54 2013 -0800 + Merge pull request #155 from rxin/jobgroup - cfde85e Sat Sep 7 13:53:08 2013 -0700 - Merge pull request #901 from ooyala/2013-09/0.8-doc-changes - [0.8 Doc changes for make-distribution.sh] + 07ae524 Sat Nov 9 11:56:46 2013 -0800 + Merge pull request #152 from rxin/repl - 4a7813a Sat Sep 7 13:52:24 2013 -0700 - Merge pull request #903 from rxin/resulttask - [Fixed the bug that ResultTask was not properly deserializing outputId.] + a5916b9 Thu Nov 7 11:08:44 2013 -0800 + Merge pull request #148 from squito/include_appId - afe46ba Sat Sep 7 07:28:51 2013 -0700 - Merge pull request #892 from jey/fix-yarn-assembly - [YARN build fixes] + d5ae953 Wed Nov 6 23:23:12 2013 -0800 + Merge pull request #23 from jerryshao/multi-user - 2eebeff Fri Sep 6 15:25:22 2013 -0700 - Merge pull request #897 from pwendell/master - [Docs describing Spark monitoring and instrumentation] + 1d9412b Wed Nov 6 13:28:02 2013 -0800 + Merge pull request #144 from liancheng/runjob-clean - ddcb9d3 Thu Sep 5 23:54:09 2013 -0700 - Merge pull request #895 from ilikerps/821 - [SPARK-821: Don't cache results when action run locally on driver] + c8e0c0d Wed Nov 6 09:40:00 2013 -0800 + Merge pull request #145 from aarondav/sls-fix - 699c331 Thu Sep 5 20:21:53 2013 -0700 - Merge pull request #891 from xiajunluan/SPARK-864 - [[SPARK-864]DAGScheduler Exception if we delete Worker and StandaloneExecutorBackend then add Worker] + 0f62786 Tue Nov 5 23:14:28 2013 -0800 + Merge pull request #143 from rxin/scheduler-hang - 5c7494d Wed Sep 4 22:47:03 2013 -0700 - Merge pull request #893 from ilikerps/master - [SPARK-884: Add unit test to validate Spark JSON output] + 96670e7 Tue Nov 5 10:43:10 2013 -0800 + Merge pull request #140 from aarondav/merge-75 - a547866 Wed Sep 4 21:11:56 2013 -0700 - Merge pull request #894 from c0s/master - [Updating assembly README to reflect recent changes in the build.] + 0848167 Tue Nov 5 10:42:36 2013 -0800 + Merge pull request #142 from liancheng/dagscheduler-pattern-matching - 19f7027 Tue Sep 3 14:29:10 2013 -0700 - Merge pull request #878 from tgravescs/yarnUILink - [Link the Spark UI up to the Yarn UI ] + 07b3f01 Mon Nov 4 23:32:56 2013 -0800 + Merge pull request #75 from JoshRosen/block-manager-cleanup - 68df246 Tue Sep 3 13:01:17 2013 -0700 - Merge pull request #889 from alig/master - [Return the port the WebUI is bound to (useful if port 0 was used)] + e80d1cf Mon Nov 4 20:47:44 2013 -0800 + Merge pull request #139 from aarondav/shuffle-next - d3dd48f Mon Sep 2 16:44:54 2013 -0700 - Merge pull request #887 from mateiz/misc-fixes - [Miscellaneous fixes for 0.8] + 518cf22 Mon Nov 4 18:21:27 2013 -0800 + Merge pull request #128 from shimingfei/joblogger-doc - 636fc0c Mon Sep 2 11:20:39 2013 -0700 - Merge pull request #886 from mateiz/codec - [Fix spark.io.compression.codec and change default codec to LZF] + 7e00dee Mon Nov 4 17:54:35 2013 -0800 + Merge pull request #130 from aarondav/shuffle - d9a53b9 Sun Sep 1 22:12:30 2013 -0700 - Merge pull request #885 from mateiz/win-py - [Allow PySpark to run on Windows] + 1d11e43 Mon Nov 4 16:30:30 2013 -0800 + Merge pull request #138 from marmbrus/branch-0.8 - 3c520fe Sun Sep 1 17:26:55 2013 -0700 - Merge pull request #884 from mateiz/win-fixes - [Run script fixes for Windows after package & assembly change] + a3544ee Sun Nov 3 23:49:19 2013 -0800 + Merge pull request #70 from rxin/hash1 - f957c26 Sun Sep 1 14:53:57 2013 -0700 - Merge pull request #882 from mateiz/package-rename - [Rename spark package to org.apache.spark] + e094daf Sun Nov 3 23:48:40 2013 -0800 + Merge pull request #129 from velvia/2013-11/document-local-uris - a30fac1 Sun Sep 1 12:27:50 2013 -0700 - Merge pull request #883 from alig/master - [Don't require the spark home environment variable to be set for standalone mode (change needed by SIMR)] + a9e7787 Sun Nov 3 23:48:26 2013 -0800 + Merge pull request #125 from velvia/2013-10/local-jar-uri - 03cc765 Sun Sep 1 10:20:56 2013 -0700 - Merge pull request #881 from pwendell/master - [Extend QuickStart to include next steps] + 57fdb3f Sun Nov 3 23:46:18 2013 -0800 + Merge pull request #117 from stephenh/avoid_concurrent_modification_exception - 0e9565a Sat Aug 31 18:55:41 2013 -0700 - Merge pull request #880 from mateiz/ui-tweaks - [Various UI tweaks] + ec0e4f0 Sun Nov 3 23:45:23 2013 -0800 + Merge pull request #124 from tgravescs/sparkHadoopUtilFix - 2b29a1d Sat Aug 31 17:49:45 2013 -0700 - Merge pull request #877 from mateiz/docs - [Doc improvements for 0.8] + ba0e858 Sun Nov 3 23:43:48 2013 -0800 + Merge pull request #126 from kayousterhout/local_fix - 6edef9c Sat Aug 31 13:39:24 2013 -0700 - Merge pull request #861 from AndreSchumacher/pyspark_sampling_function - [Pyspark sampling function] + 504fe74 Sun Nov 3 23:43:03 2013 -0800 + Merge pull request #118 from JoshRosen/blockinfo-memory-usage - fd89835 Sat Aug 31 13:18:12 2013 -0700 - Merge pull request #870 from JoshRosen/spark-885 - [Don't send SIGINT / ctrl-c to Py4J gateway subprocess] + 25fa229 Sun Nov 3 23:41:19 2013 -0800 + Merge pull request #112 from kayousterhout/ui_task_attempt_id - 618f0ec Fri Aug 30 18:17:13 2013 -0700 - Merge pull request #869 from AndreSchumacher/subtract - [PySpark: implementing subtractByKey(), subtract() and keyBy()] + 57ea854 Sun Nov 3 23:38:37 2013 -0800 + Merge pull request #102 from tdas/transform - 94bb7fd Fri Aug 30 12:05:13 2013 -0700 - Merge pull request #876 from mbautin/master_hadoop_rdd_conf - [Make HadoopRDD's configuration accessible] + 5b45c9b Sat Nov 2 14:42:22 2013 -0700 + Merge pull request #133 from Mistobaan/link_fix - 9e17e45 Fri Aug 30 00:22:53 2013 -0700 - Merge pull request #875 from shivaram/build-fix - [Fix broken build by removing addIntercept] + 054d97b Sat Nov 2 14:37:35 2013 -0700 + Merge pull request #134 from rxin/readme - 016787d Thu Aug 29 22:15:14 2013 -0700 - Merge pull request #863 from shivaram/etrain-ridge - [Adding linear regression and refactoring Ridge regression to use SGD] + 87d4e1c Fri Nov 1 17:58:38 2013 -0700 + Merge pull request #132 from Mistobaan/doc_fix - 852d810 Thu Aug 29 22:13:15 2013 -0700 - Merge pull request #819 from shivaram/sgd-cleanup - [Change SVM to use {0,1} labels] + 3db505c Tue Oct 29 01:42:07 2013 -0400 + Merge pull request #119 from soulmachine/master - ca71620 Thu Aug 29 21:51:14 2013 -0700 - Merge pull request #857 from mateiz/assembly - [Change build and run instructions to use assemblies] + abeca01 Sun Oct 27 22:11:39 2013 -0400 + Merge pull request #115 from aarondav/shuffle-fix - 1528776 Thu Aug 29 21:30:47 2013 -0700 - Merge pull request #874 from jerryshao/fix-report-bug - [Fix removed block zero size log reporting] + 79e5c50 Sat Oct 26 13:05:40 2013 -0700 + Merge pull request #108 from alig/master - abdbacf Wed Aug 28 21:11:31 2013 -0700 - Merge pull request #871 from pwendell/expose-local - [Expose `isLocal` in SparkContext.] + cb24278 Sat Oct 26 11:41:18 2013 -0700 + Merge pull request #113 from pwendell/master - afcade3 Wed Aug 28 20:15:40 2013 -0700 - Merge pull request #873 from pwendell/master - [Hot fix for command runner] + 3ec2f51 Sat Oct 26 11:39:29 2013 -0700 + Merge pull request #111 from kayousterhout/ui_name - baa84e7 Wed Aug 28 12:44:46 2013 -0700 - Merge pull request #865 from tgravescs/fixtmpdir - [Spark on Yarn should use yarn approved directories for spark.local.dir and tmp] + 7d47704 Sat Oct 26 11:36:23 2013 -0700 + Merge pull request #114 from soulmachine/master - cd043cf Tue Aug 27 19:50:32 2013 -0700 - Merge pull request #867 from tgravescs/yarnenvconfigs - [Spark on Yarn allow users to specify environment variables ] + 148509f Thu Oct 24 22:32:23 2013 -0700 + Merge pull request #109 from pwendell/master - 898da7e Mon Aug 26 20:40:49 2013 -0700 - Merge pull request #859 from ianbuss/sbt_opts - [Pass SBT_OPTS environment through to sbt_launcher] + c018c61 Thu Oct 24 17:27:28 2013 -0700 + Merge pull request #97 from ewencp/pyspark-system-properties - 17bafea Mon Aug 26 11:59:32 2013 -0700 - Merge pull request #864 from rxin/json1 - [Revert json library change] + 3500b66 Thu Oct 24 17:27:11 2013 -0700 + Merge pull request #93 from kayousterhout/ui_new_state - f9fc5c1 Sat Aug 24 15:19:56 2013 -0700 - Merge pull request #603 from pwendell/ec2-updates - [Several Improvements to EC2 Scripts] + f5eff85 Thu Oct 24 17:26:44 2013 -0700 + Merge pull request #83 from ewencp/pyspark-accumulator-add-method - d282c1e Fri Aug 23 11:20:20 2013 -0700 - Merge pull request #860 from jey/sbt-ide-fixes - [Fix IDE project generation under SBT] + 59d6f06 Thu Oct 24 17:09:05 2013 -0700 + Merge pull request #106 from pwendell/master - 5a6ac12 Thu Aug 22 22:08:03 2013 -0700 - Merge pull request #701 from ScrapCodes/documentation-suggestions - [Documentation suggestions for spark streaming.] + 76f3c2f Wed Oct 23 18:03:26 2013 -0700 + Merge pull request #103 from JoshRosen/unpersist-fix - 46ea0c1 Thu Aug 22 15:57:28 2013 -0700 - Merge pull request #814 from holdenk/master - [Create less instances of the random class during ALS initialization.] + 534bab2 Tue Oct 22 16:02:45 2013 -0700 + Merge pull request #100 from JoshRosen/spark-902 - 9ac3d62 Thu Aug 22 15:51:10 2013 -0700 - Merge pull request #856 from jey/sbt-fix-hadoop-0.23.9 - [Re-add removed dependency to fix build under Hadoop 0.23.9] + fa9a0e4 Tue Oct 22 12:23:17 2013 -0700 + Merge pull request #90 from pwendell/master - ae8ba83 Thu Aug 22 10:14:54 2013 -0700 - Merge pull request #855 from jey/update-build-docs - [Update build docs] + c449ee1 Tue Oct 22 00:00:35 2013 -0700 + Merge pull request #92 from tgravescs/sparkYarnFixClasspath - 8a36fd0 Thu Aug 22 10:13:35 2013 -0700 - Merge pull request #854 from markhamstra/pomUpdate - [Synced sbt and maven builds to use the same dependencies, etc.] + 498cc6b Mon Oct 21 22:45:31 2013 -0700 + Merge pull request #87 from aarondav/shuffle-base - c2d00f1 Thu Aug 22 10:13:03 2013 -0700 - Merge pull request #832 from alig/coalesce - [Coalesced RDD with locality] + e3ad6a5 Mon Oct 21 20:20:42 2013 -0700 + Revert "Merge pull request #94 from aarondav/mesos-fix" - e6d66c8 Wed Aug 21 17:44:31 2013 -0700 - Merge pull request #853 from AndreSchumacher/double_rdd - [Implementing SPARK-838: Add DoubleRDDFunctions methods to PySpark] + fe974ba Mon Oct 21 20:15:30 2013 -0700 + Merge pull request #94 from aarondav/mesos-fix - 2905611 Tue Aug 20 17:36:14 2013 -0700 - Merge pull request #851 from markhamstra/MutablePairTE - [Removed meaningless types] + 1c3f4bd Mon Oct 21 11:58:22 2013 -0700 + Merge pull request #88 from rxin/clean - d61337f Tue Aug 20 10:06:06 2013 -0700 - Merge pull request #844 from markhamstra/priorityRename - [Renamed 'priority' to 'jobId' and assorted minor changes] + 1a50c79 Sun Oct 20 22:27:29 2013 -0700 + Merge pull request #41 from pwendell/shuffle-benchmark - 8cae72e Mon Aug 19 23:40:04 2013 -0700 - Merge pull request #828 from mateiz/sched-improvements - [Scheduler fixes and improvements] + 37a755c Sun Oct 20 21:04:33 2013 -0700 + Merge pull request #89 from rxin/executor - efeb142 Mon Aug 19 19:23:50 2013 -0700 - Merge pull request #849 from mateiz/web-fixes - [Small fixes to web UI] + ec74428 Sun Oct 20 11:46:14 2013 -0700 + Merge pull request #84 from rxin/kill1 - abdc1f8 Mon Aug 19 18:30:56 2013 -0700 - Merge pull request #847 from rxin/rdd - [Allow subclasses of Product2 in all key-value related classes] + 52d13a6 Sun Oct 20 10:50:54 2013 -0700 + Merge pull request #85 from rxin/clean - 8fa0747 Sun Aug 18 17:02:54 2013 -0700 - Merge pull request #840 from AndreSchumacher/zipegg - [Implementing SPARK-878 for PySpark: adding zip and egg files to context ...] + 919c557 Sat Oct 19 11:29:00 2013 -0700 + Merge pull request #79 from aarondav/scdefaults0.8 - 1e137a5 Sat Aug 17 22:22:32 2013 -0700 - Merge pull request #846 from rxin/rdd - [Two minor RDD refactoring] + f3de2ce Fri Oct 18 23:20:16 2013 -0700 + Merge pull request #76 from pwendell/master - e89ffc7 Fri Aug 16 14:02:34 2013 -0700 - Merge pull request #839 from jegonzal/zip_partitions - [Currying RDD.zipPartitions ] + f181560 Fri Oct 18 23:14:28 2013 -0700 + Merge pull request #68 from mosharaf/master - 1fb1b09 Thu Aug 15 22:15:05 2013 -0700 - Merge pull request #841 from rxin/json - [Use the JSON formatter from Scala library and removed dependency on lift-json.] + eaa2150 Fri Oct 18 23:08:47 2013 -0700 + Merge pull request #74 from rxin/kill - c69c489 Thu Aug 15 20:55:09 2013 -0700 - Merge pull request #843 from Reinvigorate/bug-879 - [fixing typo in conf/slaves] + df21ac8 Thu Oct 17 18:38:46 2013 -0700 + Merge pull request #69 from KarthikTunga/master - 230ab27 Thu Aug 15 17:45:17 2013 -0700 - Merge pull request #834 from Daemoen/master - [Updated json output to allow for display of worker state] + b531552 Thu Oct 17 18:37:22 2013 -0700 + Merge pull request #67 from kayousterhout/remove_tsl - 659553b Thu Aug 15 16:56:31 2013 -0700 - Merge pull request #836 from pwendell/rename - [Rename `memoryBytesToString` and `memoryMegabytesToString`] + 5a73ab7 Thu Oct 17 18:36:36 2013 -0700 + Merge pull request #62 from harveyfeng/master - 28369ff Thu Aug 15 16:44:02 2013 -0700 - Merge pull request #829 from JoshRosen/pyspark-unit-tests-python-2.6 - [Fix PySpark unit tests on Python 2.6] + b6ce111 Thu Oct 17 18:35:33 2013 -0700 + Merge pull request #61 from kayousterhout/daemon_thread - 1a13460 Thu Aug 15 15:50:44 2013 -0700 - Merge pull request #833 from rxin/ui - [Various UI improvements.] + 2760055 Thu Oct 17 18:34:56 2013 -0700 + Merge pull request #59 from rxin/warning - 044a088 Wed Aug 14 20:43:49 2013 -0700 - Merge pull request #831 from rxin/scheduler - [A few small scheduler / job description changes.] + 1e67234 Thu Oct 17 18:33:21 2013 -0700 + Merge pull request #65 from tgravescs/fixYarn - 839f2d4 Wed Aug 14 16:17:23 2013 -0700 - Merge pull request #822 from pwendell/ui-features - [Adding GC Stats to TaskMetrics (and three small fixes)] + d0c9d41 Thu Oct 17 18:32:54 2013 -0700 + Merge pull request #34 from kayousterhout/rename - 63446f9 Wed Aug 14 00:17:07 2013 -0700 - Merge pull request #826 from kayousterhout/ui_fix - [Fixed 2 bugs in executor UI (incl. SPARK-877)] + cee3b43 Wed Oct 16 10:37:55 2013 -0700 + Merge pull request #63 from pwendell/master - 3f14cba Tue Aug 13 20:09:51 2013 -0700 - Merge pull request #825 from shivaram/maven-repl-fix - [Set SPARK_CLASSPATH for maven repl tests] + 1d92983 Tue Oct 15 10:52:15 2013 -0700 + Merge pull request #58 from hsaputra/update-pom-asf - 596adc6 Tue Aug 13 19:41:34 2013 -0700 - Merge pull request #824 from mateiz/mesos-0.12.1 - [Update to Mesos 0.12.1] + c50b016 Mon Oct 14 23:18:37 2013 -0700 + Merge pull request #29 from rxin/kill - d316af9 Tue Aug 13 15:31:01 2013 -0700 - Merge pull request #821 from pwendell/print-launch-command - [Print run command to stderr rather than stdout] + 616ea6f Mon Oct 14 20:59:31 2013 -0700 + Merge pull request #57 from aarondav/bid - 1f79d21 Tue Aug 13 15:23:54 2013 -0700 - Merge pull request #818 from kayousterhout/killed_fix - [Properly account for killed tasks.] + e306de8 Mon Oct 14 20:58:48 2013 -0700 + Merge pull request #53 from witgo/master - 622f83c Tue Aug 13 09:58:52 2013 -0700 - Merge pull request #817 from pwendell/pr_784 - [Minor clean-up in metrics servlet code] + d66c01f Mon Oct 14 20:58:17 2013 -0700 + Merge pull request #19 from aarondav/master-zk - a0133bf Tue Aug 13 09:28:18 2013 -0700 - Merge pull request #784 from jerryshao/dev-metrics-servlet - [Add MetricsServlet for Spark metrics system] + 0fcb234 Mon Oct 14 20:54:34 2013 -0700 + Merge pull request #46 from mateiz/py-sort-update - e2fdac6 Mon Aug 12 21:26:59 2013 -0700 - Merge pull request #802 from stayhf/SPARK-760-Python - [Simple PageRank algorithm implementation in Python for SPARK-760] + f94aa52 Mon Oct 14 20:53:15 2013 -0700 + Merge pull request #44 from mateiz/fast-map - d3525ba Mon Aug 12 21:02:39 2013 -0700 - Merge pull request #813 from AndreSchumacher/add_files_pyspark - [Implementing SPARK-865: Add the equivalent of ADD_JARS to PySpark] + 36f5b08 Mon Oct 14 20:50:28 2013 -0700 + Merge pull request #33 from AndreSchumacher/pyspark_partition_key_change - 9e02da2 Mon Aug 12 20:22:27 2013 -0700 - Merge pull request #812 from shivaram/maven-mllib-tests - [Create SparkContext in beforeAll for MLLib tests] + d615b14 Mon Oct 14 20:48:45 2013 -0700 + Merge pull request #32 from mridulm/master - 65d0d91 Mon Aug 12 19:00:57 2013 -0700 - Merge pull request #807 from JoshRosen/guava-optional - [Change scala.Option to Guava Optional in Java APIs] + 7084217 Mon Oct 14 19:31:22 2013 -0700 + Merge pull request #27 from davidmccauley/master - 4346f0a Mon Aug 12 12:12:12 2013 -0700 - Merge pull request #809 from shivaram/sgd-cleanup - [Clean up scaladoc in ML Lib.] + a35259a Mon Oct 14 19:28:46 2013 -0700 + Merge pull request #26 from Du-Li/master - ea1b4ba Mon Aug 12 08:09:58 2013 -0700 - Merge pull request #806 from apivovarov/yarn-205 - [Changed yarn.version to 2.0.5 in pom.xml] + 62ce4ae Mon Oct 14 19:28:09 2013 -0700 + Merge pull request #25 from CruncherBigData/master - 2a39d2c Sun Aug 11 20:35:09 2013 -0700 - Merge pull request #810 from pwendell/dead_doc_code - [Remove now dead code inside of docs] + 6961744 Mon Oct 14 18:55:41 2013 -0700 + Merge pull request #10 from kayousterhout/results_through-bm - e5b9ed2 Sun Aug 11 17:22:47 2013 -0700 - Merge pull request #808 from pwendell/ui_compressed_bytes - [Report compressed bytes read when calculating TaskMetrics] + ce364c0 Mon Oct 14 18:54:35 2013 -0700 + Merge pull request #4 from MLnick/implicit-als - 3796486 Sun Aug 11 14:51:47 2013 -0700 - Merge pull request #805 from woggle/hadoop-rdd-jobconf - [Use new Configuration() instead of slower new JobConf() in SerializableWritable] + f94bd3f Mon Oct 14 15:10:59 2013 -0700 + Merge pull request #28 from tgravescs/sparYarnAppName - ff9ebfa Sun Aug 11 10:52:55 2013 -0700 - Merge pull request #762 from shivaram/sgd-cleanup - [Refactor SGD options into a new class.] + dc2c90d Mon Oct 14 15:03:34 2013 -0700 + Merge pull request #38 from AndreSchumacher/pyspark_sorting - 95c62ca Sun Aug 11 10:30:52 2013 -0700 - Merge pull request #804 from apivovarov/master - [Fixed path to JavaALS.java and JavaKMeans.java, fixed hadoop2-yarn profi...] + 00a7551 Sat Oct 12 21:26:22 2013 -0700 + Merge pull request #52 from harveyfeng/hadoop-closure - 06e4f2a Sat Aug 10 18:06:23 2013 -0700 - Merge pull request #789 from MLnick/master - [Adding Scala version of PageRank example] + 5383a5a Sat Oct 12 21:26:11 2013 -0700 + Merge pull request #20 from harveyfeng/hadoop-config-cache - 71c63de Sat Aug 10 10:21:20 2013 -0700 - Merge pull request #795 from mridulm/master - [Fix bug reported in PR 791 : a race condition in ConnectionManager and Connection] + 4a2e76a Fri Oct 11 16:09:07 2013 -0700 + Merge pull request #54 from aoiwelle/remove_unused_imports - d17eeb9 Sat Aug 10 09:02:27 2013 -0700 - Merge pull request #785 from anfeng/master - [expose HDFS file system stats via Executor metrics] + 64fae16 Wed Oct 9 21:42:18 2013 -0700 + Merge pull request #47 from xiliu82/branch-0.8 - dce5e47 Fri Aug 9 21:53:45 2013 -0700 - Merge pull request #800 from dlyubimov/HBASE_VERSION - [Pull HBASE_VERSION in the head of sbt build] + dfc62e2 Wed Oct 9 16:55:58 2013 -0700 + Merge pull request #49 from mateiz/kryo-fix-2 - cd247ba Fri Aug 9 20:41:13 2013 -0700 - Merge pull request #786 from shivaram/mllib-java - [Java fixes, tests and examples for ALS, KMeans] + 0b6f047 Wed Oct 9 16:53:31 2013 -0700 + Merge pull request #50 from kayousterhout/SPARK-908 - b09d4b7 Fri Aug 9 13:17:08 2013 -0700 - Merge pull request #799 from woggle/sync-fix - [Remove extra synchronization in ResultTask] + f930dd4 Tue Oct 8 22:58:35 2013 -0700 + Merge pull request #43 from mateiz/kryo-fix - 0bc63bf Fri Aug 9 13:16:25 2013 -0700 - Merge pull request #801 from pwendell/print-launch-command - [Print launch command [Branch 0.8 version]] + 8e9bd93 Mon Oct 7 20:47:09 2013 -0700 + Merge pull request #42 from pwendell/shuffle-read-perf - cc6b92e Fri Aug 9 13:00:33 2013 -0700 - Merge pull request #775 from pwendell/print-launch-command - [Log the launch command for Spark daemons] + f2cdcc4 Mon Oct 7 15:49:32 2013 -0700 + Merge pull request #40 from pwendell/branch-0.8 - f94fc75 Fri Aug 9 10:04:03 2013 -0700 - Merge pull request #788 from shane-huang/sparkjavaopts - [For standalone mode, add worker local env setting of SPARK_JAVA_OPTS as ...] + 023e3fd Mon Oct 7 10:47:45 2013 -0700 + Merge pull request #31 from sundeepn/branch-0.8 - 63b6e02 Thu Aug 8 14:02:02 2013 -0700 - Merge pull request #797 from mateiz/chill-0.3.1 - [Update to Chill 0.3.1] + 3cb9040 Thu Sep 26 14:37:06 2013 -0700 + Merge pull request #17 from rxin/optimize - 9955e5a Thu Aug 8 11:03:38 2013 -0700 - Merge pull request #796 from pwendell/bootstrap-design - [Bootstrap re-design] + 35bcf32 Thu Sep 26 14:17:00 2013 -0700 + Merge pull request #930 from holdenk/master - 5133e4b Wed Aug 7 15:50:45 2013 -0700 - Merge pull request #790 from kayousterhout/fix_throughput - [Fixed issue in UI that decreased scheduler throughput by 5x or more] + 976fe60 Thu Sep 26 14:16:17 2013 -0700 + Merge pull request #14 from kayousterhout/untangle_scheduler - 3c8478e Tue Aug 6 23:25:03 2013 -0700 - Merge pull request #747 from mateiz/improved-lr - [Update the Python logistic regression example] + 8cbc96b Thu Sep 26 13:16:05 2013 -0700 + Merge pull request #7 from wannabeast/memorystore-fixes - 6b043a6 Tue Aug 6 22:31:02 2013 -0700 - Merge pull request #724 from dlyubimov/SPARK-826 - [SPARK-826: fold(), reduce(), collect() always attempt to use java serialization] + 240ca93 Thu Sep 26 13:12:06 2013 -0700 + Merge pull request #9 from rxin/limit - de6c4c9 Tue Aug 6 17:09:50 2013 -0700 - Merge pull request #787 from ash211/master - [Update spark-standalone.md] + a186792 Thu Sep 26 13:10:59 2013 -0700 + Merge pull request #937 from jerryshao/localProperties-fix - df4d10d Tue Aug 6 15:44:05 2013 -0700 - Merge pull request #779 from adatao/adatao-global-SparkEnv - [[HOTFIX] Extend thread safety for SparkEnv.get()] + f3c60c9 Thu Sep 26 13:10:24 2013 -0700 + Merge pull request #941 from ilikerps/master - d2b0f0c Tue Aug 6 14:49:39 2013 -0700 - Merge pull request #770 from stayhf/SPARK-760-Java - [Simple PageRank algorithm implementation in Java for SPARK-760] - - d031f73 Mon Aug 5 22:33:00 2013 -0700 - Merge pull request #782 from WANdisco/master - [SHARK-94 Log the files computed by HadoopRDD and NewHadoopRDD] - - 1b63dea Mon Aug 5 22:21:26 2013 -0700 - Merge pull request #769 from markhamstra/NegativeCores - [SPARK-847 + SPARK-845: Zombie workers and negative cores] - - 828aff7 Mon Aug 5 21:37:33 2013 -0700 - Merge pull request #776 from gingsmith/master - [adding matrix factorization data generator] - - 8b27789 Mon Aug 5 19:14:52 2013 -0700 - Merge pull request #774 from pwendell/job-description - [Show user-defined job name in UI] - - 550b0cf Mon Aug 5 12:10:32 2013 -0700 - Merge pull request #780 from cybermaster/master - [SPARK-850] - - 22abbc1 Fri Aug 2 16:37:59 2013 -0700 - Merge pull request #772 from karenfeng/ui-843 - [Show app duration] - - 9d7dfd2 Thu Aug 1 17:41:58 2013 -0700 - Merge pull request #743 from pwendell/app-metrics - [Add application metrics to standalone master] - - 6d7afd7 Thu Aug 1 17:13:28 2013 -0700 - Merge pull request #768 from pwendell/pr-695 - [Minor clean-up of fair scheduler UI] - - 5e7b38f Thu Aug 1 14:59:33 2013 -0700 - Merge pull request #695 from xiajunluan/pool_ui - [Enhance job ui in spark ui system with adding pool information] - - 0a96493 Thu Aug 1 11:27:17 2013 -0700 - Merge pull request #760 from karenfeng/heading-update - [Clean up web UI page headers] - - cb7dd86 Thu Aug 1 11:06:10 2013 -0700 - Merge pull request #758 from pwendell/master-json - [Add JSON path to master index page] - - 58756b7 Wed Jul 31 23:45:41 2013 -0700 - Merge pull request #761 from mateiz/kmeans-generator - [Add data generator for K-means] - - ecab635 Wed Jul 31 18:16:55 2013 -0700 - Merge pull request #763 from c0s/assembly - [SPARK-842. Maven assembly is including examples libs and dependencies] - - 39c75f3 Wed Jul 31 15:52:36 2013 -0700 - Merge pull request #757 from BlackNiuza/result_task_generation - [Bug fix: SPARK-837] - - b2b86c2 Wed Jul 31 15:51:39 2013 -0700 - Merge pull request #753 from shivaram/glm-refactor - [Build changes for ML lib] - - 14bf2fe Wed Jul 31 14:18:16 2013 -0700 - Merge pull request #749 from benh/spark-executor-uri - [Added property 'spark.executor.uri' for launching on Mesos.] - - 4ba4c3f Wed Jul 31 13:14:49 2013 -0700 - Merge pull request #759 from mateiz/split-fix - [Use the Char version of split() instead of the String one in MLUtils] - - a386ced Wed Jul 31 11:22:50 2013 -0700 - Merge pull request #754 from rxin/compression - [Compression codec change] - - 0be071a Wed Jul 31 11:11:59 2013 -0700 - Merge pull request #756 from cdshines/patch-1 - [Refactored Vector.apply(length, initializer) replacing excessive code with library method] - - d4556f4 Wed Jul 31 08:48:14 2013 -0700 - Merge pull request #751 from cdshines/master - [Cleaned Partitioner & PythonPartitioner source by taking out non-related logic to Utils] - - 29b8cd3 Tue Jul 30 21:30:33 2013 -0700 - Merge pull request #755 from jerryshao/add-apache-header - [Add Apache license header to metrics system] - - e87de03 Tue Jul 30 15:00:08 2013 -0700 - Merge pull request #744 from karenfeng/bootstrap-update - [Use Bootstrap progress bars in web UI] - - ae57020 Tue Jul 30 14:56:41 2013 -0700 - Merge pull request #752 from rxin/master - [Minor mllib cleanup] - - 8aee118 Tue Jul 30 10:27:54 2013 -0700 - Merge pull request #748 from atalwalkar/master - [made SimpleUpdater consistent with other updaters] - - 468a36c Mon Jul 29 19:44:33 2013 -0700 - Merge pull request #746 from rxin/cleanup - [Internal cleanup] - - 1e1ffb1 Mon Jul 29 19:26:19 2013 -0700 - Merge pull request #745 from shivaram/loss-update-fix - [Remove duplicate loss history in Gradient Descent] - - c99b674 Mon Jul 29 16:32:55 2013 -0700 - Merge pull request #735 from karenfeng/ui-807 - [Totals for shuffle data and CPU time] - - fe7298b Mon Jul 29 14:01:00 2013 -0700 - Merge pull request #741 from pwendell/usability - [Fix two small usability issues] - - c34c0f6 Mon Jul 29 13:18:10 2013 -0700 - Merge pull request #731 from pxinghao/master - [Adding SVM and Lasso] - - f3d72ff Fri Jul 26 17:19:27 2013 -0700 - Merge pull request #739 from markhamstra/toolsPom - [Missing tools/pom.xml scalatest dependency] - - cb36677 Fri Jul 26 16:59:30 2013 -0700 - Merge pull request #738 from harsha2010/pruning - [Fix bug in Partition Pruning.] - - f3cf094 Thu Jul 25 14:53:21 2013 -0700 - Merge pull request #734 from woggle/executor-env2 - [Get more env vars from driver rather than worker] - - 51c2427 Thu Jul 25 00:03:11 2013 -0700 - Merge pull request #732 from ryanlecompte/master - [Refactor Kryo serializer support to use chill/chill-java] - - 52723b9 Wed Jul 24 14:33:02 2013 -0700 - Merge pull request #728 from jey/examples-jar-env - [Fix setting of SPARK_EXAMPLES_JAR] - - 20338c2 Wed Jul 24 14:32:24 2013 -0700 - Merge pull request #729 from karenfeng/ui-811 - [Stage Page updates] - - 5584ebc Wed Jul 24 11:46:46 2013 -0700 - Merge pull request #675 from c0s/assembly - [Building spark assembly for further consumption of the Spark project with a deployed cluster] - - a73f3ee Wed Jul 24 08:59:14 2013 -0700 - Merge pull request #671 from jerryshao/master - [Add metrics system for Spark] - - b011329 Tue Jul 23 22:50:09 2013 -0700 - Merge pull request #727 from rxin/scheduler - [Scheduler code style cleanup.] - - 876125b Tue Jul 23 22:28:21 2013 -0700 - Merge pull request #726 from rxin/spark-826 - [SPARK-829: scheduler shouldn't hang if a task contains unserializable objects in its closure] - - 2f1736c Tue Jul 23 15:53:30 2013 -0700 - Merge pull request #725 from karenfeng/task-start - [Creates task start events] - - 5364f64 Tue Jul 23 13:40:34 2013 -0700 - Merge pull request #723 from rxin/mllib - [Made RegressionModel serializable and added unit tests to make sure predict methods would work.] - - f369e0e Tue Jul 23 13:22:27 2013 -0700 - Merge pull request #720 from ooyala/2013-07/persistent-rdds-api - [Add a public method getCachedRdds to SparkContext] - - 401aac8 Mon Jul 22 16:57:16 2013 -0700 - Merge pull request #719 from karenfeng/ui-808 - [Creates Executors tab for Jobs UI] - - 8ae1436 Mon Jul 22 16:03:04 2013 -0700 - Merge pull request #722 from JoshRosen/spark-825 - [Fix bug: DoubleRDDFunctions.sampleStdev() computed non-sample stdev()] - - 15fb394 Sun Jul 21 10:33:38 2013 -0700 - Merge pull request #716 from c0s/webui-port - [Regression: default webui-port can't be set via command line "--webui-port" anymore] - - c40f0f2 Fri Jul 19 13:33:04 2013 -0700 - Merge pull request #711 from shivaram/ml-generators - [Move ML lib data generator files to util/] - - 413b841 Fri Jul 19 13:31:38 2013 -0700 - Merge pull request #717 from viirya/dev1 - [Do not copy local jars given to SparkContext in yarn mode] - - 0d0a47c Thu Jul 18 12:06:37 2013 -0700 - Merge pull request #710 from shivaram/ml-updates - [Updates to LogisticRegression] - - c6235b5 Thu Jul 18 11:43:48 2013 -0700 - Merge pull request #714 from adatao/master - [[BUGFIX] Fix for sbt/sbt script SPARK_HOME setting] - - 009c79e Thu Jul 18 11:41:52 2013 -0700 - Merge pull request #715 from viirya/dev1 - [fix a bug in build process that pulls in two versions of ASM.] - - 985a9e3 Wed Jul 17 22:27:19 2013 -0700 - Merge pull request #712 from stayhf/SPARK-817 - [Consistently invoke bash with /usr/bin/env bash in scripts to make code ...] - - cad48ed Tue Jul 16 21:41:28 2013 -0700 - Merge pull request #708 from ScrapCodes/dependencies-upgrade - [Dependency upgrade Akka 2.0.3 -> 2.0.5] - - 8a8a8f2 Mon Jul 15 23:09:21 2013 -0700 - Merge pull request #705 from rxin/errormessages - [Throw a more meaningful message when runJob is called to launch tasks on non-existent partitions.] - - ed8415b Mon Jul 15 16:41:04 2013 -0700 - Merge pull request #703 from karenfeng/ui-802 - [Link to job UI from standalone deploy cluster web UI] - - e3d3e6f Mon Jul 15 14:59:44 2013 -0700 - Merge pull request #702 from karenfeng/ui-fixes - [Adds app name in HTML page titles on job web UI] - - c7877d5 Sun Jul 14 12:58:13 2013 -0700 - Merge pull request #689 from BlackNiuza/application_status - [Bug fix: SPARK-796] - - 10c0593 Sun Jul 14 11:45:18 2013 -0700 - Merge pull request #699 from pwendell/ui-env - [Add `Environment` tab to SparkUI.] - - 89e8549 Sat Jul 13 16:11:08 2013 -0700 - Merge pull request #698 from Reinvigorate/sm-deps-change - [changing com.google.code.findbugs maven coordinates] - - 77c69ae Fri Jul 12 23:05:21 2013 -0700 - Merge pull request #697 from pwendell/block-locations - [Show block locations in Web UI.] - - 5a7835c Fri Jul 12 20:28:21 2013 -0700 - Merge pull request #691 from karenfeng/logpaging - [Create log pages] - - 71ccca0 Fri Jul 12 20:25:06 2013 -0700 - Merge pull request #696 from woggle/executor-env - [Pass executor env vars (e.g. SPARK_CLASSPATH) to compute-classpath.sh] - - 90fc3f3 Fri Jul 12 20:23:36 2013 -0700 - Merge pull request #692 from Reinvigorate/takeOrdered - [adding takeOrdered() to RDD] - - 018d04c Thu Jul 11 12:48:37 2013 -0700 - Merge pull request #684 from woggle/mesos-classloader - [Explicitly set class loader for MesosSchedulerDriver callbacks.] - - bc19477 Wed Jul 10 22:29:41 2013 -0700 - Merge pull request #693 from c0s/readme - [Updating README to reflect Scala 2.9.3 requirements] - - 7dcda9a Mon Jul 8 23:24:23 2013 -0700 - Merge pull request #688 from markhamstra/scalaDependencies - [Fixed SPARK-795 with explicit dependencies] - - 638927b Mon Jul 8 22:58:50 2013 -0700 - Merge pull request #683 from shivaram/sbt-test-fix - [Remove some stack traces from sbt test output] - - 3c13178 Mon Jul 8 14:50:34 2013 -0700 - Merge pull request #687 from atalwalkar/master - [Added "Labeled" to util functions for labeled data] - - 744da8e Sun Jul 7 17:42:25 2013 -0700 - Merge pull request #679 from ryanlecompte/master - [Make binSearch method tail-recursive for RidgeRegression] - - 3cc6818 Sat Jul 6 19:51:20 2013 -0700 - Merge pull request #668 from shimingfei/guava-14.0.1 - [update guava version from 11.0.1 to 14.0.1] - - 2216188 Sat Jul 6 16:18:15 2013 -0700 - Merge pull request #676 from c0s/asf-avro - [Use standard ASF published avro module instead of a proprietory built one] - - 94871e4 Sat Jul 6 15:26:19 2013 -0700 - Merge pull request #655 from tgravescs/master - [Add support for running Spark on Yarn on a secure Hadoop Cluster] - - 3f918b3 Sat Jul 6 12:45:18 2013 -0700 - Merge pull request #672 from holdenk/master - [s/ActorSystemImpl/ExtendedActorSystem/ as ActorSystemImpl results in a warning] - - 2a36e54 Sat Jul 6 12:43:21 2013 -0700 - Merge pull request #673 from xiajunluan/master - [Add config template file for fair scheduler feature] - - 7ba7fa1 Sat Jul 6 11:45:08 2013 -0700 - Merge pull request #674 from liancheng/master - [Bug fix: SPARK-789] - - f4416a1 Sat Jul 6 11:41:58 2013 -0700 - Merge pull request #681 from BlackNiuza/memory_leak - [Remove active job from idToActiveJob when job finished or aborted] - - e063e29 Fri Jul 5 21:54:52 2013 -0700 - Merge pull request #680 from tdas/master - [Fixed major performance bug in Network Receiver] - - bf1311e Fri Jul 5 17:32:44 2013 -0700 - Merge pull request #678 from mateiz/ml-examples - [Start of ML package] - - 6ad85d0 Thu Jul 4 21:32:29 2013 -0700 - Merge pull request #677 from jerryshao/fix_stage_clean - [Clean StageToInfos periodically when spark.cleaner.ttl is enabled] - - 2e32fc8 Thu Jul 4 12:18:20 2013 -0700 - Merge pull request #666 from c0s/master - [hbase dependency is missed in hadoop2-yarn profile of examples module -] - - 6d60fe5 Mon Jul 1 18:24:03 2013 -0700 - Merge pull request #666 from c0s/master - [hbase dependency is missed in hadoop2-yarn profile of examples module] - - ccfe953 Sat Jun 29 17:57:53 2013 -0700 - Merge pull request #577 from skumargithub/master - [Example of cumulative counting using updateStateByKey] - - 50ca176 Thu Jun 27 22:24:52 2013 -0700 - Merge pull request #664 from pwendell/test-fix - [Removing incorrect test statement] - - e49bc8c Wed Jun 26 11:13:33 2013 -0700 - Merge pull request #663 from stephenh/option_and_getenv - [Be cute with Option and getenv.] - - f5e32ed Tue Jun 25 09:16:57 2013 -0700 - Merge pull request #661 from mesos/streaming - [Kafka fixes and DStream.count fix for master] - - 1249e91 Mon Jun 24 21:46:33 2013 -0700 - Merge pull request #572 from Reinvigorate/sm-block-interval - [Adding spark.streaming.blockInterval property] - - cfcda95 Mon Jun 24 21:44:50 2013 -0700 - Merge pull request #571 from Reinvigorate/sm-kafka-serializers - [Surfacing decoders on KafkaInputDStream] - - 575aff6 Mon Jun 24 21:35:50 2013 -0700 - Merge pull request #567 from Reinvigorate/sm-count-fix - [Fixing count() in Spark Streaming] - - 3e61bef Sat Jun 22 16:22:47 2013 -0700 - Merge pull request #648 from shivaram/netty-dbg - [Shuffle fixes and cleanup] - - 1ef5d0d Sat Jun 22 09:35:57 2013 -0700 - Merge pull request #644 from shimingfei/joblogger - [add Joblogger to Spark (on new Spark code)] - - 7e4b266 Sat Jun 22 07:53:18 2013 -0700 - Merge pull request #563 from jey/python-optimization - [Optimize PySpark worker invocation] - - 71030ba Wed Jun 19 15:21:03 2013 -0700 - Merge pull request #654 from lyogavin/enhance_pipe - [fix typo and coding style in #638] - - 73f4c7d Tue Jun 18 04:21:17 2013 -0700 - Merge pull request #605 from esjewett/SPARK-699 - [Add hBase example (retry of pull request #596)] - - 9933836 Tue Jun 18 02:41:10 2013 -0700 - Merge pull request #647 from jerryshao/master - [Reduce ZippedPartitionsRDD's getPreferredLocations complexity from O(2^2n) to O(2^n)] - - db42451 Mon Jun 17 15:26:36 2013 -0700 - Merge pull request #643 from adatao/master - [Bug fix: Zero-length partitions result in NaN for overall mean & variance] - - e82a2ff Mon Jun 17 15:13:15 2013 -0700 - Merge pull request #653 from rxin/logging - [SPARK-781: Log the temp directory path when Spark says "Failed to create temp directory."] - - e6d1277 Mon Jun 17 12:56:25 2013 -0700 - Merge pull request #638 from lyogavin/enhance_pipe - [Enhance pipe to support more features we can do in hadoop streaming] - - f961aac Sat Jun 15 00:53:41 2013 -0700 - Merge pull request #649 from ryanlecompte/master - [Add top K method to RDD using a bounded priority queue] - - 6602d94 Fri Jun 14 10:41:31 2013 -0700 - Merge pull request #651 from rxin/groupbykey - [SPARK-772 / SPARK-774: groupByKey and cogroup should disable map side combine] - - d93851a Thu Jun 13 13:38:45 2013 -0700 - Merge pull request #645 from pwendell/compression - [Adding compression to Hadoop save functions] - - f1da591 Wed Jun 12 17:55:08 2013 -0700 - Merge pull request #646 from markhamstra/jvmArgs - [Fixed jvmArgs in maven build.] - - 0e94b73 Mon Jun 10 13:00:31 2013 -0700 - Merge pull request #625 from stephenh/fix-start-slave - [Fix start-slave not passing instance number to spark-daemon.] - - 74b91d5 Sat Jun 8 01:19:40 2013 -0700 - Merge pull request #629 from c0s/master - [Sometime Maven build runs out of PermGen space.] - - c8fc423 Fri Jun 7 22:43:18 2013 -0700 - Merge pull request #631 from jerryshao/master - [Fix block manager UI display issue when enable spark.cleaner.ttl] - - 1ae60bc Fri Jun 7 22:39:06 2013 -0700 - Merge pull request #634 from xiajunluan/master - [[Spark-753] Fix ClusterSchedulSuite unit test failed ] - - fff3728 Tue Jun 4 16:09:50 2013 -0700 - Merge pull request #640 from pwendell/timeout-update - [Fixing bug in BlockManager timeout] - - f420d4f Tue Jun 4 15:25:58 2013 -0700 - Merge pull request #639 from pwendell/timeout-update - [Bump akka and blockmanager timeouts to 60 seconds] - - 84530ba Fri May 31 17:06:13 2013 -0700 - Merge pull request #636 from rxin/unpersist - [Unpersist More block manager cleanup.] - - ef77bb7 Thu May 30 14:50:06 2013 -0700 - Merge pull request #627 from shivaram/master - [Netty and shuffle bug fixes] - - 8cb8178 Thu May 30 14:17:44 2013 -0700 - Merge pull request #628 from shivaram/zero-block-size - [Skip fetching zero-sized blocks in NIO.] - - 6ed7139 Wed May 29 10:14:22 2013 -0700 - Merge pull request #626 from stephenh/remove-add-if-no-port - [Remove unused addIfNoPort.] - - 41d230c Tue May 28 23:35:24 2013 -0700 - Merge pull request #611 from squito/classloader - [Use default classloaders for akka & deserializing task results] - - 3db1e17 Mon May 27 21:31:43 2013 -0700 - Merge pull request #620 from jerryshao/master - [Fix CheckpointRDD java.io.FileNotFoundException when calling getPreferredLocations] - - 3d4891d Sat May 25 23:38:05 2013 -0700 - Merge pull request #621 from JoshRosen/spark-613 - [Use ec2-metadata in start-slave.sh to detect if running on EC2] - - e8d4b6c Sat May 25 21:09:03 2013 -0700 - Merge pull request #529 from xiajunluan/master - [[SPARK-663]Implement Fair Scheduler in Spark Cluster Scheduler ] - - 9a3c344 Sat May 25 17:53:43 2013 -0700 - Merge pull request #624 from rxin/master - [NonJavaSerializableClass should not be Java serializable...] - - 24e41aa Fri May 24 16:48:52 2013 -0700 - Merge pull request #623 from rxin/master - [Automatically configure Netty port.] - - 69161f9 Fri May 24 14:42:13 2013 -0700 - Merge pull request #622 from rxin/master - [bug fix: Shuffle block iterator is ignoring the shuffle serializer setting.] - - dbbedfc Thu May 23 23:11:06 2013 -0700 - Merge pull request #616 from jey/maven-netty-exclusion - [Exclude old versions of Netty from Maven-based build] - - a2b0a79 Tue May 21 18:16:20 2013 -0700 - Merge pull request #619 from woggling/adjust-sampling - [Use ARRAY_SAMPLE_SIZE constant instead of hard-coded 100.0 in SizeEstimator] - - 66dac44 Tue May 21 11:41:42 2013 -0700 - Merge pull request #618 from woggling/dead-code-disttest - [DistributedSuite: remove dead code] - - 5912cc4 Fri May 17 19:58:40 2013 -0700 - Merge pull request #610 from JoshRosen/spark-747 - [Throw exception if TaskResult exceeds Akka frame size] - - 6c27c38 Thu May 16 17:33:56 2013 -0700 - Merge pull request #615 from rxin/build-fix - [Maven build fix & two other small changes] - - 2f576ab Wed May 15 18:06:24 2013 -0700 - Merge pull request #602 from rxin/shufflemerge - [Manual merge & cleanup of Shane's Shuffle Performance Optimization] - - 48c6f46 Wed May 15 10:47:19 2013 -0700 - Merge pull request #612 from ash211/patch-4 - [Docs: Mention spark shell's default for MASTER] - - 203d7b7 Wed May 15 00:47:20 2013 -0700 - Merge pull request #593 from squito/driver_ui_link - [Master UI has link to Application UI] - - 016ac86 Mon May 13 21:45:36 2013 -0700 - Merge pull request #601 from rxin/emptyrdd-master - [EmptyRDD (master branch 0.8)] - - 4b354e0 Mon May 13 17:39:19 2013 -0700 - Merge pull request #589 from mridulm/master - [Add support for instance local scheduling] - - 5dbc9b2 Sun May 12 11:03:10 2013 -0700 - Merge pull request #608 from pwendell/SPARK-738 - [SPARK-738: Spark should detect and wrap nonserializable exceptions] - - 63e1999 Fri May 10 13:54:03 2013 -0700 - Merge pull request #606 from markhamstra/foreachPartition_fix - [Actually use the cleaned closure in foreachPartition] - - 42bbe89 Wed May 8 22:30:31 2013 -0700 - Merge pull request #599 from JoshRosen/spark-670 - [Fix SPARK-670: EC2 'start' command should require -i option.] - - 0f1b7a0 Wed May 8 13:38:50 2013 -0700 - Merge pull request #596 from esjewett/master - [hBase example] - - 7af92f2 Sat May 4 22:29:17 2013 -0700 - Merge pull request #597 from JoshRosen/webui-fixes - [Two minor bug fixes for Spark Web UI] - - c74ce60 Sat May 4 22:26:35 2013 -0700 - Merge pull request #598 from rxin/blockmanager - [Fixed flaky unpersist test in DistributedSuite.] - - 3bf2c86 Fri May 3 18:27:30 2013 -0700 - Merge pull request #594 from shivaram/master - [Add zip partitions to Java API] - - 2484ad7 Fri May 3 17:08:55 2013 -0700 - Merge pull request #587 from rxin/blockmanager - [A set of shuffle map output related changes] - - 6fe9d4e Thu May 2 21:33:56 2013 -0700 - Merge pull request #592 from woggling/localdir-fix - [Don't accept generated local directory names that can't be created] - - 538ee75 Thu May 2 09:01:42 2013 -0700 - Merge pull request #581 from jerryshao/master - [fix [SPARK-740] block manage UI throws exception when enabling Spark Streaming] - - 9abcbcc Wed May 1 22:45:10 2013 -0700 - Merge pull request #591 from rxin/removerdd - [RDD.unpersist: probably the most desired feature of Spark] - - aa8fe1a Tue Apr 30 22:30:18 2013 -0700 - Merge pull request #586 from mridulm/master - [Pull request to address issues Reynold Xin reported] - - f708dda Tue Apr 30 07:51:40 2013 -0700 - Merge pull request #585 from pwendell/listener-perf - [[Fix SPARK-742] Task Metrics should not employ per-record timing by default] - - 68c07ea Sun Apr 28 20:19:33 2013 -0700 - Merge pull request #582 from shivaram/master - [Add zip partitions interface] - - f6ee9a8 Sun Apr 28 15:36:04 2013 -0700 - Merge pull request #583 from mridulm/master - [Fix issues with streaming test cases after yarn branch merge] - - cf54b82 Thu Apr 25 11:45:58 2013 -0700 - Merge pull request #580 from pwendell/quickstart - [SPARK-739 Have quickstart standlone job use README] - - 118a6c7 Wed Apr 24 08:42:30 2013 -0700 - Merge pull request #575 from mridulm/master - [Manual merge of yarn branch to trunk] - - 5d8a71c Tue Apr 16 19:48:02 2013 -0700 - Merge pull request #570 from jey/increase-codecache-size - [Increase ReservedCodeCacheSize for sbt] - - ec5e553 Sun Apr 14 08:20:13 2013 -0700 - Merge pull request #558 from ash211/patch-jackson-conflict - [Don't pull in old versions of Jackson via hadoop-core] - - c1c219e Sun Apr 14 08:11:23 2013 -0700 - Merge pull request #564 from maspotts/master - [Allow latest scala in PATH, with SCALA_HOME as override (instead of vice-versa)] - - 7c10b3e Fri Apr 12 20:55:22 2013 -0700 - Merge pull request #565 from andyk/master - [Update wording of section on RDD operations in quick start guide in docs] - - 077ae0a Thu Apr 11 19:34:14 2013 -0700 - Merge pull request #561 from ash211/patch-4 - [Add details when BlockManager heartbeats time out] - - c91ff8d Wed Apr 10 15:08:23 2013 -0700 - Merge pull request #560 from ash211/patch-3 - [Typos: cluser -> cluster] - - 7cd83bf Tue Apr 9 22:07:35 2013 -0700 - Merge pull request #559 from ash211/patch-example-whitespace - [Uniform whitespace across scala examples] - - 271a4f3 Tue Apr 9 22:04:52 2013 -0700 - Merge pull request #555 from holdenk/master - [Retry failed ssh commands in the ec2 python script.] - - 8ac9efb Tue Apr 9 13:50:50 2013 -0700 - Merge pull request #527 from Reinvigorate/sm-kafka-cleanup - [KafkaInputDStream fixes and improvements] - - eed54a2 Mon Apr 8 09:44:30 2013 -0700 - Merge pull request #553 from pwendell/akka-standalone - [SPARK-724 - Have Akka logging enabled by default for standalone daemons] - - b362df3 Sun Apr 7 17:17:52 2013 -0700 - Merge pull request #552 from MLnick/master - [Bumping version for Twitter Algebird to latest] - - 4b30190 Sun Apr 7 17:15:10 2013 -0700 - Merge pull request #554 from andyk/scala2.9.3 - [Fixes SPARK-723 - Update build to Scala 2.9.3] - - dfe98ca Tue Apr 2 19:24:12 2013 -0700 - Merge pull request #550 from erikvanoosten/master - [corrected Algebird example] - - b5d7830 Tue Apr 2 19:23:45 2013 -0700 - Merge pull request #551 from jey/python-bugfixes - [Python bugfixes] - - 2be2295 Sun Mar 31 18:09:14 2013 -0700 - Merge pull request #548 from markhamstra/getWritableClass_filter - [Fixed broken filter in getWritableClass[T]] - - 9831bc1 Fri Mar 29 22:16:22 2013 -0700 - Merge pull request #539 from cgrothaus/fix-webui-workdirpath - [Bugfix: WorkerWebUI must respect workDirPath from Worker] - - 3cc8ab6 Fri Mar 29 22:14:07 2013 -0700 - Merge pull request #541 from stephenh/shufflecoalesce - [Add a shuffle parameter to coalesce.] - - cad507a Fri Mar 29 22:13:12 2013 -0700 - Merge pull request #547 from jey/maven-streaming-tests-initialization-fix - [Move streaming test initialization into 'before' blocks] - - a98996d Fri Mar 29 22:12:15 2013 -0700 - Merge pull request #545 from ash211/patch-1 - [Don't use deprecated Application in example] - - 104c694 Fri Mar 29 22:11:50 2013 -0700 - Merge pull request #546 from ash211/patch-2 - [Update tuning.md] - - bc36ee4 Tue Mar 26 15:05:13 2013 -0700 - Merge pull request #543 from holdenk/master - [Re-enable deprecation warnings and fix deprecated warning.] - - b8949ca Sat Mar 23 07:19:34 2013 -0700 - Merge pull request #505 from stephenh/volatile - [Make Executor fields volatile since they're read from the thread pool.] - - fd53f2f Sat Mar 23 07:13:21 2013 -0700 - Merge pull request #510 from markhamstra/WithThing - [mapWith, flatMapWith and filterWith] - - 4c5efcf Wed Mar 20 19:29:23 2013 -0700 - Merge pull request #532 from andyk/master - [SPARK-715: Adds instructions for building with Maven to documentation] - - 3558849 Wed Mar 20 19:27:47 2013 -0700 - Merge pull request #538 from rxin/cogroup - [Added mapSideCombine flag to CoGroupedRDD. Added unit test for CoGroupedRDD.] - - ca4d083 Wed Mar 20 11:22:36 2013 -0700 - Merge pull request #528 from MLnick/java-examples - [[SPARK-707] Adding Java versions of Pi, LogQuery and K-Means examples] - - b812e6b Wed Mar 20 11:21:02 2013 -0700 - Merge pull request #526 from markhamstra/foldByKey - [Add foldByKey] - - 945d1e7 Tue Mar 19 21:59:06 2013 -0700 - Merge pull request #536 from sasurfer/master - [CoalescedRDD for many partitions] - - 1cbbe94 Tue Mar 19 21:34:34 2013 -0700 - Merge pull request #534 from stephenh/removetrycatch - [Remove try/catch block that can't be hit.] - - 71e53f8 Tue Mar 19 21:31:41 2013 -0700 - Merge pull request #537 from wishbear/configurableInputFormat - [call setConf from input format if it is Configurable] - - c1e9cdc Sat Mar 16 11:47:45 2013 -0700 - Merge pull request #525 from stephenh/subtractByKey - [Add PairRDDFunctions.subtractByKey.] - - cdbfd1e Fri Mar 15 15:13:28 2013 -0700 - Merge pull request #516 from squito/fix_local_metrics - [Fix local metrics] - - f9fa2ad Fri Mar 15 15:12:43 2013 -0700 - Merge pull request #530 from mbautin/master-update-log4j-and-make-compile-in-IntelliJ - [Add a log4j compile dependency to fix build in IntelliJ] - - 4032beb Wed Mar 13 19:29:46 2013 -0700 - Merge pull request #521 from stephenh/earlyclose - [Close the reader in HadoopRDD as soon as iteration end.] - - 3c97276 Wed Mar 13 19:25:08 2013 -0700 - Merge pull request #524 from andyk/master - [Fix broken link to YARN documentation] - - 1c3d981 Wed Mar 13 19:23:48 2013 -0700 - Merge pull request #517 from Reinvigorate/sm-build-fixes - [Build fixes for streaming /w SBT] - - 2d477fd Wed Mar 13 06:49:16 2013 -0700 - Merge pull request #523 from andyk/master - [Fix broken link in Quick Start] - - 00c4d23 Tue Mar 12 22:19:00 2013 -0700 - Merge pull request #518 from woggling/long-bm-sizes - [Send block sizes as longs in BlockManager updates] - - cbf8f0d Mon Mar 11 00:23:57 2013 -0700 - Merge pull request #513 from MLnick/bagel-caching - [Adds choice of persistence level to Bagel.] - - 91a9d09 Sun Mar 10 15:48:23 2013 -0700 - Merge pull request #512 from patelh/fix-kryo-serializer - [Fix reference bug in Kryo serializer, add test, update version] - - 557cfd0 Sun Mar 10 15:44:57 2013 -0700 - Merge pull request #515 from woggling/deploy-app-death - [Notify standalone deploy client of application death.] - - 04fb81f Sun Mar 3 17:20:07 2013 -0800 - Merge pull request #506 from rxin/spark-706 - [Fixed SPARK-706: Failures in block manager put leads to read task hanging.] - - 6cf4be4 Sun Mar 3 17:16:22 2013 -0800 - Merge pull request #462 from squito/stageInfo - [Track assorted metrics for each task, report summaries to user at stage completion] - - 6bfc7ca Sat Mar 2 22:14:49 2013 -0800 - Merge pull request #504 from mosharaf/master - [Worker address was getting removed when removing an app.] - - 94b3db1 Sat Mar 2 22:13:52 2013 -0800 - Merge pull request #508 from markhamstra/TestServerInUse - [Avoid bind failure in InputStreamsSuite] - - 25c71d3 Fri Mar 1 08:00:18 2013 -0800 - Merge pull request #507 from markhamstra/poms271 - [bump version to 0.7.1-SNAPSHOT in the subproject poms] + d5a8dbf Thu Sep 26 13:09:30 2013 -0700 + Merge pull request #928 from jerryshao/fairscheduler-refactor From b3959fec5fafc741c4146618d5a0a3b54da493be Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 5 Dec 2013 12:31:24 -0800 Subject: [PATCH 37/39] Merge pull request #228 from pwendell/master Document missing configs and set shuffle consolidation to false. (cherry picked from commit 5d460253d6080d871cb71efb112ea17be0873771) Signed-off-by: Patrick Wendell --- .../spark/storage/ShuffleBlockManager.scala | 2 +- .../spark/storage/DiskBlockManagerSuite.scala | 14 ++++++- docs/configuration.md | 37 ++++++++++++++++++- 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala index 2f1b049ce4839..e828e1d1c5e7b 100644 --- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala @@ -62,7 +62,7 @@ class ShuffleBlockManager(blockManager: BlockManager) { // Turning off shuffle file consolidation causes all shuffle Blocks to get their own file. // TODO: Remove this once the shuffle file consolidation feature is stable. val consolidateShuffleFiles = - System.getProperty("spark.shuffle.consolidateFiles", "true").toBoolean + System.getProperty("spark.shuffle.consolidateFiles", "false").toBoolean private val bufferSize = System.getProperty("spark.shuffle.file.buffer.kb", "100").toInt * 1024 diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala index 0b9056344c1dd..ef4c4c0f143f9 100644 --- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala @@ -5,9 +5,9 @@ import java.io.{FileWriter, File} import scala.collection.mutable import com.google.common.io.Files -import org.scalatest.{BeforeAndAfterEach, FunSuite} +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} -class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach { +class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll { val rootDir0 = Files.createTempDir() rootDir0.deleteOnExit() @@ -16,6 +16,12 @@ class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach { val rootDirs = rootDir0.getName + "," + rootDir1.getName println("Created root dirs: " + rootDirs) + // This suite focuses primarily on consolidation features, + // so we coerce consolidation if not already enabled. + val consolidateProp = "spark.shuffle.consolidateFiles" + val oldConsolidate = Option(System.getProperty(consolidateProp)) + System.setProperty(consolidateProp, "true") + val shuffleBlockManager = new ShuffleBlockManager(null) { var idToSegmentMap = mutable.Map[ShuffleBlockId, FileSegment]() override def getBlockLocation(id: ShuffleBlockId) = idToSegmentMap(id) @@ -23,6 +29,10 @@ class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach { var diskBlockManager: DiskBlockManager = _ + override def afterAll() { + oldConsolidate.map(c => System.setProperty(consolidateProp, c)) + } + override def beforeEach() { diskBlockManager = new DiskBlockManager(shuffleBlockManager, rootDirs) shuffleBlockManager.idToSegmentMap.clear() diff --git a/docs/configuration.md b/docs/configuration.md index 97183bafdb3ea..22abe1c5a9943 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -327,7 +327,42 @@ Apart from these, the following properties are also available, and may be useful Too large a value decreases parallelism during broadcast (makes it slower); however, if it is too small, BlockManager might take a performance hit.
spark.shuffle.consolidateFilesfalse + If set to "true", consolidates intermediate files created during a shuffle. Creating fewer files can improve filesystem performance if you run shuffles with large numbers of reduce tasks. +
spark.speculationfalse + If set to "true", performs speculative execution of tasks. This means if one or more tasks are running slowly in a stage, they will be re-launched. +
spark.speculation.interval100 + How often Spark will check for tasks to speculate, in milliseconds. +
spark.speculation.quantile0.75 + Percentage of tasks which must be complete before speculation is enabled for a particular stage. +
spark.speculation.multiplier1.5 + How many times slower a task is than the median to be considered for speculation. +
# Environment Variables From 457e66359402c77405f53803339bfdaed0b628bf Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 5 Dec 2013 13:10:19 -0800 Subject: [PATCH 38/39] Bumping version numbers for 0.8.1 release --- docs/_config.yml | 4 ++-- ec2/spark_ec2.py | 4 ++-- project/SparkBuild.scala | 2 +- python/pyspark/shell.py | 2 +- repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/_config.yml b/docs/_config.yml index a260922fd00f3..cca5fd93b34cd 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -3,8 +3,8 @@ markdown: kramdown # These allow the documentation to be updated with nerw releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 0.8.1-incubating-SNAPSHOT -SPARK_VERSION_SHORT: 0.8.1-SNAPSHOT +SPARK_VERSION: 0.8.1-incubating +SPARK_VERSION_SHORT: 0.8.1 SCALA_VERSION: 2.9.3 MESOS_VERSION: 0.13.0 SPARK_ISSUE_TRACKER_URL: https://spark-project.atlassian.net diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 267c8ba849821..d508ab379e84d 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -66,7 +66,7 @@ def parse_args(): "slaves across multiple (an additional $0.01/Gb for bandwidth" + "between zones applies)") parser.add_option("-a", "--ami", help="Amazon Machine Image ID to use") - parser.add_option("-v", "--spark-version", default="0.8.0", + parser.add_option("-v", "--spark-version", default="0.8.1", help="Version of Spark to use: 'X.Y.Z' or a specific git hash") parser.add_option("--spark-git-repo", default="https://github.com/apache/incubator-spark", @@ -157,7 +157,7 @@ def is_active(instance): # Return correct versions of Spark and Shark, given the supplied Spark version def get_spark_shark_version(opts): - spark_shark_map = {"0.7.3": "0.7.1", "0.8.0": "0.8.0"} + spark_shark_map = {"0.7.3": "0.7.1", "0.8.0": "0.8.0", "0.8.1": "0.8.1"} version = opts.spark_version.replace("v", "") if version not in spark_shark_map: print >> stderr, "Don't know about Spark version: %s" % version diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 95a9ca9d5c1c8..660f0e2eff822 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -79,7 +79,7 @@ object SparkBuild extends Build { def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.apache.spark", - version := "0.8.1-incubating-SNAPSHOT", + version := "0.8.1-incubating", scalaVersion := "2.9.3", scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation", "-target:" + SCALAC_JVM_VERSION), diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index 59a7e879541bb..2329497e48d78 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -35,7 +35,7 @@ ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ - /__ / .__/\_,_/_/ /_/\_\ version 0.8.1-SNAPSHOT + /__ / .__/\_,_/_/ /_/\_\ version 0.8.1 /_/ """ print "Using Python version %s (%s, %s)" % ( diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala index a5eeacb87b181..6b5be41ce844a 100644 --- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -200,7 +200,7 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master: ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ - /___/ .__/\_,_/_/ /_/\_\ version 0.8.1-SNAPSHOT + /___/ .__/\_,_/_/ /_/\_\ version 0.8.1 /_/ """) import Properties._ From fe1717c9f5df2d6a633ede3fdd9e898cfec0973b Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 5 Dec 2013 23:29:42 -0800 Subject: [PATCH 39/39] Merge pull request #232 from markhamstra/FiniteWait jobWaiter.synchronized before jobWaiter.wait ...else ``IllegalMonitorStateException`` in ``SimpleFutureAction#ready``. (cherry picked from commit 078049877e123fe7e4c4553e36055de572cab7c4) Signed-off-by: Reynold Xin --- .../scala/org/apache/spark/FutureAction.scala | 2 +- .../apache/spark/scheduler/JobWaiter.scala | 1 + .../spark/rdd/AsyncRDDActionsSuite.scala | 26 +++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala index 1ad9240cfa63e..c6b4ac5192d14 100644 --- a/core/src/main/scala/org/apache/spark/FutureAction.scala +++ b/core/src/main/scala/org/apache/spark/FutureAction.scala @@ -99,7 +99,7 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc: override def ready(atMost: Duration)(implicit permit: CanAwait): SimpleFutureAction.this.type = { if (!atMost.isFinite()) { awaitResult() - } else { + } else jobWaiter.synchronized { val finishTime = System.currentTimeMillis() + atMost.toMillis while (!isCompleted) { val time = System.currentTimeMillis() diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala index 58f238d8cfc5d..b026f860a8cd8 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala @@ -31,6 +31,7 @@ private[spark] class JobWaiter[T]( private var finishedTasks = 0 // Is the job as a whole finished (succeeded or failed)? + @volatile private var _jobFinished = totalTasks == 0 def jobFinished = _jobFinished diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala index da032b17d98a6..0d4c10db8ef33 100644 --- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala @@ -19,6 +19,8 @@ package org.apache.spark.rdd import java.util.concurrent.Semaphore +import scala.concurrent.{Await, TimeoutException} +import scala.concurrent.duration.Duration import scala.concurrent.ExecutionContext.Implicits.global import org.scalatest.{BeforeAndAfterAll, FunSuite} @@ -173,4 +175,28 @@ class AsyncRDDActionsSuite extends FunSuite with BeforeAndAfterAll with Timeouts sem.acquire(2) } } + + /** + * Awaiting FutureAction results + */ + test("FutureAction result, infinite wait") { + val f = sc.parallelize(1 to 100, 4) + .countAsync() + assert(Await.result(f, Duration.Inf) === 100) + } + + test("FutureAction result, finite wait") { + val f = sc.parallelize(1 to 100, 4) + .countAsync() + assert(Await.result(f, Duration(30, "seconds")) === 100) + } + + test("FutureAction result, timeout") { + val f = sc.parallelize(1 to 100, 4) + .mapPartitions(itr => { Thread.sleep(20); itr }) + .countAsync() + intercept[TimeoutException] { + Await.result(f, Duration(20, "milliseconds")) + } + } }