From 1d9c210dcbc0c63893921fdef591edd9ffe4c48e Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 14 Jan 2014 00:45:27 -0800
Subject: [PATCH 001/133] Version changes for release 0.9.0.
---
docs/_config.yml | 2 +-
ec2/spark_ec2.py | 4 ++--
project/SparkBuild.scala | 2 +-
python/pyspark/shell.py | 2 +-
.../src/main/scala/org/apache/spark/repl/SparkILoopInit.scala | 2 +-
yarn/alpha/pom.xml | 2 +-
6 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/docs/_config.yml b/docs/_config.yml
index ce0fdf5fb4f03..3e96d2c1ea136 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -3,7 +3,7 @@ markdown: kramdown
# These allow the documentation to be updated with nerw releases
# of Spark, Scala, and Mesos.
-SPARK_VERSION: 0.9.0-incubating-SNAPSHOT
+SPARK_VERSION: 0.9.0-incubating
SPARK_VERSION_SHORT: 0.9.0
SCALA_VERSION: "2.10"
MESOS_VERSION: 0.13.0
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index e7cb5ab3ff9b0..e46e1f5e56442 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -70,7 +70,7 @@ def parse_args():
"slaves across multiple (an additional $0.01/Gb for bandwidth" +
"between zones applies)")
parser.add_option("-a", "--ami", help="Amazon Machine Image ID to use")
- parser.add_option("-v", "--spark-version", default="0.8.0",
+ parser.add_option("-v", "--spark-version", default="0.9.0",
help="Version of Spark to use: 'X.Y.Z' or a specific git hash")
parser.add_option("--spark-git-repo",
default="https://github.com/apache/incubator-spark",
@@ -157,7 +157,7 @@ def is_active(instance):
# Return correct versions of Spark and Shark, given the supplied Spark version
def get_spark_shark_version(opts):
- spark_shark_map = {"0.7.3": "0.7.1", "0.8.0": "0.8.0"}
+ spark_shark_map = {"0.7.3": "0.7.1", "0.8.0": "0.8.0", "0.8.1": "0.8.1", "0.9.0": "0.9.0"}
version = opts.spark_version.replace("v", "")
if version not in spark_shark_map:
print >> stderr, "Don't know about Spark version: %s" % version
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index a9f9937cb168c..bcc286d7ea190 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -120,7 +120,7 @@ object SparkBuild extends Build {
def sharedSettings = Defaults.defaultSettings ++ Seq(
organization := "org.apache.spark",
- version := "0.9.0-incubating-SNAPSHOT",
+ version := "0.9.0-incubating",
scalaVersion := "2.10.3",
scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation",
"-target:" + SCALAC_JVM_VERSION),
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index 1602227a273e7..920334205c13e 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -35,7 +35,7 @@
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
- /__ / .__/\_,_/_/ /_/\_\ version 0.9.0-SNAPSHOT
+ /__ / .__/\_,_/_/ /_/\_\ version 0.9.0
/_/
"""
print "Using Python version %s (%s, %s)" % (
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
index 21b1ba305d110..ab5e283d65f07 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
@@ -24,7 +24,7 @@ trait SparkILoopInit {
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
- /___/ .__/\_,_/_/ /_/\_\ version 0.9.0-SNAPSHOT
+ /___/ .__/\_,_/_/ /_/\_\ version 0.9.0
/_/
""")
import Properties._
diff --git a/yarn/alpha/pom.xml b/yarn/alpha/pom.xml
index 8291e9e7a36ce..349c8358ecf90 100644
--- a/yarn/alpha/pom.xml
+++ b/yarn/alpha/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 1b4adc21d77f801be795c2814fc0a501f0e6309b Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 14 Jan 2014 01:18:34 -0800
Subject: [PATCH 002/133] Merge pull request #420 from pwendell/header-files
Add missing header files
(cherry picked from commit fa75e5e1c50da7d1e6c6f41c2d6d591c1e8a025f)
Signed-off-by: Patrick Wendell
---
.../main/scala/org/apache/spark/SparkConf.scala | 17 +++++++++++++++++
.../spark/deploy/worker/CommandUtils.scala | 17 +++++++++++++++++
.../spark/deploy/worker/DriverWrapper.scala | 17 +++++++++++++++++
.../spark/deploy/worker/WorkerWatcher.scala | 17 +++++++++++++++++
.../scala/org/apache/spark/SparkConfSuite.scala | 17 +++++++++++++++++
.../spark/deploy/worker/DriverRunnerTest.scala | 17 +++++++++++++++++
.../deploy/worker/WorkerWatcherSuite.scala | 17 +++++++++++++++++
.../collection/ExternalAppendOnlyMapSuite.scala | 17 +++++++++++++++++
.../streaming/examples/StreamingExamples.scala | 17 +++++++++++++++++
.../scala/org/apache/spark/graphx/Edge.scala | 17 +++++++++++++++++
.../org/apache/spark/graphx/EdgeDirection.scala | 17 +++++++++++++++++
.../scala/org/apache/spark/graphx/EdgeRDD.scala | 17 +++++++++++++++++
.../org/apache/spark/graphx/EdgeTriplet.scala | 17 +++++++++++++++++
.../scala/org/apache/spark/graphx/Graph.scala | 17 +++++++++++++++++
.../spark/graphx/GraphKryoRegistrator.scala | 17 +++++++++++++++++
.../org/apache/spark/graphx/GraphLoader.scala | 17 +++++++++++++++++
.../org/apache/spark/graphx/GraphOps.scala | 17 +++++++++++++++++
.../apache/spark/graphx/PartitionStrategy.scala | 17 +++++++++++++++++
.../scala/org/apache/spark/graphx/Pregel.scala | 17 +++++++++++++++++
.../spark/graphx/impl/EdgePartition.scala | 17 +++++++++++++++++
.../graphx/impl/EdgePartitionBuilder.scala | 17 +++++++++++++++++
.../spark/graphx/impl/EdgeTripletIterator.scala | 17 +++++++++++++++++
.../apache/spark/graphx/impl/GraphImpl.scala | 17 +++++++++++++++++
.../spark/graphx/impl/MessageToPartition.scala | 17 +++++++++++++++++
.../graphx/impl/ReplicatedVertexView.scala | 17 +++++++++++++++++
.../apache/spark/graphx/impl/RoutingTable.scala | 17 +++++++++++++++++
.../apache/spark/graphx/impl/Serializers.scala | 17 +++++++++++++++++
.../spark/graphx/impl/VertexPartition.scala | 17 +++++++++++++++++
.../org/apache/spark/graphx/impl/package.scala | 17 +++++++++++++++++
.../org/apache/spark/graphx/lib/Analytics.scala | 17 +++++++++++++++++
.../spark/graphx/lib/ConnectedComponents.scala | 17 +++++++++++++++++
.../org/apache/spark/graphx/lib/PageRank.scala | 17 +++++++++++++++++
.../apache/spark/graphx/lib/SVDPlusPlus.scala | 17 +++++++++++++++++
.../lib/StronglyConnectedComponents.scala | 17 +++++++++++++++++
.../apache/spark/graphx/lib/TriangleCount.scala | 17 +++++++++++++++++
.../scala/org/apache/spark/graphx/package.scala | 17 +++++++++++++++++
.../spark/graphx/util/BytecodeUtils.scala | 17 +++++++++++++++++
.../spark/graphx/util/GraphGenerators.scala | 17 +++++++++++++++++
.../org/apache/spark/graphx/GraphOpsSuite.scala | 17 +++++++++++++++++
.../org/apache/spark/graphx/GraphSuite.scala | 17 +++++++++++++++++
.../apache/spark/graphx/LocalSparkContext.scala | 17 +++++++++++++++++
.../org/apache/spark/graphx/PregelSuite.scala | 17 +++++++++++++++++
.../apache/spark/graphx/SerializerSuite.scala | 17 +++++++++++++++++
.../apache/spark/graphx/VertexRDDSuite.scala | 17 +++++++++++++++++
.../spark/graphx/impl/EdgePartitionSuite.scala | 17 +++++++++++++++++
.../graphx/impl/VertexPartitionSuite.scala | 17 +++++++++++++++++
.../graphx/lib/ConnectedComponentsSuite.scala | 17 +++++++++++++++++
.../apache/spark/graphx/lib/PageRankSuite.scala | 17 +++++++++++++++++
.../spark/graphx/lib/SVDPlusPlusSuite.scala | 17 +++++++++++++++++
.../lib/StronglyConnectedComponentsSuite.scala | 17 +++++++++++++++++
.../spark/graphx/lib/TriangleCountSuite.scala | 17 +++++++++++++++++
.../spark/graphx/util/BytecodeUtilsSuite.scala | 17 +++++++++++++++++
.../classification/JavaNaiveBayesSuite.java | 17 +++++++++++++++++
.../scala/org/apache/spark/repl/ReplSuite.scala | 17 +++++++++++++++++
.../apache/spark/streaming/ContextWaiter.scala | 17 +++++++++++++++++
55 files changed, 935 insertions(+)
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 2de32231e8714..93d3d1f6972c3 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark
import scala.collection.JavaConverters._
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
index cf6a23339d961..460883ec7ae24 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.deploy.worker
import java.io.{File, FileOutputStream, IOException, InputStream}
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
index 1640d5fee0f77..6f6c101547c3c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.deploy.worker
import akka.actor._
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
index 0e0d0cd6264cf..1dc39c450ea16 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.deploy.worker
import akka.actor.{Actor, Address, AddressFromURIString}
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index ef5936dd2f588..fa49974db445b 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark
import org.scalatest.FunSuite
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
index 45dbcaffae94f..0c502612647a2 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.deploy.worker
import java.io.File
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerWatcherSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerWatcherSuite.scala
index 94d88d307a163..1f1d8d138005b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerWatcherSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerWatcherSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.deploy.worker
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index c3391f3e535bc..bb4dc0fcd31a3 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.util.collection
import scala.collection.mutable.ArrayBuffer
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/StreamingExamples.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/StreamingExamples.scala
index d41d84a980dc7..99f1502046f53 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/StreamingExamples.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/StreamingExamples.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.streaming.examples
import org.apache.spark.Logging
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
index 738a38b27f0e4..32f1602698134 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
/**
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala
index f265764006234..6f03eb1439773 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
/**
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
index 832b7816fe833..6efef061d7510 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import scala.reflect.{classTag, ClassTag}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
index 4253b24b5ac55..2c659cb070b99 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
/**
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 9dd05ade0aef2..7f65244cd95cd 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import scala.reflect.ClassTag
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
index d79bdf961841b..6db8a34937244 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import com.esotericsoftware.kryo.Kryo
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
index 5904aa3a28c71..18858466db27b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import org.apache.spark.{Logging, SparkContext}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index f10e63f059aed..9b864c1290bd2 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import scala.reflect.ClassTag
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
index 6d2990a3f6642..8ba87976f1136 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
/**
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
index fc18f7e785a99..0f6d4135934cb 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import scala.reflect.ClassTag
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
index ee95ead3ada9b..6067ee8c7e0fb 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.impl
import scala.reflect.ClassTag
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
index 9d072f933503c..960eeaccf1352 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.impl
import scala.reflect.ClassTag
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
index bad840f1cdf36..819e3ba93ac9b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.impl
import scala.reflect.ClassTag
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index 56d1d9efeafa9..eee2d58c3d8e1 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.impl
import scala.reflect.{classTag, ClassTag}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
index 05508ff716eb1..cea9d11ebe8cd 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.impl
import scala.reflect.{classTag, ClassTag}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
index 4ebe0b02671d9..5bdc9339e9fec 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.impl
import scala.reflect.{classTag, ClassTag}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala
index f342fd7437903..b365d4914e95b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.impl
import org.apache.spark.SparkContext._
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
index cbd6318f33cdc..bcad1fbc58802 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.impl
import java.io.{EOFException, InputStream, OutputStream}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
index f97ff75fb2f93..f13bdded7564d 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.impl
import scala.reflect.ClassTag
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/package.scala
index cfc3281b6407e..f493d2dd01541 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/package.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/package.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import org.apache.spark.util.collection.OpenHashSet
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/Analytics.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/Analytics.scala
index e0aff5644e40d..f914e0565ca21 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/Analytics.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/Analytics.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.lib
import org.apache.spark._
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/ConnectedComponents.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/ConnectedComponents.scala
index 4d1f5e74df59f..2a6c0aa6b554c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/ConnectedComponents.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/ConnectedComponents.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.lib
import scala.reflect.ClassTag
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index 2f4d6d686499a..2bdd8c9f985d7 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.lib
import scala.reflect.ClassTag
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index ba6517e012d28..9c7a212c5a3bb 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.lib
import scala.util.Random
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
index d3d496e335481..ed84f72156a55 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.lib
import scala.reflect.ClassTag
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
index 23c9c40594e8b..a124c892dcba5 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.lib
import scala.reflect.ClassTag
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
index 60dfc1dc37a53..e1ff3ea0d1d42 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/package.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark
import org.apache.spark.util.collection.OpenHashSet
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
index 1c5b234d74791..d1528e2f07cf2 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.util
import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
index 57422ce3f1934..9805eb3285d69 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.util
import scala.annotation.tailrec
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
index 280f50e39aa5f..4a792c0dabeac 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import org.apache.spark.SparkContext
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
index 9587f04c3e716..b18bc98e6d579 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import org.scalatest.FunSuite
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala b/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala
index aa9ba840840e0..51f02f94e00d5 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import org.scalatest.Suite
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala
index bceff11b8e6c4..936e5c9c86fb7 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import org.scalatest.FunSuite
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
index 3ba412c1f84f4..0c756400f4eff 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import java.io.{EOFException, ByteArrayInputStream, ByteArrayOutputStream}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
index d94a3aa67c925..cc86bafd2d644 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx
import org.apache.spark.SparkContext
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
index eb82436f0964c..1195beba5873c 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.impl
import scala.reflect.ClassTag
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala
index d37d64e8c849e..a048d13fd12b8 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.impl
import org.apache.spark.graphx._
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala
index 27c8705bca2ff..eba8d7b716284 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.lib
import org.scalatest.FunSuite
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
index fe7e4261f8d03..fc491ae327c2a 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.lib
import org.scalatest.FunSuite
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala
index e173c652a53b6..057d9b3d518e0 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.lib
import org.scalatest.FunSuite
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/StronglyConnectedComponentsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/StronglyConnectedComponentsSuite.scala
index 0458311661452..df54aa37cad68 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/StronglyConnectedComponentsSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/StronglyConnectedComponentsSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.lib
import org.scalatest.FunSuite
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala
index 3452ce9764752..293c7f3ba4c21 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.lib
import org.scalatest.FunSuite
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala
index 11db339750920..f3b3738db0dad 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.graphx.util
import org.scalatest.FunSuite
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java
index 23ea3548b95b6..073ded6f36933 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.mllib.classification;
import org.apache.spark.api.java.JavaRDD;
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 8aad27366524a..8203b8f6122e1 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.repl
import java.io._
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala b/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala
index 1f5dacb543db8..86753360a07e4 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.spark.streaming
private[streaming] class ContextWaiter {
From bf3b1506957bd419b6bee4d8ade9fc3c78761dbf Mon Sep 17 00:00:00 2001
From: Reynold Xin
Date: Tue, 14 Jan 2014 09:44:43 -0800
Subject: [PATCH 003/133] Merge pull request #423 from
jegonzal/GraphXProgrammingGuide
Improving the graphx-programming-guide
This PR will track a few minor improvements to the content and formatting of the graphx-programming-guide.
(cherry picked from commit 3fcc68bfa5e9ef4b7abfd5051b6847a833e1ad2f)
Signed-off-by: Reynold Xin
---
docs/graphx-programming-guide.md | 63 +++++++++++++++++++-------------
1 file changed, 37 insertions(+), 26 deletions(-)
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 9fbde4eb09575..5641f9f137b76 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -18,7 +18,7 @@ title: GraphX Programming Guide
GraphX is the new (alpha) Spark API for graphs and graph-parallel computation. At a high-level,
GraphX extends the Spark [RDD](api/core/index.html#org.apache.spark.rdd.RDD) by introducing the
-[Resilient Distributed property Graph (RDG)](#property_graph): a directed multigraph with properties
+[Resilient Distributed Property Graph](#property_graph): a directed multigraph with properties
attached to each vertex and edge. To support graph computation, GraphX exposes a set of fundamental
operators (e.g., [subgraph](#structural_operators), [joinVertices](#join_operators), and
[mapReduceTriplets](#mrTriplets)) as well as an optimized variant of the [Pregel](#pregel) API. In
@@ -29,7 +29,7 @@ addition, GraphX includes a growing collection of graph [algorithms](#graph_algo
From social networks to language modeling, the growing scale and importance of
graph data has driven the development of numerous new *graph-parallel* systems
-(e.g., [Giraph](http://http://giraph.apache.org) and
+(e.g., [Giraph](http://giraph.apache.org) and
[GraphLab](http://graphlab.org)). By restricting the types of computation that can be
expressed and introducing new techniques to partition and distribute graphs,
these systems can efficiently execute sophisticated graph algorithms orders of
@@ -43,12 +43,25 @@ magnitude faster than more general *data-parallel* systems.
-However, the same restrictions that enable these substantial performance gains
-also make it difficult to express many of the important stages in a typical graph-analytics pipeline:
-constructing the graph, modifying its structure, or expressing computation that
-spans multiple graphs. As a consequence, existing graph analytics pipelines
-compose graph-parallel and data-parallel systems, leading to extensive data
-movement and duplication and a complicated programming model.
+However, the same restrictions that enable these substantial performance gains also make it
+difficult to express many of the important stages in a typical graph-analytics pipeline:
+constructing the graph, modifying its structure, or expressing computation that spans multiple
+graphs. Furthermore, how we look at data depends on our objectives and the same raw data may have
+many different table and graph views.
+
+
+
+
+
+
+As a consequence, it is often necessary to be able to move between table and graph views of the same
+physical data and to leverage the properties of each view to easily and efficiently express
+computation. However, existing graph analytics pipelines must compose graph-parallel and data-
+parallel systems, leading to extensive data movement and duplication and a complicated programming
+model.
GraphX optimizes the representation of `VD` and `ED` when they are plain old data-types (e.g.,
-> int, double, etc...) reducing the in memory footprint.
+> GraphX optimizes the representation of vertex and edge types when they are plain old data-types
+> (e.g., int, double, etc...) reducing the in memory footprint by storing them in specialized
+> arrays.
-In some cases we may wish to have vertices with different property types in the same graph. This can
-be accomplished through inheritance. For example to model users and products as a bipartite graph
-we might do the following:
+In some cases it may be desirable to have vertices with different property types in the same graph.
+This can be accomplished through inheritance. For example to model users and products as a
+bipartite graph we might do the following:
{% highlight scala %}
class VertexProperty()
@@ -116,9 +132,11 @@ var graph: Graph[VertexProperty, String] = null
{% endhighlight %}
Like RDDs, property graphs are immutable, distributed, and fault-tolerant. Changes to the values or
-structure of the graph are accomplished by producing a new graph with the desired changes. The graph
-is partitioned across the workers using a range of vertex-partitioning heuristics. As with RDDs,
-each partition of the graph can be recreated on a different machine in the event of a failure.
+structure of the graph are accomplished by producing a new graph with the desired changes. Note
+that substantial parts of the original graph (i.e., unaffected structure, attributes, and indicies)
+are reused in the new graph reducing the cost of this inherently functional data-structure. The
+graph is partitioned across the workers using a range of vertex-partitioning heuristics. As with
+RDDs, each partition of the graph can be recreated on a different machine in the event of a failure.
Logically the property graph corresponds to a pair of typed collections (RDDs) encoding the
properties for each vertex and edge. As a consequence, the graph class contains members to access
@@ -953,13 +971,6 @@ val triCountByUsername = users.join(triCounts).map { case (id, (username, tc)) =
println(triCountByUsername.collect().mkString("\n"))
{% endhighlight %}
-
-
-
-
# Examples
From 119b6c524c659951e6abe791f2559048444b5c22 Mon Sep 17 00:00:00 2001
From: Reynold Xin
Date: Tue, 14 Jan 2014 13:28:44 -0800
Subject: [PATCH 004/133] Merge pull request #425 from rxin/scaladoc
API doc update & make Broadcast public
In #413 Broadcast was mistakenly made private[spark]. I changed it to public again. Also exposing id in public given the R frontend requires that.
Copied some of the documentation from the programming guide to API Doc for Broadcast and Accumulator.
This should be cherry picked into branch-0.9 as well for 0.9.0 release.
(cherry picked from commit 2ce23a55a3c4033873bb262919d89e5afabb9134)
Signed-off-by: Reynold Xin
---
.../scala/org/apache/spark/Accumulators.scala | 40 ++++++++++++++-----
.../spark/api/java/JavaSparkContext.scala | 11 ++---
.../org/apache/spark/api/java/package.scala | 23 +++++++++++
.../apache/spark/broadcast/Broadcast.scala | 33 +++++++++++++--
.../org/apache/spark/broadcast/package.scala | 25 ++++++++++++
5 files changed, 115 insertions(+), 17 deletions(-)
create mode 100644 core/src/main/scala/org/apache/spark/api/java/package.scala
create mode 100644 core/src/main/scala/org/apache/spark/broadcast/package.scala
diff --git a/core/src/main/scala/org/apache/spark/Accumulators.scala b/core/src/main/scala/org/apache/spark/Accumulators.scala
index 2ba871a6007d7..df01b2e942180 100644
--- a/core/src/main/scala/org/apache/spark/Accumulators.scala
+++ b/core/src/main/scala/org/apache/spark/Accumulators.scala
@@ -17,17 +17,17 @@
package org.apache.spark
-import java.io._
+import java.io.{ObjectInputStream, Serializable}
import scala.collection.mutable.Map
import scala.collection.generic.Growable
import org.apache.spark.serializer.JavaSerializer
/**
- * A datatype that can be accumulated, ie has an commutative and associative "add" operation,
+ * A data type that can be accumulated, ie has an commutative and associative "add" operation,
* but where the result type, `R`, may be different from the element type being added, `T`.
*
- * You must define how to add data, and how to merge two of these together. For some datatypes,
+ * You must define how to add data, and how to merge two of these together. For some data types,
* such as a counter, these might be the same operation. In that case, you can use the simpler
* [[org.apache.spark.Accumulator]]. They won't always be the same, though -- e.g., imagine you are
* accumulating a set. You will add items to the set, and you will union two sets together.
@@ -45,7 +45,7 @@ class Accumulable[R, T] (
val id = Accumulators.newId
@transient private var value_ = initialValue // Current value on master
val zero = param.zero(initialValue) // Zero value to be passed to workers
- var deserialized = false
+ private var deserialized = false
Accumulators.register(this, true)
@@ -127,7 +127,7 @@ class Accumulable[R, T] (
/**
* Helper object defining how to accumulate values of a particular type. An implicit
- * AccumulableParam needs to be available when you create Accumulables of a specific type.
+ * AccumulableParam needs to be available when you create [[Accumulable]]s of a specific type.
*
* @tparam R the full accumulated data (result type)
* @tparam T partial data that can be added in
@@ -186,7 +186,29 @@ class GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Ser
/**
* A simpler value of [[Accumulable]] where the result type being accumulated is the same
- * as the types of elements being merged.
+ * as the types of elements being merged, i.e. variables that are only "added" to through an
+ * associative operation and can therefore be efficiently supported in parallel. They can be used
+ * to implement counters (as in MapReduce) or sums. Spark natively supports accumulators of type
+ * `Int` and `Double`, and programmers can add support for new types.
+ *
+ * An accumulator is created from an initial value `v` by calling [[SparkContext#accumulator]].
+ * Tasks running on the cluster can then add to it using the [[Accumulable#+=]] operator.
+ * However, they cannot read its value. Only the driver program can read the accumulator's value,
+ * using its value method.
+ *
+ * The interpreter session below shows an accumulator being used to add up the elements of an array:
+ *
+ * {{{
+ * scala> val accum = sc.accumulator(0)
+ * accum: spark.Accumulator[Int] = 0
+ *
+ * scala> sc.parallelize(Array(1, 2, 3, 4)).foreach(x => accum += x)
+ * ...
+ * 10/09/29 18:41:08 INFO SparkContext: Tasks finished in 0.317106 s
+ *
+ * scala> accum.value
+ * res2: Int = 10
+ * }}}
*
* @param initialValue initial value of accumulator
* @param param helper object defining how to add elements of type `T`
@@ -196,9 +218,9 @@ class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T])
extends Accumulable[T,T](initialValue, param)
/**
- * A simpler version of [[org.apache.spark.AccumulableParam]] where the only datatype you can add in is the same type
- * as the accumulated value. An implicit AccumulatorParam object needs to be available when you create
- * Accumulators of a specific type.
+ * A simpler version of [[org.apache.spark.AccumulableParam]] where the only data type you can add
+ * in is the same type as the accumulated value. An implicit AccumulatorParam object needs to be
+ * available when you create Accumulators of a specific type.
*
* @tparam T type of value to accumulate
*/
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 7a6f044965027..8041163e3d748 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -34,11 +34,11 @@ import org.apache.spark.SparkContext.IntAccumulatorParam
import org.apache.spark.SparkContext.DoubleAccumulatorParam
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
-import scala.Tuple2
+
/**
- * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns [[org.apache.spark.api.java.JavaRDD]]s and
- * works with Java collections instead of Scala ones.
+ * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns
+ * [[org.apache.spark.api.java.JavaRDD]]s and works with Java collections instead of Scala ones.
*/
class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWorkaround {
/**
@@ -333,8 +333,9 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
sc.accumulable(initialValue)(param)
/**
- * Broadcast a read-only variable to the cluster, returning a [[org.apache.spark.Broadcast]] object for
- * reading it in distributed functions. The variable will be sent to each cluster only once.
+ * Broadcast a read-only variable to the cluster, returning a
+ * [[org.apache.spark.broadcast.Broadcast]] object for reading it in distributed functions.
+ * The variable will be sent to each cluster only once.
*/
def broadcast[T](value: T): Broadcast[T] = sc.broadcast(value)
diff --git a/core/src/main/scala/org/apache/spark/api/java/package.scala b/core/src/main/scala/org/apache/spark/api/java/package.scala
new file mode 100644
index 0000000000000..8ec770046abe9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/api/java/package.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api
+
+/** Spark Java programming APIs. */
+package object java {
+ // For package docs only
+}
diff --git a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
index 6bfe2cb4a29cf..d113d4040594d 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
@@ -17,13 +17,40 @@
package org.apache.spark.broadcast
-import java.io._
+import java.io.Serializable
import java.util.concurrent.atomic.AtomicLong
import org.apache.spark._
-private[spark]
-abstract class Broadcast[T](private[spark] val id: Long) extends Serializable {
+/**
+ * A broadcast variable. Broadcast variables allow the programmer to keep a read-only variable
+ * cached on each machine rather than shipping a copy of it with tasks. They can be used, for
+ * example, to give every node a copy of a large input dataset in an efficient manner. Spark also
+ * attempts to distribute broadcast variables using efficient broadcast algorithms to reduce
+ * communication cost.
+ *
+ * Broadcast variables are created from a variable `v` by calling [[SparkContext#broadcast]].
+ * The broadcast variable is a wrapper around `v`, and its value can be accessed by calling the
+ * `value` method. The interpreter session below shows this:
+ *
+ * {{{
+ * scala> val broadcastVar = sc.broadcast(Array(1, 2, 3))
+ * broadcastVar: spark.Broadcast[Array[Int]] = spark.Broadcast(b5c40191-a864-4c7d-b9bf-d87e1a4e787c)
+ *
+ * scala> broadcastVar.value
+ * res0: Array[Int] = Array(1, 2, 3)
+ * }}}
+ *
+ * After the broadcast variable is created, it should be used instead of the value `v` in any
+ * functions run on the cluster so that `v` is not shipped to the nodes more than once.
+ * In addition, the object `v` should not be modified after it is broadcast in order to ensure
+ * that all nodes get the same value of the broadcast variable (e.g. if the variable is shipped
+ * to a new node later).
+ *
+ * @param id A unique identifier for the broadcast variable.
+ * @tparam T Type of the data contained in the broadcast variable.
+ */
+abstract class Broadcast[T](val id: Long) extends Serializable {
def value: T
// We cannot have an abstract readObject here due to some weird issues with
diff --git a/core/src/main/scala/org/apache/spark/broadcast/package.scala b/core/src/main/scala/org/apache/spark/broadcast/package.scala
new file mode 100644
index 0000000000000..01bf88629a7dd
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/broadcast/package.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+/**
+ * Package for broadcast variables. See [[broadcast.Broadcast]] for details.
+ */
+package object broadcast {
+ // For package docs only
+}
From a14933dac1e8b866d49a161854453b56a6e1dfcc Mon Sep 17 00:00:00 2001
From: Reynold Xin
Date: Tue, 14 Jan 2014 14:52:24 -0800
Subject: [PATCH 005/133] Merge pull request #427 from
pwendell/deprecate-aggregator
Deprecate rather than remove old combineValuesByKey function
(cherry picked from commit d601a76d1fdd25b95020b2e32bacde583cf6aa50)
Signed-off-by: Reynold Xin
---
.../scala/org/apache/spark/Aggregator.scala | 22 ++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala
index 6d439fdc684af..edbea6ea5680a 100644
--- a/core/src/main/scala/org/apache/spark/Aggregator.scala
+++ b/core/src/main/scala/org/apache/spark/Aggregator.scala
@@ -17,6 +17,8 @@
package org.apache.spark
+import scala.{Option, deprecated}
+
import org.apache.spark.util.collection.{AppendOnlyMap, ExternalAppendOnlyMap}
/**
@@ -34,8 +36,12 @@ case class Aggregator[K, V, C] (
private val sparkConf = SparkEnv.get.conf
private val externalSorting = sparkConf.getBoolean("spark.shuffle.spill", true)
+ @deprecated("use combineValuesByKey with TaskContext argument", "0.9.0")
+ def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]]): Iterator[(K, C)] =
+ combineValuesByKey(iter, null)
+
def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]],
- context: TaskContext) : Iterator[(K, C)] = {
+ context: TaskContext): Iterator[(K, C)] = {
if (!externalSorting) {
val combiners = new AppendOnlyMap[K,C]
var kv: Product2[K, V] = null
@@ -53,12 +59,17 @@ case class Aggregator[K, V, C] (
val (k, v) = iter.next()
combiners.insert(k, v)
}
- context.taskMetrics.memoryBytesSpilled = combiners.memoryBytesSpilled
- context.taskMetrics.diskBytesSpilled = combiners.diskBytesSpilled
+ // TODO: Make this non optional in a future release
+ Option(context).foreach(c => c.taskMetrics.memoryBytesSpilled = combiners.memoryBytesSpilled)
+ Option(context).foreach(c => c.taskMetrics.diskBytesSpilled = combiners.diskBytesSpilled)
combiners.iterator
}
}
+ @deprecated("use combineCombinersByKey with TaskContext argument", "0.9.0")
+ def combineCombinersByKey(iter: Iterator[(K, C)]) : Iterator[(K, C)] =
+ combineCombinersByKey(iter, null)
+
def combineCombinersByKey(iter: Iterator[(K, C)], context: TaskContext) : Iterator[(K, C)] = {
if (!externalSorting) {
val combiners = new AppendOnlyMap[K,C]
@@ -77,8 +88,9 @@ case class Aggregator[K, V, C] (
val (k, c) = iter.next()
combiners.insert(k, c)
}
- context.taskMetrics.memoryBytesSpilled = combiners.memoryBytesSpilled
- context.taskMetrics.diskBytesSpilled = combiners.diskBytesSpilled
+ // TODO: Make this non optional in a future release
+ Option(context).foreach(c => c.taskMetrics.memoryBytesSpilled = combiners.memoryBytesSpilled)
+ Option(context).foreach(c => c.taskMetrics.diskBytesSpilled = combiners.diskBytesSpilled)
combiners.iterator
}
}
From 329c9df13670871acccd834eb042c59be12bb8f6 Mon Sep 17 00:00:00 2001
From: Reynold Xin
Date: Tue, 14 Jan 2014 14:53:24 -0800
Subject: [PATCH 006/133] Merge pull request #429 from
ankurdave/graphx-examples-pom.xml
Add GraphX dependency to examples/pom.xml
(cherry picked from commit 193a0757c87b717e3b6b4f005ecdbb56b04ad9b4)
Signed-off-by: Reynold Xin
---
examples/pom.xml | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/examples/pom.xml b/examples/pom.xml
index cb4f7ee33b4a1..7855706389709 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -71,6 +71,12 @@
${project.version}
provided
+
+ org.apache.spark
+ spark-graphx_${scala.binary.version}
+ ${project.version}
+ provided
+
org.apache.spark
spark-streaming-twitter_${scala.binary.version}
From 2f930d5ae10d603370b49b91719259f17dc25628 Mon Sep 17 00:00:00 2001
From: Reynold Xin
Date: Tue, 14 Jan 2014 14:59:13 -0800
Subject: [PATCH 007/133] Merge pull request #428 from
pwendell/writeable-objects
Don't clone records for text files
(cherry picked from commit 74b46acdc57293c103ab5dd5af931d0d0e32c0ed)
Signed-off-by: Reynold Xin
---
core/src/main/scala/org/apache/spark/SparkContext.scala | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 55ac76bf63909..ba3e91effbdb4 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -340,8 +340,8 @@ class SparkContext(
* Hadoop-supported file system URI, and return it as an RDD of Strings.
*/
def textFile(path: String, minSplits: Int = defaultMinSplits): RDD[String] = {
- hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], minSplits)
- .map(pair => pair._2.toString)
+ hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
+ minSplits, cloneRecords = false).map(pair => pair._2.toString)
}
/**
From ce66ca78b7c67b8dfc865d2aa32f7c4a71493ca4 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 14 Jan 2014 15:16:46 -0800
Subject: [PATCH 008/133] Small change to maven build
---
pom.xml | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/pom.xml b/pom.xml
index b25d9d7ef891d..f14d9667ccc7d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -727,6 +727,14 @@
+
+
+ release
+
+ true
+
+
yarn-alpha
From 40c97afce8be3c45590aaf5f789cbff058443892 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 14 Jan 2014 15:57:53 -0800
Subject: [PATCH 009/133] [maven-release-plugin] prepare release
v0.9.0-incubating
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl-bin/pom.xml | 2 +-
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
18 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 54a25910ced7d..ca993eea4f23c 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index cb8e79f22535b..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 9e5a450d57a47..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 7855706389709..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 443910a03a94e..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 23b2fead657e6..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 31b4fa87de772..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 216e6c1d8ff44..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index c240d595742cf..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 3e5faf230dbc9..c7897bcc2e4a5 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index dda3900afebdf..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index f14d9667ccc7d..2075fbe6a41d7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
index 869dbdb9b095a..80d0b9f47f34a 100644
--- a/repl-bin/pom.xml
+++ b/repl-bin/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/repl/pom.xml b/repl/pom.xml
index 2dfe7ac900b83..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 459756912dbe5..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 28f5ef14b1a35..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index aea8b0cddefa2..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 62fe3e274250f..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 51131bf820330bd2475a9676d151f5d488c150a7 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 14 Jan 2014 15:57:59 -0800
Subject: [PATCH 010/133] [maven-release-plugin] prepare for next development
iteration
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl-bin/pom.xml | 2 +-
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
18 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index ca993eea4f23c..1b2d8f531ffb7 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..42e624402f77e 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..abf48935cd915 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..967556744c1e6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..978b99f4a7054 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..a3d5fc64f070e 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..1f416dd8c06d4 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..f23091684f95c 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..6a250b3916ead 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index c7897bcc2e4a5..8080075e9395a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..41600c4c4b561 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index 2075fbe6a41d7..c14685da5f232 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
index 80d0b9f47f34a..37debdf73a6d3 100644
--- a/repl-bin/pom.xml
+++ b/repl-bin/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..346c672165d7d 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..7e10ef6f471be 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..bb8f747ae9328 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..508317b5fc01c 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..04b29c76e5830 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
From 6fa4e02dd19308c9629fb898061334d554def641 Mon Sep 17 00:00:00 2001
From: Reynold Xin
Date: Tue, 14 Jan 2014 21:51:06 -0800
Subject: [PATCH 011/133] Merge pull request #431 from
ankurdave/graphx-caching-doc
Describe caching and uncaching in GraphX programming guide
(cherry picked from commit ad294db326f57beb98f9734e2b4c45d9da1a4c89)
Signed-off-by: Reynold Xin
---
docs/graphx-programming-guide.md | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 5641f9f137b76..03940d836b698 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -611,11 +611,20 @@ class GraphOps[VD, ED] {
> substantial communication. If possible try expressing the same computation using the
> `mapReduceTriplets` operator directly.
+## Caching and Uncaching
+
+In Spark, RDDs are not persisted in memory by default. To avoid recomputation, they must be explicitly cached when using them multiple times (see the [Spark Programming Guide][RDD Persistence]). Graphs in GraphX behave the same way. **When using a graph multiple times, make sure to call [`Graph.cache()`][Graph.cache] on it first.**
+
+[RDD Persistence]: scala-programming-guide.html#rdd-persistence
+[Graph.cache]: api/graphx/index.html#org.apache.spark.graphx.Graph@cache():Graph[VD,ED]
+
+In iterative computations, *uncaching* may also be necessary for best performance. By default, cached RDDs and graphs will remain in memory until memory pressure forces them to be evicted in LRU order. For iterative computation, intermediate results from previous iterations will fill up the cache. Though they will eventually be evicted, the unnecessary data stored in memory will slow down garbage collection. It would be more efficient to uncache intermediate results as soon as they are no longer necessary. This involves materializing (caching and forcing) a graph or RDD every iteration, uncaching all other datasets, and only using the materialized dataset in future iterations. However, because graphs are composed of multiple RDDs, it can be difficult to unpersist them correctly. **For iterative computation we recommend using the Pregel API, which correctly unpersists intermediate results.**
+
# Pregel API
Graphs are inherently recursive data-structures as properties of vertices depend on properties of
-their neighbors which intern depend on properties of *their* neighbors. As a
+their neighbors which in turn depend on properties of *their* neighbors. As a
consequence many important graph algorithms iteratively recompute the properties of each vertex
until a fixed-point condition is reached. A range of graph-parallel abstractions have been proposed
to express these iterative algorithms. GraphX exposes a Pregel-like operator which is a fusion of
From 2c6c07f428079f390901f662d893cd932c90a70a Mon Sep 17 00:00:00 2001
From: Reynold Xin
Date: Tue, 14 Jan 2014 21:52:50 -0800
Subject: [PATCH 012/133] Merge pull request #424 from
jegonzal/GraphXProgrammingGuide
Additional edits for clarity in the graphx programming guide.
Added an overview of the Graph and GraphOps functions and fixed numerous typos.
(cherry picked from commit 3a386e238984c48a6ac07974b92647beae1199b3)
Signed-off-by: Reynold Xin
---
docs/graphx-programming-guide.md | 173 +++++++++++++++++++++----------
1 file changed, 121 insertions(+), 52 deletions(-)
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 03940d836b698..4bf47434571f5 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -108,7 +108,7 @@ with user defined objects attached to each vertex and edge. A directed multigra
graph with potentially multiple parallel edges sharing the same source and destination vertex. The
ability to support parallel edges simplifies modeling scenarios where there can be multiple
relationships (e.g., co-worker and friend) between the same vertices. Each vertex is keyed by a
-*unique* 64-bit long identifier (`VertexId`). GraphX does not impose any ordering constraints on
+*unique* 64-bit long identifier (`VertexID`). GraphX does not impose any ordering constraints on
the vertex identifiers. Similarly, edges have corresponding source and destination vertex
identifiers.
@@ -149,12 +149,12 @@ class Graph[VD, ED] {
}
{% endhighlight %}
-The classes `VertexRDD[VD]` and `EdgeRDD[ED]` extend and are optimized versions of `RDD[(VertexId,
+The classes `VertexRDD[VD]` and `EdgeRDD[ED]` extend and are optimized versions of `RDD[(VertexID,
VD)]` and `RDD[Edge[ED]]` respectively. Both `VertexRDD[VD]` and `EdgeRDD[ED]` provide additional
functionality built around graph computation and leverage internal optimizations. We discuss the
`VertexRDD` and `EdgeRDD` API in greater detail in the section on [vertex and edge
RDDs](#vertex_and_edge_rdds) but for now they can be thought of as simply RDDs of the form:
-`RDD[(VertexId, VD)]` and `RDD[Edge[ED]]`.
+`RDD[(VertexID, VD)]` and `RDD[Edge[ED]]`.
### Example Property Graph
@@ -201,7 +201,7 @@ val graph = Graph(users, relationships, defaultUser)
In the above example we make use of the [`Edge`][Edge] case class. Edges have a `srcId` and a
`dstId` corresponding to the source and destination vertex identifiers. In addition, the `Edge`
-class contains the `attr` member which contains the edge property.
+class has an `attr` member which stores the edge property.
[Edge]: api/graphx/index.html#org.apache.spark.graphx.Edge
@@ -217,7 +217,7 @@ graph.edges.filter(e => e.srcId > e.dstId).count
{% endhighlight %}
> Note that `graph.vertices` returns an `VertexRDD[(String, String)]` which extends
-> `RDD[(VertexId, (String, String))]` and so we use the scala `case` expression to deconstruct the
+> `RDD[(VertexID, (String, String))]` and so we use the scala `case` expression to deconstruct the
> tuple. On the other hand, `graph.edges` returns an `EdgeRDD` containing `Edge[String]` objects.
> We could have also used the case class type constructor as in the following:
> {% highlight scala %}
@@ -284,6 +284,75 @@ able to support different graph representations in the future. Each graph repre
provide implementations of the core operations and reuse many of the useful operations defined in
[`GraphOps`][GraphOps].
+### Summary List of Operators
+The following is a quick summary of the functionality defined in both [`Graph`][Graph] and
+[`GraphOps`][GraphOps] but presented as members of Graph for simplicity. Note that some function
+signatures have been simplified (e.g., default arguments and type constraints removed) and some more
+advanced functionality has been removed so please consult the API docs for the official list of
+operations.
+
+{% highlight scala %}
+/** Summary of the functionality in the property graph */
+class Graph[VD, ED] {
+ // Information about the Graph ===================================================================
+ val numEdges: Long
+ val numVertices: Long
+ val inDegrees: VertexRDD[Int]
+ val outDegrees: VertexRDD[Int]
+ val degrees: VertexRDD[Int]
+ // Views of the graph as collections =============================================================
+ val vertices: VertexRDD[VD]
+ val edges: EdgeRDD[ED]
+ val triplets: RDD[EdgeTriplet[VD, ED]]
+ // Functions for caching graphs ==================================================================
+ def persist(newLevel: StorageLevel = StorageLevel.MEMORY_ONLY): Graph[VD, ED]
+ def cache(): Graph[VD, ED]
+ def unpersistVertices(blocking: Boolean = true): Graph[VD, ED]
+ // Change the partitioning heuristic ============================================================
+ def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED]
+ // Transform vertex and edge attributes ==========================================================
+ def mapVertices[VD2](map: (VertexID, VD) => VD2): Graph[VD2, ED]
+ def mapEdges[ED2](map: Edge[ED] => ED2): Graph[VD, ED2]
+ def mapEdges[ED2](map: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2]
+ def mapTriplets[ED2](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2]
+ def mapTriplets[ED2](map: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2])
+ : Graph[VD, ED2]
+ // Modify the graph structure ====================================================================
+ def reverse: Graph[VD, ED]
+ def subgraph(
+ epred: EdgeTriplet[VD,ED] => Boolean = (x => true),
+ vpred: (VertexID, VD) => Boolean = ((v, d) => true))
+ : Graph[VD, ED]
+ def mask[VD2, ED2](other: Graph[VD2, ED2]): Graph[VD, ED]
+ def groupEdges(merge: (ED, ED) => ED): Graph[VD, ED]
+ // Join RDDs with the graph ======================================================================
+ def joinVertices[U](table: RDD[(VertexID, U)])(mapFunc: (VertexID, VD, U) => VD): Graph[VD, ED]
+ def outerJoinVertices[U, VD2](other: RDD[(VertexID, U)])
+ (mapFunc: (VertexID, VD, Option[U]) => VD2)
+ : Graph[VD2, ED]
+ // Aggregate information about adjacent triplets =================================================
+ def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexID]]
+ def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexID, VD)]]
+ def mapReduceTriplets[A: ClassTag](
+ mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
+ reduceFunc: (A, A) => A,
+ activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None)
+ : VertexRDD[A]
+ // Iterative graph-parallel computation ==========================================================
+ def pregel[A](initialMsg: A, maxIterations: Int, activeDirection: EdgeDirection)(
+ vprog: (VertexID, VD, A) => VD,
+ sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexID,A)],
+ mergeMsg: (A, A) => A)
+ : Graph[VD, ED]
+ // Basic graph algorithms ========================================================================
+ def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double]
+ def connectedComponents(): Graph[VertexID, ED]
+ def triangleCount(): Graph[Int, ED]
+ def stronglyConnectedComponents(numIter: Int): Graph[VertexID, ED]
+}
+{% endhighlight %}
+
+
## Property Operators
In direct analogy to the RDD `map` operator, the property
@@ -443,7 +512,7 @@ original value.
> is therefore recommended that the input RDD be first made unique using the following which will
> also *pre-index* the resulting values to substantially accelerate the subsequent join.
> {% highlight scala %}
-val nonUniqueCosts: RDD[(VertexId, Double)]
+val nonUniqueCosts: RDD[(VertexID, Double)]
val uniqueCosts: VertexRDD[Double] =
graph.vertices.aggregateUsingIndex(nonUnique, (a,b) => a + b)
val joinedGraph = graph.joinVertices(uniqueCosts)(
@@ -475,7 +544,7 @@ val degreeGraph = graph.outerJoinVertices(outDegrees) { (id, oldAttr, outDegOpt)
> provide type annotation for the user defined function:
> {% highlight scala %}
val joinedGraph = graph.joinVertices(uniqueCosts,
- (id: VertexId, oldCost: Double, extraCost: Double) => oldCost + extraCost)
+ (id: VertexID, oldCost: Double, extraCost: Double) => oldCost + extraCost)
{% endhighlight %}
@@ -513,26 +582,26 @@ containing the aggregate message (of type `A`) destined to each vertex. Vertice
receive a message are not included in the returned `VertexRDD`.
-
-Note that mapReduceTriplets
takes an additional optional activeSet
-(see API docs) which restricts the map phase to edges adjacent to the vertices in the provided
-VertexRDD
:
-
+
+Note that mapReduceTriplets
takes an additional optional activeSet
+(not shown above see API docs for details) which restricts the map phase to edges adjacent to the
+vertices in the provided VertexRDD
:
+
{% highlight scala %}
activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None
{% endhighlight %}
-
-The EdgeDirection specifies which edges adjacent to the vertex set are included in the map phase. If
-the direction is In
, mapFunc
will only be run only on edges with
-destination in the active set. If the direction is Out
, mapFunc
will only
-be run only on edges originating from vertices in the active set. If the direction is
-Either
, mapFunc
will be run only on edges with either vertex in the
-active set. If the direction is Both
, mapFunc
will be run only on edges
-with both vertices in the active set. The active set must be derived from the set of vertices in
-the graph. Restricting computation to triplets adjacent to a subset of the vertices is often
-necessary in incremental iterative computation and is a key part of the GraphX implementation of
-Pregel.
-
+
+The EdgeDirection specifies which edges adjacent to the vertex set are included in the map
+phase. If the direction is In
, then the user defined map
function will
+only be run only on edges with the destination vertex in the active set. If the direction is
+Out
, then the map
function will only be run only on edges originating from
+vertices in the active set. If the direction is Either
, then the map
+function will be run only on edges with either vertex in the active set. If the direction is
+Both
, then the map
function will be run only on edges with both vertices
+in the active set. The active set must be derived from the set of vertices in the graph.
+Restricting computation to triplets adjacent to a subset of the vertices is often necessary in
+incremental iterative computation and is a key part of the GraphX implementation of Pregel.
+
In the following example we use the `mapReduceTriplets` operator to compute the average age of the
@@ -565,8 +634,8 @@ val avgAgeOfOlderFollowers: VertexRDD[Double] =
avgAgeOfOlderFollowers.collect.foreach(println(_))
{% endhighlight %}
-> Note that the `mapReduceTriplets` operation performs optimally when the messages (and their sums)
-> are constant sized (e.g., floats and addition instead of lists and concatenation). More
+> Note that the `mapReduceTriplets` operation performs optimally when the messages (and the sums of
+> messages) are constant sized (e.g., floats and addition instead of lists and concatenation). More
> precisely, the result of `mapReduceTriplets` should ideally be sub-linear in the degree of each
> vertex.
@@ -788,16 +857,16 @@ respectively. In this section we review some of the additional useful functiona
## VertexRDDs
-The `VertexRDD[A]` extends the more traditional `RDD[(VertexId, A)]` but adds the additional
-constraint that each `VertexId` occurs only *once*. Moreover, `VertexRDD[A]` represents a *set* of
-vertices each with an attribute of type `A`. Internally, this is achieved by storing the vertex
-attributes in a reusable hash-map data-structure. As a consequence if two `VertexRDD`s are derived
-from the same base `VertexRDD` (e.g., by `filter` or `mapValues`) they can be joined in constant
-time without hash evaluations. To leverage this indexed data-structure, the `VertexRDD` exposes the
-following additional functionality:
+The `VertexRDD[A]` extends `RDD[(VertexID, A)]` and adds the additional constraint that each
+`VertexID` occurs only *once*. Moreover, `VertexRDD[A]` represents a *set* of vertices each with an
+attribute of type `A`. Internally, this is achieved by storing the vertex attributes in a reusable
+hash-map data-structure. As a consequence if two `VertexRDD`s are derived from the same base
+`VertexRDD` (e.g., by `filter` or `mapValues`) they can be joined in constant time without hash
+evaluations. To leverage this indexed data-structure, the `VertexRDD` exposes the following
+additional functionality:
{% highlight scala %}
-class VertexRDD[VD] {
+class VertexRDD[VD] extends RDD[(VertexID, VD)] {
// Filter the vertex set but preserves the internal index
def filter(pred: Tuple2[VertexID, VD] => Boolean): VertexRDD[VD]
// Transform the values without changing the ids (preserves the internal index)
@@ -816,15 +885,14 @@ class VertexRDD[VD] {
Notice, for example, how the `filter` operator returns an `VertexRDD`. Filter is actually
implemented using a `BitSet` thereby reusing the index and preserving the ability to do fast joins
with other `VertexRDD`s. Likewise, the `mapValues` operators do not allow the `map` function to
-change the `VertexId` thereby enabling the same `HashMap` data-structures to be reused. Both the
+change the `VertexID` thereby enabling the same `HashMap` data-structures to be reused. Both the
`leftJoin` and `innerJoin` are able to identify when joining two `VertexRDD`s derived from the same
`HashMap` and implement the join by linear scan rather than costly point lookups.
-The `aggregateUsingIndex` operator can be slightly confusing but is also useful for efficient
-construction of a new `VertexRDD` from an `RDD[(VertexId, A)]`. Conceptually, if I have constructed
-a `VertexRDD[B]` over a set of vertices, *which is a super-set* of the vertices in some
-`RDD[(VertexId, A)]` then I can reuse the index to both aggregate and then subsequently index the
-RDD. For example:
+The `aggregateUsingIndex` operator is useful for efficient construction of a new `VertexRDD` from an
+`RDD[(VertexID, A)]`. Conceptually, if I have constructed a `VertexRDD[B]` over a set of vertices,
+*which is a super-set* of the vertices in some `RDD[(VertexID, A)]` then I can reuse the index to
+both aggregate and then subsequently index the `RDD[(VertexID, A)]`. For example:
{% highlight scala %}
val setA: VertexRDD[Int] = VertexRDD(sc.parallelize(0L until 100L).map(id => (id, 1)))
@@ -840,10 +908,10 @@ val setC: VertexRDD[Double] = setA.innerJoin(setB)((id, a, b) => a + b)
## EdgeRDDs
-The `EdgeRDD[ED]`, which extends `RDD[Edge[ED]]` is considerably simpler than the `VertexRDD`.
-GraphX organizes the edges in blocks partitioned using one of the various partitioning strategies
-defined in [`PartitionStrategy`][PartitionStrategy]. Within each partition, edge attributes and
-adjacency structure, are stored separately enabling maximum reuse when changing attribute values.
+The `EdgeRDD[ED]`, which extends `RDD[Edge[ED]]` organizes the edges in blocks partitioned using one
+of the various partitioning strategies defined in [`PartitionStrategy`][PartitionStrategy]. Within
+each partition, edge attributes and adjacency structure, are stored separately enabling maximum
+reuse when changing attribute values.
[PartitionStrategy]: api/graphx/index.html#org.apache.spark.graphx.PartitionStrategy
@@ -858,7 +926,7 @@ def innerJoin[ED2, ED3](other: EdgeRDD[ED2])(f: (VertexID, VertexID, ED, ED2) =>
{% endhighlight %}
In most applications we have found that operations on the `EdgeRDD` are accomplished through the
-graph or rely on operations defined in the base `RDD` class.
+graph operators or rely on operations defined in the base `RDD` class.
# Optimized Representation
@@ -880,7 +948,9 @@ reduce both the communication and storage overhead. Logically, this corresponds
to machines and allowing vertices to span multiple machines. The exact method of assigning edges
depends on the [`PartitionStrategy`][PartitionStrategy] and there are several tradeoffs to the
various heuristics. Users can choose between different strategies by repartitioning the graph with
-the [`Graph.partitionBy`][Graph.partitionBy] operator.
+the [`Graph.partitionBy`][Graph.partitionBy] operator. The default partitioning strategy is to use
+the initial partitioning of the edges as provided on graph construction. However, users can easily
+switch to 2D-partitioning or other heuristics included in GraphX.
[Graph.partitionBy]: api/graphx/index.html#org.apache.spark.graphx.Graph$@partitionBy(partitionStrategy:org.apache.spark.graphx.PartitionStrategy):org.apache.spark.graphx.Graph[VD,ED]
@@ -894,16 +964,15 @@ the [`Graph.partitionBy`][Graph.partitionBy] operator.
Once the edges have be partitioned the key challenge to efficient graph-parallel computation is
efficiently joining vertex attributes with the edges. Because real-world graphs typically have more
-edges than vertices, we move vertex attributes to the edges.
-
-
-
-
+edges than vertices, we move vertex attributes to the edges. Because not all partitions will
+contain edges adjacent to all vertices we internally maintain a routing table which identifies where
+to broadcast vertices when implementing the join required for operations like `triplets` and
+`mapReduceTriplets`.
# Graph Algorithms
-GraphX includes a set of graph algorithms in to simplify analytics. The algorithms are contained in the `org.apache.spark.graphx.lib` package and can be accessed directly as methods on `Graph` via [`GraphOps`][GraphOps]. This section describes the algorithms and how they are used.
+GraphX includes a set of graph algorithms to simplify analytics tasks. The algorithms are contained in the `org.apache.spark.graphx.lib` package and can be accessed directly as methods on `Graph` via [`GraphOps`][GraphOps]. This section describes the algorithms and how they are used.
## PageRank
From 863dd722436598a390d7a0c319c6cf49b488b5f2 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 14 Jan 2014 23:05:23 -0800
Subject: [PATCH 013/133] Reverting release plugin changes
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 2 +-
repl-bin/pom.xml | 2 +-
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
18 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 1b2d8f531ffb7..54a25910ced7d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 42e624402f77e..cb8e79f22535b 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index abf48935cd915..9e5a450d57a47 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 967556744c1e6..7855706389709 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 978b99f4a7054..443910a03a94e 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index a3d5fc64f070e..23b2fead657e6 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 1f416dd8c06d4..31b4fa87de772 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index f23091684f95c..216e6c1d8ff44 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6a250b3916ead..c240d595742cf 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 8080075e9395a..3e5faf230dbc9 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 41600c4c4b561..dda3900afebdf 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index c14685da5f232..f14d9667ccc7d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
index 37debdf73a6d3..869dbdb9b095a 100644
--- a/repl-bin/pom.xml
+++ b/repl-bin/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/repl/pom.xml b/repl/pom.xml
index 346c672165d7d..2dfe7ac900b83 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 7e10ef6f471be..459756912dbe5 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index bb8f747ae9328..28f5ef14b1a35 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 508317b5fc01c..aea8b0cddefa2 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 04b29c76e5830..62fe3e274250f 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
From fbfbb331db64dc1c5daef4585c7f85ce0323f5e8 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 14 Jan 2014 22:50:36 -0800
Subject: [PATCH 014/133] Merge pull request #434 from rxin/graphxmaven
Fixed SVDPlusPlusSuite in Maven build.
This should go into 0.9.0 also.
(cherry picked from commit 087487e90e4d6269d7a027f7cb718120f6c10505)
Signed-off-by: Patrick Wendell
---
graphx/src/test/resources/als-test.data | 16 ++++++++++++++++
.../spark/graphx/lib/SVDPlusPlusSuite.scala | 10 +++-------
2 files changed, 19 insertions(+), 7 deletions(-)
create mode 100644 graphx/src/test/resources/als-test.data
diff --git a/graphx/src/test/resources/als-test.data b/graphx/src/test/resources/als-test.data
new file mode 100644
index 0000000000000..e476cc23e047d
--- /dev/null
+++ b/graphx/src/test/resources/als-test.data
@@ -0,0 +1,16 @@
+1,1,5.0
+1,2,1.0
+1,3,5.0
+1,4,1.0
+2,1,5.0
+2,2,1.0
+2,3,5.0
+2,4,1.0
+3,1,1.0
+3,2,5.0
+3,3,1.0
+3,4,5.0
+4,1,1.0
+4,2,5.0
+4,3,1.0
+4,4,5.0
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala
index 057d9b3d518e0..e01df56e94de9 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala
@@ -19,11 +19,7 @@ package org.apache.spark.graphx.lib
import org.scalatest.FunSuite
-import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
import org.apache.spark.graphx._
-import org.apache.spark.graphx.util.GraphGenerators
-import org.apache.spark.rdd._
class SVDPlusPlusSuite extends FunSuite with LocalSparkContext {
@@ -31,16 +27,16 @@ class SVDPlusPlusSuite extends FunSuite with LocalSparkContext {
test("Test SVD++ with mean square error on training set") {
withSpark { sc =>
val svdppErr = 8.0
- val edges = sc.textFile("mllib/data/als/test.data").map { line =>
+ val edges = sc.textFile(getClass.getResource("/als-test.data").getFile).map { line =>
val fields = line.split(",")
Edge(fields(0).toLong * 2, fields(1).toLong * 2 + 1, fields(2).toDouble)
}
val conf = new SVDPlusPlus.Conf(10, 2, 0.0, 5.0, 0.007, 0.007, 0.005, 0.015) // 2 iterations
var (graph, u) = SVDPlusPlus.run(edges, conf)
graph.cache()
- val err = graph.vertices.collect.map{ case (vid, vd) =>
+ val err = graph.vertices.collect().map{ case (vid, vd) =>
if (vid % 2 == 1) vd._4 else 0.0
- }.reduce(_ + _) / graph.triplets.collect.size
+ }.reduce(_ + _) / graph.triplets.collect().size
assert(err <= svdppErr)
}
}
From 2859cab2f50099d1a691aecb5f7e5dfa26dccdb1 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 14 Jan 2014 23:07:55 -0800
Subject: [PATCH 015/133] Merge pull request #435 from tdas/filestream-fix
Fixed the flaky tests by making SparkConf not serializable
SparkConf was being serialized with CoGroupedRDD and Aggregator, which somehow caused OptionalJavaException while being deserialized as part of a ShuffleMapTask. SparkConf should not even be serializable (according to conversation with Matei). This change fixes that.
@mateiz @pwendell
(cherry picked from commit 139c24ef08e6ffb090975c9808a2cba304eb79e0)
Signed-off-by: Patrick Wendell
---
.../scala/org/apache/spark/Aggregator.scala | 3 +-
.../scala/org/apache/spark/SparkConf.scala | 3 +-
.../org/apache/spark/rdd/CoGroupedRDD.scala | 3 +-
.../flume/src/test/resources/log4j.properties | 2 +-
.../kafka/src/test/resources/log4j.properties | 2 +-
.../streaming/kafka/KafkaStreamSuite.scala | 1 +
.../mqtt/src/test/resources/log4j.properties | 2 +-
.../streaming/mqtt/MQTTStreamSuite.scala | 1 +
.../src/test/resources/log4j.properties | 2 +-
.../twitter/TwitterStreamSuite.scala | 1 +
.../src/test/resources/log4j.properties | 2 +-
.../streaming/zeromq/ZeroMQStreamSuite.scala | 1 +
.../apache/spark/streaming/Checkpoint.scala | 10 ++++---
.../apache/spark/streaming/DStreamGraph.scala | 2 ++
.../dstream/DStreamCheckpointData.scala | 26 ++++++++++++++++-
.../spark/streaming/CheckpointSuite.scala | 28 +++++++++++++------
16 files changed, 66 insertions(+), 23 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala
index edbea6ea5680a..c4579cf6ad560 100644
--- a/core/src/main/scala/org/apache/spark/Aggregator.scala
+++ b/core/src/main/scala/org/apache/spark/Aggregator.scala
@@ -33,8 +33,7 @@ case class Aggregator[K, V, C] (
mergeValue: (C, V) => C,
mergeCombiners: (C, C) => C) {
- private val sparkConf = SparkEnv.get.conf
- private val externalSorting = sparkConf.getBoolean("spark.shuffle.spill", true)
+ private val externalSorting = SparkEnv.get.conf.getBoolean("spark.shuffle.spill", true)
@deprecated("use combineValuesByKey with TaskContext argument", "0.9.0")
def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]]): Iterator[(K, C)] =
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 93d3d1f6972c3..369c6ce78fa71 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -21,6 +21,7 @@ import scala.collection.JavaConverters._
import scala.collection.mutable.HashMap
import com.typesafe.config.ConfigFactory
+import java.io.{ObjectInputStream, ObjectOutputStream, IOException}
/**
* Configuration for a Spark application. Used to set various Spark parameters as key-value pairs.
@@ -41,7 +42,7 @@ import com.typesafe.config.ConfigFactory
*
* @param loadDefaults whether to load values from the system properties and classpath
*/
-class SparkConf(loadDefaults: Boolean) extends Serializable with Cloneable with Logging {
+class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
/** Create a SparkConf that loads defaults from system properties and the classpath */
def this() = this(true)
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 9c6b308804c77..f2feb406f7783 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -66,7 +66,6 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
private type CoGroupValue = (Any, Int) // Int is dependency number
private type CoGroupCombiner = Seq[CoGroup]
- private val sparkConf = SparkEnv.get.conf
private var serializerClass: String = null
def setSerializer(cls: String): CoGroupedRDD[K] = {
@@ -106,7 +105,7 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
override val partitioner = Some(part)
override def compute(s: Partition, context: TaskContext): Iterator[(K, CoGroupCombiner)] = {
-
+ val sparkConf = SparkEnv.get.conf
val externalSorting = sparkConf.getBoolean("spark.shuffle.externalSorting", true)
val split = s.asInstanceOf[CoGroupPartition]
val numRdds = split.deps.size
diff --git a/external/flume/src/test/resources/log4j.properties b/external/flume/src/test/resources/log4j.properties
index 063529a9cbc67..d1bd73a8430e1 100644
--- a/external/flume/src/test/resources/log4j.properties
+++ b/external/flume/src/test/resources/log4j.properties
@@ -20,7 +20,7 @@ log4j.rootCategory=INFO, file
# log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=false
-log4j.appender.file.file=streaming/target/unit-tests.log
+log4j.appender.file.file=external/flume/target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
diff --git a/external/kafka/src/test/resources/log4j.properties b/external/kafka/src/test/resources/log4j.properties
index 063529a9cbc67..38910d113050a 100644
--- a/external/kafka/src/test/resources/log4j.properties
+++ b/external/kafka/src/test/resources/log4j.properties
@@ -20,7 +20,7 @@ log4j.rootCategory=INFO, file
# log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=false
-log4j.appender.file.file=streaming/target/unit-tests.log
+log4j.appender.file.file=external/kafka/target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
index 9c81f23c19118..d9809f6409d44 100644
--- a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
+++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
@@ -35,5 +35,6 @@ class KafkaStreamSuite extends TestSuiteBase {
ssc, kafkaParams, topics, StorageLevel.MEMORY_AND_DISK_SER_2)
// TODO: Actually test receiving data
+ ssc.stop()
}
}
diff --git a/external/mqtt/src/test/resources/log4j.properties b/external/mqtt/src/test/resources/log4j.properties
index 063529a9cbc67..d0462c7336df5 100644
--- a/external/mqtt/src/test/resources/log4j.properties
+++ b/external/mqtt/src/test/resources/log4j.properties
@@ -20,7 +20,7 @@ log4j.rootCategory=INFO, file
# log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=false
-log4j.appender.file.file=streaming/target/unit-tests.log
+log4j.appender.file.file=external/mqtt/target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
diff --git a/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala b/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
index 73e7ce6e968c6..89c40ad4619c9 100644
--- a/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
+++ b/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
@@ -32,5 +32,6 @@ class MQTTStreamSuite extends TestSuiteBase {
val test2 = MQTTUtils.createStream(ssc, brokerUrl, topic, StorageLevel.MEMORY_AND_DISK_SER_2)
// TODO: Actually test receiving data
+ ssc.stop()
}
}
diff --git a/external/twitter/src/test/resources/log4j.properties b/external/twitter/src/test/resources/log4j.properties
index 063529a9cbc67..c918335fcdc70 100644
--- a/external/twitter/src/test/resources/log4j.properties
+++ b/external/twitter/src/test/resources/log4j.properties
@@ -20,7 +20,7 @@ log4j.rootCategory=INFO, file
# log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=false
-log4j.appender.file.file=streaming/target/unit-tests.log
+log4j.appender.file.file=external/twitter/target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
diff --git a/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala b/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
index ccc38784ef671..06ab0cdaf3b4e 100644
--- a/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
+++ b/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
@@ -39,5 +39,6 @@ class TwitterStreamSuite extends TestSuiteBase {
// Note that actually testing the data receiving is hard as authentication keys are
// necessary for accessing Twitter live stream
+ ssc.stop()
}
}
diff --git a/external/zeromq/src/test/resources/log4j.properties b/external/zeromq/src/test/resources/log4j.properties
index 063529a9cbc67..304683dd0bac3 100644
--- a/external/zeromq/src/test/resources/log4j.properties
+++ b/external/zeromq/src/test/resources/log4j.properties
@@ -20,7 +20,7 @@ log4j.rootCategory=INFO, file
# log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=false
-log4j.appender.file.file=streaming/target/unit-tests.log
+log4j.appender.file.file=external/zeromq/target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
diff --git a/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala b/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
index 4193b8a02f14a..92d55a7a7b6e4 100644
--- a/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
+++ b/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
@@ -40,5 +40,6 @@ class ZeroMQStreamSuite extends TestSuiteBase {
StorageLevel.MEMORY_AND_DISK_SER_2, SupervisorStrategy.defaultStrategy)
// TODO: Actually test data receiving
+ ssc.stop()
}
}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 5046a1d53fa41..4d778dc4d43b4 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -42,11 +42,13 @@ class Checkpoint(@transient ssc: StreamingContext, val checkpointTime: Time)
val checkpointDuration = ssc.checkpointDuration
val pendingTimes = ssc.scheduler.getPendingTimes().toArray
val delaySeconds = MetadataCleaner.getDelaySeconds(ssc.conf)
- val sparkConf = ssc.conf
+ val sparkConfPairs = ssc.conf.getAll
- // These should be unset when a checkpoint is deserialized,
- // otherwise the SparkContext won't initialize correctly.
- sparkConf.remove("spark.driver.host").remove("spark.driver.port")
+ def sparkConf = {
+ new SparkConf(false).setAll(sparkConfPairs)
+ .remove("spark.driver.host")
+ .remove("spark.driver.port")
+ }
def validate() {
assert(master != null, "Checkpoint.master is null")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index 8faa79f8c7e9d..0683113bd0b51 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -163,8 +163,10 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
logDebug("DStreamGraph.writeObject used")
this.synchronized {
checkpointInProgress = true
+ logDebug("Enabled checkpoint mode")
oos.defaultWriteObject()
checkpointInProgress = false
+ logDebug("Disabled checkpoint mode")
}
}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
index 38bad5ac8042a..906a16e508cd8 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
@@ -19,7 +19,7 @@ package org.apache.spark.streaming.dstream
import scala.collection.mutable.HashMap
import scala.reflect.ClassTag
-import java.io.{ObjectInputStream, IOException}
+import java.io.{ObjectOutputStream, ObjectInputStream, IOException}
import org.apache.hadoop.fs.Path
import org.apache.hadoop.fs.FileSystem
import org.apache.spark.Logging
@@ -117,8 +117,32 @@ class DStreamCheckpointData[T: ClassTag] (dstream: DStream[T])
"[\n" + currentCheckpointFiles.size + " checkpoint files \n" + currentCheckpointFiles.mkString("\n") + "\n]"
}
+ @throws(classOf[IOException])
+ private def writeObject(oos: ObjectOutputStream) {
+ logDebug(this.getClass().getSimpleName + ".writeObject used")
+ if (dstream.context.graph != null) {
+ dstream.context.graph.synchronized {
+ if (dstream.context.graph.checkpointInProgress) {
+ oos.defaultWriteObject()
+ } else {
+ val msg = "Object of " + this.getClass.getName + " is being serialized " +
+ " possibly as a part of closure of an RDD operation. This is because " +
+ " the DStream object is being referred to from within the closure. " +
+ " Please rewrite the RDD operation inside this DStream to avoid this. " +
+ " This has been enforced to avoid bloating of Spark tasks " +
+ " with unnecessary objects."
+ throw new java.io.NotSerializableException(msg)
+ }
+ }
+ } else {
+ throw new java.io.NotSerializableException(
+ "Graph is unexpectedly null when DStream is being serialized.")
+ }
+ }
+
@throws(classOf[IOException])
private def readObject(ois: ObjectInputStream) {
+ logDebug(this.getClass().getSimpleName + ".readObject used")
ois.defaultReadObject()
timeToOldestCheckpointFileTime = new HashMap[Time, Time]
timeToCheckpointFile = new HashMap[Time, String]
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index 89daf4758661b..831e7c1471a09 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -151,17 +151,29 @@ class CheckpointSuite extends TestSuiteBase {
val value = "myvalue"
System.setProperty(key, value)
ssc = new StreamingContext(master, framework, batchDuration)
+ val originalConf = ssc.conf
+
val cp = new Checkpoint(ssc, Time(1000))
- assert(!cp.sparkConf.contains("spark.driver.host"))
- assert(!cp.sparkConf.contains("spark.driver.port"))
- assert(!cp.sparkConf.contains("spark.hostPort"))
- assert(cp.sparkConf.get(key) === value)
+ val cpConf = cp.sparkConf
+ assert(cpConf.get("spark.master") === originalConf.get("spark.master"))
+ assert(cpConf.get("spark.app.name") === originalConf.get("spark.app.name"))
+ assert(cpConf.get(key) === value)
ssc.stop()
+
+ // Serialize/deserialize to simulate write to storage and reading it back
val newCp = Utils.deserialize[Checkpoint](Utils.serialize(cp))
- assert(!newCp.sparkConf.contains("spark.driver.host"))
- assert(!newCp.sparkConf.contains("spark.driver.port"))
- assert(!newCp.sparkConf.contains("spark.hostPort"))
- assert(newCp.sparkConf.get(key) === value)
+
+ val newCpConf = newCp.sparkConf
+ assert(newCpConf.get("spark.master") === originalConf.get("spark.master"))
+ assert(newCpConf.get("spark.app.name") === originalConf.get("spark.app.name"))
+ assert(newCpConf.get(key) === value)
+ assert(!newCpConf.contains("spark.driver.host"))
+ assert(!newCpConf.contains("spark.driver.port"))
+
+ // Check if all the parameters have been restored
+ ssc = new StreamingContext(null, newCp, null)
+ val restoredConf = ssc.conf
+ assert(restoredConf.get(key) === value)
}
From 2f015c2e77f991d8d880c73a241898b370708c56 Mon Sep 17 00:00:00 2001
From: Reynold Xin
Date: Tue, 14 Jan 2014 23:17:05 -0800
Subject: [PATCH 016/133] Merge pull request #436 from ankurdave/VertexId-case
Rename VertexID -> VertexId in GraphX
(cherry picked from commit 3d9e66d92ada4fa93dd0bd78cb4c80f8169e6393)
Signed-off-by: Reynold Xin
---
docs/graphx-programming-guide.md | 70 +++++++++----------
.../scala/org/apache/spark/graphx/Edge.scala | 8 +--
.../org/apache/spark/graphx/EdgeRDD.scala | 4 +-
.../org/apache/spark/graphx/EdgeTriplet.scala | 4 +-
.../scala/org/apache/spark/graphx/Graph.scala | 18 ++---
.../spark/graphx/GraphKryoRegistrator.scala | 2 +-
.../org/apache/spark/graphx/GraphOps.scala | 32 ++++-----
.../spark/graphx/PartitionStrategy.scala | 14 ++--
.../org/apache/spark/graphx/Pregel.scala | 8 +--
.../org/apache/spark/graphx/VertexRDD.scala | 42 +++++------
.../spark/graphx/impl/EdgePartition.scala | 16 ++---
.../graphx/impl/EdgePartitionBuilder.scala | 10 +--
.../graphx/impl/EdgeTripletIterator.scala | 2 +-
.../apache/spark/graphx/impl/GraphImpl.scala | 32 ++++-----
.../graphx/impl/MessageToPartition.scala | 12 ++--
.../graphx/impl/ReplicatedVertexView.scala | 30 ++++----
.../spark/graphx/impl/RoutingTable.scala | 16 ++---
.../spark/graphx/impl/Serializers.scala | 10 +--
.../spark/graphx/impl/VertexPartition.scala | 44 ++++++------
.../apache/spark/graphx/impl/package.scala | 2 +-
.../graphx/lib/ConnectedComponents.scala | 4 +-
.../apache/spark/graphx/lib/PageRank.scala | 4 +-
.../apache/spark/graphx/lib/SVDPlusPlus.scala | 12 ++--
.../lib/StronglyConnectedComponents.scala | 6 +-
.../spark/graphx/lib/TriangleCount.scala | 2 +-
.../org/apache/spark/graphx/package.scala | 4 +-
.../spark/graphx/util/GraphGenerators.scala | 12 ++--
.../apache/spark/graphx/GraphOpsSuite.scala | 10 +--
.../org/apache/spark/graphx/GraphSuite.scala | 28 ++++----
.../org/apache/spark/graphx/PregelSuite.scala | 8 +--
.../apache/spark/graphx/SerializerSuite.scala | 18 ++---
.../graphx/impl/EdgePartitionSuite.scala | 2 +-
.../graphx/lib/ConnectedComponentsSuite.scala | 2 +-
33 files changed, 244 insertions(+), 244 deletions(-)
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 4bf47434571f5..3dfed7bea9ea8 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -186,7 +186,7 @@ code constructs a graph from a collection of RDDs:
// Assume the SparkContext has already been constructed
val sc: SparkContext
// Create an RDD for the vertices
-val users: RDD[(VertexID, (String, String))] =
+val users: RDD[(VertexId, (String, String))] =
sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
(5L, ("franklin", "prof")), (2L, ("istoica", "prof"))))
// Create an RDD for edges
@@ -360,7 +360,7 @@ graph contains the following:
{% highlight scala %}
class Graph[VD, ED] {
- def mapVertices[VD2](map: (VertexID, VD) => VD2): Graph[VD2, ED]
+ def mapVertices[VD2](map: (VertexId, VD) => VD2): Graph[VD2, ED]
def mapEdges[ED2](map: Edge[ED] => ED2): Graph[VD, ED2]
def mapTriplets[ED2](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2]
}
@@ -382,7 +382,7 @@ val newGraph = Graph(newVertices, graph.edges)
val newGraph = graph.mapVertices((id, attr) => mapUdf(id, attr))
{% endhighlight %}
-[Graph.mapVertices]: api/graphx/index.html#org.apache.spark.graphx.Graph@mapVertices[VD2]((VertexID,VD)⇒VD2)(ClassTag[VD2]):Graph[VD2,ED]
+[Graph.mapVertices]: api/graphx/index.html#org.apache.spark.graphx.Graph@mapVertices[VD2]((VertexId,VD)⇒VD2)(ClassTag[VD2]):Graph[VD2,ED]
These operators are often used to initialize the graph for a particular computation or project away
unnecessary properties. For example, given a graph with the out-degrees as the vertex properties
@@ -408,7 +408,7 @@ add more in the future. The following is a list of the basic structural operato
class Graph[VD, ED] {
def reverse: Graph[VD, ED]
def subgraph(epred: EdgeTriplet[VD,ED] => Boolean,
- vpred: (VertexID, VD) => Boolean): Graph[VD, ED]
+ vpred: (VertexId, VD) => Boolean): Graph[VD, ED]
def mask[VD2, ED2](other: Graph[VD2, ED2]): Graph[VD, ED]
def groupEdges(merge: (ED, ED) => ED): Graph[VD,ED]
}
@@ -427,11 +427,11 @@ satisfy the edge predicate *and connect vertices that satisfy the vertex predica
operator can be used in number of situations to restrict the graph to the vertices and edges of
interest or eliminate broken links. For example in the following code we remove broken links:
-[Graph.subgraph]: api/graphx/index.html#org.apache.spark.graphx.Graph@subgraph((EdgeTriplet[VD,ED])⇒Boolean,(VertexID,VD)⇒Boolean):Graph[VD,ED]
+[Graph.subgraph]: api/graphx/index.html#org.apache.spark.graphx.Graph@subgraph((EdgeTriplet[VD,ED])⇒Boolean,(VertexId,VD)⇒Boolean):Graph[VD,ED]
{% highlight scala %}
// Create an RDD for the vertices
-val users: RDD[(VertexID, (String, String))] =
+val users: RDD[(VertexId, (String, String))] =
sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
(5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
(4L, ("peter", "student"))))
@@ -494,9 +494,9 @@ using the *join* operators. Below we list the key join operators:
{% highlight scala %}
class Graph[VD, ED] {
- def joinVertices[U](table: RDD[(VertexID, U)])(map: (VertexID, VD, U) => VD)
+ def joinVertices[U](table: RDD[(VertexId, U)])(map: (VertexId, VD, U) => VD)
: Graph[VD, ED]
- def outerJoinVertices[U, VD2](table: RDD[(VertexID, U)])(map: (VertexID, VD, Option[U]) => VD2)
+ def outerJoinVertices[U, VD2](table: RDD[(VertexId, U)])(map: (VertexId, VD, Option[U]) => VD2)
: Graph[VD2, ED]
}
{% endhighlight %}
@@ -506,7 +506,7 @@ returns a new graph with the vertex properties obtained by applying the user def
to the result of the joined vertices. Vertices without a matching value in the RDD retain their
original value.
-[GraphOps.joinVertices]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@joinVertices[U](RDD[(VertexID,U)])((VertexID,VD,U)⇒VD)(ClassTag[U]):Graph[VD,ED]
+[GraphOps.joinVertices]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@joinVertices[U](RDD[(VertexId,U)])((VertexId,VD,U)⇒VD)(ClassTag[U]):Graph[VD,ED]
> Note that if the RDD contains more than one value for a given vertex only one will be used. It
> is therefore recommended that the input RDD be first made unique using the following which will
@@ -525,7 +525,7 @@ property type. Because not all vertices may have a matching value in the input
function takes an `Option` type. For example, we can setup a graph for PageRank by initializing
vertex properties with their `outDegree`.
-[Graph.outerJoinVertices]: api/graphx/index.html#org.apache.spark.graphx.Graph@outerJoinVertices[U,VD2](RDD[(VertexID,U)])((VertexID,VD,Option[U])⇒VD2)(ClassTag[U],ClassTag[VD2]):Graph[VD2,ED]
+[Graph.outerJoinVertices]: api/graphx/index.html#org.apache.spark.graphx.Graph@outerJoinVertices[U,VD2](RDD[(VertexId,U)])((VertexId,VD,Option[U])⇒VD2)(ClassTag[U],ClassTag[VD2]):Graph[VD2,ED]
{% highlight scala %}
@@ -559,7 +559,7 @@ PageRank Value, shortest path to the source, and smallest reachable vertex id).
### Map Reduce Triplets (mapReduceTriplets)
-[Graph.mapReduceTriplets]: api/graphx/index.html#org.apache.spark.graphx.Graph@mapReduceTriplets[A](mapFunc:org.apache.spark.graphx.EdgeTriplet[VD,ED]=>Iterator[(org.apache.spark.graphx.VertexID,A)],reduceFunc:(A,A)=>A,activeSetOpt:Option[(org.apache.spark.graphx.VertexRDD[_],org.apache.spark.graphx.EdgeDirection)])(implicitevidence$10:scala.reflect.ClassTag[A]):org.apache.spark.graphx.VertexRDD[A]
+[Graph.mapReduceTriplets]: api/graphx/index.html#org.apache.spark.graphx.Graph@mapReduceTriplets[A](mapFunc:org.apache.spark.graphx.EdgeTriplet[VD,ED]=>Iterator[(org.apache.spark.graphx.VertexId,A)],reduceFunc:(A,A)=>A,activeSetOpt:Option[(org.apache.spark.graphx.VertexRDD[_],org.apache.spark.graphx.EdgeDirection)])(implicitevidence$10:scala.reflect.ClassTag[A]):org.apache.spark.graphx.VertexRDD[A]
The core (heavily optimized) aggregation primitive in GraphX is the
[`mapReduceTriplets`][Graph.mapReduceTriplets] operator:
@@ -567,7 +567,7 @@ The core (heavily optimized) aggregation primitive in GraphX is the
{% highlight scala %}
class Graph[VD, ED] {
def mapReduceTriplets[A](
- map: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
+ map: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
reduce: (A, A) => A)
: VertexRDD[A]
}
@@ -649,13 +649,13 @@ compute the max in, out, and total degrees:
{% highlight scala %}
// Define a reduce operation to compute the highest degree vertex
-def max(a: (VertexID, Int), b: (VertexID, Int)): (VertexID, Int) = {
+def max(a: (VertexId, Int), b: (VertexId, Int)): (VertexId, Int) = {
if (a._2 > b._2) a else b
}
// Compute the max degrees
-val maxInDegree: (VertexID, Int) = graph.inDegrees.reduce(max)
-val maxOutDegree: (VertexID, Int) = graph.outDegrees.reduce(max)
-val maxDegrees: (VertexID, Int) = graph.degrees.reduce(max)
+val maxInDegree: (VertexId, Int) = graph.inDegrees.reduce(max)
+val maxOutDegree: (VertexId, Int) = graph.outDegrees.reduce(max)
+val maxDegrees: (VertexId, Int) = graph.degrees.reduce(max)
{% endhighlight %}
### Collecting Neighbors
@@ -665,14 +665,14 @@ attributes at each vertex. This can be easily accomplished using the
[`collectNeighborIds`][GraphOps.collectNeighborIds] and the
[`collectNeighbors`][GraphOps.collectNeighbors] operators.
-[GraphOps.collectNeighborIds]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@collectNeighborIds(EdgeDirection):VertexRDD[Array[VertexID]]
-[GraphOps.collectNeighbors]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@collectNeighbors(EdgeDirection):VertexRDD[Array[(VertexID,VD)]]
+[GraphOps.collectNeighborIds]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@collectNeighborIds(EdgeDirection):VertexRDD[Array[VertexId]]
+[GraphOps.collectNeighbors]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@collectNeighbors(EdgeDirection):VertexRDD[Array[(VertexId,VD)]]
{% highlight scala %}
class GraphOps[VD, ED] {
- def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexID]]
- def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[ Array[(VertexID, VD)] ]
+ def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]]
+ def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[ Array[(VertexId, VD)] ]
}
{% endhighlight %}
@@ -716,7 +716,7 @@ messages remaining.
The following is the type signature of the [Pregel operator][GraphOps.pregel] as well as a *sketch*
of its implementation (note calls to graph.cache have been removed):
-[GraphOps.pregel]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@pregel[A](A,Int,EdgeDirection)((VertexID,VD,A)⇒VD,(EdgeTriplet[VD,ED])⇒Iterator[(VertexID,A)],(A,A)⇒A)(ClassTag[A]):Graph[VD,ED]
+[GraphOps.pregel]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@pregel[A](A,Int,EdgeDirection)((VertexId,VD,A)⇒VD,(EdgeTriplet[VD,ED])⇒Iterator[(VertexId,A)],(A,A)⇒A)(ClassTag[A]):Graph[VD,ED]
{% highlight scala %}
class GraphOps[VD, ED] {
@@ -724,8 +724,8 @@ class GraphOps[VD, ED] {
(initialMsg: A,
maxIter: Int = Int.MaxValue,
activeDir: EdgeDirection = EdgeDirection.Out)
- (vprog: (VertexID, VD, A) => VD,
- sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
+ (vprog: (VertexId, VD, A) => VD,
+ sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
mergeMsg: (A, A) => A)
: Graph[VD, ED] = {
// Receive the initial message at each vertex
@@ -770,7 +770,7 @@ import org.apache.spark.graphx.util.GraphGenerators
// A graph with edge attributes containing distances
val graph: Graph[Int, Double] =
GraphGenerators.logNormalGraph(sc, numVertices = 100).mapEdges(e => e.attr.toDouble)
-val sourceId: VertexID = 42 // The ultimate source
+val sourceId: VertexId = 42 // The ultimate source
// Initialize the graph such that all vertices except the root have distance infinity.
val initialGraph = graph.mapVertices((id, _) => if (id == sourceId) 0.0 else Double.PositiveInfinity)
val sssp = initialGraph.pregel(Double.PositiveInfinity)(
@@ -817,7 +817,7 @@ It creates a `Graph` from the specified edges, automatically creating any vertic
{% highlight scala %}
object Graph {
def apply[VD, ED](
- vertices: RDD[(VertexID, VD)],
+ vertices: RDD[(VertexId, VD)],
edges: RDD[Edge[ED]],
defaultVertexAttr: VD = null)
: Graph[VD, ED]
@@ -827,7 +827,7 @@ object Graph {
defaultValue: VD): Graph[VD, ED]
def fromEdgeTuples[VD](
- rawEdges: RDD[(VertexID, VertexID)],
+ rawEdges: RDD[(VertexId, VertexId)],
defaultValue: VD,
uniqueEdges: Option[PartitionStrategy] = None): Graph[VD, Int]
@@ -843,8 +843,8 @@ object Graph {
[PartitionStrategy]: api/graphx/index.html#org.apache.spark.graphx.PartitionStrategy$
[GraphLoader.edgeListFile]: api/graphx/index.html#org.apache.spark.graphx.GraphLoader$@edgeListFile(SparkContext,String,Boolean,Int):Graph[Int,Int]
-[Graph.apply]: api/graphx/index.html#org.apache.spark.graphx.Graph$@apply[VD,ED](RDD[(VertexID,VD)],RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
-[Graph.fromEdgeTuples]: api/graphx/index.html#org.apache.spark.graphx.Graph$@fromEdgeTuples[VD](RDD[(VertexID,VertexID)],VD,Option[PartitionStrategy])(ClassTag[VD]):Graph[VD,Int]
+[Graph.apply]: api/graphx/index.html#org.apache.spark.graphx.Graph$@apply[VD,ED](RDD[(VertexId,VD)],RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
+[Graph.fromEdgeTuples]: api/graphx/index.html#org.apache.spark.graphx.Graph$@fromEdgeTuples[VD](RDD[(VertexId,VertexId)],VD,Option[PartitionStrategy])(ClassTag[VD]):Graph[VD,Int]
[Graph.fromEdges]: api/graphx/index.html#org.apache.spark.graphx.Graph$@fromEdges[VD,ED](RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
# Vertex and Edge RDDs
@@ -868,17 +868,17 @@ additional functionality:
{% highlight scala %}
class VertexRDD[VD] extends RDD[(VertexID, VD)] {
// Filter the vertex set but preserves the internal index
- def filter(pred: Tuple2[VertexID, VD] => Boolean): VertexRDD[VD]
+ def filter(pred: Tuple2[VertexId, VD] => Boolean): VertexRDD[VD]
// Transform the values without changing the ids (preserves the internal index)
def mapValues[VD2](map: VD => VD2): VertexRDD[VD2]
- def mapValues[VD2](map: (VertexID, VD) => VD2): VertexRDD[VD2]
+ def mapValues[VD2](map: (VertexId, VD) => VD2): VertexRDD[VD2]
// Remove vertices from this set that appear in the other set
def diff(other: VertexRDD[VD]): VertexRDD[VD]
// Join operators that take advantage of the internal indexing to accelerate joins (substantially)
- def leftJoin[VD2, VD3](other: RDD[(VertexID, VD2)])(f: (VertexID, VD, Option[VD2]) => VD3): VertexRDD[VD3]
- def innerJoin[U, VD2](other: RDD[(VertexID, U)])(f: (VertexID, VD, U) => VD2): VertexRDD[VD2]
+ def leftJoin[VD2, VD3](other: RDD[(VertexId, VD2)])(f: (VertexId, VD, Option[VD2]) => VD3): VertexRDD[VD3]
+ def innerJoin[U, VD2](other: RDD[(VertexId, U)])(f: (VertexId, VD, U) => VD2): VertexRDD[VD2]
// Use the index on this RDD to accelerate a `reduceByKey` operation on the input RDD.
- def aggregateUsingIndex[VD2](other: RDD[(VertexID, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2]
+ def aggregateUsingIndex[VD2](other: RDD[(VertexId, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2]
}
{% endhighlight %}
@@ -896,7 +896,7 @@ both aggregate and then subsequently index the `RDD[(VertexID, A)]`. For exampl
{% highlight scala %}
val setA: VertexRDD[Int] = VertexRDD(sc.parallelize(0L until 100L).map(id => (id, 1)))
-val rddB: RDD[(VertexID, Double)] = sc.parallelize(0L until 100L).flatMap(id => List((id, 1.0), (id, 2.0)))
+val rddB: RDD[(VertexId, Double)] = sc.parallelize(0L until 100L).flatMap(id => List((id, 1.0), (id, 2.0)))
// There should be 200 entries in rddB
rddB.count
val setB: VertexRDD[Double] = setA.aggregateUsingIndex(rddB, _ + _)
@@ -922,7 +922,7 @@ def mapValues[ED2](f: Edge[ED] => ED2): EdgeRDD[ED2]
// Revere the edges reusing both attributes and structure
def reverse: EdgeRDD[ED]
// Join two `EdgeRDD`s partitioned using the same partitioning strategy.
-def innerJoin[ED2, ED3](other: EdgeRDD[ED2])(f: (VertexID, VertexID, ED, ED2) => ED3): EdgeRDD[ED3]
+def innerJoin[ED2, ED3](other: EdgeRDD[ED2])(f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3]
{% endhighlight %}
In most applications we have found that operations on the `EdgeRDD` are accomplished through the
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
index 32f1602698134..580faa0866789 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
@@ -28,8 +28,8 @@ package org.apache.spark.graphx
* @param attr The attribute associated with the edge
*/
case class Edge[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED] (
- var srcId: VertexID = 0,
- var dstId: VertexID = 0,
+ var srcId: VertexId = 0,
+ var dstId: VertexId = 0,
var attr: ED = null.asInstanceOf[ED])
extends Serializable {
@@ -39,7 +39,7 @@ case class Edge[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED]
* @param vid the id one of the two vertices on the edge.
* @return the id of the other vertex on the edge.
*/
- def otherVertexId(vid: VertexID): VertexID =
+ def otherVertexId(vid: VertexId): VertexId =
if (srcId == vid) dstId else { assert(dstId == vid); srcId }
/**
@@ -50,7 +50,7 @@ case class Edge[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED]
* @return the relative direction of the edge to the corresponding
* vertex.
*/
- def relativeDirection(vid: VertexID): EdgeDirection =
+ def relativeDirection(vid: VertexId): EdgeDirection =
if (vid == srcId) EdgeDirection.Out else { assert(vid == dstId); EdgeDirection.In }
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
index 6efef061d7510..fe03ae4a629b9 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
@@ -102,7 +102,7 @@ class EdgeRDD[@specialized ED: ClassTag](
*/
def innerJoin[ED2: ClassTag, ED3: ClassTag]
(other: EdgeRDD[ED2])
- (f: (VertexID, VertexID, ED, ED2) => ED3): EdgeRDD[ED3] = {
+ (f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3] = {
val ed2Tag = classTag[ED2]
val ed3Tag = classTag[ED3]
new EdgeRDD[ED3](partitionsRDD.zipPartitions(other.partitionsRDD, true) {
@@ -113,7 +113,7 @@ class EdgeRDD[@specialized ED: ClassTag](
})
}
- private[graphx] def collectVertexIDs(): RDD[VertexID] = {
+ private[graphx] def collectVertexIds(): RDD[VertexId] = {
partitionsRDD.flatMap { case (_, p) => Array.concat(p.srcIds, p.dstIds) }
}
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
index 2c659cb070b99..fea43c3b2bbf1 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
@@ -50,7 +50,7 @@ class EdgeTriplet[VD, ED] extends Edge[ED] {
* @param vid the id one of the two vertices on the edge
* @return the attribute for the other vertex on the edge
*/
- def otherVertexAttr(vid: VertexID): VD =
+ def otherVertexAttr(vid: VertexId): VD =
if (srcId == vid) dstAttr else { assert(dstId == vid); srcAttr }
/**
@@ -59,7 +59,7 @@ class EdgeTriplet[VD, ED] extends Edge[ED] {
* @param vid the id of one of the two vertices on the edge
* @return the attr for the vertex with that id
*/
- def vertexAttr(vid: VertexID): VD =
+ def vertexAttr(vid: VertexId): VD =
if (srcId == vid) srcAttr else { assert(dstId == vid); dstAttr }
override def toString = ((srcId, srcAttr), (dstId, dstAttr), attr).toString()
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 7f65244cd95cd..eea95d38d5016 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -126,7 +126,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
* }}}
*
*/
- def mapVertices[VD2: ClassTag](map: (VertexID, VD) => VD2): Graph[VD2, ED]
+ def mapVertices[VD2: ClassTag](map: (VertexId, VD) => VD2): Graph[VD2, ED]
/**
* Transforms each edge attribute in the graph using the map function. The map function is not
@@ -242,7 +242,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
*/
def subgraph(
epred: EdgeTriplet[VD,ED] => Boolean = (x => true),
- vpred: (VertexID, VD) => Boolean = ((v, d) => true))
+ vpred: (VertexId, VD) => Boolean = ((v, d) => true))
: Graph[VD, ED]
/**
@@ -292,7 +292,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
* vertex
* {{{
* val rawGraph: Graph[(),()] = Graph.textFile("twittergraph")
- * val inDeg: RDD[(VertexID, Int)] =
+ * val inDeg: RDD[(VertexId, Int)] =
* mapReduceTriplets[Int](et => Iterator((et.dst.id, 1)), _ + _)
* }}}
*
@@ -304,7 +304,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
*
*/
def mapReduceTriplets[A: ClassTag](
- mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
+ mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
reduceFunc: (A, A) => A,
activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None)
: VertexRDD[A]
@@ -328,14 +328,14 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
*
* {{{
* val rawGraph: Graph[_, _] = Graph.textFile("webgraph")
- * val outDeg: RDD[(VertexID, Int)] = rawGraph.outDegrees()
+ * val outDeg: RDD[(VertexId, Int)] = rawGraph.outDegrees()
* val graph = rawGraph.outerJoinVertices(outDeg) {
* (vid, data, optDeg) => optDeg.getOrElse(0)
* }
* }}}
*/
- def outerJoinVertices[U: ClassTag, VD2: ClassTag](other: RDD[(VertexID, U)])
- (mapFunc: (VertexID, VD, Option[U]) => VD2)
+ def outerJoinVertices[U: ClassTag, VD2: ClassTag](other: RDD[(VertexId, U)])
+ (mapFunc: (VertexId, VD, Option[U]) => VD2)
: Graph[VD2, ED]
/**
@@ -364,7 +364,7 @@ object Graph {
* (if `uniqueEdges` is `None`) and vertex attributes containing the total degree of each vertex.
*/
def fromEdgeTuples[VD: ClassTag](
- rawEdges: RDD[(VertexID, VertexID)],
+ rawEdges: RDD[(VertexId, VertexId)],
defaultValue: VD,
uniqueEdges: Option[PartitionStrategy] = None): Graph[VD, Int] =
{
@@ -405,7 +405,7 @@ object Graph {
* mentioned in edges but not in vertices
*/
def apply[VD: ClassTag, ED: ClassTag](
- vertices: RDD[(VertexID, VD)],
+ vertices: RDD[(VertexId, VD)],
edges: RDD[Edge[ED]],
defaultVertexAttr: VD = null.asInstanceOf[VD]): Graph[VD, ED] = {
GraphImpl(vertices, edges, defaultVertexAttr)
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
index 6db8a34937244..dd380d8c182c9 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
@@ -33,7 +33,7 @@ class GraphKryoRegistrator extends KryoRegistrator {
kryo.register(classOf[Edge[Object]])
kryo.register(classOf[MessageToPartition[Object]])
kryo.register(classOf[VertexBroadcastMsg[Object]])
- kryo.register(classOf[(VertexID, Object)])
+ kryo.register(classOf[(VertexId, Object)])
kryo.register(classOf[EdgePartition[Object]])
kryo.register(classOf[BitSet])
kryo.register(classOf[VertexIdToIndexMap])
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index 9b864c1290bd2..0fc1e4df6813c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -80,19 +80,19 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
*
* @return the set of neighboring ids for each vertex
*/
- def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexID]] = {
+ def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]] = {
val nbrs =
if (edgeDirection == EdgeDirection.Either) {
- graph.mapReduceTriplets[Array[VertexID]](
+ graph.mapReduceTriplets[Array[VertexId]](
mapFunc = et => Iterator((et.srcId, Array(et.dstId)), (et.dstId, Array(et.srcId))),
reduceFunc = _ ++ _
)
} else if (edgeDirection == EdgeDirection.Out) {
- graph.mapReduceTriplets[Array[VertexID]](
+ graph.mapReduceTriplets[Array[VertexId]](
mapFunc = et => Iterator((et.srcId, Array(et.dstId))),
reduceFunc = _ ++ _)
} else if (edgeDirection == EdgeDirection.In) {
- graph.mapReduceTriplets[Array[VertexID]](
+ graph.mapReduceTriplets[Array[VertexId]](
mapFunc = et => Iterator((et.dstId, Array(et.srcId))),
reduceFunc = _ ++ _)
} else {
@@ -100,7 +100,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
"direction. (EdgeDirection.Both is not supported; use EdgeDirection.Either instead.)")
}
graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
- nbrsOpt.getOrElse(Array.empty[VertexID])
+ nbrsOpt.getOrElse(Array.empty[VertexId])
}
} // end of collectNeighborIds
@@ -116,8 +116,8 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
*
* @return the vertex set of neighboring vertex attributes for each vertex
*/
- def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexID, VD)]] = {
- val nbrs = graph.mapReduceTriplets[Array[(VertexID,VD)]](
+ def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexId, VD)]] = {
+ val nbrs = graph.mapReduceTriplets[Array[(VertexId,VD)]](
edge => {
val msgToSrc = (edge.srcId, Array((edge.dstId, edge.dstAttr)))
val msgToDst = (edge.dstId, Array((edge.srcId, edge.srcAttr)))
@@ -133,7 +133,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
(a, b) => a ++ b)
graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
- nbrsOpt.getOrElse(Array.empty[(VertexID, VD)])
+ nbrsOpt.getOrElse(Array.empty[(VertexId, VD)])
}
} // end of collectNeighbor
@@ -164,9 +164,9 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
* }}}
*
*/
- def joinVertices[U: ClassTag](table: RDD[(VertexID, U)])(mapFunc: (VertexID, VD, U) => VD)
+ def joinVertices[U: ClassTag](table: RDD[(VertexId, U)])(mapFunc: (VertexId, VD, U) => VD)
: Graph[VD, ED] = {
- val uf = (id: VertexID, data: VD, o: Option[U]) => {
+ val uf = (id: VertexId, data: VD, o: Option[U]) => {
o match {
case Some(u) => mapFunc(id, data, u)
case None => data
@@ -197,7 +197,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
* val degrees: VertexRDD[Int] = graph.outDegrees
* graph.outerJoinVertices(degrees) {(vid, data, deg) => deg.getOrElse(0)}
* },
- * vpred = (vid: VertexID, deg:Int) => deg > 0
+ * vpred = (vid: VertexId, deg:Int) => deg > 0
* )
* }}}
*
@@ -205,7 +205,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
def filter[VD2: ClassTag, ED2: ClassTag](
preprocess: Graph[VD, ED] => Graph[VD2, ED2],
epred: (EdgeTriplet[VD2, ED2]) => Boolean = (x: EdgeTriplet[VD2, ED2]) => true,
- vpred: (VertexID, VD2) => Boolean = (v:VertexID, d:VD2) => true): Graph[VD, ED] = {
+ vpred: (VertexId, VD2) => Boolean = (v:VertexId, d:VD2) => true): Graph[VD, ED] = {
graph.mask(preprocess(graph).subgraph(epred, vpred))
}
@@ -260,8 +260,8 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
initialMsg: A,
maxIterations: Int = Int.MaxValue,
activeDirection: EdgeDirection = EdgeDirection.Either)(
- vprog: (VertexID, VD, A) => VD,
- sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexID,A)],
+ vprog: (VertexId, VD, A) => VD,
+ sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId,A)],
mergeMsg: (A, A) => A)
: Graph[VD, ED] = {
Pregel(graph, initialMsg, maxIterations, activeDirection)(vprog, sendMsg, mergeMsg)
@@ -293,7 +293,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
*
* @see [[org.apache.spark.graphx.lib.ConnectedComponents$#run]]
*/
- def connectedComponents(): Graph[VertexID, ED] = {
+ def connectedComponents(): Graph[VertexId, ED] = {
ConnectedComponents.run(graph)
}
@@ -312,7 +312,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
*
* @see [[org.apache.spark.graphx.lib.StronglyConnectedComponents$#run]]
*/
- def stronglyConnectedComponents(numIter: Int): Graph[VertexID, ED] = {
+ def stronglyConnectedComponents(numIter: Int): Graph[VertexId, ED] = {
StronglyConnectedComponents.run(graph, numIter)
}
} // end of GraphOps
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
index 8ba87976f1136..929915362c1c9 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
@@ -23,7 +23,7 @@ package org.apache.spark.graphx
*/
trait PartitionStrategy extends Serializable {
/** Returns the partition number for a given edge. */
- def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID
+ def getPartition(src: VertexId, dst: VertexId, numParts: PartitionID): PartitionID
}
/**
@@ -73,9 +73,9 @@ object PartitionStrategy {
* is used.
*/
case object EdgePartition2D extends PartitionStrategy {
- override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
+ override def getPartition(src: VertexId, dst: VertexId, numParts: PartitionID): PartitionID = {
val ceilSqrtNumParts: PartitionID = math.ceil(math.sqrt(numParts)).toInt
- val mixingPrime: VertexID = 1125899906842597L
+ val mixingPrime: VertexId = 1125899906842597L
val col: PartitionID = ((math.abs(src) * mixingPrime) % ceilSqrtNumParts).toInt
val row: PartitionID = ((math.abs(dst) * mixingPrime) % ceilSqrtNumParts).toInt
(col * ceilSqrtNumParts + row) % numParts
@@ -87,8 +87,8 @@ object PartitionStrategy {
* source.
*/
case object EdgePartition1D extends PartitionStrategy {
- override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
- val mixingPrime: VertexID = 1125899906842597L
+ override def getPartition(src: VertexId, dst: VertexId, numParts: PartitionID): PartitionID = {
+ val mixingPrime: VertexId = 1125899906842597L
(math.abs(src) * mixingPrime).toInt % numParts
}
}
@@ -99,7 +99,7 @@ object PartitionStrategy {
* random vertex cut that colocates all same-direction edges between two vertices.
*/
case object RandomVertexCut extends PartitionStrategy {
- override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
+ override def getPartition(src: VertexId, dst: VertexId, numParts: PartitionID): PartitionID = {
math.abs((src, dst).hashCode()) % numParts
}
}
@@ -111,7 +111,7 @@ object PartitionStrategy {
* regardless of direction.
*/
case object CanonicalRandomVertexCut extends PartitionStrategy {
- override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
+ override def getPartition(src: VertexId, dst: VertexId, numParts: PartitionID): PartitionID = {
val lower = math.min(src, dst)
val higher = math.max(src, dst)
math.abs((lower, higher).hashCode()) % numParts
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
index 0f6d4135934cb..ac07a594a12e4 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
@@ -40,9 +40,9 @@ import scala.reflect.ClassTag
* // Set the vertex attributes to the initial pagerank values
* .mapVertices((id, attr) => 1.0)
*
- * def vertexProgram(id: VertexID, attr: Double, msgSum: Double): Double =
+ * def vertexProgram(id: VertexId, attr: Double, msgSum: Double): Double =
* resetProb + (1.0 - resetProb) * msgSum
- * def sendMessage(id: VertexID, edge: EdgeTriplet[Double, Double]): Iterator[(VertexId, Double)] =
+ * def sendMessage(id: VertexId, edge: EdgeTriplet[Double, Double]): Iterator[(VertexId, Double)] =
* Iterator((edge.dstId, edge.srcAttr * edge.attr))
* def messageCombiner(a: Double, b: Double): Double = a + b
* val initialMessage = 0.0
@@ -113,8 +113,8 @@ object Pregel {
initialMsg: A,
maxIterations: Int = Int.MaxValue,
activeDirection: EdgeDirection = EdgeDirection.Either)
- (vprog: (VertexID, VD, A) => VD,
- sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
+ (vprog: (VertexId, VD, A) => VD,
+ sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
mergeMsg: (A, A) => A)
: Graph[VD, ED] =
{
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
index 9a95364cb16dd..edd59bcf32943 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
@@ -28,7 +28,7 @@ import org.apache.spark.graphx.impl.MsgRDDFunctions
import org.apache.spark.graphx.impl.VertexPartition
/**
- * Extends `RDD[(VertexID, VD)]` by ensuring that there is only one entry for each vertex and by
+ * Extends `RDD[(VertexId, VD)]` by ensuring that there is only one entry for each vertex and by
* pre-indexing the entries for fast, efficient joins. Two VertexRDDs with the same index can be
* joined efficiently. All operations except [[reindex]] preserve the index. To construct a
* `VertexRDD`, use the [[org.apache.spark.graphx.VertexRDD$ VertexRDD object]].
@@ -36,12 +36,12 @@ import org.apache.spark.graphx.impl.VertexPartition
* @example Construct a `VertexRDD` from a plain RDD:
* {{{
* // Construct an initial vertex set
- * val someData: RDD[(VertexID, SomeType)] = loadData(someFile)
+ * val someData: RDD[(VertexId, SomeType)] = loadData(someFile)
* val vset = VertexRDD(someData)
* // If there were redundant values in someData we would use a reduceFunc
* val vset2 = VertexRDD(someData, reduceFunc)
* // Finally we can use the VertexRDD to index another dataset
- * val otherData: RDD[(VertexID, OtherType)] = loadData(otherFile)
+ * val otherData: RDD[(VertexId, OtherType)] = loadData(otherFile)
* val vset3 = vset2.innerJoin(otherData) { (vid, a, b) => b }
* // Now we can construct very fast joins between the two sets
* val vset4: VertexRDD[(SomeType, OtherType)] = vset.leftJoin(vset3)
@@ -51,7 +51,7 @@ import org.apache.spark.graphx.impl.VertexPartition
*/
class VertexRDD[@specialized VD: ClassTag](
val partitionsRDD: RDD[VertexPartition[VD]])
- extends RDD[(VertexID, VD)](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+ extends RDD[(VertexId, VD)](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
require(partitionsRDD.partitioner.isDefined)
@@ -92,9 +92,9 @@ class VertexRDD[@specialized VD: ClassTag](
}
/**
- * Provides the `RDD[(VertexID, VD)]` equivalent output.
+ * Provides the `RDD[(VertexId, VD)]` equivalent output.
*/
- override def compute(part: Partition, context: TaskContext): Iterator[(VertexID, VD)] = {
+ override def compute(part: Partition, context: TaskContext): Iterator[(VertexId, VD)] = {
firstParent[VertexPartition[VD]].iterator(part, context).next.iterator
}
@@ -114,9 +114,9 @@ class VertexRDD[@specialized VD: ClassTag](
* rather than allocating new memory.
*
* @param pred the user defined predicate, which takes a tuple to conform to the
- * `RDD[(VertexID, VD)]` interface
+ * `RDD[(VertexId, VD)]` interface
*/
- override def filter(pred: Tuple2[VertexID, VD] => Boolean): VertexRDD[VD] =
+ override def filter(pred: Tuple2[VertexId, VD] => Boolean): VertexRDD[VD] =
this.mapVertexPartitions(_.filter(Function.untupled(pred)))
/**
@@ -140,7 +140,7 @@ class VertexRDD[@specialized VD: ClassTag](
* @return a new VertexRDD with values obtained by applying `f` to each of the entries in the
* original VertexRDD. The resulting VertexRDD retains the same index.
*/
- def mapValues[VD2: ClassTag](f: (VertexID, VD) => VD2): VertexRDD[VD2] =
+ def mapValues[VD2: ClassTag](f: (VertexId, VD) => VD2): VertexRDD[VD2] =
this.mapVertexPartitions(_.map(f))
/**
@@ -172,7 +172,7 @@ class VertexRDD[@specialized VD: ClassTag](
* @return a VertexRDD containing the results of `f`
*/
def leftZipJoin[VD2: ClassTag, VD3: ClassTag]
- (other: VertexRDD[VD2])(f: (VertexID, VD, Option[VD2]) => VD3): VertexRDD[VD3] = {
+ (other: VertexRDD[VD2])(f: (VertexId, VD, Option[VD2]) => VD3): VertexRDD[VD3] = {
val newPartitionsRDD = partitionsRDD.zipPartitions(
other.partitionsRDD, preservesPartitioning = true
) { (thisIter, otherIter) =>
@@ -200,8 +200,8 @@ class VertexRDD[@specialized VD: ClassTag](
* by `f`.
*/
def leftJoin[VD2: ClassTag, VD3: ClassTag]
- (other: RDD[(VertexID, VD2)])
- (f: (VertexID, VD, Option[VD2]) => VD3)
+ (other: RDD[(VertexId, VD2)])
+ (f: (VertexId, VD, Option[VD2]) => VD3)
: VertexRDD[VD3] = {
// Test if the other vertex is a VertexRDD to choose the optimal join strategy.
// If the other set is a VertexRDD then we use the much more efficient leftZipJoin
@@ -225,7 +225,7 @@ class VertexRDD[@specialized VD: ClassTag](
* [[innerJoin]] for the behavior of the join.
*/
def innerZipJoin[U: ClassTag, VD2: ClassTag](other: VertexRDD[U])
- (f: (VertexID, VD, U) => VD2): VertexRDD[VD2] = {
+ (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
val newPartitionsRDD = partitionsRDD.zipPartitions(
other.partitionsRDD, preservesPartitioning = true
) { (thisIter, otherIter) =>
@@ -247,8 +247,8 @@ class VertexRDD[@specialized VD: ClassTag](
* @return a VertexRDD co-indexed with `this`, containing only vertices that appear in both `this`
* and `other`, with values supplied by `f`
*/
- def innerJoin[U: ClassTag, VD2: ClassTag](other: RDD[(VertexID, U)])
- (f: (VertexID, VD, U) => VD2): VertexRDD[VD2] = {
+ def innerJoin[U: ClassTag, VD2: ClassTag](other: RDD[(VertexId, U)])
+ (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
// Test if the other vertex is a VertexRDD to choose the optimal join strategy.
// If the other set is a VertexRDD then we use the much more efficient innerZipJoin
other match {
@@ -278,7 +278,7 @@ class VertexRDD[@specialized VD: ClassTag](
* messages.
*/
def aggregateUsingIndex[VD2: ClassTag](
- messages: RDD[(VertexID, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2] = {
+ messages: RDD[(VertexId, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2] = {
val shuffled = MsgRDDFunctions.partitionForAggregation(messages, this.partitioner.get)
val parts = partitionsRDD.zipPartitions(shuffled, true) { (thisIter, msgIter) =>
val vertexPartition: VertexPartition[VD] = thisIter.next()
@@ -303,8 +303,8 @@ object VertexRDD {
*
* @param rdd the collection of vertex-attribute pairs
*/
- def apply[VD: ClassTag](rdd: RDD[(VertexID, VD)]): VertexRDD[VD] = {
- val partitioned: RDD[(VertexID, VD)] = rdd.partitioner match {
+ def apply[VD: ClassTag](rdd: RDD[(VertexId, VD)]): VertexRDD[VD] = {
+ val partitioned: RDD[(VertexId, VD)] = rdd.partitioner match {
case Some(p) => rdd
case None => rdd.partitionBy(new HashPartitioner(rdd.partitions.size))
}
@@ -323,8 +323,8 @@ object VertexRDD {
* @param rdd the collection of vertex-attribute pairs
* @param mergeFunc the associative, commutative merge function.
*/
- def apply[VD: ClassTag](rdd: RDD[(VertexID, VD)], mergeFunc: (VD, VD) => VD): VertexRDD[VD] = {
- val partitioned: RDD[(VertexID, VD)] = rdd.partitioner match {
+ def apply[VD: ClassTag](rdd: RDD[(VertexId, VD)], mergeFunc: (VD, VD) => VD): VertexRDD[VD] = {
+ val partitioned: RDD[(VertexId, VD)] = rdd.partitioner match {
case Some(p) => rdd
case None => rdd.partitionBy(new HashPartitioner(rdd.partitions.size))
}
@@ -338,7 +338,7 @@ object VertexRDD {
* Constructs a VertexRDD from the vertex IDs in `vids`, taking attributes from `rdd` and using
* `defaultVal` otherwise.
*/
- def apply[VD: ClassTag](vids: RDD[VertexID], rdd: RDD[(VertexID, VD)], defaultVal: VD)
+ def apply[VD: ClassTag](vids: RDD[VertexId], rdd: RDD[(VertexId, VD)], defaultVal: VD)
: VertexRDD[VD] = {
VertexRDD(vids.map(vid => (vid, defaultVal))).leftJoin(rdd) { (vid, default, value) =>
value.getOrElse(default)
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
index 6067ee8c7e0fb..57fa5eefd5e09 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
@@ -34,10 +34,10 @@ import org.apache.spark.graphx.util.collection.PrimitiveKeyOpenHashMap
*/
private[graphx]
class EdgePartition[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED: ClassTag](
- val srcIds: Array[VertexID],
- val dstIds: Array[VertexID],
+ val srcIds: Array[VertexId],
+ val dstIds: Array[VertexId],
val data: Array[ED],
- val index: PrimitiveKeyOpenHashMap[VertexID, Int]) extends Serializable {
+ val index: PrimitiveKeyOpenHashMap[VertexId, Int]) extends Serializable {
/**
* Reverse all the edges in this partition.
@@ -118,8 +118,8 @@ class EdgePartition[@specialized(Char, Int, Boolean, Byte, Long, Float, Double)
*/
def groupEdges(merge: (ED, ED) => ED): EdgePartition[ED] = {
val builder = new EdgePartitionBuilder[ED]
- var currSrcId: VertexID = null.asInstanceOf[VertexID]
- var currDstId: VertexID = null.asInstanceOf[VertexID]
+ var currSrcId: VertexId = null.asInstanceOf[VertexId]
+ var currDstId: VertexId = null.asInstanceOf[VertexId]
var currAttr: ED = null.asInstanceOf[ED]
var i = 0
while (i < size) {
@@ -153,7 +153,7 @@ class EdgePartition[@specialized(Char, Int, Boolean, Byte, Long, Float, Double)
*/
def innerJoin[ED2: ClassTag, ED3: ClassTag]
(other: EdgePartition[ED2])
- (f: (VertexID, VertexID, ED, ED2) => ED3): EdgePartition[ED3] = {
+ (f: (VertexId, VertexId, ED, ED2) => ED3): EdgePartition[ED3] = {
val builder = new EdgePartitionBuilder[ED3]
var i = 0
var j = 0
@@ -210,14 +210,14 @@ class EdgePartition[@specialized(Char, Int, Boolean, Byte, Long, Float, Double)
* iterator is generated using an index scan, so it is efficient at skipping edges that don't
* match srcIdPred.
*/
- def indexIterator(srcIdPred: VertexID => Boolean): Iterator[Edge[ED]] =
+ def indexIterator(srcIdPred: VertexId => Boolean): Iterator[Edge[ED]] =
index.iterator.filter(kv => srcIdPred(kv._1)).flatMap(Function.tupled(clusterIterator))
/**
* Get an iterator over the cluster of edges in this partition with source vertex id `srcId`. The
* cluster must start at position `index`.
*/
- private def clusterIterator(srcId: VertexID, index: Int) = new Iterator[Edge[ED]] {
+ private def clusterIterator(srcId: VertexId, index: Int) = new Iterator[Edge[ED]] {
private[this] val edge = new Edge[ED]
private[this] var pos = index
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
index 960eeaccf1352..63ccccb056b48 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
@@ -29,22 +29,22 @@ class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag](size: I
var edges = new PrimitiveVector[Edge[ED]](size)
/** Add a new edge to the partition. */
- def add(src: VertexID, dst: VertexID, d: ED) {
+ def add(src: VertexId, dst: VertexId, d: ED) {
edges += Edge(src, dst, d)
}
def toEdgePartition: EdgePartition[ED] = {
val edgeArray = edges.trim().array
Sorting.quickSort(edgeArray)(Edge.lexicographicOrdering)
- val srcIds = new Array[VertexID](edgeArray.size)
- val dstIds = new Array[VertexID](edgeArray.size)
+ val srcIds = new Array[VertexId](edgeArray.size)
+ val dstIds = new Array[VertexId](edgeArray.size)
val data = new Array[ED](edgeArray.size)
- val index = new PrimitiveKeyOpenHashMap[VertexID, Int]
+ val index = new PrimitiveKeyOpenHashMap[VertexId, Int]
// Copy edges into columnar structures, tracking the beginnings of source vertex id clusters and
// adding them to the index
if (edgeArray.length > 0) {
index.update(srcIds(0), 0)
- var currSrcId: VertexID = srcIds(0)
+ var currSrcId: VertexId = srcIds(0)
var i = 0
while (i < edgeArray.size) {
srcIds(i) = edgeArray(i).srcId
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
index 819e3ba93ac9b..886c250d7cffd 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
@@ -41,7 +41,7 @@ class EdgeTripletIterator[VD: ClassTag, ED: ClassTag](
// allocating too many temporary Java objects.
private val triplet = new EdgeTriplet[VD, ED]
- private val vmap = new PrimitiveKeyOpenHashMap[VertexID, VD](vidToIndex, vertexArray)
+ private val vmap = new PrimitiveKeyOpenHashMap[VertexId, VD](vidToIndex, vertexArray)
override def hasNext: Boolean = pos < edgePartition.size
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index eee2d58c3d8e1..1d029bf009e8c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -105,7 +105,7 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
new GraphImpl(vertices, newETable, routingTable, replicatedVertexView)
}
- override def mapVertices[VD2: ClassTag](f: (VertexID, VD) => VD2): Graph[VD2, ED] = {
+ override def mapVertices[VD2: ClassTag](f: (VertexId, VD) => VD2): Graph[VD2, ED] = {
if (classTag[VD] equals classTag[VD2]) {
// The map preserves type, so we can use incremental replication
val newVerts = vertices.mapVertexPartitions(_.map(f)).cache()
@@ -153,7 +153,7 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
override def subgraph(
epred: EdgeTriplet[VD, ED] => Boolean = x => true,
- vpred: (VertexID, VD) => Boolean = (a, b) => true): Graph[VD, ED] = {
+ vpred: (VertexId, VD) => Boolean = (a, b) => true): Graph[VD, ED] = {
// Filter the vertices, reusing the partitioner and the index from this graph
val newVerts = vertices.mapVertexPartitions(_.filter(vpred))
@@ -195,7 +195,7 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
//////////////////////////////////////////////////////////////////////////////////////////////////
override def mapReduceTriplets[A: ClassTag](
- mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
+ mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
reduceFunc: (A, A) => A,
activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None) = {
@@ -225,7 +225,7 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
val edgeIter = activeDirectionOpt match {
case Some(EdgeDirection.Both) =>
if (activeFraction < 0.8) {
- edgePartition.indexIterator(srcVertexID => vPart.isActive(srcVertexID))
+ edgePartition.indexIterator(srcVertexId => vPart.isActive(srcVertexId))
.filter(e => vPart.isActive(e.dstId))
} else {
edgePartition.iterator.filter(e => vPart.isActive(e.srcId) && vPart.isActive(e.dstId))
@@ -236,7 +236,7 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
edgePartition.iterator.filter(e => vPart.isActive(e.srcId) || vPart.isActive(e.dstId))
case Some(EdgeDirection.Out) =>
if (activeFraction < 0.8) {
- edgePartition.indexIterator(srcVertexID => vPart.isActive(srcVertexID))
+ edgePartition.indexIterator(srcVertexId => vPart.isActive(srcVertexId))
} else {
edgePartition.iterator.filter(e => vPart.isActive(e.srcId))
}
@@ -267,8 +267,8 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
} // end of mapReduceTriplets
override def outerJoinVertices[U: ClassTag, VD2: ClassTag]
- (other: RDD[(VertexID, U)])
- (updateF: (VertexID, VD, Option[U]) => VD2): Graph[VD2, ED] =
+ (other: RDD[(VertexId, U)])
+ (updateF: (VertexId, VD, Option[U]) => VD2): Graph[VD2, ED] =
{
if (classTag[VD] equals classTag[VD2]) {
// updateF preserves type, so we can use incremental replication
@@ -312,7 +312,7 @@ object GraphImpl {
}
def apply[VD: ClassTag, ED: ClassTag](
- vertices: RDD[(VertexID, VD)],
+ vertices: RDD[(VertexId, VD)],
edges: RDD[Edge[ED]],
defaultVertexAttr: VD): GraphImpl[VD, ED] =
{
@@ -321,7 +321,7 @@ object GraphImpl {
// Get the set of all vids
val partitioner = Partitioner.defaultPartitioner(vertices)
val vPartitioned = vertices.partitionBy(partitioner)
- val vidsFromEdges = collectVertexIDsFromEdges(edgeRDD, partitioner)
+ val vidsFromEdges = collectVertexIdsFromEdges(edgeRDD, partitioner)
val vids = vPartitioned.zipPartitions(vidsFromEdges) { (vertexIter, vidsFromEdgesIter) =>
vertexIter.map(_._1) ++ vidsFromEdgesIter.map(_._1)
}
@@ -355,7 +355,7 @@ object GraphImpl {
/**
* Create the edge RDD, which is much more efficient for Java heap storage than the normal edges
- * data structure (RDD[(VertexID, VertexID, ED)]).
+ * data structure (RDD[(VertexId, VertexId, ED)]).
*
* The edge RDD contains multiple partitions, and each partition contains only one RDD key-value
* pair: the key is the partition id, and the value is an EdgePartition object containing all the
@@ -378,19 +378,19 @@ object GraphImpl {
defaultVertexAttr: VD): GraphImpl[VD, ED] = {
edges.cache()
// Get the set of all vids
- val vids = collectVertexIDsFromEdges(edges, new HashPartitioner(edges.partitions.size))
+ val vids = collectVertexIdsFromEdges(edges, new HashPartitioner(edges.partitions.size))
// Create the VertexRDD.
val vertices = VertexRDD(vids.mapValues(x => defaultVertexAttr))
GraphImpl(vertices, edges)
}
/** Collects all vids mentioned in edges and partitions them by partitioner. */
- private def collectVertexIDsFromEdges(
+ private def collectVertexIdsFromEdges(
edges: EdgeRDD[_],
- partitioner: Partitioner): RDD[(VertexID, Int)] = {
+ partitioner: Partitioner): RDD[(VertexId, Int)] = {
// TODO: Consider doing map side distinct before shuffle.
- new ShuffledRDD[VertexID, Int, (VertexID, Int)](
- edges.collectVertexIDs.map(vid => (vid, 0)), partitioner)
- .setSerializer(classOf[VertexIDMsgSerializer].getName)
+ new ShuffledRDD[VertexId, Int, (VertexId, Int)](
+ edges.collectVertexIds.map(vid => (vid, 0)), partitioner)
+ .setSerializer(classOf[VertexIdMsgSerializer].getName)
}
} // end of object GraphImpl
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
index cea9d11ebe8cd..e9ee09c3614c1 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
@@ -20,16 +20,16 @@ package org.apache.spark.graphx.impl
import scala.reflect.{classTag, ClassTag}
import org.apache.spark.Partitioner
-import org.apache.spark.graphx.{PartitionID, VertexID}
+import org.apache.spark.graphx.{PartitionID, VertexId}
import org.apache.spark.rdd.{ShuffledRDD, RDD}
private[graphx]
class VertexBroadcastMsg[@specialized(Int, Long, Double, Boolean) T](
@transient var partition: PartitionID,
- var vid: VertexID,
+ var vid: VertexId,
var data: T)
- extends Product2[PartitionID, (VertexID, T)] with Serializable {
+ extends Product2[PartitionID, (VertexId, T)] with Serializable {
override def _1 = partition
@@ -61,7 +61,7 @@ class MessageToPartition[@specialized(Int, Long, Double, Char, Boolean/*, AnyRef
private[graphx]
class VertexBroadcastMsgRDDFunctions[T: ClassTag](self: RDD[VertexBroadcastMsg[T]]) {
def partitionBy(partitioner: Partitioner): RDD[VertexBroadcastMsg[T]] = {
- val rdd = new ShuffledRDD[PartitionID, (VertexID, T), VertexBroadcastMsg[T]](self, partitioner)
+ val rdd = new ShuffledRDD[PartitionID, (VertexId, T), VertexBroadcastMsg[T]](self, partitioner)
// Set a custom serializer if the data is of int or double type.
if (classTag[T] == ClassTag.Int) {
@@ -99,8 +99,8 @@ object MsgRDDFunctions {
new VertexBroadcastMsgRDDFunctions(rdd)
}
- def partitionForAggregation[T: ClassTag](msgs: RDD[(VertexID, T)], partitioner: Partitioner) = {
- val rdd = new ShuffledRDD[VertexID, T, (VertexID, T)](msgs, partitioner)
+ def partitionForAggregation[T: ClassTag](msgs: RDD[(VertexId, T)], partitioner: Partitioner) = {
+ val rdd = new ShuffledRDD[VertexId, T, (VertexId, T)](msgs, partitioner)
// Set a custom serializer if the data is of int or double type.
if (classTag[T] == ClassTag.Int) {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
index 5bdc9339e9fec..a8154b63ce5fb 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
@@ -50,9 +50,9 @@ class ReplicatedVertexView[VD: ClassTag](
* vids from both the source and destination of edges. It must always include both source and
* destination vids because some operations, such as GraphImpl.mapReduceTriplets, rely on this.
*/
- private val localVertexIDMap: RDD[(Int, VertexIdToIndexMap)] = prevViewOpt match {
+ private val localVertexIdMap: RDD[(Int, VertexIdToIndexMap)] = prevViewOpt match {
case Some(prevView) =>
- prevView.localVertexIDMap
+ prevView.localVertexIdMap
case None =>
edges.partitionsRDD.mapPartitions(_.map {
case (pid, epart) =>
@@ -62,7 +62,7 @@ class ReplicatedVertexView[VD: ClassTag](
vidToIndex.add(e.dstId)
}
(pid, vidToIndex)
- }, preservesPartitioning = true).cache().setName("ReplicatedVertexView localVertexIDMap")
+ }, preservesPartitioning = true).cache().setName("ReplicatedVertexView localVertexIdMap")
}
private lazy val bothAttrs: RDD[(PartitionID, VertexPartition[VD])] = create(true, true)
@@ -75,7 +75,7 @@ class ReplicatedVertexView[VD: ClassTag](
srcAttrOnly.unpersist(blocking)
dstAttrOnly.unpersist(blocking)
noAttrs.unpersist(blocking)
- // Don't unpersist localVertexIDMap because a future ReplicatedVertexView may be using it
+ // Don't unpersist localVertexIdMap because a future ReplicatedVertexView may be using it
// without modification
this
}
@@ -133,8 +133,8 @@ class ReplicatedVertexView[VD: ClassTag](
case None =>
// Within each edge partition, place the shipped vertex attributes into the correct
- // locations specified in localVertexIDMap
- localVertexIDMap.zipPartitions(shippedVerts) { (mapIter, shippedVertsIter) =>
+ // locations specified in localVertexIdMap
+ localVertexIdMap.zipPartitions(shippedVerts) { (mapIter, shippedVertsIter) =>
val (pid, vidToIndex) = mapIter.next()
assert(!mapIter.hasNext)
// Populate the vertex array using the vidToIndex map
@@ -157,15 +157,15 @@ class ReplicatedVertexView[VD: ClassTag](
private object ReplicatedVertexView {
protected def buildBuffer[VD: ClassTag](
- pid2vidIter: Iterator[Array[Array[VertexID]]],
+ pid2vidIter: Iterator[Array[Array[VertexId]]],
vertexPartIter: Iterator[VertexPartition[VD]]) = {
- val pid2vid: Array[Array[VertexID]] = pid2vidIter.next()
+ val pid2vid: Array[Array[VertexId]] = pid2vidIter.next()
val vertexPart: VertexPartition[VD] = vertexPartIter.next()
Iterator.tabulate(pid2vid.size) { pid =>
val vidsCandidate = pid2vid(pid)
val size = vidsCandidate.length
- val vids = new PrimitiveVector[VertexID](pid2vid(pid).size)
+ val vids = new PrimitiveVector[VertexId](pid2vid(pid).size)
val attrs = new PrimitiveVector[VD](pid2vid(pid).size)
var i = 0
while (i < size) {
@@ -181,16 +181,16 @@ private object ReplicatedVertexView {
}
protected def buildActiveBuffer(
- pid2vidIter: Iterator[Array[Array[VertexID]]],
+ pid2vidIter: Iterator[Array[Array[VertexId]]],
activePartIter: Iterator[VertexPartition[_]])
- : Iterator[(Int, Array[VertexID])] = {
- val pid2vid: Array[Array[VertexID]] = pid2vidIter.next()
+ : Iterator[(Int, Array[VertexId])] = {
+ val pid2vid: Array[Array[VertexId]] = pid2vidIter.next()
val activePart: VertexPartition[_] = activePartIter.next()
Iterator.tabulate(pid2vid.size) { pid =>
val vidsCandidate = pid2vid(pid)
val size = vidsCandidate.length
- val actives = new PrimitiveVector[VertexID](vidsCandidate.size)
+ val actives = new PrimitiveVector[VertexId](vidsCandidate.size)
var i = 0
while (i < size) {
val vid = vidsCandidate(i)
@@ -205,8 +205,8 @@ private object ReplicatedVertexView {
}
private[graphx]
-class VertexAttributeBlock[VD: ClassTag](val vids: Array[VertexID], val attrs: Array[VD])
+class VertexAttributeBlock[VD: ClassTag](val vids: Array[VertexId], val attrs: Array[VD])
extends Serializable {
- def iterator: Iterator[(VertexID, VD)] =
+ def iterator: Iterator[(VertexId, VD)] =
(0 until vids.size).iterator.map { i => (vids(i), attrs(i)) }
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala
index b365d4914e95b..fe44e1ee0c391 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala
@@ -32,12 +32,12 @@ import org.apache.spark.util.collection.PrimitiveVector
private[impl]
class RoutingTable(edges: EdgeRDD[_], vertices: VertexRDD[_]) {
- val bothAttrs: RDD[Array[Array[VertexID]]] = createPid2Vid(true, true)
- val srcAttrOnly: RDD[Array[Array[VertexID]]] = createPid2Vid(true, false)
- val dstAttrOnly: RDD[Array[Array[VertexID]]] = createPid2Vid(false, true)
- val noAttrs: RDD[Array[Array[VertexID]]] = createPid2Vid(false, false)
+ val bothAttrs: RDD[Array[Array[VertexId]]] = createPid2Vid(true, true)
+ val srcAttrOnly: RDD[Array[Array[VertexId]]] = createPid2Vid(true, false)
+ val dstAttrOnly: RDD[Array[Array[VertexId]]] = createPid2Vid(false, true)
+ val noAttrs: RDD[Array[Array[VertexId]]] = createPid2Vid(false, false)
- def get(includeSrcAttr: Boolean, includeDstAttr: Boolean): RDD[Array[Array[VertexID]]] =
+ def get(includeSrcAttr: Boolean, includeDstAttr: Boolean): RDD[Array[Array[VertexId]]] =
(includeSrcAttr, includeDstAttr) match {
case (true, true) => bothAttrs
case (true, false) => srcAttrOnly
@@ -46,9 +46,9 @@ class RoutingTable(edges: EdgeRDD[_], vertices: VertexRDD[_]) {
}
private def createPid2Vid(
- includeSrcAttr: Boolean, includeDstAttr: Boolean): RDD[Array[Array[VertexID]]] = {
+ includeSrcAttr: Boolean, includeDstAttr: Boolean): RDD[Array[Array[VertexId]]] = {
// Determine which vertices each edge partition needs by creating a mapping from vid to pid.
- val vid2pid: RDD[(VertexID, PartitionID)] = edges.partitionsRDD.mapPartitions { iter =>
+ val vid2pid: RDD[(VertexId, PartitionID)] = edges.partitionsRDD.mapPartitions { iter =>
val (pid: PartitionID, edgePartition: EdgePartition[_]) = iter.next()
val numEdges = edgePartition.size
val vSet = new VertexSet
@@ -71,7 +71,7 @@ class RoutingTable(edges: EdgeRDD[_], vertices: VertexRDD[_]) {
val numPartitions = vertices.partitions.size
vid2pid.partitionBy(vertices.partitioner.get).mapPartitions { iter =>
- val pid2vid = Array.fill(numPartitions)(new PrimitiveVector[VertexID])
+ val pid2vid = Array.fill(numPartitions)(new PrimitiveVector[VertexId])
for ((vid, pid) <- iter) {
pid2vid(pid) += vid
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
index bcad1fbc58802..c74d487e206db 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
@@ -25,12 +25,12 @@ import org.apache.spark.graphx._
import org.apache.spark.serializer._
private[graphx]
-class VertexIDMsgSerializer(conf: SparkConf) extends Serializer {
+class VertexIdMsgSerializer(conf: SparkConf) extends Serializer {
override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
def writeObject[T](t: T) = {
- val msg = t.asInstanceOf[(VertexID, _)]
+ val msg = t.asInstanceOf[(VertexId, _)]
writeVarLong(msg._1, optimizePositive = false)
this
}
@@ -123,7 +123,7 @@ class IntAggMsgSerializer(conf: SparkConf) extends Serializer {
override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
def writeObject[T](t: T) = {
- val msg = t.asInstanceOf[(VertexID, Int)]
+ val msg = t.asInstanceOf[(VertexId, Int)]
writeVarLong(msg._1, optimizePositive = false)
writeUnsignedVarInt(msg._2)
this
@@ -147,7 +147,7 @@ class LongAggMsgSerializer(conf: SparkConf) extends Serializer {
override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
def writeObject[T](t: T) = {
- val msg = t.asInstanceOf[(VertexID, Long)]
+ val msg = t.asInstanceOf[(VertexId, Long)]
writeVarLong(msg._1, optimizePositive = false)
writeVarLong(msg._2, optimizePositive = true)
this
@@ -171,7 +171,7 @@ class DoubleAggMsgSerializer(conf: SparkConf) extends Serializer {
override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
def writeObject[T](t: T) = {
- val msg = t.asInstanceOf[(VertexID, Double)]
+ val msg = t.asInstanceOf[(VertexId, Double)]
writeVarLong(msg._1, optimizePositive = false)
writeDouble(msg._2)
this
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
index f13bdded7564d..7a54b413dc8ca 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
@@ -26,18 +26,18 @@ import org.apache.spark.util.collection.BitSet
private[graphx] object VertexPartition {
- def apply[VD: ClassTag](iter: Iterator[(VertexID, VD)]): VertexPartition[VD] = {
- val map = new PrimitiveKeyOpenHashMap[VertexID, VD]
+ def apply[VD: ClassTag](iter: Iterator[(VertexId, VD)]): VertexPartition[VD] = {
+ val map = new PrimitiveKeyOpenHashMap[VertexId, VD]
iter.foreach { case (k, v) =>
map(k) = v
}
new VertexPartition(map.keySet, map._values, map.keySet.getBitSet)
}
- def apply[VD: ClassTag](iter: Iterator[(VertexID, VD)], mergeFunc: (VD, VD) => VD)
+ def apply[VD: ClassTag](iter: Iterator[(VertexId, VD)], mergeFunc: (VD, VD) => VD)
: VertexPartition[VD] =
{
- val map = new PrimitiveKeyOpenHashMap[VertexID, VD]
+ val map = new PrimitiveKeyOpenHashMap[VertexId, VD]
iter.foreach { case (k, v) =>
map.setMerge(k, v, mergeFunc)
}
@@ -60,15 +60,15 @@ class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
def size: Int = mask.cardinality()
/** Return the vertex attribute for the given vertex ID. */
- def apply(vid: VertexID): VD = values(index.getPos(vid))
+ def apply(vid: VertexId): VD = values(index.getPos(vid))
- def isDefined(vid: VertexID): Boolean = {
+ def isDefined(vid: VertexId): Boolean = {
val pos = index.getPos(vid)
pos >= 0 && mask.get(pos)
}
/** Look up vid in activeSet, throwing an exception if it is None. */
- def isActive(vid: VertexID): Boolean = {
+ def isActive(vid: VertexId): Boolean = {
activeSet.get.contains(vid)
}
@@ -88,7 +88,7 @@ class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
* each of the entries in the original VertexRDD. The resulting
* VertexPartition retains the same index.
*/
- def map[VD2: ClassTag](f: (VertexID, VD) => VD2): VertexPartition[VD2] = {
+ def map[VD2: ClassTag](f: (VertexId, VD) => VD2): VertexPartition[VD2] = {
// Construct a view of the map transformation
val newValues = new Array[VD2](capacity)
var i = mask.nextSetBit(0)
@@ -108,7 +108,7 @@ class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
* RDD can be easily joined with the original vertex-set. Furthermore, the filter only
* modifies the bitmap index and so no new values are allocated.
*/
- def filter(pred: (VertexID, VD) => Boolean): VertexPartition[VD] = {
+ def filter(pred: (VertexId, VD) => Boolean): VertexPartition[VD] = {
// Allocate the array to store the results into
val newMask = new BitSet(capacity)
// Iterate over the active bits in the old mask and evaluate the predicate
@@ -146,7 +146,7 @@ class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
/** Left outer join another VertexPartition. */
def leftJoin[VD2: ClassTag, VD3: ClassTag]
(other: VertexPartition[VD2])
- (f: (VertexID, VD, Option[VD2]) => VD3): VertexPartition[VD3] = {
+ (f: (VertexId, VD, Option[VD2]) => VD3): VertexPartition[VD3] = {
if (index != other.index) {
logWarning("Joining two VertexPartitions with different indexes is slow.")
leftJoin(createUsingIndex(other.iterator))(f)
@@ -165,14 +165,14 @@ class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
/** Left outer join another iterator of messages. */
def leftJoin[VD2: ClassTag, VD3: ClassTag]
- (other: Iterator[(VertexID, VD2)])
- (f: (VertexID, VD, Option[VD2]) => VD3): VertexPartition[VD3] = {
+ (other: Iterator[(VertexId, VD2)])
+ (f: (VertexId, VD, Option[VD2]) => VD3): VertexPartition[VD3] = {
leftJoin(createUsingIndex(other))(f)
}
/** Inner join another VertexPartition. */
def innerJoin[U: ClassTag, VD2: ClassTag](other: VertexPartition[U])
- (f: (VertexID, VD, U) => VD2): VertexPartition[VD2] = {
+ (f: (VertexId, VD, U) => VD2): VertexPartition[VD2] = {
if (index != other.index) {
logWarning("Joining two VertexPartitions with different indexes is slow.")
innerJoin(createUsingIndex(other.iterator))(f)
@@ -192,15 +192,15 @@ class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
* Inner join an iterator of messages.
*/
def innerJoin[U: ClassTag, VD2: ClassTag]
- (iter: Iterator[Product2[VertexID, U]])
- (f: (VertexID, VD, U) => VD2): VertexPartition[VD2] = {
+ (iter: Iterator[Product2[VertexId, U]])
+ (f: (VertexId, VD, U) => VD2): VertexPartition[VD2] = {
innerJoin(createUsingIndex(iter))(f)
}
/**
* Similar effect as aggregateUsingIndex((a, b) => a)
*/
- def createUsingIndex[VD2: ClassTag](iter: Iterator[Product2[VertexID, VD2]])
+ def createUsingIndex[VD2: ClassTag](iter: Iterator[Product2[VertexId, VD2]])
: VertexPartition[VD2] = {
val newMask = new BitSet(capacity)
val newValues = new Array[VD2](capacity)
@@ -218,7 +218,7 @@ class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
* Similar to innerJoin, but vertices from the left side that don't appear in iter will remain in
* the partition, hidden by the bitmask.
*/
- def innerJoinKeepLeft(iter: Iterator[Product2[VertexID, VD]]): VertexPartition[VD] = {
+ def innerJoinKeepLeft(iter: Iterator[Product2[VertexId, VD]]): VertexPartition[VD] = {
val newMask = new BitSet(capacity)
val newValues = new Array[VD](capacity)
System.arraycopy(values, 0, newValues, 0, newValues.length)
@@ -233,7 +233,7 @@ class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
}
def aggregateUsingIndex[VD2: ClassTag](
- iter: Iterator[Product2[VertexID, VD2]],
+ iter: Iterator[Product2[VertexId, VD2]],
reduceFunc: (VD2, VD2) => VD2): VertexPartition[VD2] = {
val newMask = new BitSet(capacity)
val newValues = new Array[VD2](capacity)
@@ -253,7 +253,7 @@ class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
new VertexPartition[VD2](index, newValues, newMask)
}
- def replaceActives(iter: Iterator[VertexID]): VertexPartition[VD] = {
+ def replaceActives(iter: Iterator[VertexId]): VertexPartition[VD] = {
val newActiveSet = new VertexSet
iter.foreach(newActiveSet.add(_))
new VertexPartition(index, values, mask, Some(newActiveSet))
@@ -263,7 +263,7 @@ class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
* Construct a new VertexPartition whose index contains only the vertices in the mask.
*/
def reindex(): VertexPartition[VD] = {
- val hashMap = new PrimitiveKeyOpenHashMap[VertexID, VD]
+ val hashMap = new PrimitiveKeyOpenHashMap[VertexId, VD]
val arbitraryMerge = (a: VD, b: VD) => a
for ((k, v) <- this.iterator) {
hashMap.setMerge(k, v, arbitraryMerge)
@@ -271,8 +271,8 @@ class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
new VertexPartition(hashMap.keySet, hashMap._values, hashMap.keySet.getBitSet)
}
- def iterator: Iterator[(VertexID, VD)] =
+ def iterator: Iterator[(VertexId, VD)] =
mask.iterator.map(ind => (index.getValue(ind), values(ind)))
- def vidIterator: Iterator[VertexID] = mask.iterator.map(ind => index.getValue(ind))
+ def vidIterator: Iterator[VertexId] = mask.iterator.map(ind => index.getValue(ind))
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/package.scala
index f493d2dd01541..79549fe060457 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/package.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/package.scala
@@ -20,5 +20,5 @@ package org.apache.spark.graphx
import org.apache.spark.util.collection.OpenHashSet
package object impl {
- private[graphx] type VertexIdToIndexMap = OpenHashSet[VertexID]
+ private[graphx] type VertexIdToIndexMap = OpenHashSet[VertexId]
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/ConnectedComponents.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/ConnectedComponents.scala
index 2a6c0aa6b554c..e2f6cc138958e 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/ConnectedComponents.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/ConnectedComponents.scala
@@ -35,9 +35,9 @@ object ConnectedComponents {
* @return a graph with vertex attributes containing the smallest vertex in each
* connected component
*/
- def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]): Graph[VertexID, ED] = {
+ def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]): Graph[VertexId, ED] = {
val ccGraph = graph.mapVertices { case (vid, _) => vid }
- def sendMessage(edge: EdgeTriplet[VertexID, ED]) = {
+ def sendMessage(edge: EdgeTriplet[VertexId, ED]) = {
if (edge.srcAttr < edge.dstAttr) {
Iterator((edge.dstId, edge.srcAttr))
} else if (edge.srcAttr > edge.dstAttr) {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index 2bdd8c9f985d7..614555a054dfb 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -92,7 +92,7 @@ object PageRank extends Logging {
// Define the three functions needed to implement PageRank in the GraphX
// version of Pregel
- def vertexProgram(id: VertexID, attr: Double, msgSum: Double): Double =
+ def vertexProgram(id: VertexId, attr: Double, msgSum: Double): Double =
resetProb + (1.0 - resetProb) * msgSum
def sendMessage(edge: EdgeTriplet[Double, Double]) =
Iterator((edge.dstId, edge.srcAttr * edge.attr))
@@ -137,7 +137,7 @@ object PageRank extends Logging {
// Define the three functions needed to implement PageRank in the GraphX
// version of Pregel
- def vertexProgram(id: VertexID, attr: (Double, Double), msgSum: Double): (Double, Double) = {
+ def vertexProgram(id: VertexId, attr: (Double, Double), msgSum: Double): (Double, Double) = {
val (oldPR, lastDelta) = attr
val newPR = oldPR + (1.0 - resetProb) * msgSum
(newPR, newPR - oldPR)
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index 9c7a212c5a3bb..c327ce7935147 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -79,13 +79,13 @@ object SVDPlusPlus {
(g1: (Long, Double), g2: (Long, Double)) => (g1._1 + g2._1, g1._2 + g2._2))
g = g.outerJoinVertices(t0) {
- (vid: VertexID, vd: (RealVector, RealVector, Double, Double), msg: Option[(Long, Double)]) =>
+ (vid: VertexId, vd: (RealVector, RealVector, Double, Double), msg: Option[(Long, Double)]) =>
(vd._1, vd._2, msg.get._2 / msg.get._1, 1.0 / scala.math.sqrt(msg.get._1))
}
def mapTrainF(conf: Conf, u: Double)
(et: EdgeTriplet[(RealVector, RealVector, Double, Double), Double])
- : Iterator[(VertexID, (RealVector, RealVector, Double))] = {
+ : Iterator[(VertexId, (RealVector, RealVector, Double))] = {
val (usr, itm) = (et.srcAttr, et.dstAttr)
val (p, q) = (usr._1, itm._1)
var pred = u + usr._3 + itm._3 + q.dotProduct(usr._2)
@@ -112,7 +112,7 @@ object SVDPlusPlus {
et => Iterator((et.srcId, et.dstAttr._2)),
(g1: RealVector, g2: RealVector) => g1.add(g2))
g = g.outerJoinVertices(t1) {
- (vid: VertexID, vd: (RealVector, RealVector, Double, Double), msg: Option[RealVector]) =>
+ (vid: VertexId, vd: (RealVector, RealVector, Double, Double), msg: Option[RealVector]) =>
if (msg.isDefined) (vd._1, vd._1.add(msg.get.mapMultiply(vd._4)), vd._3, vd._4) else vd
}
@@ -123,7 +123,7 @@ object SVDPlusPlus {
(g1: (RealVector, RealVector, Double), g2: (RealVector, RealVector, Double)) =>
(g1._1.add(g2._1), g1._2.add(g2._2), g1._3 + g2._3))
g = g.outerJoinVertices(t2) {
- (vid: VertexID,
+ (vid: VertexId,
vd: (RealVector, RealVector, Double, Double),
msg: Option[(RealVector, RealVector, Double)]) =>
(vd._1.add(msg.get._1), vd._2.add(msg.get._2), vd._3 + msg.get._3, vd._4)
@@ -133,7 +133,7 @@ object SVDPlusPlus {
// calculate error on training set
def mapTestF(conf: Conf, u: Double)
(et: EdgeTriplet[(RealVector, RealVector, Double, Double), Double])
- : Iterator[(VertexID, Double)] =
+ : Iterator[(VertexId, Double)] =
{
val (usr, itm) = (et.srcAttr, et.dstAttr)
val (p, q) = (usr._1, itm._1)
@@ -146,7 +146,7 @@ object SVDPlusPlus {
g.cache()
val t3 = g.mapReduceTriplets(mapTestF(conf, u), (g1: Double, g2: Double) => g1 + g2)
g = g.outerJoinVertices(t3) {
- (vid: VertexID, vd: (RealVector, RealVector, Double, Double), msg: Option[Double]) =>
+ (vid: VertexId, vd: (RealVector, RealVector, Double, Double), msg: Option[Double]) =>
if (msg.isDefined) (vd._1, vd._2, vd._3, msg.get) else vd
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
index ed84f72156a55..46da38eeb725a 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
@@ -35,7 +35,7 @@ object StronglyConnectedComponents {
*
* @return a graph with vertex attributes containing the smallest vertex id in each SCC
*/
- def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED], numIter: Int): Graph[VertexID, ED] = {
+ def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED], numIter: Int): Graph[VertexId, ED] = {
// the graph we update with final SCC ids, and the graph we return at the end
var sccGraph = graph.mapVertices { case (vid, _) => vid }
@@ -71,7 +71,7 @@ object StronglyConnectedComponents {
// collect min of all my neighbor's scc values, update if it's smaller than mine
// then notify any neighbors with scc values larger than mine
- sccWorkGraph = Pregel[(VertexID, Boolean), ED, VertexID](
+ sccWorkGraph = Pregel[(VertexId, Boolean), ED, VertexId](
sccWorkGraph, Long.MaxValue, activeDirection = EdgeDirection.Out)(
(vid, myScc, neighborScc) => (math.min(myScc._1, neighborScc), myScc._2),
e => {
@@ -85,7 +85,7 @@ object StronglyConnectedComponents {
// start at root of SCCs. Traverse values in reverse, notify all my neighbors
// do not propagate if colors do not match!
- sccWorkGraph = Pregel[(VertexID, Boolean), ED, Boolean](
+ sccWorkGraph = Pregel[(VertexId, Boolean), ED, Boolean](
sccWorkGraph, false, activeDirection = EdgeDirection.In)(
// vertex is final if it is the root of a color
// or it has the same color as a neighbor that is final
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
index a124c892dcba5..7c396e6e66a28 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -61,7 +61,7 @@ object TriangleCount {
(vid, _, optSet) => optSet.getOrElse(null)
}
// Edge function computes intersection of smaller vertex with larger vertex
- def edgeFunc(et: EdgeTriplet[VertexSet, ED]): Iterator[(VertexID, Int)] = {
+ def edgeFunc(et: EdgeTriplet[VertexSet, ED]): Iterator[(VertexId, Int)] = {
assert(et.srcAttr != null)
assert(et.dstAttr != null)
val (smallSet, largeSet) = if (et.srcAttr.size < et.dstAttr.size) {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
index e1ff3ea0d1d42..425a5164cad24 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/package.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
@@ -25,11 +25,11 @@ package object graphx {
* A 64-bit vertex identifier that uniquely identifies a vertex within a graph. It does not need
* to follow any ordering or any constraints other than uniqueness.
*/
- type VertexID = Long
+ type VertexId = Long
/** Integer identifer of a graph partition. */
// TODO: Consider using Char.
type PartitionID = Int
- private[graphx] type VertexSet = OpenHashSet[VertexID]
+ private[graphx] type VertexSet = OpenHashSet[VertexId]
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
index 9805eb3285d69..7677641bfede6 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
@@ -50,7 +50,7 @@ object GraphGenerators {
val mu = 4
val sigma = 1.3
- val vertices: RDD[(VertexID, Int)] = sc.parallelize(0 until numVertices).map{
+ val vertices: RDD[(VertexId, Int)] = sc.parallelize(0 until numVertices).map{
src => (src, sampleLogNormal(mu, sigma, numVertices))
}
val edges = vertices.flatMap { v =>
@@ -59,9 +59,9 @@ object GraphGenerators {
Graph(vertices, edges, 0)
}
- def generateRandomEdges(src: Int, numEdges: Int, maxVertexID: Int): Array[Edge[Int]] = {
+ def generateRandomEdges(src: Int, numEdges: Int, maxVertexId: Int): Array[Edge[Int]] = {
val rand = new Random()
- Array.fill(maxVertexID) { Edge[Int](src, rand.nextInt(maxVertexID), 1) }
+ Array.fill(maxVertexId) { Edge[Int](src, rand.nextInt(maxVertexId), 1) }
}
/**
@@ -206,9 +206,9 @@ object GraphGenerators {
*/
def gridGraph(sc: SparkContext, rows: Int, cols: Int): Graph[(Int,Int), Double] = {
// Convert row column address into vertex ids (row major order)
- def sub2ind(r: Int, c: Int): VertexID = r * cols + c
+ def sub2ind(r: Int, c: Int): VertexId = r * cols + c
- val vertices: RDD[(VertexID, (Int,Int))] =
+ val vertices: RDD[(VertexId, (Int,Int))] =
sc.parallelize(0 until rows).flatMap( r => (0 until cols).map( c => (sub2ind(r,c), (r,c)) ) )
val edges: RDD[Edge[Double]] =
vertices.flatMap{ case (vid, (r,c)) =>
@@ -228,7 +228,7 @@ object GraphGenerators {
* being the center vertex.
*/
def starGraph(sc: SparkContext, nverts: Int): Graph[Int, Int] = {
- val edges: RDD[(VertexID, VertexID)] = sc.parallelize(1 until nverts).map(vid => (vid, 0))
+ val edges: RDD[(VertexId, VertexId)] = sc.parallelize(1 until nverts).map(vid => (vid, 0))
Graph.fromEdgeTuples(edges, 1)
} // end of starGraph
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
index 4a792c0dabeac..bc2ad5677f806 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
@@ -28,12 +28,12 @@ class GraphOpsSuite extends FunSuite with LocalSparkContext {
test("joinVertices") {
withSpark { sc =>
val vertices =
- sc.parallelize(Seq[(VertexID, String)]((1, "one"), (2, "two"), (3, "three")), 2)
+ sc.parallelize(Seq[(VertexId, String)]((1, "one"), (2, "two"), (3, "three")), 2)
val edges = sc.parallelize((Seq(Edge(1, 2, "onetwo"))))
val g: Graph[String, String] = Graph(vertices, edges)
- val tbl = sc.parallelize(Seq[(VertexID, Int)]((1, 10), (2, 20)))
- val g1 = g.joinVertices(tbl) { (vid: VertexID, attr: String, u: Int) => attr + u }
+ val tbl = sc.parallelize(Seq[(VertexId, Int)]((1, 10), (2, 20)))
+ val g1 = g.joinVertices(tbl) { (vid: VertexId, attr: String, u: Int) => attr + u }
val v = g1.vertices.collect().toSet
assert(v === Set((1, "one10"), (2, "two20"), (3, "three")))
@@ -60,7 +60,7 @@ class GraphOpsSuite extends FunSuite with LocalSparkContext {
test ("filter") {
withSpark { sc =>
val n = 5
- val vertices = sc.parallelize((0 to n).map(x => (x:VertexID, x)))
+ val vertices = sc.parallelize((0 to n).map(x => (x:VertexId, x)))
val edges = sc.parallelize((1 to n).map(x => Edge(0, x, x)))
val graph: Graph[Int, Int] = Graph(vertices, edges).cache()
val filteredGraph = graph.filter(
@@ -68,7 +68,7 @@ class GraphOpsSuite extends FunSuite with LocalSparkContext {
val degrees: VertexRDD[Int] = graph.outDegrees
graph.outerJoinVertices(degrees) {(vid, data, deg) => deg.getOrElse(0)}
},
- vpred = (vid: VertexID, deg:Int) => deg > 0
+ vpred = (vid: VertexId, deg:Int) => deg > 0
).cache()
val v = filteredGraph.vertices.collect().toSet
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
index b18bc98e6d579..28d34dd9a1a41 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.rdd._
class GraphSuite extends FunSuite with LocalSparkContext {
def starGraph(sc: SparkContext, n: Int): Graph[String, Int] = {
- Graph.fromEdgeTuples(sc.parallelize((1 to n).map(x => (0: VertexID, x: VertexID)), 3), "v")
+ Graph.fromEdgeTuples(sc.parallelize((1 to n).map(x => (0: VertexId, x: VertexId)), 3), "v")
}
test("Graph.fromEdgeTuples") {
@@ -57,7 +57,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
withSpark { sc =>
val rawEdges = (0L to 98L).zip((1L to 99L) :+ 0L)
val edges: RDD[Edge[Int]] = sc.parallelize(rawEdges).map { case (s, t) => Edge(s, t, 1) }
- val vertices: RDD[(VertexID, Boolean)] = sc.parallelize((0L until 10L).map(id => (id, true)))
+ val vertices: RDD[(VertexId, Boolean)] = sc.parallelize((0L until 10L).map(id => (id, true)))
val graph = Graph(vertices, edges, false)
assert( graph.edges.count() === rawEdges.size )
// Vertices not explicitly provided but referenced by edges should be created automatically
@@ -74,7 +74,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
val n = 5
val star = starGraph(sc, n)
assert(star.triplets.map(et => (et.srcId, et.dstId, et.srcAttr, et.dstAttr)).collect.toSet ===
- (1 to n).map(x => (0: VertexID, x: VertexID, "v", "v")).toSet)
+ (1 to n).map(x => (0: VertexId, x: VertexId, "v", "v")).toSet)
}
}
@@ -110,7 +110,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
val p = 100
val verts = 1 to n
val graph = Graph.fromEdgeTuples(sc.parallelize(verts.flatMap(x =>
- verts.filter(y => y % x == 0).map(y => (x: VertexID, y: VertexID))), p), 0)
+ verts.filter(y => y % x == 0).map(y => (x: VertexId, y: VertexId))), p), 0)
assert(graph.edges.partitions.length === p)
val partitionedGraph = graph.partitionBy(EdgePartition2D)
assert(graph.edges.partitions.length === p)
@@ -136,10 +136,10 @@ class GraphSuite extends FunSuite with LocalSparkContext {
val star = starGraph(sc, n)
// mapVertices preserving type
val mappedVAttrs = star.mapVertices((vid, attr) => attr + "2")
- assert(mappedVAttrs.vertices.collect.toSet === (0 to n).map(x => (x: VertexID, "v2")).toSet)
+ assert(mappedVAttrs.vertices.collect.toSet === (0 to n).map(x => (x: VertexId, "v2")).toSet)
// mapVertices changing type
val mappedVAttrs2 = star.mapVertices((vid, attr) => attr.length)
- assert(mappedVAttrs2.vertices.collect.toSet === (0 to n).map(x => (x: VertexID, 1)).toSet)
+ assert(mappedVAttrs2.vertices.collect.toSet === (0 to n).map(x => (x: VertexId, 1)).toSet)
}
}
@@ -168,7 +168,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
withSpark { sc =>
val n = 5
val star = starGraph(sc, n)
- assert(star.reverse.outDegrees.collect.toSet === (1 to n).map(x => (x: VertexID, 1)).toSet)
+ assert(star.reverse.outDegrees.collect.toSet === (1 to n).map(x => (x: VertexId, 1)).toSet)
}
}
@@ -191,7 +191,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
test("mask") {
withSpark { sc =>
val n = 5
- val vertices = sc.parallelize((0 to n).map(x => (x:VertexID, x)))
+ val vertices = sc.parallelize((0 to n).map(x => (x:VertexId, x)))
val edges = sc.parallelize((1 to n).map(x => Edge(0, x, x)))
val graph: Graph[Int, Int] = Graph(vertices, edges).cache()
@@ -218,7 +218,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
val star = starGraph(sc, n)
val doubleStar = Graph.fromEdgeTuples(
sc.parallelize((1 to n).flatMap(x =>
- List((0: VertexID, x: VertexID), (0: VertexID, x: VertexID))), 1), "v")
+ List((0: VertexId, x: VertexId), (0: VertexId, x: VertexId))), 1), "v")
val star2 = doubleStar.groupEdges { (a, b) => a}
assert(star2.edges.collect.toArray.sorted(Edge.lexicographicOrdering[Int]) ===
star.edges.collect.toArray.sorted(Edge.lexicographicOrdering[Int]))
@@ -237,7 +237,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
assert(neighborDegreeSums.collect().toSet === (0 to n).map(x => (x, n)).toSet)
// activeSetOpt
- val allPairs = for (x <- 1 to n; y <- 1 to n) yield (x: VertexID, y: VertexID)
+ val allPairs = for (x <- 1 to n; y <- 1 to n) yield (x: VertexId, y: VertexId)
val complete = Graph.fromEdgeTuples(sc.parallelize(allPairs, 3), 0)
val vids = complete.mapVertices((vid, attr) => vid).cache()
val active = vids.vertices.filter { case (vid, attr) => attr % 2 == 0 }
@@ -248,10 +248,10 @@ class GraphSuite extends FunSuite with LocalSparkContext {
}
Iterator((et.srcId, 1))
}, (a: Int, b: Int) => a + b, Some((active, EdgeDirection.In))).collect.toSet
- assert(numEvenNeighbors === (1 to n).map(x => (x: VertexID, n / 2)).toSet)
+ assert(numEvenNeighbors === (1 to n).map(x => (x: VertexId, n / 2)).toSet)
// outerJoinVertices followed by mapReduceTriplets(activeSetOpt)
- val ringEdges = sc.parallelize((0 until n).map(x => (x: VertexID, (x+1) % n: VertexID)), 3)
+ val ringEdges = sc.parallelize((0 until n).map(x => (x: VertexId, (x+1) % n: VertexId)), 3)
val ring = Graph.fromEdgeTuples(ringEdges, 0) .mapVertices((vid, attr) => vid).cache()
val changed = ring.vertices.filter { case (vid, attr) => attr % 2 == 1 }.mapValues(-_).cache()
val changedGraph = ring.outerJoinVertices(changed) { (vid, old, newOpt) => newOpt.getOrElse(old) }
@@ -262,7 +262,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
}
Iterator((et.dstId, 1))
}, (a: Int, b: Int) => a + b, Some(changed, EdgeDirection.Out)).collect.toSet
- assert(numOddNeighbors === (2 to n by 2).map(x => (x: VertexID, 1)).toSet)
+ assert(numOddNeighbors === (2 to n by 2).map(x => (x: VertexId, 1)).toSet)
}
}
@@ -277,7 +277,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
val neighborDegreeSums = reverseStarDegrees.mapReduceTriplets(
et => Iterator((et.srcId, et.dstAttr), (et.dstId, et.srcAttr)),
(a: Int, b: Int) => a + b).collect.toSet
- assert(neighborDegreeSums === Set((0: VertexID, n)) ++ (1 to n).map(x => (x: VertexID, 0)))
+ assert(neighborDegreeSums === Set((0: VertexId, n)) ++ (1 to n).map(x => (x: VertexId, 0)))
// outerJoinVertices preserving type
val messages = reverseStar.vertices.mapValues { (vid, attr) => vid.toString }
val newReverseStar =
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala
index 936e5c9c86fb7..490b94429ea1f 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala
@@ -27,7 +27,7 @@ class PregelSuite extends FunSuite with LocalSparkContext {
test("1 iteration") {
withSpark { sc =>
val n = 5
- val starEdges = (1 to n).map(x => (0: VertexID, x: VertexID))
+ val starEdges = (1 to n).map(x => (0: VertexId, x: VertexId))
val star = Graph.fromEdgeTuples(sc.parallelize(starEdges, 3), "v").cache()
val result = Pregel(star, 0)(
(vid, attr, msg) => attr,
@@ -41,12 +41,12 @@ class PregelSuite extends FunSuite with LocalSparkContext {
withSpark { sc =>
val n = 5
val chain = Graph.fromEdgeTuples(
- sc.parallelize((1 until n).map(x => (x: VertexID, x + 1: VertexID)), 3),
+ sc.parallelize((1 until n).map(x => (x: VertexId, x + 1: VertexId)), 3),
0).cache()
- assert(chain.vertices.collect.toSet === (1 to n).map(x => (x: VertexID, 0)).toSet)
+ assert(chain.vertices.collect.toSet === (1 to n).map(x => (x: VertexId, 0)).toSet)
val chainWithSeed = chain.mapVertices { (vid, attr) => if (vid == 1) 1 else 0 }.cache()
assert(chainWithSeed.vertices.collect.toSet ===
- Set((1: VertexID, 1)) ++ (2 to n).map(x => (x: VertexID, 0)).toSet)
+ Set((1: VertexId, 1)) ++ (2 to n).map(x => (x: VertexId, 0)).toSet)
val result = Pregel(chainWithSeed, 0)(
(vid, attr, msg) => math.max(msg, attr),
et => if (et.dstAttr != et.srcAttr) Iterator((et.dstId, et.srcAttr)) else Iterator.empty,
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
index 0c756400f4eff..e5a582b47ba05 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
@@ -99,7 +99,7 @@ class SerializerSuite extends FunSuite with LocalSparkContext {
test("IntAggMsgSerializer") {
val conf = new SparkConf(false)
- val outMsg = (4: VertexID, 5)
+ val outMsg = (4: VertexId, 5)
val bout = new ByteArrayOutputStream
val outStrm = new IntAggMsgSerializer(conf).newInstance().serializeStream(bout)
outStrm.writeObject(outMsg)
@@ -107,8 +107,8 @@ class SerializerSuite extends FunSuite with LocalSparkContext {
bout.flush()
val bin = new ByteArrayInputStream(bout.toByteArray)
val inStrm = new IntAggMsgSerializer(conf).newInstance().deserializeStream(bin)
- val inMsg1: (VertexID, Int) = inStrm.readObject()
- val inMsg2: (VertexID, Int) = inStrm.readObject()
+ val inMsg1: (VertexId, Int) = inStrm.readObject()
+ val inMsg2: (VertexId, Int) = inStrm.readObject()
assert(outMsg === inMsg1)
assert(outMsg === inMsg2)
@@ -119,7 +119,7 @@ class SerializerSuite extends FunSuite with LocalSparkContext {
test("LongAggMsgSerializer") {
val conf = new SparkConf(false)
- val outMsg = (4: VertexID, 1L << 32)
+ val outMsg = (4: VertexId, 1L << 32)
val bout = new ByteArrayOutputStream
val outStrm = new LongAggMsgSerializer(conf).newInstance().serializeStream(bout)
outStrm.writeObject(outMsg)
@@ -127,8 +127,8 @@ class SerializerSuite extends FunSuite with LocalSparkContext {
bout.flush()
val bin = new ByteArrayInputStream(bout.toByteArray)
val inStrm = new LongAggMsgSerializer(conf).newInstance().deserializeStream(bin)
- val inMsg1: (VertexID, Long) = inStrm.readObject()
- val inMsg2: (VertexID, Long) = inStrm.readObject()
+ val inMsg1: (VertexId, Long) = inStrm.readObject()
+ val inMsg2: (VertexId, Long) = inStrm.readObject()
assert(outMsg === inMsg1)
assert(outMsg === inMsg2)
@@ -139,7 +139,7 @@ class SerializerSuite extends FunSuite with LocalSparkContext {
test("DoubleAggMsgSerializer") {
val conf = new SparkConf(false)
- val outMsg = (4: VertexID, 5.0)
+ val outMsg = (4: VertexId, 5.0)
val bout = new ByteArrayOutputStream
val outStrm = new DoubleAggMsgSerializer(conf).newInstance().serializeStream(bout)
outStrm.writeObject(outMsg)
@@ -147,8 +147,8 @@ class SerializerSuite extends FunSuite with LocalSparkContext {
bout.flush()
val bin = new ByteArrayInputStream(bout.toByteArray)
val inStrm = new DoubleAggMsgSerializer(conf).newInstance().deserializeStream(bin)
- val inMsg1: (VertexID, Double) = inStrm.readObject()
- val inMsg2: (VertexID, Double) = inStrm.readObject()
+ val inMsg1: (VertexId, Double) = inStrm.readObject()
+ val inMsg2: (VertexId, Double) = inStrm.readObject()
assert(outMsg === inMsg1)
assert(outMsg === inMsg2)
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
index 1195beba5873c..e135d1d7ad6a3 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
@@ -79,7 +79,7 @@ class EdgePartitionSuite extends FunSuite {
test("innerJoin") {
def makeEdgePartition[A: ClassTag](xs: Iterable[(Int, Int, A)]): EdgePartition[A] = {
val builder = new EdgePartitionBuilder[A]
- for ((src, dst, attr) <- xs) { builder.add(src: VertexID, dst: VertexID, attr) }
+ for ((src, dst, attr) <- xs) { builder.add(src: VertexId, dst: VertexId, attr) }
builder.toEdgePartition
}
val aList = List((0, 1, 0), (1, 0, 0), (1, 2, 0), (5, 4, 0), (5, 5, 0))
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala
index eba8d7b716284..3915be15b3434 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala
@@ -100,7 +100,7 @@ class ConnectedComponentsSuite extends FunSuite with LocalSparkContext {
test("Connected Components on a Toy Connected Graph") {
withSpark { sc =>
// Create an RDD for the vertices
- val users: RDD[(VertexID, (String, String))] =
+ val users: RDD[(VertexId, (String, String))] =
sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
(5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
(4L, ("peter", "student"))))
From e12c374d223c67f57ac2ec4af55a9e413272dd10 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 15 Jan 2014 10:00:50 -0800
Subject: [PATCH 017/133] Merge pull request #433 from markhamstra/debFix
Updated Debian packaging
(cherry picked from commit 494d3c077496735e6ebca3217de4f0cc6b6419f2)
Signed-off-by: Patrick Wendell
---
assembly/pom.xml | 116 ++++++++++-
assembly/src/deb/RELEASE | 2 +
.../src/deb/control/control | 0
docs/building-with-maven.md | 6 +-
pom.xml | 10 -
repl-bin/pom.xml | 184 ------------------
repl-bin/src/deb/bin/run | 57 ------
repl-bin/src/deb/bin/spark-executor | 22 ---
repl-bin/src/deb/bin/spark-shell | 21 --
9 files changed, 120 insertions(+), 298 deletions(-)
create mode 100644 assembly/src/deb/RELEASE
rename {repl-bin => assembly}/src/deb/control/control (100%)
delete mode 100644 repl-bin/pom.xml
delete mode 100755 repl-bin/src/deb/bin/run
delete mode 100755 repl-bin/src/deb/bin/spark-executor
delete mode 100755 repl-bin/src/deb/bin/spark-shell
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 54a25910ced7d..dcd9601fe4a90 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -30,6 +30,13 @@
Spark Project Assembly
http://spark.incubator.apache.org/
+
+ ${project.build.directory}/scala-${scala.binary.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar
+ spark
+ /usr/share/spark
+ root
+
+
@@ -79,7 +86,7 @@
maven-shade-plugin
false
- ${project.build.directory}/scala-${scala.binary.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar
+ ${spark.jar}
*:*
@@ -171,5 +178,112 @@
+
+ deb
+
+
+
+ org.codehaus.mojo
+ buildnumber-maven-plugin
+ 1.1
+
+
+ validate
+
+ create
+
+
+ 8
+
+
+
+
+
+ org.vafer
+ jdeb
+ 0.11
+
+
+ package
+
+ jdeb
+
+
+ ${project.build.directory}/${deb.pkg.name}_${project.version}-${buildNumber}_all.deb
+ false
+ gzip
+
+
+ ${spark.jar}
+ file
+
+ perm
+ ${deb.user}
+ ${deb.user}
+ ${deb.install.path}/jars
+
+
+
+ ${basedir}/src/deb/RELEASE
+ file
+
+ perm
+ ${deb.user}
+ ${deb.user}
+ ${deb.install.path}
+
+
+
+ ${basedir}/../conf
+ directory
+
+ perm
+ ${deb.user}
+ ${deb.user}
+ ${deb.install.path}/conf
+ 744
+
+
+
+ ${basedir}/../bin
+ directory
+
+ perm
+ ${deb.user}
+ ${deb.user}
+ ${deb.install.path}/bin
+ 744
+
+
+
+ ${basedir}/../sbin
+ directory
+
+ perm
+ ${deb.user}
+ ${deb.user}
+ ${deb.install.path}/sbin
+ 744
+
+
+
+ ${basedir}/../python
+ directory
+
+ perm
+ ${deb.user}
+ ${deb.user}
+ ${deb.install.path}/python
+ 744
+
+
+
+
+
+
+
+
+
+
diff --git a/assembly/src/deb/RELEASE b/assembly/src/deb/RELEASE
new file mode 100644
index 0000000000000..aad50ee73aa45
--- /dev/null
+++ b/assembly/src/deb/RELEASE
@@ -0,0 +1,2 @@
+compute-classpath.sh uses the existence of this file to decide whether to put the assembly jar on the
+classpath or instead to use classfiles in the source tree.
\ No newline at end of file
diff --git a/repl-bin/src/deb/control/control b/assembly/src/deb/control/control
similarity index 100%
rename from repl-bin/src/deb/control/control
rename to assembly/src/deb/control/control
diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md
index b9ff0af76f647..6a9a8d681742f 100644
--- a/docs/building-with-maven.md
+++ b/docs/building-with-maven.md
@@ -71,8 +71,8 @@ This setup works fine in IntelliJ IDEA 11.1.4. After opening the project via the
## Building Spark Debian Packages ##
-It includes support for building a Debian package containing a 'fat-jar' which includes the repl, the examples and bagel. This can be created by specifying the following profiles:
+The maven build includes support for building a Debian package containing the assembly 'fat-jar', PySpark, and the necessary scripts and configuration files. This can be created by specifying the following:
- $ mvn -Prepl-bin -Pdeb clean package
+ $ mvn -Pdeb -DskipTests clean package
-The debian package can then be found under repl/target. We added the short commit hash to the file name so that we can distinguish individual packages build for SNAPSHOT versions.
+The debian package can then be found under assembly/target. We added the short commit hash to the file name so that we can distinguish individual packages built for SNAPSHOT versions.
diff --git a/pom.xml b/pom.xml
index f14d9667ccc7d..70c17e9fc74a8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -762,15 +762,5 @@
-
-
- repl-bin
-
- false
-
-
- repl-bin
-
-
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
deleted file mode 100644
index 869dbdb9b095a..0000000000000
--- a/repl-bin/pom.xml
+++ /dev/null
@@ -1,184 +0,0 @@
-
-
-
-
- 4.0.0
-
- org.apache.spark
- spark-parent
- 0.9.0-incubating-SNAPSHOT
- ../pom.xml
-
-
- org.apache.spark
- spark-repl-bin_2.10
- pom
- Spark Project REPL binary packaging
- http://spark.incubator.apache.org/
-
-
- spark
- /usr/share/spark
- root
-
-
-
-
- org.apache.spark
- spark-core_${scala.binary.version}
- ${project.version}
-
-
- org.apache.spark
- spark-bagel_${scala.binary.version}
- ${project.version}
- runtime
-
-
- org.apache.spark
- spark-repl_${scala.binary.version}
- ${project.version}
- runtime
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-shade-plugin
-
- false
- ${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar
-
-
- *:*
-
-
-
-
- *:*
-
- META-INF/*.SF
- META-INF/*.DSA
- META-INF/*.RSA
-
-
-
-
-
-
- package
-
- shade
-
-
-
-
-
- reference.conf
-
-
- spark.repl.Main
-
-
-
-
-
-
-
-
-
-
-
- deb
-
-
-
- org.codehaus.mojo
- buildnumber-maven-plugin
- 1.1
-
-
- validate
-
- create
-
-
- 8
-
-
-
-
-
- org.vafer
- jdeb
- 0.11
-
-
- package
-
- jdeb
-
-
- ${project.build.directory}/${deb.pkg.name}_${project.version}-${buildNumber}_all.deb
- false
- gzip
-
-
- ${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar
- file
-
- perm
- ${deb.user}
- ${deb.user}
- ${deb.install.path}
-
-
-
- ${basedir}/src/deb/bin
- directory
-
- perm
- ${deb.user}
- ${deb.user}
- ${deb.install.path}
- 744
-
-
-
- ${basedir}/../conf
- directory
-
- perm
- ${deb.user}
- ${deb.user}
- ${deb.install.path}/conf
- 744
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/repl-bin/src/deb/bin/run b/repl-bin/src/deb/bin/run
deleted file mode 100755
index 3a6f22f41fca5..0000000000000
--- a/repl-bin/src/deb/bin/run
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-SCALA_VERSION=2.10
-
-# Figure out where the Scala framework is installed
-FWDIR="$(cd `dirname $0`; pwd)"
-
-# Export this as SPARK_HOME
-export SPARK_HOME="$FWDIR"
-
-# Load environment variables from conf/spark-env.sh, if it exists
-if [ -e $FWDIR/conf/spark-env.sh ] ; then
- . $FWDIR/conf/spark-env.sh
-fi
-
-# Figure out how much memory to use per executor and set it as an environment
-# variable so that our process sees it and can report it to Mesos
-if [ -z "$SPARK_MEM" ] ; then
- SPARK_MEM="512m"
-fi
-export SPARK_MEM
-
-# Set JAVA_OPTS to be able to load native libraries and to set heap size
-JAVA_OPTS="$SPARK_JAVA_OPTS"
-JAVA_OPTS+=" -Djava.library.path=$SPARK_LIBRARY_PATH"
-JAVA_OPTS+=" -Xms$SPARK_MEM -Xmx$SPARK_MEM"
-# Load extra JAVA_OPTS from conf/java-opts, if it exists
-if [ -e $FWDIR/conf/java-opts ] ; then
- JAVA_OPTS+=" `cat $FWDIR/conf/java-opts`"
-fi
-export JAVA_OPTS
-
-# Build up classpath
-CLASSPATH=":$FWDIR/conf"
-for jar in `find $FWDIR -name '*jar'`; do
- CLASSPATH+=":$jar"
-done
-export CLASSPATH
-
-exec java -Dscala.usejavacp=true -Djline.shutdownhook=true -cp "$CLASSPATH" $JAVA_OPTS $EXTRA_ARGS "$@"
diff --git a/repl-bin/src/deb/bin/spark-executor b/repl-bin/src/deb/bin/spark-executor
deleted file mode 100755
index 052d76fb8d81c..0000000000000
--- a/repl-bin/src/deb/bin/spark-executor
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-FWDIR="$(cd `dirname $0`; pwd)"
-echo "Running spark-executor with framework dir = $FWDIR"
-exec $FWDIR/run org.apache.spark.executor.MesosExecutorBackend
diff --git a/repl-bin/src/deb/bin/spark-shell b/repl-bin/src/deb/bin/spark-shell
deleted file mode 100755
index 118349d7c30f2..0000000000000
--- a/repl-bin/src/deb/bin/spark-shell
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-FWDIR="$(cd `dirname $0`; pwd)"
-exec $FWDIR/run org.apache.spark.repl.Main "$@"
From aca40aae87179b248da7d96290d19d4c59c01558 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 15 Jan 2014 11:15:07 -0800
Subject: [PATCH 018/133] Merge pull request #441 from pwendell/graphx-build
GraphX shouldn't list Spark as provided.
I noticed this when building an application against GraphX to audit the released artifacts.
(cherry picked from commit 5fecd2516dc8de28b76fe6e0fbdca7922cc28d1c)
Signed-off-by: Patrick Wendell
---
graphx/pom.xml | 1 -
1 file changed, 1 deletion(-)
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 3e5faf230dbc9..4eca4747ea96a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -36,7 +36,6 @@
org.apache.spark
spark-core_${scala.binary.version}
${project.version}
- provided
org.eclipse.jetty
From 29c76d96b2489823a7ad4781129b707c73108bf8 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 15 Jan 2014 13:54:45 -0800
Subject: [PATCH 019/133] Merge pull request #443 from tdas/filestream-fix
Made some classes private[stremaing] and deprecated a method in JavaStreamingContext.
Classes `RawTextHelper`, `RawTextSender` and `RateLimitedOutputStream` are not useful in the streaming API. There are not used by the core functionality and was there as a support classes for an obscure example. One of the classes is RawTextSender has a main function which can be executed using bin/spark-class even if it is made private[streaming]. In future, I will probably completely remove these classes. For the time being, I am just converting them to private[streaming].
Accessing underlying JavaSparkContext in JavaStreamingContext was through `JavaStreamingContext.sc` . This is deprecated and preferred method is `JavaStreamingContext.sparkContext` to keep it consistent with the `StreamingContext.sparkContext`.
(cherry picked from commit 2a05403a7ced4ecf6084c96f582ee3a24f3cc874)
Signed-off-by: Patrick Wendell
---
.../spark/streaming/api/java/JavaStreamingContext.scala | 6 +++++-
.../spark/streaming/util/RateLimitedOutputStream.scala | 1 +
.../org/apache/spark/streaming/util/RawTextHelper.scala | 1 +
.../org/apache/spark/streaming/util/RawTextSender.scala | 1 +
.../test/java/org/apache/spark/streaming/JavaAPISuite.java | 6 +++---
5 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 4edf8fa13a205..613683ca40501 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -141,8 +141,12 @@ class JavaStreamingContext(val ssc: StreamingContext) {
*/
def this(path: String, hadoopConf: Configuration) = this(new StreamingContext(path, hadoopConf))
+
+ @deprecated("use sparkContext", "0.9.0")
+ val sc: JavaSparkContext = sparkContext
+
/** The underlying SparkContext */
- val sc: JavaSparkContext = new JavaSparkContext(ssc.sc)
+ val sparkContext = new JavaSparkContext(ssc.sc)
/**
* Create a input stream from network source hostname:port. Data is received using
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
index b9c0596378b4f..179fd7593982c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
@@ -22,6 +22,7 @@ import scala.annotation.tailrec
import java.io.OutputStream
import java.util.concurrent.TimeUnit._
+private[streaming]
class RateLimitedOutputStream(out: OutputStream, bytesPerSec: Int) extends OutputStream {
val SYNC_INTERVAL = NANOSECONDS.convert(10, SECONDS)
val CHUNK_SIZE = 8192
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
index 5b6c048a39620..07021ebb5802a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
@@ -22,6 +22,7 @@ import org.apache.spark.SparkContext._
import it.unimi.dsi.fastutil.objects.{Object2LongOpenHashMap => OLMap}
import scala.collection.JavaConversions.mapAsScalaMap
+private[streaming]
object RawTextHelper {
/**
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
index 463617a713b22..684b38e8b3102 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
@@ -33,6 +33,7 @@ import org.apache.spark.util.IntParam
* A helper program that sends blocks of Kryo-serialized text strings out on a socket at a
* specified rate. Used to feed data into RawInputDStream.
*/
+private[streaming]
object RawTextSender extends Logging {
def main(args: Array[String]) {
if (args.length != 4) {
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
index 8b7d7709bf2c5..4fbbce9b8b90e 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
@@ -297,9 +297,9 @@ public void testQueueStream() {
Arrays.asList(7,8,9));
JavaSparkContext jsc = new JavaSparkContext(ssc.ssc().sc());
- JavaRDD rdd1 = ssc.sc().parallelize(Arrays.asList(1, 2, 3));
- JavaRDD rdd2 = ssc.sc().parallelize(Arrays.asList(4, 5, 6));
- JavaRDD rdd3 = ssc.sc().parallelize(Arrays.asList(7,8,9));
+ JavaRDD rdd1 = ssc.sparkContext().parallelize(Arrays.asList(1, 2, 3));
+ JavaRDD rdd2 = ssc.sparkContext().parallelize(Arrays.asList(4, 5, 6));
+ JavaRDD rdd3 = ssc.sparkContext().parallelize(Arrays.asList(7,8,9));
LinkedList> rdds = Lists.newLinkedList();
rdds.add(rdd1);
From e3fa36f259b7ede73bc148891e2635bf41221660 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 15 Jan 2014 13:55:14 -0800
Subject: [PATCH 020/133] Merge pull request #442 from pwendell/standalone
Workers should use working directory as spark home if it's not specified
If users don't set SPARK_HOME in their environment file when launching an application, the standalone cluster should default to the spark home of the worker.
(cherry picked from commit 59f475c79fc8fd6d3485e4d0adf6768b6a9225a4)
Signed-off-by: Patrick Wendell
---
.../main/scala/org/apache/spark/deploy/worker/Worker.scala | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 5182dcbb2abfd..312560d7063a4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -209,8 +209,11 @@ private[spark] class Worker(
logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
} else {
logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
+ // TODO (pwendell): We shuld make sparkHome an Option[String] in
+ // ApplicationDescription to be more explicit about this.
+ val effectiveSparkHome = Option(execSparkHome_).getOrElse(sparkHome.getAbsolutePath)
val manager = new ExecutorRunner(appId, execId, appDesc, cores_, memory_,
- self, workerId, host, new File(execSparkHome_), workDir, akkaUrl, ExecutorState.RUNNING)
+ self, workerId, host, new File(effectiveSparkHome), workDir, akkaUrl, ExecutorState.RUNNING)
executors(appId + "/" + execId) = manager
manager.start()
coresUsed += cores_
From 4ccedb3d1d8c40d3ec914b2d17be9b43aa4744f4 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 15 Jan 2014 14:25:45 -0800
Subject: [PATCH 021/133] Merge pull request #444 from mateiz/py-version
Clarify that Python 2.7 is only needed for MLlib
(cherry picked from commit 4f0c361b0e140f5f6879f019b2e1a16c683c705c)
Signed-off-by: Patrick Wendell
---
docs/mllib-guide.md | 3 ++-
docs/python-programming-guide.md | 4 ++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 1a5c640d10df4..a22a22184b5c6 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -21,7 +21,8 @@ depends on native Fortran routines. You may need to install the
if it is not already present on your nodes. MLlib will throw a linking error if it cannot
detect these libraries automatically.
-To use MLlib in Python, you will also need [NumPy](http://www.numpy.org) version 1.7 or newer.
+To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.7 or newer
+and Python 2.7.
# Binary Classification
diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index b07899c2e176d..7c5283fb0b6fb 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -52,7 +52,7 @@ In addition, PySpark fully supports interactive use---simply run `./bin/pyspark`
# Installing and Configuring PySpark
-PySpark requires Python 2.7 or higher.
+PySpark requires Python 2.6 or higher.
PySpark applications are executed using a standard CPython interpreter in order to support Python modules that use C extensions.
We have not tested PySpark with Python 3 or with alternative Python interpreters, such as [PyPy](http://pypy.org/) or [Jython](http://www.jython.org/).
@@ -152,7 +152,7 @@ Many of the methods also contain [doctests](http://docs.python.org/2/library/doc
# Libraries
[MLlib](mllib-guide.html) is also available in PySpark. To use it, you'll need
-[NumPy](http://www.numpy.org) version 1.7 or newer. The [MLlib guide](mllib-guide.html) contains
+[NumPy](http://www.numpy.org) version 1.7 or newer, and Python 2.7. The [MLlib guide](mllib-guide.html) contains
some example applications.
# Where to Go from Here
From 7749b988dcc6f3abfd1d5b42f07f82eb680010e0 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 15 Jan 2014 14:33:37 -0800
Subject: [PATCH 022/133] Change log for release 0.9.0-incubating
---
CHANGES.txt | 2967 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 2967 insertions(+)
create mode 100644 CHANGES.txt
diff --git a/CHANGES.txt b/CHANGES.txt
new file mode 100644
index 0000000000000..d5ca405dcc198
--- /dev/null
+++ b/CHANGES.txt
@@ -0,0 +1,2967 @@
+Spark Change Log
+
+Release 0.9.0-incubating
+
+ 4ccedb3 Wed Jan 15 14:26:48 2014 -0800
+ Merge pull request #444 from mateiz/py-version
+ [Clarify that Python 2.7 is only needed for MLlib]
+
+ e3fa36f Wed Jan 15 13:56:04 2014 -0800
+ Merge pull request #442 from pwendell/standalone
+ [Workers should use working directory as spark home if it's not specified]
+
+ 29c76d9 Wed Jan 15 13:55:48 2014 -0800
+ Merge pull request #443 from tdas/filestream-fix
+ [Made some classes private[stremaing] and deprecated a method in JavaStreamingContext.]
+
+ aca40aa Wed Jan 15 11:15:47 2014 -0800
+ Merge pull request #441 from pwendell/graphx-build
+ [GraphX shouldn't list Spark as provided.]
+
+ e12c374 Wed Jan 15 10:01:43 2014 -0800
+ Merge pull request #433 from markhamstra/debFix
+ [Updated Debian packaging]
+
+ 2f015c2 Tue Jan 14 23:17:28 2014 -0800
+ Merge pull request #436 from ankurdave/VertexId-case
+ [Rename VertexID -> VertexId in GraphX]
+
+ 2859cab Tue Jan 14 23:08:19 2014 -0800
+ Merge pull request #435 from tdas/filestream-fix
+ [Fixed the flaky tests by making SparkConf not serializable]
+
+ fbfbb33 Tue Jan 14 23:06:29 2014 -0800
+ Merge pull request #434 from rxin/graphxmaven
+ [Fixed SVDPlusPlusSuite in Maven build.]
+
+ 2c6c07f Tue Jan 14 21:53:05 2014 -0800
+ Merge pull request #424 from jegonzal/GraphXProgrammingGuide
+ [Additional edits for clarity in the graphx programming guide.]
+
+ 6fa4e02 Tue Jan 14 21:51:25 2014 -0800
+ Merge pull request #431 from ankurdave/graphx-caching-doc
+ [Describe caching and uncaching in GraphX programming guide]
+
+ 2f930d5 Tue Jan 14 15:00:11 2014 -0800
+ Merge pull request #428 from pwendell/writeable-objects
+ [Don't clone records for text files]
+
+ 329c9df Tue Jan 14 14:53:36 2014 -0800
+ Merge pull request #429 from ankurdave/graphx-examples-pom.xml
+ [Add GraphX dependency to examples/pom.xml]
+
+ a14933d Tue Jan 14 14:52:42 2014 -0800
+ Merge pull request #427 from pwendell/deprecate-aggregator
+ [Deprecate rather than remove old combineValuesByKey function]
+
+ 119b6c5 Tue Jan 14 13:29:08 2014 -0800
+ Merge pull request #425 from rxin/scaladoc
+ [API doc update & make Broadcast public]
+
+ bf3b150 Tue Jan 14 09:45:22 2014 -0800
+ Merge pull request #423 from jegonzal/GraphXProgrammingGuide
+ [Improving the graphx-programming-guide]
+
+ 1b4adc2 Tue Jan 14 01:19:24 2014 -0800
+ Merge pull request #420 from pwendell/header-files
+ [Add missing header files]
+
+ b60840e Tue Jan 14 00:48:34 2014 -0800
+ Merge pull request #418 from pwendell/0.9-versions
+ [Version changes for release 0.9.0.]
+
+ 980250b Tue Jan 14 00:05:37 2014 -0800
+ Merge pull request #416 from tdas/filestream-fix
+ [Removed unnecessary DStream operations and updated docs]
+
+ 055be5c Mon Jan 13 23:26:44 2014 -0800
+ Merge pull request #415 from pwendell/shuffle-compress
+ [Enable compression by default for spills]
+
+ fdaabdc Mon Jan 13 23:08:26 2014 -0800
+ Merge pull request #380 from mateiz/py-bayes
+ [Add Naive Bayes to Python MLlib, and some API fixes]
+
+ 4a805af Mon Jan 13 22:58:38 2014 -0800
+ Merge pull request #367 from ankurdave/graphx
+ [GraphX: Unifying Graphs and Tables]
+
+ 945fe7a Mon Jan 13 22:56:12 2014 -0800
+ Merge pull request #408 from pwendell/external-serializers
+ [Improvements to external sorting]
+
+ 68641bc Mon Jan 13 22:54:13 2014 -0800
+ Merge pull request #413 from rxin/scaladoc
+ [Adjusted visibility of various components and documentation for 0.9.0 release.]
+
+ 0ca0d4d Mon Jan 13 22:32:21 2014 -0800
+ Merge pull request #401 from andrewor14/master
+ [External sorting - Add number of bytes spilled to Web UI]
+
+ 08b9fec Mon Jan 13 22:29:03 2014 -0800
+ Merge pull request #409 from tdas/unpersist
+ [Automatically unpersisting RDDs that have been cleaned up from DStreams]
+
+ b07bc02 Mon Jan 13 20:45:22 2014 -0800
+ Merge pull request #412 from harveyfeng/master
+ [Add default value for HadoopRDD's `cloneRecords` constructor arg]
+
+ a2fee38 Mon Jan 13 19:45:26 2014 -0800
+ Merge pull request #411 from tdas/filestream-fix
+ [Improved logic of finding new files in FileInputDStream]
+
+ 01c0d72 Mon Jan 13 16:24:30 2014 -0800
+ Merge pull request #410 from rxin/scaladoc1
+ [Updated JavaStreamingContext to make scaladoc compile.]
+
+ 8038da2 Mon Jan 13 14:59:30 2014 -0800
+ Merge pull request #2 from jegonzal/GraphXCCIssue
+ [Improving documentation and identifying potential bug in CC calculation.]
+
+ b93f9d4 Mon Jan 13 12:18:05 2014 -0800
+ Merge pull request #400 from tdas/dstream-move
+ [Moved DStream and PairDSream to org.apache.spark.streaming.dstream]
+
+ e6ed13f Sun Jan 12 22:35:14 2014 -0800
+ Merge pull request #397 from pwendell/host-port
+ [Remove now un-needed hostPort option]
+
+ 0b96d85 Sun Jan 12 21:31:43 2014 -0800
+ Merge pull request #399 from pwendell/consolidate-off
+ [Disable shuffle file consolidation by default]
+
+ 0ab505a Sun Jan 12 21:31:04 2014 -0800
+ Merge pull request #395 from hsaputra/remove_simpleredundantreturn_scala
+ [Remove simple redundant return statements for Scala methods/functions]
+
+ 405bfe8 Sun Jan 12 20:04:21 2014 -0800
+ Merge pull request #394 from tdas/error-handling
+ [Better error handling in Spark Streaming and more API cleanup]
+
+ 28a6b0c Sun Jan 12 19:49:36 2014 -0800
+ Merge pull request #398 from pwendell/streaming-api
+ [Rename DStream.foreach to DStream.foreachRDD]
+
+ 074f502 Sun Jan 12 17:01:13 2014 -0800
+ Merge pull request #396 from pwendell/executor-env
+ [Setting load defaults to true in executor]
+
+ 82e2b92 Sun Jan 12 16:55:11 2014 -0800
+ Merge pull request #392 from rxin/listenerbus
+ [Stop SparkListenerBus daemon thread when DAGScheduler is stopped.]
+
+ 288a878 Sat Jan 11 21:53:19 2014 -0800
+ Merge pull request #389 from rxin/clone-writables
+ [Minor update for clone writables and more documentation.]
+
+ dbc11df Sat Jan 11 18:07:13 2014 -0800
+ Merge pull request #388 from pwendell/master
+ [Fix UI bug introduced in #244.]
+
+ 409866b Sat Jan 11 17:12:06 2014 -0800
+ Merge pull request #393 from pwendell/revert-381
+ [Revert PR 381]
+
+ 6510f04 Sat Jan 11 12:48:26 2014 -0800
+ Merge pull request #387 from jerryshao/conf-fix
+ [Fix configure didn't work small problem in ALS]
+
+ ee6e7f9 Sat Jan 11 12:07:55 2014 -0800
+ Merge pull request #359 from ScrapCodes/clone-writables
+ [We clone hadoop key and values by default and reuse objects if asked to.]
+
+ 4216178 Sat Jan 11 09:46:48 2014 -0800
+ Merge pull request #373 from jerryshao/kafka-upgrade
+ [Upgrade Kafka dependecy to 0.8.0 release version]
+
+ 92ad18b Fri Jan 10 23:25:15 2014 -0800
+ Merge pull request #376 from prabeesh/master
+ [Change clientId to random clientId]
+
+ 0b5ce7a Fri Jan 10 23:23:21 2014 -0800
+ Merge pull request #386 from pwendell/typo-fix
+ [Small typo fix]
+
+ 1d7bef0 Fri Jan 10 18:53:03 2014 -0800
+ Merge pull request #381 from mateiz/default-ttl
+ [Fix default TTL for metadata cleaner]
+
+ 44d6a8e Fri Jan 10 17:51:50 2014 -0800
+ Merge pull request #382 from RongGu/master
+ [Fix a type error in comment lines]
+
+ 88faa30 Fri Jan 10 17:14:22 2014 -0800
+ Merge pull request #385 from shivaram/add-i2-instances
+ [Add i2 instance types to Spark EC2.]
+
+ f265531 Fri Jan 10 16:25:44 2014 -0800
+ Merge pull request #383 from tdas/driver-test
+ [API for automatic driver recovery for streaming programs and other bug fixes]
+
+ d37408f Fri Jan 10 16:25:01 2014 -0800
+ Merge pull request #377 from andrewor14/master
+ [External Sorting for Aggregator and CoGroupedRDDs (Revisited)]
+
+ 0eaf01c Fri Jan 10 15:32:19 2014 -0800
+ Merge pull request #369 from pillis/master
+ [SPARK-961 Add a Vector.random() method]
+
+ 7cef843 Fri Jan 10 15:34:15 2014 -0600
+ Merge pull request #371 from tgravescs/yarn_client_addjar_misc_fixes
+ [Yarn client addjar and misc fixes]
+
+ 7b58f11 Fri Jan 10 12:47:46 2014 -0800
+ Merge pull request #384 from pwendell/debug-logs
+ [Make DEBUG-level logs consummable.]
+
+ 23d2995 Fri Jan 10 10:20:02 2014 -0800
+ Merge pull request #1 from jegonzal/graphx
+ [ProgrammingGuide]
+
+ 0ebc973 Thu Jan 9 23:58:49 2014 -0800
+ Merge pull request #375 from mateiz/option-fix
+ [Fix bug added when we changed AppDescription.maxCores to an Option]
+
+ dd03cea Thu Jan 9 23:38:03 2014 -0800
+ Merge pull request #378 from pwendell/consolidate_on
+ [Enable shuffle consolidation by default.]
+
+ 997c830 Thu Jan 9 22:22:20 2014 -0800
+ Merge pull request #363 from pwendell/streaming-logs
+ [Set default logging to WARN for Spark streaming examples.]
+
+ 300eaa9 Thu Jan 9 20:29:51 2014 -0800
+ Merge pull request #353 from pwendell/ipython-simplify
+ [Simplify and fix pyspark script.]
+
+ 4b074fa Thu Jan 9 19:03:55 2014 -0800
+ Merge pull request #374 from mateiz/completeness
+ [Add some missing Java API methods]
+
+ a9d5333 Thu Jan 9 18:46:46 2014 -0800
+ Merge pull request #294 from RongGu/master
+ [Bug fixes for updating the RDD block's memory and disk usage information]
+
+ d86a85e Thu Jan 9 18:37:52 2014 -0800
+ Merge pull request #293 from pwendell/standalone-driver
+ [SPARK-998: Support Launching Driver Inside of Standalone Mode]
+
+ 26cdb5f Thu Jan 9 17:16:34 2014 -0800
+ Merge pull request #372 from pwendell/log4j-fix-1
+ [Send logs to stderr by default (instead of stdout).]
+
+ 12f414e Thu Jan 9 15:31:30 2014 -0800
+ Merge pull request #362 from mateiz/conf-getters
+ [Use typed getters for configuration settings]
+
+ 365cac9 Thu Jan 9 00:56:16 2014 -0800
+ Merge pull request #361 from rxin/clean
+ [Minor style cleanup. Mostly on indenting & line width changes.]
+
+ 73c724e Thu Jan 9 00:32:19 2014 -0800
+ Merge pull request #368 from pwendell/sbt-fix
+ [Don't delegate to users `sbt`.]
+
+ dceedb4 Wed Jan 8 23:19:28 2014 -0800
+ Merge pull request #364 from pwendell/fix
+ [Fixing config option "retained_stages" => "retainedStages".]
+
+ 04d83fc Wed Jan 8 11:55:37 2014 -0800
+ Merge pull request #360 from witgo/master
+ [fix make-distribution.sh show version: command not found]
+
+ 56ebfea Wed Jan 8 11:50:06 2014 -0800
+ Merge pull request #357 from hsaputra/set_boolean_paramname
+ [Set boolean param name for call to SparkHadoopMapReduceUtil.newTaskAttemptID]
+
+ bdeaeaf Wed Jan 8 11:48:39 2014 -0800
+ Merge pull request #358 from pwendell/add-cdh
+ [Add CDH Repository to Maven Build]
+
+ 5cae05f Wed Jan 8 11:47:28 2014 -0800
+ Merge pull request #356 from hsaputra/remove_deprecated_cleanup_method
+ [Remove calls to deprecated mapred's OutputCommitter.cleanupJob]
+
+ 6eef78d Wed Jan 8 08:49:20 2014 -0600
+ Merge pull request #345 from colorant/yarn
+ [support distributing extra files to worker for yarn client mode]
+
+ bb6a39a Tue Jan 7 22:32:18 2014 -0800
+ Merge pull request #322 from falaki/MLLibDocumentationImprovement
+ [SPARK-1009 Updated MLlib docs to show how to use it in Python]
+
+ cb1b927 Tue Jan 7 22:26:28 2014 -0800
+ Merge pull request #355 from ScrapCodes/patch-1
+ [Update README.md]
+
+ c0f0155 Tue Jan 7 22:21:52 2014 -0800
+ Merge pull request #313 from tdas/project-refactor
+ [Refactored the streaming project to separate external libraries like Twitter, Kafka, Flume, etc.]
+
+ f5f12dc Tue Jan 7 21:56:35 2014 -0800
+ Merge pull request #336 from liancheng/akka-remote-lookup
+ [Get rid of `Either[ActorRef, ActorSelection]']
+
+ 11891e6 Wed Jan 8 00:32:18 2014 -0500
+ Merge pull request #327 from lucarosellini/master
+ [Added ‘-i’ command line option to Spark REPL]
+
+ 7d0aac9 Wed Jan 8 00:30:45 2014 -0500
+ Merge pull request #354 from hsaputra/addasfheadertosbt
+ [Add ASF header to the new sbt script.]
+
+ d75dc42 Wed Jan 8 00:30:03 2014 -0500
+ Merge pull request #350 from mateiz/standalone-limit
+ [Add way to limit default # of cores used by apps in standalone mode]
+
+ 61674bc Tue Jan 7 18:32:13 2014 -0800
+ Merge pull request #352 from markhamstra/oldArch
+ [Don't leave os.arch unset after BlockManagerSuite]
+
+ b2e690f Tue Jan 7 16:57:08 2014 -0800
+ Merge pull request #328 from falaki/MatrixFactorizationModel-fix
+ [SPARK-1012: DAGScheduler Exception Fix]
+
+ 6ccf8ce Tue Jan 7 15:49:14 2014 -0800
+ Merge pull request #351 from pwendell/maven-fix
+ [Add log4j exclusion rule to maven.]
+
+ 7d5fa17 Tue Jan 7 11:31:34 2014 -0800
+ Merge pull request #337 from yinxusen/mllib-16-bugfix
+ [Mllib 16 bugfix]
+
+ 71fc113 Tue Jan 7 11:30:35 2014 -0800
+ Merge pull request #349 from CodingCat/support-worker_dir
+ [add the comments about SPARK_WORKER_DIR]
+
+ 15d9534 Tue Jan 7 08:10:02 2014 -0800
+ Merge pull request #318 from srowen/master
+ [Suggested small changes to Java code for slightly more standard style, encapsulation and in some cases performance]
+
+ 468af0f Tue Jan 7 08:09:01 2014 -0800
+ Merge pull request #348 from prabeesh/master
+ [spark -> org.apache.spark]
+
+ c3cf047 Tue Jan 7 00:54:25 2014 -0800
+ Merge pull request #339 from ScrapCodes/conf-improvements
+ [Conf improvements]
+
+ a862caf Tue Jan 7 00:18:20 2014 -0800
+ Merge pull request #331 from holdenk/master
+ [Add a script to download sbt if not present on the system]
+
+ b97ef21 Mon Jan 6 20:12:57 2014 -0800
+ Merge pull request #346 from sproblvem/patch-1
+ [Update stop-slaves.sh]
+
+ 7210257 Mon Jan 6 18:25:44 2014 -0800
+ Merge pull request #128 from adamnovak/master
+ [Fix failing "sbt/sbt publish-local" by adding a no-argument PrimitiveKeyOpenHashMap constructor
]
+
+ e4d6057 Mon Jan 6 14:56:54 2014 -0800
+ Merge pull request #343 from pwendell/build-fix
+ [Fix test breaking downstream builds]
+
+ 93bf962 Mon Jan 6 11:42:41 2014 -0800
+ Merge pull request #340 from ScrapCodes/sbt-fixes
+ [Made java options to be applied during tests so that they become self explanatory.]
+
+ 60edeb3 Mon Jan 6 11:40:32 2014 -0800
+ Merge pull request #338 from ScrapCodes/ning-upgrade
+ [SPARK-1005 Ning upgrade]
+
+ c708e81 Mon Jan 6 11:35:48 2014 -0800
+ Merge pull request #341 from ash211/patch-5
+ [Clarify spark.cores.max in docs]
+
+ 33fcb91 Mon Jan 6 11:19:23 2014 -0800
+ Merge pull request #342 from tgravescs/fix_maven_protobuf
+ [Change protobuf version for yarn alpha back to 2.4.1]
+
+ 357083c Mon Jan 6 10:29:04 2014 -0800
+ Merge pull request #330 from tgravescs/fix_addjars_null_handling
+ [Fix handling of empty SPARK_EXAMPLES_JAR]
+
+ a2e7e04 Sun Jan 5 22:37:36 2014 -0800
+ Merge pull request #333 from pwendell/logging-silence
+ [Quiet ERROR-level Akka Logs]
+
+ 5b0986a Sun Jan 5 19:25:09 2014 -0800
+ Merge pull request #334 from pwendell/examples-fix
+ [Removing SPARK_EXAMPLES_JAR in the code]
+
+ f4b924f Sun Jan 5 17:11:47 2014 -0800
+ Merge pull request #335 from rxin/ser
+ [Fall back to zero-arg constructor for Serializer initialization if there is no constructor that accepts SparkConf.]
+
+ d43ad3e Sat Jan 4 16:29:30 2014 -0800
+ Merge pull request #292 from soulmachine/naive-bayes
+ [standard Naive Bayes classifier]
+
+ 86404da Sat Jan 4 14:55:54 2014 -0800
+ Merge pull request #127 from jegonzal/MapByPartition
+ [Adding mapEdges and mapTriplets by Partition]
+
+ e68cdb1 Sat Jan 4 13:46:02 2014 -0800
+ Merge pull request #124 from jianpingjwang/master
+ [refactor and bug fix]
+
+ 280ddf6 Sat Jan 4 12:54:41 2014 -0800
+ Merge pull request #121 from ankurdave/more-simplify
+ [Simplify GraphImpl internals further]
+
+ 10fe23b Fri Jan 3 23:50:14 2014 -0800
+ Merge pull request #329 from pwendell/remove-binaries
+ [SPARK-1002: Remove Binaries from Spark Source]
+
+ c4d6145 Fri Jan 3 16:30:53 2014 -0800
+ Merge pull request #325 from witgo/master
+ [Modify spark on yarn to create SparkConf process]
+
+ 4ae101f Fri Jan 3 11:24:35 2014 -0800
+ Merge pull request #317 from ScrapCodes/spark-915-segregate-scripts
+ [Spark-915 segregate scripts]
+
+ 87248bd Fri Jan 3 00:45:31 2014 -0800
+ Merge pull request #1 from apache/master
+ [Merge latest Spark changes]
+
+ 30b9db0 Thu Jan 2 23:15:55 2014 -0800
+ Merge pull request #285 from colorant/yarn-refactor
+ [Yarn refactor]
+
+ 498a5f0 Thu Jan 2 19:06:40 2014 -0800
+ Merge pull request #323 from tgravescs/sparkconf_yarn_fix
+ [fix spark on yarn after the sparkConf changes]
+
+ 0475ca8 Thu Jan 2 15:17:08 2014 -0800
+ Merge pull request #320 from kayousterhout/erroneous_failed_msg
+ [Remove erroneous FAILED state for killed tasks.]
+
+ 588a169 Thu Jan 2 13:20:54 2014 -0800
+ Merge pull request #297 from tdas/window-improvement
+ [Improvements to DStream window ops and refactoring of Spark's CheckpointSuite]
+
+ 5e67cdc Thu Jan 2 12:56:28 2014 -0800
+ Merge pull request #319 from kayousterhout/remove_error_method
+ [Removed redundant TaskSetManager.error() function.]
+
+ ca67909 Thu Jan 2 15:54:54 2014 -0500
+ Merge pull request #311 from tmyklebu/master
+ [SPARK-991: Report information gleaned from a Python stacktrace in the UI]
+
+ 3713f81 Wed Jan 1 21:29:12 2014 -0800
+ Merge pull request #309 from mateiz/conf2
+ [SPARK-544. Migrate configuration to a SparkConf class]
+
+ c1d928a Wed Jan 1 17:03:48 2014 -0800
+ Merge pull request #312 from pwendell/log4j-fix-2
+ [SPARK-1008: Logging improvments]
+
+ dc9cb83 Wed Jan 1 13:28:34 2014 -0800
+ Merge pull request #126 from jegonzal/FixingPersist
+ [Fixing Persist Behavior]
+
+ 9a0ff72 Tue Dec 31 21:50:24 2013 -0800
+ Merge pull request #314 from witgo/master
+ [restore core/pom.xml file modification]
+
+ 8b8e70e Tue Dec 31 17:48:24 2013 -0800
+ Merge pull request #73 from falaki/ApproximateDistinctCount
+ [Approximate distinct count]
+
+ 63b411d Tue Dec 31 14:31:28 2013 -0800
+ Merge pull request #238 from ngbinh/upgradeNetty
+ [upgrade Netty from 4.0.0.Beta2 to 4.0.13.Final]
+
+ 32d6ae9 Tue Dec 31 13:51:07 2013 -0800
+ Merge pull request #120 from ankurdave/subgraph-reuses-view
+ [Reuse VTableReplicated in GraphImpl.subgraph]
+
+ 55b7e2f Tue Dec 31 10:12:51 2013 -0800
+ Merge pull request #289 from tdas/filestream-fix
+ [Bug fixes for file input stream and checkpointing]
+
+ 2b71ab9 Mon Dec 30 11:01:30 2013 -0800
+ Merge pull request from aarondav: Utilize DiskBlockManager pathway for temp file writing
+ [This gives us a couple advantages:]
+
+ 50e3b8e Mon Dec 30 07:44:26 2013 -0800
+ Merge pull request #308 from kayousterhout/stage_naming
+ [Changed naming of StageCompleted event to be consistent]
+
+ 72a17b6 Sat Dec 28 21:25:40 2013 -1000
+ Revert "Merge pull request #310 from jyunfan/master"
+ [This reverts commit 79b20e4dbe3dcd8559ec8316784d3334bb55868b, reversing]
+
+ 79b20e4 Sat Dec 28 21:13:36 2013 -1000
+ Merge pull request #310 from jyunfan/master
+ [Fix typo in the Accumulators section]
+
+ 7375047 Sat Dec 28 13:25:06 2013 -0800
+ Merge pull request #304 from kayousterhout/remove_unused
+ [Removed unused failed and causeOfFailure variables (in TaskSetManager)]
+
+ ad3dfd1 Fri Dec 27 22:10:14 2013 -0500
+ Merge pull request #307 from kayousterhout/other_failure
+ [Removed unused OtherFailure TaskEndReason.]
+
+ b579b83 Fri Dec 27 22:09:04 2013 -0500
+ Merge pull request #306 from kayousterhout/remove_pending
+ [Remove unused hasPendingTasks methods]
+
+ 19672dc Fri Dec 27 13:37:10 2013 -0800
+ Merge pull request #305 from kayousterhout/line_spacing
+ [Fixed >100char lines in DAGScheduler.scala]
+
+ 7be1e57 Thu Dec 26 23:41:40 2013 -1000
+ Merge pull request #298 from aarondav/minor
+ [Minor: Decrease margin of left side of Log page]
+
+ 7d811ba Thu Dec 26 23:39:58 2013 -1000
+ Merge pull request #302 from pwendell/SPARK-1007
+ [SPARK-1007: spark-class2.cmd should change SCALA_VERSION to be 2.10]
+
+ 5e69fc5 Thu Dec 26 19:10:39 2013 -0500
+ Merge pull request #295 from markhamstra/JobProgressListenerNPE
+ [Avoid a lump of coal (NPE) in JobProgressListener's stocking.]
+
+ da20270 Thu Dec 26 12:11:52 2013 -0800
+ Merge pull request #1 from aarondav/driver
+ [Refactor DriverClient to be more Actor-based]
+
+ e240bad Thu Dec 26 12:30:48 2013 -0500
+ Merge pull request #296 from witgo/master
+ [Renamed ClusterScheduler to TaskSchedulerImpl for yarn and new-yarn package]
+
+ c344ed0 Thu Dec 26 01:31:06 2013 -0500
+ Merge pull request #283 from tmyklebu/master
+ [Python bindings for mllib]
+
+ 56094bc Wed Dec 25 13:14:33 2013 -0500
+ Merge pull request #290 from ash211/patch-3
+ [Typo: avaiable -> available]
+
+ 4842a07 Wed Dec 25 01:52:15 2013 -0800
+ Merge pull request #287 from azuryyu/master
+ [Fixed job name in the java streaming example.]
+
+ 85a344b Tue Dec 24 16:35:06 2013 -0800
+ Merge pull request #127 from kayousterhout/consolidate_schedulers
+ [Deduplicate Local and Cluster schedulers.]
+
+ c2dd6bc Tue Dec 24 14:36:47 2013 -0800
+ Merge pull request #279 from aarondav/shuffle-cleanup0
+ [Clean up shuffle files once their metadata is gone]
+
+ 3bf7c70 Tue Dec 24 16:37:13 2013 -0500
+ Merge pull request #275 from ueshin/wip/changeclasspathorder
+ [Change the order of CLASSPATH.]
+
+ d63856c Mon Dec 23 22:07:26 2013 -0800
+ Merge pull request #286 from rxin/build
+ [Show full stack trace and time taken in unit tests.]
+
+ 23a9ae6 Tue Dec 24 00:08:48 2013 -0500
+ Merge pull request #277 from tdas/scheduler-update
+ [Refactored the streaming scheduler and added StreamingListener interface]
+
+ 11107c9 Mon Dec 23 10:38:20 2013 -0800
+ Merge pull request #244 from leftnoteasy/master
+ [Added SPARK-968 implementation for review]
+
+ 44e4205 Sun Dec 22 11:44:18 2013 -0800
+ Merge pull request #116 from jianpingjwang/master
+ [remove unused variables and fix a bug]
+
+ 4797c22 Fri Dec 20 13:30:39 2013 -0800
+ Merge pull request #118 from ankurdave/VertexPartitionSuite
+ [Test VertexPartition and fix bugs]
+
+ 0bc57c5 Fri Dec 20 11:56:54 2013 -0800
+ Merge pull request #280 from aarondav/minor
+ [Minor cleanup for standalone scheduler]
+
+ ac70b8f Fri Dec 20 10:56:10 2013 -0800
+ Merge pull request #117 from ankurdave/more-tests
+ [More tests]
+
+ 45310d4 Thu Dec 19 22:08:20 2013 -0800
+ Merge pull request #115 from ankurdave/test-reorg
+ [Reorganize unit tests; improve GraphSuite test coverage]
+
+ 9228ec8 Thu Dec 19 21:37:15 2013 -0800
+ Merge pull request #1 from aarondav/127
+ [Merge master into 127]
+
+ eca68d4 Thu Dec 19 18:12:22 2013 -0800
+ Merge pull request #272 from tmyklebu/master
+ [Track and report task result serialisation time.]
+
+ 7990c56 Thu Dec 19 13:35:09 2013 -0800
+ Merge pull request #276 from shivaram/collectPartition
+ [Add collectPartition to JavaRDD interface.]
+
+ 440e531 Thu Dec 19 10:38:56 2013 -0800
+ Merge pull request #278 from MLnick/java-python-tostring
+ [Add toString to Java RDD, and __repr__ to Python RDD]
+
+ d8d3f3e Thu Dec 19 00:06:43 2013 -0800
+ Merge pull request #183 from aarondav/spark-959
+ [[SPARK-959] Explicitly depend on org.eclipse.jetty.orbit jar]
+
+ bfba532 Wed Dec 18 22:22:21 2013 -0800
+ Merge pull request #247 from aarondav/minor
+ [Increase spark.akka.askTimeout default to 30 seconds]
+
+ da301b5 Wed Dec 18 20:03:29 2013 -0800
+ Merge pull request #112 from amatsukawa/scc
+ [Strongly connected component algorithm]
+
+ c64a53a Wed Dec 18 16:56:26 2013 -0800
+ Merge pull request #267 from JoshRosen/cygwin
+ [Fix Cygwin support in several scripts.]
+
+ a645ef6 Wed Dec 18 16:07:52 2013 -0800
+ Merge pull request #48 from amatsukawa/add_project_to_graph
+ [Add mask operation on graph and filter graph primitive]
+
+ d7ebff0 Wed Dec 18 15:38:48 2013 -0800
+ Merge pull request #1 from ankurdave/add_project_to_graph
+ [Merge current master and reimplement Graph.mask using innerJoin]
+
+ 5ea1872 Wed Dec 18 15:27:24 2013 -0800
+ Merge pull request #274 from azuryy/master
+ [Fixed the example link in the Scala programing guid.]
+
+ 3fd2e09 Wed Dec 18 12:52:36 2013 -0800
+ Merge pull request #104 from jianpingjwang/master
+ [SVD++ demo]
+
+ f4effb3 Tue Dec 17 22:26:21 2013 -0800
+ Merge pull request #273 from rxin/top
+ [Fixed a performance problem in RDD.top and BoundedPriorityQueue]
+
+ 1b5eacb Tue Dec 17 13:49:17 2013 -0800
+ Merge pull request #102 from ankurdave/clustered-edge-index
+ [Add clustered index on edges by source vertex]
+
+ 7a8169b Mon Dec 16 22:42:21 2013 -0800
+ Merge pull request #268 from pwendell/shaded-protobuf
+ [Add support for 2.2. to master (via shaded jars)]
+
+ 0476c84 Mon Dec 16 17:19:25 2013 -0800
+ Merge pull request #100 from ankurdave/mrTriplets-active-set
+ [Support activeSet option in mapReduceTriplets]
+
+ 964a3b6 Mon Dec 16 15:23:51 2013 -0800
+ Merge pull request #270 from ewencp/really-force-ssh-pseudo-tty-master
+ [Force pseudo-tty allocation in spark-ec2 script.]
+
+ 5192ef3 Mon Dec 16 15:08:08 2013 -0800
+ Merge pull request #94 from ankurdave/load-edges-columnar
+ [Load edges in columnar format]
+
+ 883e034 Mon Dec 16 14:16:02 2013 -0800
+ Merge pull request #245 from gregakespret/task-maxfailures-fix
+ [Fix for spark.task.maxFailures not enforced correctly.]
+
+ a51f340 Sun Dec 15 22:02:30 2013 -0800
+ Merge pull request #265 from markhamstra/scala.binary.version
+ [DRY out the POMs with scala.binary.version]
+
+ ded10ce Sun Dec 15 17:25:33 2013 -0800
+ Merge pull request #103 from amplab/optimizations
+ [Optimizations cherry-picked from SIGMOD branches]
+
+ d2ced6d Sun Dec 15 14:11:34 2013 -0800
+ Merge pull request #256 from MLnick/master
+ [Fix 'IPYTHON=1 ./pyspark' throwing ValueError]
+
+ c55e698 Sun Dec 15 12:49:02 2013 -0800
+ Merge pull request #257 from tgravescs/sparkYarnFixName
+ [Fix the --name option for Spark on Yarn]
+
+ ab85f88 Sun Dec 15 12:48:32 2013 -0800
+ Merge pull request #264 from shivaram/spark-class-fix
+ [Use CoarseGrainedExecutorBackend in spark-class]
+
+ 8a56c1f Sat Dec 14 16:29:24 2013 -0800
+ Merge pull request #84 from amatsukawa/graphlab_enhancements
+ [GraphLab bug fix & set start vertex]
+
+ 7db9165 Sat Dec 14 14:16:34 2013 -0800
+ Merge pull request #251 from pwendell/master
+ [Fix list rendering in YARN markdown docs.]
+
+ 2fd781d Sat Dec 14 12:59:37 2013 -0800
+ Merge pull request #249 from ngbinh/partitionInJavaSortByKey
+ [Expose numPartitions parameter in JavaPairRDD.sortByKey()]
+
+ 9bf192b Sat Dec 14 12:52:18 2013 -0800
+ Merge pull request #91 from amplab/standalone-pagerank
+ [Standalone PageRank]
+
+ 840af5e Sat Dec 14 12:51:51 2013 -0800
+ Merge pull request #99 from ankurdave/only-dynamic-pregel
+ [Remove static Pregel; take maxIterations in dynamic Pregel]
+
+ 97ac060 Sat Dec 14 00:22:45 2013 -0800
+ Merge pull request #259 from pwendell/scala-2.10
+ [Migration to Scala 2.10]
+
+ 7ac944f Fri Dec 13 23:22:08 2013 -0800
+ Merge pull request #262 from pwendell/mvn-fix
+ [Fix maven build issues in 2.10 branch]
+
+ 6defb06 Fri Dec 13 21:18:57 2013 -0800
+ Merge pull request #261 from ScrapCodes/scala-2.10
+ [Added a comment about ActorRef and ActorSelection difference.]
+
+ 76566b1 Fri Dec 13 10:11:02 2013 -0800
+ Merge pull request #260 from ScrapCodes/scala-2.10
+ [Review comments on the PR for scala 2.10 migration.]
+
+ 0aeb182 Thu Dec 12 21:14:42 2013 -0800
+ Merge pull request #255 from ScrapCodes/scala-2.10
+ [Disabled yarn 2.2 in sbt and mvn build and added a message in the sbt build.]
+
+ 2e89398 Wed Dec 11 23:10:53 2013 -0800
+ Merge pull request #254 from ScrapCodes/scala-2.10
+ [Scala 2.10 migration]
+
+ ce6ca4e Wed Dec 11 22:30:54 2013 -0800
+ Merge pull request #97 from dcrankshaw/fix-rddtop
+ [Added BoundedPriorityQueue kryo registrator. Fixes top issue.]
+
+ d2efe13 Tue Dec 10 13:01:26 2013 -0800
+ Merge pull request #250 from pwendell/master
+ [README incorrectly suggests build sources spark-env.sh]
+
+ 6169fe1 Mon Dec 9 16:51:36 2013 -0800
+ Merge pull request #246 from pwendell/master
+ [Add missing license headers]
+
+ d992ec6 Sun Dec 8 20:49:20 2013 -0800
+ Merge pull request #195 from dhardy92/fix_DebScriptPackage
+ [[Deb] fix package of Spark classes adding org.apache prefix in scripts embeded in .deb]
+
+ 1f4a4bc Sat Dec 7 22:34:34 2013 -0800
+ Merge pull request #242 from pwendell/master
+ [Update broken links and add HDP 2.0 version string]
+
+ 6494d62 Sat Dec 7 11:56:16 2013 -0800
+ Merge pull request #240 from pwendell/master
+ [SPARK-917 Improve API links in nav bar]
+
+ f466f79 Sat Dec 7 11:51:52 2013 -0800
+ Merge pull request #239 from aarondav/nit
+ [Correct spellling error in configuration.md]
+
+ 3abfbfb Sat Dec 7 11:24:19 2013 -0800
+ Merge pull request #92 from ankurdave/rdd-names
+ [Set RDD names for easy debugging]
+
+ 31e8a14 Fri Dec 6 21:49:55 2013 -0800
+ Merge pull request #90 from amplab/pregel-replicate-changed
+ [Replicate only changed vertices]
+
+ 10c3c0c Fri Dec 6 20:29:45 2013 -0800
+ Merge pull request #237 from pwendell/formatting-fix
+ [Formatting fix]
+
+ 1b38f5f Fri Dec 6 20:16:15 2013 -0800
+ Merge pull request #236 from pwendell/shuffle-docs
+ [Adding disclaimer for shuffle file consolidation]
+
+ e5d5728 Fri Dec 6 20:14:56 2013 -0800
+ Merge pull request #235 from pwendell/master
+ [Minor doc fixes and updating README]
+
+ 241336a Fri Dec 6 17:29:03 2013 -0800
+ Merge pull request #234 from alig/master
+ [Updated documentation about the YARN v2.2 build process]
+
+ e039234 Fri Dec 6 11:49:59 2013 -0800
+ Merge pull request #190 from markhamstra/Stages4Jobs
+ [stageId <--> jobId mapping in DAGScheduler]
+
+ bfa6860 Fri Dec 6 11:04:03 2013 -0800
+ Merge pull request #233 from hsaputra/changecontexttobackend
+ [Change the name of input argument in ClusterScheduler#initialize from context to backend.]
+
+ 3fb302c Fri Dec 6 11:03:32 2013 -0800
+ Merge pull request #205 from kayousterhout/logging
+ [Added logging of scheduler delays to UI]
+
+ 87676a6 Fri Dec 6 11:01:42 2013 -0800
+ Merge pull request #220 from rxin/zippart
+ [Memoize preferred locations in ZippedPartitionsBaseRDD]
+
+ 0780498 Thu Dec 5 23:29:42 2013 -0800
+ Merge pull request #232 from markhamstra/FiniteWait
+ [jobWaiter.synchronized before jobWaiter.wait]
+
+ 1c8500e Thu Dec 5 16:25:44 2013 -0800
+ Merge pull request #88 from amplab/varenc
+ [Fixed a bug that variable encoding doesn't work for ints that use all 64 bits.]
+
+ e0bcaa0 Thu Dec 5 12:37:02 2013 -0800
+ Merge pull request #86 from ankurdave/vid-varenc
+ [Finish work on #85]
+
+ 5d46025 Thu Dec 5 12:31:24 2013 -0800
+ Merge pull request #228 from pwendell/master
+ [Document missing configs and set shuffle consolidation to false.]
+
+ 3e96b9a Thu Dec 5 12:07:36 2013 -0800
+ Merge pull request #85 from ankurdave/vid-varenc
+ [Always write Vids using variable encoding]
+
+ 72b6961 Wed Dec 4 23:33:04 2013 -0800
+ Merge pull request #199 from harveyfeng/yarn-2.2
+ [Hadoop 2.2 migration]
+
+ e0347ba Wed Dec 4 17:38:06 2013 -0800
+ Merge pull request #83 from ankurdave/fix-tests
+ [Fix compile errors in GraphSuite and SerializerSuite]
+
+ 182f9ba Wed Dec 4 15:52:07 2013 -0800
+ Merge pull request #227 from pwendell/master
+ [Fix small bug in web UI and minor clean-up.]
+
+ cbd3b75 Wed Dec 4 15:35:26 2013 -0800
+ Merge pull request #81 from amplab/clean1
+ [Codebase refactoring]
+
+ b9e7609 Wed Dec 4 14:42:09 2013 -0800
+ Merge pull request #225 from ash211/patch-3
+ [Add missing space after "Serialized" in StorageLevel]
+
+ 055462c Wed Dec 4 14:02:11 2013 -0800
+ Merge pull request #226 from ash211/patch-4
+ [Typo: applicaton]
+
+ d6e5473 Wed Dec 4 10:28:50 2013 -0800
+ Merge pull request #223 from rxin/transient
+ [Mark partitioner, name, and generator field in RDD as @transient.]
+
+ 8a3475a Tue Dec 3 14:21:40 2013 -0800
+ Merge pull request #218 from JoshRosen/spark-970-pyspark-unicode-error
+ [Fix UnicodeEncodeError in PySpark saveAsTextFile() (SPARK-970)]
+
+ 46b87b8 Tue Dec 3 00:41:11 2013 -0800
+ Merge pull request #2 from colorant/yarn-client-2.2
+ [Fix pom.xml for maven build]
+
+ 58d9bbc Mon Dec 2 21:58:53 2013 -0800
+ Merge pull request #217 from aarondav/mesos-urls
+ [Re-enable zk:// urls for Mesos SparkContexts]
+
+ 740922f Sun Dec 1 12:46:58 2013 -0800
+ Merge pull request #219 from sundeepn/schedulerexception
+ [Scheduler quits when newStage fails]
+
+ 60e23a5 Sat Nov 30 23:38:49 2013 -0800
+ Merge pull request #216 from liancheng/fix-spark-966
+ [Bugfix: SPARK-965 & SPARK-966]
+
+ 34ee814 Sat Nov 30 15:10:30 2013 -0800
+ Merged Ankur's pull request #80 and fixed subgraph.
+ []
+
+ 743a31a Wed Nov 27 18:24:39 2013 -0800
+ Merge pull request #210 from haitaoyao/http-timeout
+ [add http timeout for httpbroadcast]
+
+ 993e293 Wed Nov 27 00:57:54 2013 -0800
+ Merge pull request #1 from colorant/yarn-client-2.2
+ [Port yarn-client mode for new-yarn]
+
+ fb6875d Tue Nov 26 20:55:40 2013 -0800
+ Merge pull request #146 from JoshRosen/pyspark-custom-serializers
+ [Custom Serializers for PySpark]
+
+ 330ada1 Tue Nov 26 19:08:33 2013 -0800
+ Merge pull request #207 from henrydavidge/master
+ [Log a warning if a task's serialized size is very big]
+
+ 615213f Tue Nov 26 19:07:20 2013 -0800
+ Merge pull request #212 from markhamstra/SPARK-963
+ [[SPARK-963] Fixed races in JobLoggerSuite]
+
+ cb976df Tue Nov 26 10:23:19 2013 -0800
+ Merge pull request #209 from pwendell/better-docs
+ [Improve docs for shuffle instrumentation]
+
+ 18d6df0 Tue Nov 26 00:00:07 2013 -0800
+ Merge pull request #86 from holdenk/master
+ [Add histogram functionality to DoubleRDDFunctions]
+
+ 0e2109d Mon Nov 25 20:48:37 2013 -0800
+ Merge pull request #204 from rxin/hash
+ [OpenHashSet fixes]
+
+ c46067f Mon Nov 25 19:09:31 2013 -0800
+ Merge pull request #206 from ash211/patch-2
+ [Update tuning.md]
+
+ 14bb465 Mon Nov 25 18:50:18 2013 -0800
+ Merge pull request #201 from rxin/mappartitions
+ [Use the proper partition index in mapPartitionsWIthIndex]
+
+ eb4296c Mon Nov 25 15:25:29 2013 -0800
+ Merge pull request #101 from colorant/yarn-client-scheduler
+ [For SPARK-527, Support spark-shell when running on YARN]
+
+ 466fd06 Mon Nov 25 18:27:26 2013 +0800
+ Incorporated ideas from pull request #200. - Use Murmur Hash 3 finalization step to scramble the bits of HashCode instead of the simpler version in java.util.HashMap; the latter one had trouble with ranges of consecutive integers. Murmur Hash 3 is used by fastutil.
+ [- Don't check keys for equality when re-inserting due to growing the]
+
+ 088995f Mon Nov 25 00:57:51 2013 -0800
+ Merge pull request #77 from amplab/upgrade
+ [Sync with Spark master]
+
+ 62889c4 Mon Nov 25 11:27:45 2013 +0800
+ Merge pull request #203 from witgo/master
+ [ Fix Maven build for metrics-graphite]
+
+ 6af03ed Sun Nov 24 16:42:37 2013 -0800
+ Merge pull request #76 from dcrankshaw/fix_partitioners
+ [Actually use partitioner command line args in Analytics.]
+
+ 859d62d Sun Nov 24 16:19:51 2013 -0800
+ Merge pull request #151 from russellcardullo/add-graphite-sink
+ [Add graphite sink for metrics]
+
+ 65de73c Sun Nov 24 15:52:33 2013 -0800
+ Merge pull request #185 from mkolod/random-number-generator
+ [XORShift RNG with unit tests and benchmark]
+
+ 972171b Mon Nov 25 07:50:46 2013 +0800
+ Merge pull request #197 from aarondav/patrick-fix
+ [Fix 'timeWriting' stat for shuffle files]
+
+ a1a7e36 Sun Nov 24 05:15:09 2013 -0800
+ Merge pull request #75 from amplab/simplify
+ [Simplify GraphImpl internals]
+
+ 718cc80 Sun Nov 24 11:02:02 2013 +0800
+ Merge pull request #200 from mateiz/hash-fix
+ [AppendOnlyMap fixes]
+
+ 51aa9d6 Sat Nov 23 19:46:46 2013 +0800
+ Merge pull request #198 from ankurdave/zipPartitions-preservesPartitioning
+ [Support preservesPartitioning in RDD.zipPartitions]
+
+ 18ce7e9 Fri Nov 22 17:02:40 2013 -0800
+ Merge pull request #73 from jegonzal/TriangleCount
+ [Triangle count]
+
+ 086b097 Fri Nov 22 10:26:39 2013 +0800
+ Merge pull request #193 from aoiwelle/patch-1
+ [Fix Kryo Serializer buffer documentation inconsistency]
+
+ f20093c Fri Nov 22 10:12:13 2013 +0800
+ Merge pull request #196 from pwendell/master
+ [TimeTrackingOutputStream should pass on calls to close() and flush().]
+
+ 4b89501 Wed Nov 20 10:36:10 2013 -0800
+ Merge pull request #191 from hsaputra/removesemicolonscala
+ [Cleanup to remove semicolons (;) from Scala code]
+
+ 202f8e6 Wed Nov 20 03:26:08 2013 -0800
+ Merge pull request #74 from dcrankshaw/remove_sleep
+ [Removed sleep from pagerank in Analytics]
+
+ 74ade9e Tue Nov 19 16:53:58 2013 -0800
+ Merge pull request #62 from dcrankshaw/partitioners
+ [Allow user to choose a partitioner at runtime]
+
+ f568912 Tue Nov 19 16:11:31 2013 -0800
+ Merge pull request #181 from BlackNiuza/fix_tasks_number
+ [correct number of tasks in ExecutorsUI]
+
+ aa638ed Tue Nov 19 16:05:44 2013 -0800
+ Merge pull request #189 from tgravescs/sparkYarnErrorHandling
+ [Impove Spark on Yarn Error handling]
+
+ 5592580 Tue Nov 19 16:04:01 2013 -0800
+ Merge pull request #187 from aarondav/example-bcast-test
+ [Enable the Broadcast examples to work in a cluster setting]
+
+ 99cfe89 Mon Nov 18 22:00:36 2013 -0500
+ Updates to reflect pull request code review
+ []
+
+ e2ebc3a Sun Nov 17 18:42:18 2013 -0800
+ Merge pull request #182 from rxin/vector
+ [Slightly enhanced PrimitiveVector:]
+
+ 26f616d Sun Nov 17 18:18:16 2013 -0800
+ Merge pull request #3 from aarondav/pv-test
+ [Add PrimitiveVectorSuite and fix bug in resize()]
+
+ 1b5b358 Sat Nov 16 11:44:10 2013 -0800
+ Merge pull request #178 from hsaputra/simplecleanupcode
+ [Simple cleanup on Spark's Scala code]
+
+ 62a2a71 Fri Nov 15 13:12:07 2013 -0800
+ Merge pull request #65 from amplab/varenc
+ [Use variable encoding for ints, longs, and doubles in the specialized serializers.]
+
+ f6b2e59 Thu Nov 14 23:04:55 2013 -0800
+ Merge pull request #1 from aarondav/scala210-master
+ [Various merge corrections]
+
+ 96e0fb4 Thu Nov 14 22:29:28 2013 -0800
+ Merge pull request #173 from kayousterhout/scheduler_hang
+ [Fix bug where scheduler could hang after task failure.]
+
+ dfd40e9 Thu Nov 14 19:44:50 2013 -0800
+ Merge pull request #175 from kayousterhout/no_retry_not_serializable
+ [Don't retry tasks when they fail due to a NotSerializableException]
+
+ ed25105 Thu Nov 14 19:43:55 2013 -0800
+ Merge pull request #174 from ahirreddy/master
+ [Write Spark UI url to driver file on HDFS]
+
+ 1a4cfbe Thu Nov 14 10:32:11 2013 -0800
+ Merge pull request #169 from kayousterhout/mesos_fix
+ [Don't ignore spark.cores.max when using Mesos Coarse mode]
+
+ 5a4f483 Thu Nov 14 10:30:36 2013 -0800
+ Merge pull request #170 from liancheng/hadooprdd-doc-typo
+ [Fixed a scaladoc typo in HadoopRDD.scala]
+
+ d76f520 Thu Nov 14 10:25:48 2013 -0800
+ Merge pull request #171 from RIA-pierre-borckmans/master
+ [Fixed typos in the CDH4 distributions version codes.]
+
+ 2c39d80 Wed Nov 13 23:28:01 2013 -0800
+ Merge pull request #69 from jegonzal/MissingVertices
+ [Addressing issue in Graph creation]
+
+ 33b2dea Wed Nov 13 17:55:58 2013 -0800
+ Merge pull request #1 from ankurdave/MissingVertices
+ [During graph creation, create eTable earlier]
+
+ 2054c61 Wed Nov 13 16:49:55 2013 -0800
+ Merge pull request #159 from liancheng/dagscheduler-actor-refine
+ [Migrate the daemon thread started by DAGScheduler to Akka actor]
+
+ 9290e5b Wed Nov 13 16:48:44 2013 -0800
+ Merge pull request #165 from NathanHowell/kerberos-master
+ [spark-assembly.jar fails to authenticate with YARN ResourceManager]
+
+ a81fcb7 Wed Nov 13 10:41:01 2013 -0800
+ Merge pull request #68 from jegonzal/BitSetSetUntilBug
+ [Addressing bug in BitSet.setUntil(ind)]
+
+ 39af914 Wed Nov 13 08:39:05 2013 -0800
+ Merge pull request #166 from ahirreddy/simr-spark-ui
+ [SIMR Backend Scheduler will now write Spark UI URL to HDFS, which is to ...]
+
+ f49ea28 Tue Nov 12 19:13:39 2013 -0800
+ Merge pull request #137 from tgravescs/sparkYarnJarsHdfsRebase
+ [Allow spark on yarn to be run from HDFS.]
+
+ 87f2f4e Tue Nov 12 16:26:09 2013 -0800
+ Merge pull request #153 from ankurdave/stop-spot-cluster
+ [Enable stopping and starting a spot cluster]
+
+ b8bf04a Tue Nov 12 16:19:50 2013 -0800
+ Merge pull request #160 from xiajunluan/JIRA-923
+ [Fix bug JIRA-923]
+
+ dfd1ebc Tue Nov 12 09:10:05 2013 -0800
+ Merge pull request #164 from tdas/kafka-fix
+ [Made block generator thread safe to fix Kafka bug.]
+
+ 2e8d450 Mon Nov 11 17:34:09 2013 -0800
+ Merge pull request #63 from jegonzal/VertexSetCleanup
+ [Cleanup of VertexSetRDD]
+
+ b8e294a Mon Nov 11 16:25:42 2013 -0800
+ Merge pull request #61 from ankurdave/pid2vid
+ [Shuffle replicated vertex attributes efficiently in columnar format]
+
+ 3d7277c Mon Nov 11 15:49:28 2013 -0800
+ Merge pull request #55 from ankurdave/aggregateNeighbors-variants
+ [Specialize mapReduceTriplets for accessing subsets of vertex attributes]
+
+ 23b53ef Mon Nov 11 12:30:02 2013 -0800
+ Merge pull request #156 from haoyuan/master
+ [add tachyon module]
+
+ 1a06f70 Sun Nov 10 10:54:44 2013 -0800
+ Merge pull request #60 from amplab/rxin
+ [Looks good to me.]
+
+ 58d4f6c Sun Nov 10 09:23:56 2013 -0800
+ Merge pull request #157 from rxin/kryo
+ [3 Kryo related changes.]
+
+ 3efc019 Sat Nov 9 17:53:49 2013 -0800
+ Merge pull request #147 from JoshRosen/fix-java-api-completeness-checker
+ [Add spark-tools assembly to spark-class'ss classpath]
+
+ 87954d4 Sat Nov 9 17:53:25 2013 -0800
+ Merge pull request #154 from soulmachine/ClusterScheduler
+ [Replace the thread inside ClusterScheduler.start() with an Akka scheduler]
+
+ f6c9462 Sat Nov 9 16:14:45 2013 -0800
+ Merge pull request #58 from jegonzal/KryoMessages
+ [Kryo messages]
+
+ 83bf192 Sat Nov 9 15:40:29 2013 -0800
+ Merge pull request #155 from rxin/jobgroup
+ [Don't reset job group when a new job description is set.]
+
+ 8af99f2 Sat Nov 9 13:48:00 2013 -0800
+ Merge pull request #149 from tgravescs/fixSecureHdfsAccess
+ [Fix secure hdfs access for spark on yarn]
+
+ 72a601e Sat Nov 9 11:55:16 2013 -0800
+ Merge pull request #152 from rxin/repl
+ [Propagate SparkContext local properties from spark-repl caller thread to the repl execution thread.]
+
+ 6ee05be Thu Nov 7 19:12:41 2013 -0800
+ Merge pull request #49 from jegonzal/graphxshell
+ [GraphX Console with Logo Text]
+
+ a9f96b5 Thu Nov 7 18:56:56 2013 -0800
+ Merge pull request #56 from jegonzal/PregelAPIChanges
+ [Changing Pregel API to use mapReduceTriplets instead of aggregateNeighbors]
+
+ 5907137 Thu Nov 7 16:58:31 2013 -0800
+ Merge pull request #54 from amplab/rxin
+ [Converted for loops to while loops in EdgePartition.]
+
+ edf4164 Thu Nov 7 16:22:43 2013 -0800
+ Merge pull request #53 from amplab/rxin
+ [Added GraphX to classpath.]
+
+ c379e10 Thu Nov 7 16:01:47 2013 -0800
+ Merge pull request #51 from jegonzal/VertexSetRDD
+ [Reverting to Array based (materialized) output in VertexSetRDD]
+
+ 3d4ad84 Thu Nov 7 11:08:27 2013 -0800
+ Merge pull request #148 from squito/include_appId
+ [Include appId in executor cmd line args]
+
+ be7e8da Wed Nov 6 23:22:47 2013 -0800
+ Merge pull request #23 from jerryshao/multi-user
+ [Add Spark multi-user support for standalone mode and Mesos]
+
+ aadeda5 Wed Nov 6 13:27:47 2013 -0800
+ Merge pull request #144 from liancheng/runjob-clean
+ [Removed unused return value in SparkContext.runJob]
+
+ 951024f Wed Nov 6 09:36:14 2013 -0800
+ Merge pull request #145 from aarondav/sls-fix
+ [Attempt to fix SparkListenerSuite breakage]
+
+ bf4e613 Tue Nov 5 23:14:09 2013 -0800
+ Merge pull request #143 from rxin/scheduler-hang
+ [Ignore a task update status if the executor doesn't exist anymore.]
+
+ 9f7b9bb Tue Nov 5 10:42:19 2013 -0800
+ Merge pull request #142 from liancheng/dagscheduler-pattern-matching
+ [Using case class deep match to simplify code in DAGScheduler.processEvent]
+
+ ca44b51 Tue Nov 5 01:32:55 2013 -0800
+ Merge pull request #50 from amplab/mergemerge
+ [Merge Spark master into graphx]
+
+ 8106532 Mon Nov 4 20:47:14 2013 -0800
+ Merge pull request #139 from aarondav/shuffle-next
+ [Never store shuffle blocks in BlockManager]
+
+ 0b26a39 Mon Nov 4 18:22:06 2013 -0800
+ Merge pull request #128 from shimingfei/joblogger-doc
+ [add javadoc to JobLogger, and some small fix]
+
+ 7a26104 Mon Nov 4 17:54:06 2013 -0800
+ Merge pull request #130 from aarondav/shuffle
+ [Memory-optimized shuffle file consolidation]
+
+ b5dc339 Sun Nov 3 20:43:15 2013 -0800
+ Merge pull request #70 from rxin/hash1
+ [Fast, memory-efficient hash set, hash table implementations optimized for primitive data types.]
+
+ 41ead7a Sat Nov 2 14:41:50 2013 -0700
+ Merge pull request #133 from Mistobaan/link_fix
+ [update default github]
+
+ d407c07 Sat Nov 2 14:36:37 2013 -0700
+ Merge pull request #134 from rxin/readme
+ [Fixed a typo in Hadoop version in README.]
+
+ e7c7b80 Fri Nov 1 17:58:10 2013 -0700
+ Merge pull request #132 from Mistobaan/doc_fix
+ [fix persistent-hdfs]
+
+ d6d11c2 Fri Nov 1 15:40:33 2013 -0700
+ Merge pull request #129 from velvia/2013-11/document-local-uris
+ [Document & finish support for local: URIs]
+
+ 99bfcc9 Thu Oct 31 21:38:10 2013 -0700
+ Merge pull request #46 from jegonzal/VertexSetWithHashSet
+ [Switched VertexSetRDD and GraphImpl to use OpenHashSet]
+
+ fcaaf86 Thu Oct 31 18:27:30 2013 -0700
+ Merge pull request #44 from jegonzal/rxinBitSet
+ [Switching to VertexSetRDD to use @rxin BitSet and OpenHash ]
+
+ 3f3c727 Thu Oct 31 09:52:25 2013 -0700
+ Merge pull request #41 from jegonzal/LineageTracking
+ [Optimizing Graph Lineage]
+
+ 944f6b8 Thu Oct 31 09:40:35 2013 -0700
+ Merge pull request #43 from amplab/FixBitSetCastException
+ [Fix BitSet cast exception]
+
+ 8f1098a Wed Oct 30 20:11:48 2013 -0700
+ Merge pull request #117 from stephenh/avoid_concurrent_modification_exception
+ [Handle ConcurrentModificationExceptions in SparkContext init.]
+
+ dc9ce16 Wed Oct 30 17:01:56 2013 -0700
+ Merge pull request #126 from kayousterhout/local_fix
+ [Fixed incorrect log message in local scheduler]
+
+ 33de11c Wed Oct 30 16:58:27 2013 -0700
+ Merge pull request #124 from tgravescs/sparkHadoopUtilFix
+ [Pull SparkHadoopUtil out of SparkEnv (jira SPARK-886)]
+
+ a0c86c3 Wed Oct 30 15:34:39 2013 -0700
+ Merge pull request #38 from jegonzal/Documentation
+ [Improving Documentation]
+
+ 618c1f6 Wed Oct 30 12:03:44 2013 -0700
+ Merge pull request #125 from velvia/2013-10/local-jar-uri
+ [Add support for local:// URI scheme for addJars()]
+
+ 745dc42 Tue Oct 29 23:47:10 2013 -0700
+ Merge pull request #118 from JoshRosen/blockinfo-memory-usage
+ [Reduce the memory footprint of BlockInfo objects]
+
+ 06adf63 Tue Oct 29 16:43:46 2013 -0700
+ Merge pull request #33 from kellrott/master
+ [Fixing graph/pom.xml]
+
+ 098768e Tue Oct 29 15:08:36 2013 -0700
+ Merge pull request #37 from jegonzal/AnalyticsCleanup
+ [Updated Connected Components and Pregel Docs]
+
+ f0e23a0 Tue Oct 29 01:41:44 2013 -0400
+ Merge pull request #119 from soulmachine/master
+ [A little revise for the document]
+
+ aec9bf9 Sun Oct 27 19:32:00 2013 -0700
+ Merge pull request #112 from kayousterhout/ui_task_attempt_id
+ [Display both task ID and task attempt ID in UI, and rename taskId to taskAttemptId]
+
+ d4df474 Sun Oct 27 22:11:21 2013 -0400
+ Merge pull request #115 from aarondav/shuffle-fix
+ [Eliminate extra memory usage when shuffle file consolidation is disabled]
+
+ e018f2d Sat Oct 26 11:39:15 2013 -0700
+ Merge pull request #113 from pwendell/master
+ [Improve error message when multiple assembly jars are present.]
+
+ 662ee9f Sat Oct 26 11:35:59 2013 -0700
+ Merge pull request #114 from soulmachine/master
+ [A little revise for the document]
+
+ bab496c Fri Oct 25 18:28:43 2013 -0700
+ Merge pull request #108 from alig/master
+ [Changes to enable executing by using HDFS as a synchronization point between driver and executors, as well as ensuring executors exit properly.]
+
+ d307db6 Fri Oct 25 17:26:06 2013 -0700
+ Merge pull request #102 from tdas/transform
+ [Added new Spark Streaming operations]
+
+ 85e2cab Fri Oct 25 14:46:06 2013 -0700
+ Merge pull request #111 from kayousterhout/ui_name
+ [Properly display the name of a stage in the UI.]
+
+ ab35ec4 Fri Oct 25 10:16:18 2013 -0700
+ Merge pull request #110 from pwendell/master
+ [Exclude jopt from kafka dependency.]
+
+ 4f2c943 Thu Oct 24 22:32:02 2013 -0700
+ Merge pull request #109 from pwendell/master
+ [Adding Java/Java Streaming versions of `repartition` with associated tests]
+
+ 99ad4a6 Thu Oct 24 17:08:39 2013 -0700
+ Merge pull request #106 from pwendell/master
+ [Add a `repartition` operator.]
+
+ 5429d62 Thu Oct 24 11:15:55 2013 -0700
+ Merge pull request #107 from ScrapCodes/scala-2.10
+ [Updating to latest akka 2.2.3, which fixes our only failing test Driver Suite.]
+
+ 6f82c42 Thu Oct 24 11:09:46 2013 -0700
+ Merge pull request #34 from jegonzal/AnalyticsCleanup
+ [Analytics Cleanup]
+
+ 1dc776b Wed Oct 23 22:05:52 2013 -0700
+ Merge pull request #93 from kayousterhout/ui_new_state
+ [Show "GETTING_RESULTS" state in UI.]
+
+ c4b187d Wed Oct 23 21:56:18 2013 -0700
+ Merge pull request #105 from pwendell/doc-fix
+ [Fixing broken links in programming guide]
+
+ a098438 Wed Oct 23 18:03:08 2013 -0700
+ Merge pull request #103 from JoshRosen/unpersist-fix
+ [Add unpersist() to JavaDoubleRDD and JavaPairRDD.]
+
+ dd65964 Wed Oct 23 15:07:59 2013 -0700
+ Merge pull request #64 from prabeesh/master
+ [MQTT Adapter for Spark Streaming]
+
+ 452aa36 Tue Oct 22 23:15:33 2013 -0700
+ Merge pull request #97 from ewencp/pyspark-system-properties
+ [Add classmethod to SparkContext to set system properties.]
+
+ 9dfcf53 Tue Oct 22 16:01:42 2013 -0700
+ Merge pull request #100 from JoshRosen/spark-902
+ [Remove redundant Java Function call() definitions]
+
+ 49d5cda Tue Oct 22 15:38:02 2013 -0700
+ Merge pull request #30 from jegonzal/VertexSetRDD_Tests
+ [Testing and Documenting VertexSetRDD]
+
+ 97184de Tue Oct 22 13:10:14 2013 -0700
+ Merge pull request #99 from pwendell/master
+ [Use correct formatting for comments in StoragePerfTester]
+
+ c404adb Tue Oct 22 11:30:19 2013 -0700
+ Merge pull request #90 from pwendell/master
+ [SPARK-940: Do not directly pass Stage objects to SparkListener.]
+
+ aa9019f Tue Oct 22 10:30:02 2013 -0700
+ Merge pull request #98 from aarondav/docs
+ [Docs: Fix links to RDD API documentation]
+
+ a0e08f0 Tue Oct 22 10:20:43 2013 -0700
+ Merge pull request #82 from JoshRosen/map-output-tracker-refactoring
+ [Split MapOutputTracker into Master/Worker classes]
+
+ b84193c Mon Oct 21 23:35:13 2013 -0700
+ Merge pull request #92 from tgravescs/sparkYarnFixClasspath
+ [Fix the Worker to use CoarseGrainedExecutorBackend and modify classpath ...]
+
+ 731c94e Mon Oct 21 23:31:38 2013 -0700
+ Merge pull request #56 from jerryshao/kafka-0.8-dev
+ [Upgrade Kafka 0.7.2 to Kafka 0.8.0-beta1 for Spark Streaming]
+
+ 48952d6 Mon Oct 21 22:45:00 2013 -0700
+ Merge pull request #87 from aarondav/shuffle-base
+ [Basic shuffle file consolidation]
+
+ a51359c Mon Oct 21 20:33:29 2013 -0700
+ Merge pull request #95 from aarondav/perftest
+ [Minor: Put StoragePerfTester in org/apache/]
+
+ 39d2e9b Mon Oct 21 18:58:48 2013 -0700
+ Merge pull request #94 from aarondav/mesos-fix
+ [Fix mesos urls]
+
+ aa61bfd Mon Oct 21 11:57:05 2013 -0700
+ Merge pull request #88 from rxin/clean
+ [Made the following traits/interfaces/classes non-public:]
+
+ 35886f3 Sun Oct 20 22:20:32 2013 -0700
+ Merge pull request #41 from pwendell/shuffle-benchmark
+ [Provide Instrumentation for Shuffle Write Performance]
+
+ 5b9380e Sun Oct 20 21:03:51 2013 -0700
+ Merge pull request #89 from rxin/executor
+ [Don't setup the uncaught exception handler in local mode.]
+
+ 261bcf2 Sun Oct 20 17:59:51 2013 -0700
+ Merge pull request #80 from rxin/build
+ [Exclusion rules for Maven build files.]
+
+ edc5e3f Sun Oct 20 17:18:06 2013 -0700
+ Merge pull request #75 from JoshRosen/block-manager-cleanup
+ [Code de-duplication in BlockManager]
+
+ 2a7ae17 Sun Oct 20 11:45:21 2013 -0700
+ Merge pull request #84 from rxin/kill1
+ [Added documentation for setJobGroup. Also some minor cleanup in SparkContext.]
+
+ e4abb75 Sun Oct 20 09:38:37 2013 -0700
+ Merge pull request #85 from rxin/clean
+ [Moved the top level spark package object from spark to org.apache.spark]
+
+ 136b9b3 Sun Oct 20 02:58:26 2013 -0700
+ Basic shuffle file consolidation
+ [The Spark shuffle phase can produce a large number of files, as one file is created]
+
+ 747f538 Sat Oct 19 23:40:40 2013 -0700
+ Merge pull request #83 from ewencp/pyspark-accumulator-add-method
+ [Add an add() method to pyspark accumulators.]
+
+ 6511bbe Sat Oct 19 11:34:56 2013 -0700
+ Merge pull request #78 from mosharaf/master
+ [Removed BitTorrentBroadcast and TreeBroadcast.]
+
+ f628804 Fri Oct 18 23:19:42 2013 -0700
+ Merge pull request #76 from pwendell/master
+ [Clarify compression property.]
+
+ 599dcb0 Fri Oct 18 22:49:00 2013 -0700
+ Merge pull request #74 from rxin/kill
+ [Job cancellation via job group id.]
+
+ 9cf43cf Fri Oct 18 22:07:21 2013 -0700
+ Merge pull request #28 from jegonzal/VertexSetRDD
+ [Refactoring IndexedRDD to VertexSetRDD.]
+
+ f888a5b Fri Oct 18 22:06:58 2013 -0700
+ Merge pull request #29 from ankurdave/unit-tests
+ [Unit tests for Graph and GraphOps]
+
+ 8de9706 Fri Oct 18 20:32:39 2013 -0700
+ Merge pull request #66 from shivaram/sbt-assembly-deps
+ [Add SBT target to assemble dependencies]
+
+ e5316d0 Fri Oct 18 20:30:56 2013 -0700
+ Merge pull request #68 from mosharaf/master
+ [Faster and stable/reliable broadcast]
+
+ 8d528af Fri Oct 18 20:24:10 2013 -0700
+ Merge pull request #71 from aarondav/scdefaults
+ [Spark shell exits if it cannot create SparkContext]
+
+ 0794bd7 Fri Oct 18 18:59:58 2013 -0700
+ Merge pull request #27 from jegonzal/removed_indexedrdd_from_core
+ [Removing IndexedRDD changes for spark/core]
+
+ 099977f Thu Oct 17 14:17:08 2013 -0700
+ Merge pull request #26 from ankurdave/split-vTableReplicated
+ [Great work!]
+
+ fc26e5b Thu Oct 17 13:21:07 2013 -0700
+ Merge pull request #69 from KarthikTunga/master
+ [Fix for issue SPARK-627. Implementing --config argument in the scripts.]
+
+ cf64f63 Thu Oct 17 11:12:28 2013 -0700
+ Merge pull request #67 from kayousterhout/remove_tsl
+ [Removed TaskSchedulerListener interface.]
+
+ f9973ca Wed Oct 16 15:58:41 2013 -0700
+ Merge pull request #65 from tgravescs/fixYarn
+ [Fix yarn build]
+
+ 28e9c2a Tue Oct 15 23:59:56 2013 -0700
+ Merge pull request #63 from pwendell/master
+ [Fixing spark streaming example and a bug in examples build.]
+
+ 4e46fde Tue Oct 15 23:14:27 2013 -0700
+ Merge pull request #62 from harveyfeng/master
+ [Make TaskContext's stageId publicly accessible.]
+
+ b534606 Tue Oct 15 21:25:03 2013 -0700
+ Merge pull request #8 from vchekan/checkpoint-ttl-restore
+ [Serialize and restore spark.cleaner.ttl to savepoint]
+
+ 6dbd220 Tue Oct 15 19:02:57 2013 -0700
+ Merge pull request #34 from kayousterhout/rename
+ [Renamed StandaloneX to CoarseGrainedX.]
+
+ 983b83f Tue Oct 15 19:02:46 2013 -0700
+ Merge pull request #61 from kayousterhout/daemon_thread
+ [Unified daemon thread pools]
+
+ 3249e0e Tue Oct 15 14:12:33 2013 -0700
+ Merge pull request #59 from rxin/warning
+ [Bump up logging level to warning for failed tasks.]
+
+ 678dec6 Tue Oct 15 10:51:46 2013 -0700
+ Merge pull request #58 from hsaputra/update-pom-asf
+ [Update pom.xml to use version 13 of the ASF parent pom]
+
+ e33b183 Mon Oct 14 22:25:47 2013 -0700
+ Merge pull request #29 from rxin/kill
+ [Job killing]
+
+ 3b11f43 Mon Oct 14 14:20:01 2013 -0700
+ Merge pull request #57 from aarondav/bid
+ [Refactor BlockId into an actual type]
+
+ 9979690 Sat Oct 12 21:23:26 2013 -0700
+ Merge pull request #52 from harveyfeng/hadoop-closure
+ [Add an optional closure parameter to HadoopRDD instantiation to use when creating local JobConfs.]
+
+ dca8009 Fri Oct 11 16:08:15 2013 -0700
+ Merge pull request #54 from aoiwelle/remove_unused_imports
+ [Remove unnecessary mutable imports]
+
+ 0e5052b Fri Oct 11 15:45:16 2013 -0700
+ Merge pull request #51 from ScrapCodes/scala-2.10
+ [Scala 2.10]
+
+ fb25f32 Fri Oct 11 15:44:43 2013 -0700
+ Merge pull request #53 from witgo/master
+ [Add a zookeeper compile dependency to fix build in maven]
+
+ d6ead47 Fri Oct 11 15:43:01 2013 -0700
+ Merge pull request #32 from mridulm/master
+ [Address review comments, move to incubator spark]
+
+ c71499b Thu Oct 10 17:16:42 2013 -0700
+ Merge pull request #19 from aarondav/master-zk
+ [Standalone Scheduler fault tolerance using ZooKeeper]
+
+ 5867a82 Thu Oct 10 14:02:37 2013 -0700
+ Merge pull request #19 from dcrankshaw/master
+ [Merge canonical 2d partitioner and group edges into benchmarks]
+
+ cd08f73 Thu Oct 10 13:55:47 2013 -0700
+ Merge pull request #44 from mateiz/fast-map
+ [A fast and low-memory append-only map for shuffle operations]
+
+ 4b46d51 Thu Oct 10 13:35:36 2013 -0700
+ Merge pull request #17 from amplab/product2
+ [product 2 change]
+
+ 320418f Wed Oct 9 16:55:30 2013 -0700
+ Merge pull request #49 from mateiz/kryo-fix-2
+ [Fix Chill serialization of Range objects]
+
+ 215238c Wed Oct 9 16:49:44 2013 -0700
+ Merge pull request #50 from kayousterhout/SPARK-908
+ [Fix race condition in SparkListenerSuite (fixes SPARK-908).]
+
+ 7827efc Wed Oct 9 15:07:25 2013 -0700
+ Merge pull request #46 from mateiz/py-sort-update
+ [Fix PySpark docs and an overly long line of code after #38]
+
+ 7b3ae04 Wed Oct 9 12:14:19 2013 -0700
+ Merge pull request #45 from pwendell/metrics_units
+ [Use standard abbreviation in metrics description (MBytes -> MB)]
+
+ b4fa11f Wed Oct 9 11:59:47 2013 -0700
+ Merge pull request #38 from AndreSchumacher/pyspark_sorting
+ [SPARK-705: implement sortByKey() in PySpark]
+
+ 19d445d Wed Oct 9 11:08:34 2013 -0700
+ Merge pull request #22 from GraceH/metrics-naming
+ [SPARK-900 Use coarser grained naming for metrics]
+
+ 7d50f9f Wed Oct 9 10:32:42 2013 -0700
+ Merge pull request #35 from MartinWeindel/scala-2.10
+ [Fixing inconsistencies and warnings on Scala 2.10 branch]
+
+ 3218fa7 Tue Oct 8 23:44:55 2013 -0700
+ Merge pull request #4 from MLnick/implicit-als
+ [Adding algorithm for implicit feedback data to ALS]
+
+ e67d5b9 Tue Oct 8 22:57:38 2013 -0700
+ Merge pull request #43 from mateiz/kryo-fix
+ [Don't allocate Kryo buffers unless needed]
+
+ ea34c52 Mon Oct 7 20:45:58 2013 -0700
+ Merge pull request #42 from pwendell/shuffle-read-perf
+ [Fix inconsistent and incorrect log messages in shuffle read path]
+
+ 02f37ee Mon Oct 7 15:48:52 2013 -0700
+ Merge pull request #39 from pwendell/master
+ [Adding Shark 0.7.1 to EC2 scripts]
+
+ 213b70a Mon Oct 7 10:54:22 2013 -0700
+ Merge pull request #31 from sundeepn/branch-0.8
+ [Resolving package conflicts with hadoop 0.23.9]
+
+ d585613 Sat Oct 5 22:57:05 2013 -0700
+ Merge pull request #37 from pwendell/merge-0.8
+ [merge in remaining changes from `branch-0.8`]
+
+ 4a25b11 Sat Oct 5 19:28:55 2013 -0700
+ Merge pull request #20 from harveyfeng/hadoop-config-cache
+ [Allow users to pass broadcasted Configurations and cache InputFormats across Hadoop file reads.]
+
+ 8fc68d0 Sat Oct 5 17:24:35 2013 -0700
+ Merge pull request #36 from pwendell/versions
+ [Bumping EC2 default version in master to .]
+
+ 100222b Sat Oct 5 13:38:59 2013 -0700
+ Merge pull request #27 from davidmccauley/master
+ [SPARK-920/921 - JSON endpoint updates]
+
+ 0864193 Sat Oct 5 13:25:18 2013 -0700
+ Merge pull request #33 from AndreSchumacher/pyspark_partition_key_change
+ [Fixing SPARK-602: PythonPartitioner]
+
+ 61ffcde Fri Oct 4 10:52:17 2013 -0700
+ Merge pull request #15 from dcrankshaw/master
+ [Add synthetic generators]
+
+ 3fe12cc Fri Oct 4 10:51:28 2013 -0700
+ Merge pull request #946 from ScrapCodes/scala-2.10
+ [Fixed non termination of Executor backend, when sc.stop is not called and system.exit instead.]
+
+ 232765f Thu Oct 3 12:00:48 2013 -0700
+ Merge pull request #26 from Du-Li/master
+ [fixed a wildcard bug in make-distribution.sh; ask sbt to check local]
+
+ 405e69b Thu Oct 3 10:52:41 2013 -0700
+ Merge pull request #25 from CruncherBigData/master
+ [Update README: updated the link]
+
+ 49dbfcc Thu Oct 3 10:52:06 2013 -0700
+ Merge pull request #28 from tgravescs/sparYarnAppName
+ [Allow users to set the application name for Spark on Yarn]
+
+ e597ea3 Wed Oct 2 21:14:24 2013 -0700
+ Merge pull request #10 from kayousterhout/results_through-bm
+ [Send Task results through the block manager when larger than Akka frame size (fixes SPARK-669).]
+
+ 714fdab Thu Sep 26 14:28:55 2013 -0700
+ Merge pull request #17 from rxin/optimize
+ [Remove -optimize flag]
+
+ 13eced7 Thu Sep 26 14:18:19 2013 -0700
+ Merge pull request #16 from pwendell/master
+ [Bug fix in master build]
+
+ 70a0b99 Thu Sep 26 14:11:54 2013 -0700
+ Merge pull request #14 from kayousterhout/untangle_scheduler
+ [Improved organization of scheduling packages.]
+
+ afd03b2 Thu Sep 26 14:09:55 2013 -0700
+ Merge pull request #943 from ScrapCodes/scala-2.10
+ [Scala 2.10 with akka 2.2]
+
+ 76677b8 Thu Sep 26 14:03:46 2013 -0700
+ Merge pull request #670 from jey/ec2-ssh-improvements
+ [EC2 SSH improvements]
+
+ c514cd1 Thu Sep 26 13:48:20 2013 -0700
+ Merge pull request #930 from holdenk/master
+ [Add mapPartitionsWithIndex]
+
+ 560ee5c Thu Sep 26 11:27:34 2013 -0700
+ Merge pull request #7 from wannabeast/memorystore-fixes
+ [some minor fixes to MemoryStore]
+
+ 6566a19 Thu Sep 26 08:01:04 2013 -0700
+ Merge pull request #9 from rxin/limit
+ [Smarter take/limit implementation.]
+
+ 834686b Sun Sep 22 15:06:48 2013 -0700
+ Merge pull request #928 from jerryshao/fairscheduler-refactor
+ [Refactor FairSchedulableBuilder]
+
+ a2ea069 Sat Sep 21 23:04:42 2013 -0700
+ Merge pull request #937 from jerryshao/localProperties-fix
+ [Fix PR926 local properties issues in Spark Streaming like scenarios]
+
+ f06f2da Sat Sep 21 22:43:34 2013 -0700
+ Merge pull request #941 from ilikerps/master
+ [Add "org.apache." prefix to packages in spark-class]
+
+ 7bb12a2 Sat Sep 21 22:42:46 2013 -0700
+ Merge pull request #940 from ankurdave/clear-port-properties-after-tests
+ [After unit tests, clear port properties unconditionally]
+
+ a00317b Fri Sep 20 11:29:31 2013 -0700
+ Merge pull request #1 from ankurdave/aggregateNeighbors-returns-graph
+ [Return Graph from Graph.aggregateNeighbors]
+
+ 6a5e665 Thu Sep 19 22:41:44 2013 -0700
+ Merge pull request #3 from ankurdave/clear-port-properties-after-tests
+ [After unit tests, clear port properties unconditionally
]
+
+ 68ad33a Thu Sep 19 21:30:27 2013 -0700
+ Merge pull request #2 from ankurdave/package-fixes
+ [Package fixes (spark.graph -> org.apache.spark.graph)]
+
+ cd7222c Thu Sep 19 14:21:24 2013 -0700
+ Merge pull request #938 from ilikerps/master
+ [Fix issue with spark_ec2 seeing empty security groups]
+
+ e0dd24d Sat Aug 31 17:54:15 2013 -0700
+ Merge pull request #879 from AndreSchumacher/scala-2.10
+ [PySpark: replacing class manifest by class tag for Scala 2.10.2 in rdd.py]
+
+ ad61349 Thu Jul 18 13:53:48 2013 -0700
+ Merge pull request #709 from ScrapCodes/scala-2.10
+ [Fixed warnings in scala 2.10 branch.]
+
+ a289ded Mon Jul 15 15:59:43 2013 -0700
+ Merge pull request #700 from ScrapCodes/scala-2.10
+ [Scala 2.10 ]
+
+ 1044a95 Fri Jun 14 20:04:24 2013 -0700
+ Merge pull request #652 from ScrapCodes/scala-2.10
+ [Fixed maven build without netty fix]
+
+ 4b57f83 Sat Apr 20 10:40:07 2013 -0700
+ Merge pull request #535 from ScrapCodes/scala-2.10-repl-port
+ [porting of repl to scala-2.10]
+
+ 73b3fee Sun Jan 20 10:11:49 2013 -0800
+ Merge pull request #388 from folone/master
+ [Updated maven build configuration for Scala 2.10]
+
+ 20adf27 Tue Jan 15 11:03:49 2013 -0800
+ Merge pull request #371 from folone/master
+ [Scala 2.10.0]
+
+Release 0.8.0-incubating
+
+ 2aff798 Sun Sep 15 14:05:04 2013 -0700
+ Merge pull request #933 from jey/yarn-typo-fix
+ [Fix typo in Maven build docs]
+
+ dbd2c4f Sun Sep 15 13:20:41 2013 -0700
+ Merge pull request #932 from pwendell/mesos-version
+ [Bumping Mesos version to 0.13.0]
+
+ 9fb0b9d Sun Sep 15 13:02:53 2013 -0700
+ Merge pull request #931 from pwendell/yarn-docs
+ [Explain yarn.version in Maven build docs]
+
+ c4c1db2 Fri Sep 13 19:52:12 2013 -0700
+ Merge pull request #929 from pwendell/master
+ [Use different Hadoop version for YARN artifacts.]
+
+ a310de6 Wed Sep 11 19:36:11 2013 -0700
+ Merge pull request #926 from kayousterhout/dynamic
+ [Changed localProperties to use ThreadLocal (not DynamicVariable).]
+
+ 58c7d8b Wed Sep 11 17:33:42 2013 -0700
+ Merge pull request #927 from benh/mesos-docs
+ [Updated Spark on Mesos documentation.]
+
+ 91a59e6 Wed Sep 11 10:21:48 2013 -0700
+ Merge pull request #919 from mateiz/jets3t
+ [Add explicit jets3t dependency, which is excluded in hadoop-client]
+
+ b9128d3 Wed Sep 11 10:03:06 2013 -0700
+ Merge pull request #922 from pwendell/port-change
+ [Change default port number from 3030 to 4030.]
+
+ e07eef8 Wed Sep 11 07:35:39 2013 -0700
+ Merge pull request #925 from davidmccauley/master
+ [SPARK-894 - Not all WebUI fields delivered VIA JSON]
+
+ 8432f27 Tue Sep 10 23:19:53 2013 -0700
+ Merge pull request #923 from haoyuan/master
+ [fix run-example script]
+
+ d40f140 Tue Sep 10 23:05:29 2013 -0700
+ Merge pull request #921 from pwendell/master
+ [Fix HDFS access bug with assembly build.]
+
+ 0a6c051 Mon Sep 9 23:37:57 2013 -0700
+ Merge pull request #918 from pwendell/branch-0.8
+ [Update versions for 0.8.0 release.]
+
+ 8c14f4b Mon Sep 9 22:07:58 2013 -0700
+ Merge pull request #917 from pwendell/master
+ [Document libgfortran dependency for MLBase]
+
+ c81377b Mon Sep 9 20:16:19 2013 -0700
+ Merge pull request #915 from ooyala/master
+ [Get rid of / improve ugly NPE when Utils.deleteRecursively() fails]
+
+ 61d2a01 Mon Sep 9 18:21:01 2013 -0700
+ Merge pull request #916 from mateiz/mkdist-fix
+ [Fix copy issue in https://github.com/mesos/spark/pull/899]
+
+ a85758c Mon Sep 9 13:45:40 2013 -0700
+ Merge pull request #907 from stephenh/document_coalesce_shuffle
+ [Add better docs for coalesce.]
+
+ 084fc36 Mon Sep 9 12:01:35 2013 -0700
+ Merge pull request #912 from tgravescs/ganglia-pom
+ [Add metrics-ganglia to core pom file]
+
+ 0456384 Mon Sep 9 09:57:54 2013 -0700
+ Merge pull request #911 from pwendell/ganglia-sink
+ [Adding Manen dependency for Ganglia]
+
+ bf984e2 Sun Sep 8 23:50:24 2013 -0700
+ Merge pull request #890 from mridulm/master
+ [Fix hash bug]
+
+ e9d4f44 Sun Sep 8 23:36:48 2013 -0700
+ Merge pull request #909 from mateiz/exec-id-fix
+ [Fix an instance where full standalone mode executor IDs were passed to]
+
+ 2447b1c Sun Sep 8 22:27:49 2013 -0700
+ Merge pull request #910 from mateiz/ml-doc-tweaks
+ [Small tweaks to MLlib docs]
+
+ 7d3204b Sun Sep 8 21:39:12 2013 -0700
+ Merge pull request #905 from mateiz/docs2
+ [Job scheduling and cluster mode docs]
+
+ f1f8371 Sun Sep 8 21:26:11 2013 -0700
+ Merge pull request #896 from atalwalkar/master
+ [updated content]
+
+ f68848d Sun Sep 8 18:32:16 2013 -0700
+ Merge pull request #906 from pwendell/ganglia-sink
+ [Clean-up of Metrics Code/Docs and Add Ganglia Sink]
+
+ 0b95799 Sun Sep 8 15:30:16 2013 -0700
+ Merge pull request #908 from pwendell/master
+ [Fix target JVM version in scala build]
+
+ 04cfb3a Sun Sep 8 10:33:20 2013 -0700
+ Merge pull request #898 from ilikerps/660
+ [SPARK-660: Add StorageLevel support in Python]
+
+ 38488ac Sun Sep 8 00:28:53 2013 -0700
+ Merge pull request #900 from pwendell/cdh-docs
+ [Provide docs to describe running on CDH/HDP cluster.]
+
+ a8e376e Sat Sep 7 21:16:01 2013 -0700
+ Merge pull request #904 from pwendell/master
+ [Adding Apache license to two files]
+
+ cfde85e Sat Sep 7 13:53:08 2013 -0700
+ Merge pull request #901 from ooyala/2013-09/0.8-doc-changes
+ [0.8 Doc changes for make-distribution.sh]
+
+ 4a7813a Sat Sep 7 13:52:24 2013 -0700
+ Merge pull request #903 from rxin/resulttask
+ [Fixed the bug that ResultTask was not properly deserializing outputId.]
+
+ afe46ba Sat Sep 7 07:28:51 2013 -0700
+ Merge pull request #892 from jey/fix-yarn-assembly
+ [YARN build fixes]
+
+ 2eebeff Fri Sep 6 15:25:22 2013 -0700
+ Merge pull request #897 from pwendell/master
+ [Docs describing Spark monitoring and instrumentation]
+
+ ddcb9d3 Thu Sep 5 23:54:09 2013 -0700
+ Merge pull request #895 from ilikerps/821
+ [SPARK-821: Don't cache results when action run locally on driver]
+
+ 699c331 Thu Sep 5 20:21:53 2013 -0700
+ Merge pull request #891 from xiajunluan/SPARK-864
+ [[SPARK-864]DAGScheduler Exception if we delete Worker and StandaloneExecutorBackend then add Worker]
+
+ 5c7494d Wed Sep 4 22:47:03 2013 -0700
+ Merge pull request #893 from ilikerps/master
+ [SPARK-884: Add unit test to validate Spark JSON output]
+
+ a547866 Wed Sep 4 21:11:56 2013 -0700
+ Merge pull request #894 from c0s/master
+ [Updating assembly README to reflect recent changes in the build.]
+
+ 19f7027 Tue Sep 3 14:29:10 2013 -0700
+ Merge pull request #878 from tgravescs/yarnUILink
+ [Link the Spark UI up to the Yarn UI ]
+
+ 68df246 Tue Sep 3 13:01:17 2013 -0700
+ Merge pull request #889 from alig/master
+ [Return the port the WebUI is bound to (useful if port 0 was used)]
+
+ d3dd48f Mon Sep 2 16:44:54 2013 -0700
+ Merge pull request #887 from mateiz/misc-fixes
+ [Miscellaneous fixes for 0.8]
+
+ 636fc0c Mon Sep 2 11:20:39 2013 -0700
+ Merge pull request #886 from mateiz/codec
+ [Fix spark.io.compression.codec and change default codec to LZF]
+
+ d9a53b9 Sun Sep 1 22:12:30 2013 -0700
+ Merge pull request #885 from mateiz/win-py
+ [Allow PySpark to run on Windows]
+
+ 3c520fe Sun Sep 1 17:26:55 2013 -0700
+ Merge pull request #884 from mateiz/win-fixes
+ [Run script fixes for Windows after package & assembly change]
+
+ f957c26 Sun Sep 1 14:53:57 2013 -0700
+ Merge pull request #882 from mateiz/package-rename
+ [Rename spark package to org.apache.spark]
+
+ a30fac1 Sun Sep 1 12:27:50 2013 -0700
+ Merge pull request #883 from alig/master
+ [Don't require the spark home environment variable to be set for standalone mode (change needed by SIMR)]
+
+ 03cc765 Sun Sep 1 10:20:56 2013 -0700
+ Merge pull request #881 from pwendell/master
+ [Extend QuickStart to include next steps]
+
+ 0e9565a Sat Aug 31 18:55:41 2013 -0700
+ Merge pull request #880 from mateiz/ui-tweaks
+ [Various UI tweaks]
+
+ 2b29a1d Sat Aug 31 17:49:45 2013 -0700
+ Merge pull request #877 from mateiz/docs
+ [Doc improvements for 0.8]
+
+ 6edef9c Sat Aug 31 13:39:24 2013 -0700
+ Merge pull request #861 from AndreSchumacher/pyspark_sampling_function
+ [Pyspark sampling function]
+
+ fd89835 Sat Aug 31 13:18:12 2013 -0700
+ Merge pull request #870 from JoshRosen/spark-885
+ [Don't send SIGINT / ctrl-c to Py4J gateway subprocess]
+
+ 618f0ec Fri Aug 30 18:17:13 2013 -0700
+ Merge pull request #869 from AndreSchumacher/subtract
+ [PySpark: implementing subtractByKey(), subtract() and keyBy()]
+
+ 94bb7fd Fri Aug 30 12:05:13 2013 -0700
+ Merge pull request #876 from mbautin/master_hadoop_rdd_conf
+ [Make HadoopRDD's configuration accessible]
+
+ 9e17e45 Fri Aug 30 00:22:53 2013 -0700
+ Merge pull request #875 from shivaram/build-fix
+ [Fix broken build by removing addIntercept]
+
+ 016787d Thu Aug 29 22:15:14 2013 -0700
+ Merge pull request #863 from shivaram/etrain-ridge
+ [Adding linear regression and refactoring Ridge regression to use SGD]
+
+ 852d810 Thu Aug 29 22:13:15 2013 -0700
+ Merge pull request #819 from shivaram/sgd-cleanup
+ [Change SVM to use {0,1} labels]
+
+ ca71620 Thu Aug 29 21:51:14 2013 -0700
+ Merge pull request #857 from mateiz/assembly
+ [Change build and run instructions to use assemblies]
+
+ 1528776 Thu Aug 29 21:30:47 2013 -0700
+ Merge pull request #874 from jerryshao/fix-report-bug
+ [Fix removed block zero size log reporting]
+
+ abdbacf Wed Aug 28 21:11:31 2013 -0700
+ Merge pull request #871 from pwendell/expose-local
+ [Expose `isLocal` in SparkContext.]
+
+ afcade3 Wed Aug 28 20:15:40 2013 -0700
+ Merge pull request #873 from pwendell/master
+ [Hot fix for command runner]
+
+ baa84e7 Wed Aug 28 12:44:46 2013 -0700
+ Merge pull request #865 from tgravescs/fixtmpdir
+ [Spark on Yarn should use yarn approved directories for spark.local.dir and tmp]
+
+ cd043cf Tue Aug 27 19:50:32 2013 -0700
+ Merge pull request #867 from tgravescs/yarnenvconfigs
+ [Spark on Yarn allow users to specify environment variables ]
+
+ 898da7e Mon Aug 26 20:40:49 2013 -0700
+ Merge pull request #859 from ianbuss/sbt_opts
+ [Pass SBT_OPTS environment through to sbt_launcher]
+
+ 17bafea Mon Aug 26 11:59:32 2013 -0700
+ Merge pull request #864 from rxin/json1
+ [Revert json library change]
+
+ f9fc5c1 Sat Aug 24 15:19:56 2013 -0700
+ Merge pull request #603 from pwendell/ec2-updates
+ [Several Improvements to EC2 Scripts]
+
+ d282c1e Fri Aug 23 11:20:20 2013 -0700
+ Merge pull request #860 from jey/sbt-ide-fixes
+ [Fix IDE project generation under SBT]
+
+ 5a6ac12 Thu Aug 22 22:08:03 2013 -0700
+ Merge pull request #701 from ScrapCodes/documentation-suggestions
+ [Documentation suggestions for spark streaming.]
+
+ 46ea0c1 Thu Aug 22 15:57:28 2013 -0700
+ Merge pull request #814 from holdenk/master
+ [Create less instances of the random class during ALS initialization.]
+
+ 9ac3d62 Thu Aug 22 15:51:10 2013 -0700
+ Merge pull request #856 from jey/sbt-fix-hadoop-0.23.9
+ [Re-add removed dependency to fix build under Hadoop 0.23.9]
+
+ ae8ba83 Thu Aug 22 10:14:54 2013 -0700
+ Merge pull request #855 from jey/update-build-docs
+ [Update build docs]
+
+ 8a36fd0 Thu Aug 22 10:13:35 2013 -0700
+ Merge pull request #854 from markhamstra/pomUpdate
+ [Synced sbt and maven builds to use the same dependencies, etc.]
+
+ c2d00f1 Thu Aug 22 10:13:03 2013 -0700
+ Merge pull request #832 from alig/coalesce
+ [Coalesced RDD with locality]
+
+ e6d66c8 Wed Aug 21 17:44:31 2013 -0700
+ Merge pull request #853 from AndreSchumacher/double_rdd
+ [Implementing SPARK-838: Add DoubleRDDFunctions methods to PySpark]
+
+ 2905611 Tue Aug 20 17:36:14 2013 -0700
+ Merge pull request #851 from markhamstra/MutablePairTE
+ [Removed meaningless types]
+
+ d61337f Tue Aug 20 10:06:06 2013 -0700
+ Merge pull request #844 from markhamstra/priorityRename
+ [Renamed 'priority' to 'jobId' and assorted minor changes]
+
+ 8cae72e Mon Aug 19 23:40:04 2013 -0700
+ Merge pull request #828 from mateiz/sched-improvements
+ [Scheduler fixes and improvements]
+
+ efeb142 Mon Aug 19 19:23:50 2013 -0700
+ Merge pull request #849 from mateiz/web-fixes
+ [Small fixes to web UI]
+
+ abdc1f8 Mon Aug 19 18:30:56 2013 -0700
+ Merge pull request #847 from rxin/rdd
+ [Allow subclasses of Product2 in all key-value related classes]
+
+ 8fa0747 Sun Aug 18 17:02:54 2013 -0700
+ Merge pull request #840 from AndreSchumacher/zipegg
+ [Implementing SPARK-878 for PySpark: adding zip and egg files to context ...]
+
+ 1e137a5 Sat Aug 17 22:22:32 2013 -0700
+ Merge pull request #846 from rxin/rdd
+ [Two minor RDD refactoring]
+
+ e89ffc7 Fri Aug 16 14:02:34 2013 -0700
+ Merge pull request #839 from jegonzal/zip_partitions
+ [Currying RDD.zipPartitions ]
+
+ 1fb1b09 Thu Aug 15 22:15:05 2013 -0700
+ Merge pull request #841 from rxin/json
+ [Use the JSON formatter from Scala library and removed dependency on lift-json.]
+
+ c69c489 Thu Aug 15 20:55:09 2013 -0700
+ Merge pull request #843 from Reinvigorate/bug-879
+ [fixing typo in conf/slaves]
+
+ 230ab27 Thu Aug 15 17:45:17 2013 -0700
+ Merge pull request #834 from Daemoen/master
+ [Updated json output to allow for display of worker state]
+
+ 659553b Thu Aug 15 16:56:31 2013 -0700
+ Merge pull request #836 from pwendell/rename
+ [Rename `memoryBytesToString` and `memoryMegabytesToString`]
+
+ 28369ff Thu Aug 15 16:44:02 2013 -0700
+ Merge pull request #829 from JoshRosen/pyspark-unit-tests-python-2.6
+ [Fix PySpark unit tests on Python 2.6]
+
+ 1a13460 Thu Aug 15 15:50:44 2013 -0700
+ Merge pull request #833 from rxin/ui
+ [Various UI improvements.]
+
+ 044a088 Wed Aug 14 20:43:49 2013 -0700
+ Merge pull request #831 from rxin/scheduler
+ [A few small scheduler / job description changes.]
+
+ 839f2d4 Wed Aug 14 16:17:23 2013 -0700
+ Merge pull request #822 from pwendell/ui-features
+ [Adding GC Stats to TaskMetrics (and three small fixes)]
+
+ 63446f9 Wed Aug 14 00:17:07 2013 -0700
+ Merge pull request #826 from kayousterhout/ui_fix
+ [Fixed 2 bugs in executor UI (incl. SPARK-877)]
+
+ 3f14cba Tue Aug 13 20:09:51 2013 -0700
+ Merge pull request #825 from shivaram/maven-repl-fix
+ [Set SPARK_CLASSPATH for maven repl tests]
+
+ 596adc6 Tue Aug 13 19:41:34 2013 -0700
+ Merge pull request #824 from mateiz/mesos-0.12.1
+ [Update to Mesos 0.12.1]
+
+ d316af9 Tue Aug 13 15:31:01 2013 -0700
+ Merge pull request #821 from pwendell/print-launch-command
+ [Print run command to stderr rather than stdout]
+
+ 1f79d21 Tue Aug 13 15:23:54 2013 -0700
+ Merge pull request #818 from kayousterhout/killed_fix
+ [Properly account for killed tasks.]
+
+ 622f83c Tue Aug 13 09:58:52 2013 -0700
+ Merge pull request #817 from pwendell/pr_784
+ [Minor clean-up in metrics servlet code]
+
+ a0133bf Tue Aug 13 09:28:18 2013 -0700
+ Merge pull request #784 from jerryshao/dev-metrics-servlet
+ [Add MetricsServlet for Spark metrics system]
+
+ e2fdac6 Mon Aug 12 21:26:59 2013 -0700
+ Merge pull request #802 from stayhf/SPARK-760-Python
+ [Simple PageRank algorithm implementation in Python for SPARK-760]
+
+ d3525ba Mon Aug 12 21:02:39 2013 -0700
+ Merge pull request #813 from AndreSchumacher/add_files_pyspark
+ [Implementing SPARK-865: Add the equivalent of ADD_JARS to PySpark]
+
+ 9e02da2 Mon Aug 12 20:22:27 2013 -0700
+ Merge pull request #812 from shivaram/maven-mllib-tests
+ [Create SparkContext in beforeAll for MLLib tests]
+
+ 65d0d91 Mon Aug 12 19:00:57 2013 -0700
+ Merge pull request #807 from JoshRosen/guava-optional
+ [Change scala.Option to Guava Optional in Java APIs]
+
+ 4346f0a Mon Aug 12 12:12:12 2013 -0700
+ Merge pull request #809 from shivaram/sgd-cleanup
+ [Clean up scaladoc in ML Lib.]
+
+ ea1b4ba Mon Aug 12 08:09:58 2013 -0700
+ Merge pull request #806 from apivovarov/yarn-205
+ [Changed yarn.version to 2.0.5 in pom.xml]
+
+ 2a39d2c Sun Aug 11 20:35:09 2013 -0700
+ Merge pull request #810 from pwendell/dead_doc_code
+ [Remove now dead code inside of docs]
+
+ e5b9ed2 Sun Aug 11 17:22:47 2013 -0700
+ Merge pull request #808 from pwendell/ui_compressed_bytes
+ [Report compressed bytes read when calculating TaskMetrics]
+
+ 3796486 Sun Aug 11 14:51:47 2013 -0700
+ Merge pull request #805 from woggle/hadoop-rdd-jobconf
+ [Use new Configuration() instead of slower new JobConf() in SerializableWritable]
+
+ ff9ebfa Sun Aug 11 10:52:55 2013 -0700
+ Merge pull request #762 from shivaram/sgd-cleanup
+ [Refactor SGD options into a new class.]
+
+ 95c62ca Sun Aug 11 10:30:52 2013 -0700
+ Merge pull request #804 from apivovarov/master
+ [Fixed path to JavaALS.java and JavaKMeans.java, fixed hadoop2-yarn profi...]
+
+ 06e4f2a Sat Aug 10 18:06:23 2013 -0700
+ Merge pull request #789 from MLnick/master
+ [Adding Scala version of PageRank example]
+
+ 71c63de Sat Aug 10 10:21:20 2013 -0700
+ Merge pull request #795 from mridulm/master
+ [Fix bug reported in PR 791 : a race condition in ConnectionManager and Connection]
+
+ d17eeb9 Sat Aug 10 09:02:27 2013 -0700
+ Merge pull request #785 from anfeng/master
+ [expose HDFS file system stats via Executor metrics]
+
+ dce5e47 Fri Aug 9 21:53:45 2013 -0700
+ Merge pull request #800 from dlyubimov/HBASE_VERSION
+ [Pull HBASE_VERSION in the head of sbt build]
+
+ cd247ba Fri Aug 9 20:41:13 2013 -0700
+ Merge pull request #786 from shivaram/mllib-java
+ [Java fixes, tests and examples for ALS, KMeans]
+
+ b09d4b7 Fri Aug 9 13:17:08 2013 -0700
+ Merge pull request #799 from woggle/sync-fix
+ [Remove extra synchronization in ResultTask]
+
+ 0bc63bf Fri Aug 9 13:16:25 2013 -0700
+ Merge pull request #801 from pwendell/print-launch-command
+ [Print launch command [Branch 0.8 version]]
+
+ cc6b92e Fri Aug 9 13:00:33 2013 -0700
+ Merge pull request #775 from pwendell/print-launch-command
+ [Log the launch command for Spark daemons]
+
+ f94fc75 Fri Aug 9 10:04:03 2013 -0700
+ Merge pull request #788 from shane-huang/sparkjavaopts
+ [For standalone mode, add worker local env setting of SPARK_JAVA_OPTS as ...]
+
+ 63b6e02 Thu Aug 8 14:02:02 2013 -0700
+ Merge pull request #797 from mateiz/chill-0.3.1
+ [Update to Chill 0.3.1]
+
+ 9955e5a Thu Aug 8 11:03:38 2013 -0700
+ Merge pull request #796 from pwendell/bootstrap-design
+ [Bootstrap re-design]
+
+ 5133e4b Wed Aug 7 15:50:45 2013 -0700
+ Merge pull request #790 from kayousterhout/fix_throughput
+ [Fixed issue in UI that decreased scheduler throughput by 5x or more]
+
+ 3c8478e Tue Aug 6 23:25:03 2013 -0700
+ Merge pull request #747 from mateiz/improved-lr
+ [Update the Python logistic regression example]
+
+ 6b043a6 Tue Aug 6 22:31:02 2013 -0700
+ Merge pull request #724 from dlyubimov/SPARK-826
+ [SPARK-826: fold(), reduce(), collect() always attempt to use java serialization]
+
+ de6c4c9 Tue Aug 6 17:09:50 2013 -0700
+ Merge pull request #787 from ash211/master
+ [Update spark-standalone.md]
+
+ df4d10d Tue Aug 6 15:44:05 2013 -0700
+ Merge pull request #779 from adatao/adatao-global-SparkEnv
+ [[HOTFIX] Extend thread safety for SparkEnv.get()]
+
+ d2b0f0c Tue Aug 6 14:49:39 2013 -0700
+ Merge pull request #770 from stayhf/SPARK-760-Java
+ [Simple PageRank algorithm implementation in Java for SPARK-760]
+
+ d031f73 Mon Aug 5 22:33:00 2013 -0700
+ Merge pull request #782 from WANdisco/master
+ [SHARK-94 Log the files computed by HadoopRDD and NewHadoopRDD]
+
+ 1b63dea Mon Aug 5 22:21:26 2013 -0700
+ Merge pull request #769 from markhamstra/NegativeCores
+ [SPARK-847 + SPARK-845: Zombie workers and negative cores]
+
+ 828aff7 Mon Aug 5 21:37:33 2013 -0700
+ Merge pull request #776 from gingsmith/master
+ [adding matrix factorization data generator]
+
+ 8b27789 Mon Aug 5 19:14:52 2013 -0700
+ Merge pull request #774 from pwendell/job-description
+ [Show user-defined job name in UI]
+
+ 550b0cf Mon Aug 5 12:10:32 2013 -0700
+ Merge pull request #780 from cybermaster/master
+ [SPARK-850]
+
+ 22abbc1 Fri Aug 2 16:37:59 2013 -0700
+ Merge pull request #772 from karenfeng/ui-843
+ [Show app duration]
+
+ 9d7dfd2 Thu Aug 1 17:41:58 2013 -0700
+ Merge pull request #743 from pwendell/app-metrics
+ [Add application metrics to standalone master]
+
+ 6d7afd7 Thu Aug 1 17:13:28 2013 -0700
+ Merge pull request #768 from pwendell/pr-695
+ [Minor clean-up of fair scheduler UI]
+
+ 5e7b38f Thu Aug 1 14:59:33 2013 -0700
+ Merge pull request #695 from xiajunluan/pool_ui
+ [Enhance job ui in spark ui system with adding pool information]
+
+ 0a96493 Thu Aug 1 11:27:17 2013 -0700
+ Merge pull request #760 from karenfeng/heading-update
+ [Clean up web UI page headers]
+
+ cb7dd86 Thu Aug 1 11:06:10 2013 -0700
+ Merge pull request #758 from pwendell/master-json
+ [Add JSON path to master index page]
+
+ 58756b7 Wed Jul 31 23:45:41 2013 -0700
+ Merge pull request #761 from mateiz/kmeans-generator
+ [Add data generator for K-means]
+
+ ecab635 Wed Jul 31 18:16:55 2013 -0700
+ Merge pull request #763 from c0s/assembly
+ [SPARK-842. Maven assembly is including examples libs and dependencies]
+
+ 39c75f3 Wed Jul 31 15:52:36 2013 -0700
+ Merge pull request #757 from BlackNiuza/result_task_generation
+ [Bug fix: SPARK-837]
+
+ b2b86c2 Wed Jul 31 15:51:39 2013 -0700
+ Merge pull request #753 from shivaram/glm-refactor
+ [Build changes for ML lib]
+
+ 14bf2fe Wed Jul 31 14:18:16 2013 -0700
+ Merge pull request #749 from benh/spark-executor-uri
+ [Added property 'spark.executor.uri' for launching on Mesos.]
+
+ 4ba4c3f Wed Jul 31 13:14:49 2013 -0700
+ Merge pull request #759 from mateiz/split-fix
+ [Use the Char version of split() instead of the String one in MLUtils]
+
+ a386ced Wed Jul 31 11:22:50 2013 -0700
+ Merge pull request #754 from rxin/compression
+ [Compression codec change]
+
+ 0be071a Wed Jul 31 11:11:59 2013 -0700
+ Merge pull request #756 from cdshines/patch-1
+ [Refactored Vector.apply(length, initializer) replacing excessive code with library method]
+
+ d4556f4 Wed Jul 31 08:48:14 2013 -0700
+ Merge pull request #751 from cdshines/master
+ [Cleaned Partitioner & PythonPartitioner source by taking out non-related logic to Utils]
+
+ 29b8cd3 Tue Jul 30 21:30:33 2013 -0700
+ Merge pull request #755 from jerryshao/add-apache-header
+ [Add Apache license header to metrics system]
+
+ e87de03 Tue Jul 30 15:00:08 2013 -0700
+ Merge pull request #744 from karenfeng/bootstrap-update
+ [Use Bootstrap progress bars in web UI]
+
+ ae57020 Tue Jul 30 14:56:41 2013 -0700
+ Merge pull request #752 from rxin/master
+ [Minor mllib cleanup]
+
+ 8aee118 Tue Jul 30 10:27:54 2013 -0700
+ Merge pull request #748 from atalwalkar/master
+ [made SimpleUpdater consistent with other updaters]
+
+ 468a36c Mon Jul 29 19:44:33 2013 -0700
+ Merge pull request #746 from rxin/cleanup
+ [Internal cleanup]
+
+ 1e1ffb1 Mon Jul 29 19:26:19 2013 -0700
+ Merge pull request #745 from shivaram/loss-update-fix
+ [Remove duplicate loss history in Gradient Descent]
+
+ c99b674 Mon Jul 29 16:32:55 2013 -0700
+ Merge pull request #735 from karenfeng/ui-807
+ [Totals for shuffle data and CPU time]
+
+ fe7298b Mon Jul 29 14:01:00 2013 -0700
+ Merge pull request #741 from pwendell/usability
+ [Fix two small usability issues]
+
+ c34c0f6 Mon Jul 29 13:18:10 2013 -0700
+ Merge pull request #731 from pxinghao/master
+ [Adding SVM and Lasso]
+
+ f3d72ff Fri Jul 26 17:19:27 2013 -0700
+ Merge pull request #739 from markhamstra/toolsPom
+ [Missing tools/pom.xml scalatest dependency]
+
+ cb36677 Fri Jul 26 16:59:30 2013 -0700
+ Merge pull request #738 from harsha2010/pruning
+ [Fix bug in Partition Pruning.]
+
+ f3cf094 Thu Jul 25 14:53:21 2013 -0700
+ Merge pull request #734 from woggle/executor-env2
+ [Get more env vars from driver rather than worker]
+
+ 51c2427 Thu Jul 25 00:03:11 2013 -0700
+ Merge pull request #732 from ryanlecompte/master
+ [Refactor Kryo serializer support to use chill/chill-java]
+
+ 52723b9 Wed Jul 24 14:33:02 2013 -0700
+ Merge pull request #728 from jey/examples-jar-env
+ [Fix setting of SPARK_EXAMPLES_JAR]
+
+ 20338c2 Wed Jul 24 14:32:24 2013 -0700
+ Merge pull request #729 from karenfeng/ui-811
+ [Stage Page updates]
+
+ 5584ebc Wed Jul 24 11:46:46 2013 -0700
+ Merge pull request #675 from c0s/assembly
+ [Building spark assembly for further consumption of the Spark project with a deployed cluster]
+
+ a73f3ee Wed Jul 24 08:59:14 2013 -0700
+ Merge pull request #671 from jerryshao/master
+ [Add metrics system for Spark]
+
+ b011329 Tue Jul 23 22:50:09 2013 -0700
+ Merge pull request #727 from rxin/scheduler
+ [Scheduler code style cleanup.]
+
+ 876125b Tue Jul 23 22:28:21 2013 -0700
+ Merge pull request #726 from rxin/spark-826
+ [SPARK-829: scheduler shouldn't hang if a task contains unserializable objects in its closure]
+
+ 2f1736c Tue Jul 23 15:53:30 2013 -0700
+ Merge pull request #725 from karenfeng/task-start
+ [Creates task start events]
+
+ 5364f64 Tue Jul 23 13:40:34 2013 -0700
+ Merge pull request #723 from rxin/mllib
+ [Made RegressionModel serializable and added unit tests to make sure predict methods would work.]
+
+ f369e0e Tue Jul 23 13:22:27 2013 -0700
+ Merge pull request #720 from ooyala/2013-07/persistent-rdds-api
+ [Add a public method getCachedRdds to SparkContext]
+
+ 401aac8 Mon Jul 22 16:57:16 2013 -0700
+ Merge pull request #719 from karenfeng/ui-808
+ [Creates Executors tab for Jobs UI]
+
+ 8ae1436 Mon Jul 22 16:03:04 2013 -0700
+ Merge pull request #722 from JoshRosen/spark-825
+ [Fix bug: DoubleRDDFunctions.sampleStdev() computed non-sample stdev()]
+
+ 15fb394 Sun Jul 21 10:33:38 2013 -0700
+ Merge pull request #716 from c0s/webui-port
+ [Regression: default webui-port can't be set via command line "--webui-port" anymore]
+
+ c40f0f2 Fri Jul 19 13:33:04 2013 -0700
+ Merge pull request #711 from shivaram/ml-generators
+ [Move ML lib data generator files to util/]
+
+ 413b841 Fri Jul 19 13:31:38 2013 -0700
+ Merge pull request #717 from viirya/dev1
+ [Do not copy local jars given to SparkContext in yarn mode]
+
+ 0d0a47c Thu Jul 18 12:06:37 2013 -0700
+ Merge pull request #710 from shivaram/ml-updates
+ [Updates to LogisticRegression]
+
+ c6235b5 Thu Jul 18 11:43:48 2013 -0700
+ Merge pull request #714 from adatao/master
+ [[BUGFIX] Fix for sbt/sbt script SPARK_HOME setting]
+
+ 009c79e Thu Jul 18 11:41:52 2013 -0700
+ Merge pull request #715 from viirya/dev1
+ [fix a bug in build process that pulls in two versions of ASM.]
+
+ 985a9e3 Wed Jul 17 22:27:19 2013 -0700
+ Merge pull request #712 from stayhf/SPARK-817
+ [Consistently invoke bash with /usr/bin/env bash in scripts to make code ...]
+
+ cad48ed Tue Jul 16 21:41:28 2013 -0700
+ Merge pull request #708 from ScrapCodes/dependencies-upgrade
+ [Dependency upgrade Akka 2.0.3 -> 2.0.5]
+
+ 8a8a8f2 Mon Jul 15 23:09:21 2013 -0700
+ Merge pull request #705 from rxin/errormessages
+ [Throw a more meaningful message when runJob is called to launch tasks on non-existent partitions.]
+
+ ed8415b Mon Jul 15 16:41:04 2013 -0700
+ Merge pull request #703 from karenfeng/ui-802
+ [Link to job UI from standalone deploy cluster web UI]
+
+ e3d3e6f Mon Jul 15 14:59:44 2013 -0700
+ Merge pull request #702 from karenfeng/ui-fixes
+ [Adds app name in HTML page titles on job web UI]
+
+ c7877d5 Sun Jul 14 12:58:13 2013 -0700
+ Merge pull request #689 from BlackNiuza/application_status
+ [Bug fix: SPARK-796]
+
+ 10c0593 Sun Jul 14 11:45:18 2013 -0700
+ Merge pull request #699 from pwendell/ui-env
+ [Add `Environment` tab to SparkUI.]
+
+ 89e8549 Sat Jul 13 16:11:08 2013 -0700
+ Merge pull request #698 from Reinvigorate/sm-deps-change
+ [changing com.google.code.findbugs maven coordinates]
+
+ 77c69ae Fri Jul 12 23:05:21 2013 -0700
+ Merge pull request #697 from pwendell/block-locations
+ [Show block locations in Web UI.]
+
+ 5a7835c Fri Jul 12 20:28:21 2013 -0700
+ Merge pull request #691 from karenfeng/logpaging
+ [Create log pages]
+
+ 71ccca0 Fri Jul 12 20:25:06 2013 -0700
+ Merge pull request #696 from woggle/executor-env
+ [Pass executor env vars (e.g. SPARK_CLASSPATH) to compute-classpath.sh]
+
+ 90fc3f3 Fri Jul 12 20:23:36 2013 -0700
+ Merge pull request #692 from Reinvigorate/takeOrdered
+ [adding takeOrdered() to RDD]
+
+ 018d04c Thu Jul 11 12:48:37 2013 -0700
+ Merge pull request #684 from woggle/mesos-classloader
+ [Explicitly set class loader for MesosSchedulerDriver callbacks.]
+
+ bc19477 Wed Jul 10 22:29:41 2013 -0700
+ Merge pull request #693 from c0s/readme
+ [Updating README to reflect Scala 2.9.3 requirements]
+
+ 7dcda9a Mon Jul 8 23:24:23 2013 -0700
+ Merge pull request #688 from markhamstra/scalaDependencies
+ [Fixed SPARK-795 with explicit dependencies]
+
+ 638927b Mon Jul 8 22:58:50 2013 -0700
+ Merge pull request #683 from shivaram/sbt-test-fix
+ [Remove some stack traces from sbt test output]
+
+ 3c13178 Mon Jul 8 14:50:34 2013 -0700
+ Merge pull request #687 from atalwalkar/master
+ [Added "Labeled" to util functions for labeled data]
+
+ 744da8e Sun Jul 7 17:42:25 2013 -0700
+ Merge pull request #679 from ryanlecompte/master
+ [Make binSearch method tail-recursive for RidgeRegression]
+
+ 3cc6818 Sat Jul 6 19:51:20 2013 -0700
+ Merge pull request #668 from shimingfei/guava-14.0.1
+ [update guava version from 11.0.1 to 14.0.1]
+
+ 2216188 Sat Jul 6 16:18:15 2013 -0700
+ Merge pull request #676 from c0s/asf-avro
+ [Use standard ASF published avro module instead of a proprietory built one]
+
+ 94871e4 Sat Jul 6 15:26:19 2013 -0700
+ Merge pull request #655 from tgravescs/master
+ [Add support for running Spark on Yarn on a secure Hadoop Cluster]
+
+ 3f918b3 Sat Jul 6 12:45:18 2013 -0700
+ Merge pull request #672 from holdenk/master
+ [s/ActorSystemImpl/ExtendedActorSystem/ as ActorSystemImpl results in a warning]
+
+ 2a36e54 Sat Jul 6 12:43:21 2013 -0700
+ Merge pull request #673 from xiajunluan/master
+ [Add config template file for fair scheduler feature]
+
+ 7ba7fa1 Sat Jul 6 11:45:08 2013 -0700
+ Merge pull request #674 from liancheng/master
+ [Bug fix: SPARK-789]
+
+ f4416a1 Sat Jul 6 11:41:58 2013 -0700
+ Merge pull request #681 from BlackNiuza/memory_leak
+ [Remove active job from idToActiveJob when job finished or aborted]
+
+ e063e29 Fri Jul 5 21:54:52 2013 -0700
+ Merge pull request #680 from tdas/master
+ [Fixed major performance bug in Network Receiver]
+
+ bf1311e Fri Jul 5 17:32:44 2013 -0700
+ Merge pull request #678 from mateiz/ml-examples
+ [Start of ML package]
+
+ 6ad85d0 Thu Jul 4 21:32:29 2013 -0700
+ Merge pull request #677 from jerryshao/fix_stage_clean
+ [Clean StageToInfos periodically when spark.cleaner.ttl is enabled]
+
+ 2e32fc8 Thu Jul 4 12:18:20 2013 -0700
+ Merge pull request #666 from c0s/master
+ [hbase dependency is missed in hadoop2-yarn profile of examples module
+]
+
+ 6d60fe5 Mon Jul 1 18:24:03 2013 -0700
+ Merge pull request #666 from c0s/master
+ [hbase dependency is missed in hadoop2-yarn profile of examples module]
+
+ ccfe953 Sat Jun 29 17:57:53 2013 -0700
+ Merge pull request #577 from skumargithub/master
+ [Example of cumulative counting using updateStateByKey]
+
+ 50ca176 Thu Jun 27 22:24:52 2013 -0700
+ Merge pull request #664 from pwendell/test-fix
+ [Removing incorrect test statement]
+
+ e49bc8c Wed Jun 26 11:13:33 2013 -0700
+ Merge pull request #663 from stephenh/option_and_getenv
+ [Be cute with Option and getenv.]
+
+ f5e32ed Tue Jun 25 09:16:57 2013 -0700
+ Merge pull request #661 from mesos/streaming
+ [Kafka fixes and DStream.count fix for master]
+
+ 1249e91 Mon Jun 24 21:46:33 2013 -0700
+ Merge pull request #572 from Reinvigorate/sm-block-interval
+ [Adding spark.streaming.blockInterval property]
+
+ cfcda95 Mon Jun 24 21:44:50 2013 -0700
+ Merge pull request #571 from Reinvigorate/sm-kafka-serializers
+ [Surfacing decoders on KafkaInputDStream]
+
+ 575aff6 Mon Jun 24 21:35:50 2013 -0700
+ Merge pull request #567 from Reinvigorate/sm-count-fix
+ [Fixing count() in Spark Streaming]
+
+ 3e61bef Sat Jun 22 16:22:47 2013 -0700
+ Merge pull request #648 from shivaram/netty-dbg
+ [Shuffle fixes and cleanup]
+
+ 1ef5d0d Sat Jun 22 09:35:57 2013 -0700
+ Merge pull request #644 from shimingfei/joblogger
+ [add Joblogger to Spark (on new Spark code)]
+
+ 7e4b266 Sat Jun 22 07:53:18 2013 -0700
+ Merge pull request #563 from jey/python-optimization
+ [Optimize PySpark worker invocation]
+
+ 71030ba Wed Jun 19 15:21:03 2013 -0700
+ Merge pull request #654 from lyogavin/enhance_pipe
+ [fix typo and coding style in #638]
+
+ 73f4c7d Tue Jun 18 04:21:17 2013 -0700
+ Merge pull request #605 from esjewett/SPARK-699
+ [Add hBase example (retry of pull request #596)]
+
+ 9933836 Tue Jun 18 02:41:10 2013 -0700
+ Merge pull request #647 from jerryshao/master
+ [Reduce ZippedPartitionsRDD's getPreferredLocations complexity from O(2^2n) to O(2^n)]
+
+ db42451 Mon Jun 17 15:26:36 2013 -0700
+ Merge pull request #643 from adatao/master
+ [Bug fix: Zero-length partitions result in NaN for overall mean & variance]
+
+ e82a2ff Mon Jun 17 15:13:15 2013 -0700
+ Merge pull request #653 from rxin/logging
+ [SPARK-781: Log the temp directory path when Spark says "Failed to create temp directory."]
+
+ e6d1277 Mon Jun 17 12:56:25 2013 -0700
+ Merge pull request #638 from lyogavin/enhance_pipe
+ [Enhance pipe to support more features we can do in hadoop streaming]
+
+ f961aac Sat Jun 15 00:53:41 2013 -0700
+ Merge pull request #649 from ryanlecompte/master
+ [Add top K method to RDD using a bounded priority queue]
+
+ 6602d94 Fri Jun 14 10:41:31 2013 -0700
+ Merge pull request #651 from rxin/groupbykey
+ [SPARK-772 / SPARK-774: groupByKey and cogroup should disable map side combine]
+
+ d93851a Thu Jun 13 13:38:45 2013 -0700
+ Merge pull request #645 from pwendell/compression
+ [Adding compression to Hadoop save functions]
+
+ f1da591 Wed Jun 12 17:55:08 2013 -0700
+ Merge pull request #646 from markhamstra/jvmArgs
+ [Fixed jvmArgs in maven build.]
+
+ 0e94b73 Mon Jun 10 13:00:31 2013 -0700
+ Merge pull request #625 from stephenh/fix-start-slave
+ [Fix start-slave not passing instance number to spark-daemon.]
+
+ 74b91d5 Sat Jun 8 01:19:40 2013 -0700
+ Merge pull request #629 from c0s/master
+ [Sometime Maven build runs out of PermGen space.]
+
+ c8fc423 Fri Jun 7 22:43:18 2013 -0700
+ Merge pull request #631 from jerryshao/master
+ [Fix block manager UI display issue when enable spark.cleaner.ttl]
+
+ 1ae60bc Fri Jun 7 22:39:06 2013 -0700
+ Merge pull request #634 from xiajunluan/master
+ [[Spark-753] Fix ClusterSchedulSuite unit test failed ]
+
+ fff3728 Tue Jun 4 16:09:50 2013 -0700
+ Merge pull request #640 from pwendell/timeout-update
+ [Fixing bug in BlockManager timeout]
+
+ f420d4f Tue Jun 4 15:25:58 2013 -0700
+ Merge pull request #639 from pwendell/timeout-update
+ [Bump akka and blockmanager timeouts to 60 seconds]
+
+ 84530ba Fri May 31 17:06:13 2013 -0700
+ Merge pull request #636 from rxin/unpersist
+ [Unpersist More block manager cleanup.]
+
+ ef77bb7 Thu May 30 14:50:06 2013 -0700
+ Merge pull request #627 from shivaram/master
+ [Netty and shuffle bug fixes]
+
+ 8cb8178 Thu May 30 14:17:44 2013 -0700
+ Merge pull request #628 from shivaram/zero-block-size
+ [Skip fetching zero-sized blocks in NIO.]
+
+ 6ed7139 Wed May 29 10:14:22 2013 -0700
+ Merge pull request #626 from stephenh/remove-add-if-no-port
+ [Remove unused addIfNoPort.]
+
+ 41d230c Tue May 28 23:35:24 2013 -0700
+ Merge pull request #611 from squito/classloader
+ [Use default classloaders for akka & deserializing task results]
+
+ 3db1e17 Mon May 27 21:31:43 2013 -0700
+ Merge pull request #620 from jerryshao/master
+ [Fix CheckpointRDD java.io.FileNotFoundException when calling getPreferredLocations]
+
+ 3d4891d Sat May 25 23:38:05 2013 -0700
+ Merge pull request #621 from JoshRosen/spark-613
+ [Use ec2-metadata in start-slave.sh to detect if running on EC2]
+
+ e8d4b6c Sat May 25 21:09:03 2013 -0700
+ Merge pull request #529 from xiajunluan/master
+ [[SPARK-663]Implement Fair Scheduler in Spark Cluster Scheduler ]
+
+ 9a3c344 Sat May 25 17:53:43 2013 -0700
+ Merge pull request #624 from rxin/master
+ [NonJavaSerializableClass should not be Java serializable...]
+
+ 24e41aa Fri May 24 16:48:52 2013 -0700
+ Merge pull request #623 from rxin/master
+ [Automatically configure Netty port.]
+
+ 69161f9 Fri May 24 14:42:13 2013 -0700
+ Merge pull request #622 from rxin/master
+ [bug fix: Shuffle block iterator is ignoring the shuffle serializer setting.]
+
+ dbbedfc Thu May 23 23:11:06 2013 -0700
+ Merge pull request #616 from jey/maven-netty-exclusion
+ [Exclude old versions of Netty from Maven-based build]
+
+ a2b0a79 Tue May 21 18:16:20 2013 -0700
+ Merge pull request #619 from woggling/adjust-sampling
+ [Use ARRAY_SAMPLE_SIZE constant instead of hard-coded 100.0 in SizeEstimator]
+
+ 66dac44 Tue May 21 11:41:42 2013 -0700
+ Merge pull request #618 from woggling/dead-code-disttest
+ [DistributedSuite: remove dead code]
+
+ 5912cc4 Fri May 17 19:58:40 2013 -0700
+ Merge pull request #610 from JoshRosen/spark-747
+ [Throw exception if TaskResult exceeds Akka frame size]
+
+ 6c27c38 Thu May 16 17:33:56 2013 -0700
+ Merge pull request #615 from rxin/build-fix
+ [Maven build fix & two other small changes]
+
+ 2f576ab Wed May 15 18:06:24 2013 -0700
+ Merge pull request #602 from rxin/shufflemerge
+ [Manual merge & cleanup of Shane's Shuffle Performance Optimization]
+
+ 48c6f46 Wed May 15 10:47:19 2013 -0700
+ Merge pull request #612 from ash211/patch-4
+ [Docs: Mention spark shell's default for MASTER]
+
+ 203d7b7 Wed May 15 00:47:20 2013 -0700
+ Merge pull request #593 from squito/driver_ui_link
+ [Master UI has link to Application UI]
+
+ 016ac86 Mon May 13 21:45:36 2013 -0700
+ Merge pull request #601 from rxin/emptyrdd-master
+ [EmptyRDD (master branch 0.8)]
+
+ 4b354e0 Mon May 13 17:39:19 2013 -0700
+ Merge pull request #589 from mridulm/master
+ [Add support for instance local scheduling]
+
+ 5dbc9b2 Sun May 12 11:03:10 2013 -0700
+ Merge pull request #608 from pwendell/SPARK-738
+ [SPARK-738: Spark should detect and wrap nonserializable exceptions]
+
+ 63e1999 Fri May 10 13:54:03 2013 -0700
+ Merge pull request #606 from markhamstra/foreachPartition_fix
+ [Actually use the cleaned closure in foreachPartition]
+
+ 42bbe89 Wed May 8 22:30:31 2013 -0700
+ Merge pull request #599 from JoshRosen/spark-670
+ [Fix SPARK-670: EC2 'start' command should require -i option.]
+
+ 0f1b7a0 Wed May 8 13:38:50 2013 -0700
+ Merge pull request #596 from esjewett/master
+ [hBase example]
+
+ 7af92f2 Sat May 4 22:29:17 2013 -0700
+ Merge pull request #597 from JoshRosen/webui-fixes
+ [Two minor bug fixes for Spark Web UI]
+
+ c74ce60 Sat May 4 22:26:35 2013 -0700
+ Merge pull request #598 from rxin/blockmanager
+ [Fixed flaky unpersist test in DistributedSuite.]
+
+ 3bf2c86 Fri May 3 18:27:30 2013 -0700
+ Merge pull request #594 from shivaram/master
+ [Add zip partitions to Java API]
+
+ 2484ad7 Fri May 3 17:08:55 2013 -0700
+ Merge pull request #587 from rxin/blockmanager
+ [A set of shuffle map output related changes]
+
+ 6fe9d4e Thu May 2 21:33:56 2013 -0700
+ Merge pull request #592 from woggling/localdir-fix
+ [Don't accept generated local directory names that can't be created]
+
+ 538ee75 Thu May 2 09:01:42 2013 -0700
+ Merge pull request #581 from jerryshao/master
+ [fix [SPARK-740] block manage UI throws exception when enabling Spark Streaming]
+
+ 9abcbcc Wed May 1 22:45:10 2013 -0700
+ Merge pull request #591 from rxin/removerdd
+ [RDD.unpersist: probably the most desired feature of Spark]
+
+ aa8fe1a Tue Apr 30 22:30:18 2013 -0700
+ Merge pull request #586 from mridulm/master
+ [Pull request to address issues Reynold Xin reported]
+
+ f708dda Tue Apr 30 07:51:40 2013 -0700
+ Merge pull request #585 from pwendell/listener-perf
+ [[Fix SPARK-742] Task Metrics should not employ per-record timing by default]
+
+ 68c07ea Sun Apr 28 20:19:33 2013 -0700
+ Merge pull request #582 from shivaram/master
+ [Add zip partitions interface]
+
+ f6ee9a8 Sun Apr 28 15:36:04 2013 -0700
+ Merge pull request #583 from mridulm/master
+ [Fix issues with streaming test cases after yarn branch merge]
+
+ cf54b82 Thu Apr 25 11:45:58 2013 -0700
+ Merge pull request #580 from pwendell/quickstart
+ [SPARK-739 Have quickstart standlone job use README]
+
+ 118a6c7 Wed Apr 24 08:42:30 2013 -0700
+ Merge pull request #575 from mridulm/master
+ [Manual merge of yarn branch to trunk]
+
+ 5d8a71c Tue Apr 16 19:48:02 2013 -0700
+ Merge pull request #570 from jey/increase-codecache-size
+ [Increase ReservedCodeCacheSize for sbt]
+
+ ec5e553 Sun Apr 14 08:20:13 2013 -0700
+ Merge pull request #558 from ash211/patch-jackson-conflict
+ [Don't pull in old versions of Jackson via hadoop-core]
+
+ c1c219e Sun Apr 14 08:11:23 2013 -0700
+ Merge pull request #564 from maspotts/master
+ [Allow latest scala in PATH, with SCALA_HOME as override (instead of vice-versa)]
+
+ 7c10b3e Fri Apr 12 20:55:22 2013 -0700
+ Merge pull request #565 from andyk/master
+ [Update wording of section on RDD operations in quick start guide in docs]
+
+ 077ae0a Thu Apr 11 19:34:14 2013 -0700
+ Merge pull request #561 from ash211/patch-4
+ [Add details when BlockManager heartbeats time out]
+
+ c91ff8d Wed Apr 10 15:08:23 2013 -0700
+ Merge pull request #560 from ash211/patch-3
+ [Typos: cluser -> cluster]
+
+ 7cd83bf Tue Apr 9 22:07:35 2013 -0700
+ Merge pull request #559 from ash211/patch-example-whitespace
+ [Uniform whitespace across scala examples]
+
+ 271a4f3 Tue Apr 9 22:04:52 2013 -0700
+ Merge pull request #555 from holdenk/master
+ [Retry failed ssh commands in the ec2 python script.]
+
+ 8ac9efb Tue Apr 9 13:50:50 2013 -0700
+ Merge pull request #527 from Reinvigorate/sm-kafka-cleanup
+ [KafkaInputDStream fixes and improvements]
+
+ eed54a2 Mon Apr 8 09:44:30 2013 -0700
+ Merge pull request #553 from pwendell/akka-standalone
+ [SPARK-724 - Have Akka logging enabled by default for standalone daemons]
+
+ b362df3 Sun Apr 7 17:17:52 2013 -0700
+ Merge pull request #552 from MLnick/master
+ [Bumping version for Twitter Algebird to latest]
+
+ 4b30190 Sun Apr 7 17:15:10 2013 -0700
+ Merge pull request #554 from andyk/scala2.9.3
+ [Fixes SPARK-723 - Update build to Scala 2.9.3]
+
+ dfe98ca Tue Apr 2 19:24:12 2013 -0700
+ Merge pull request #550 from erikvanoosten/master
+ [corrected Algebird example]
+
+ b5d7830 Tue Apr 2 19:23:45 2013 -0700
+ Merge pull request #551 from jey/python-bugfixes
+ [Python bugfixes]
+
+ 2be2295 Sun Mar 31 18:09:14 2013 -0700
+ Merge pull request #548 from markhamstra/getWritableClass_filter
+ [Fixed broken filter in getWritableClass[T]]
+
+ 9831bc1 Fri Mar 29 22:16:22 2013 -0700
+ Merge pull request #539 from cgrothaus/fix-webui-workdirpath
+ [Bugfix: WorkerWebUI must respect workDirPath from Worker]
+
+ 3cc8ab6 Fri Mar 29 22:14:07 2013 -0700
+ Merge pull request #541 from stephenh/shufflecoalesce
+ [Add a shuffle parameter to coalesce.]
+
+ cad507a Fri Mar 29 22:13:12 2013 -0700
+ Merge pull request #547 from jey/maven-streaming-tests-initialization-fix
+ [Move streaming test initialization into 'before' blocks]
+
+ a98996d Fri Mar 29 22:12:15 2013 -0700
+ Merge pull request #545 from ash211/patch-1
+ [Don't use deprecated Application in example]
+
+ 104c694 Fri Mar 29 22:11:50 2013 -0700
+ Merge pull request #546 from ash211/patch-2
+ [Update tuning.md]
+
+ bc36ee4 Tue Mar 26 15:05:13 2013 -0700
+ Merge pull request #543 from holdenk/master
+ [Re-enable deprecation warnings and fix deprecated warning.]
+
+ b8949ca Sat Mar 23 07:19:34 2013 -0700
+ Merge pull request #505 from stephenh/volatile
+ [Make Executor fields volatile since they're read from the thread pool.]
+
+ fd53f2f Sat Mar 23 07:13:21 2013 -0700
+ Merge pull request #510 from markhamstra/WithThing
+ [mapWith, flatMapWith and filterWith]
+
+ 4c5efcf Wed Mar 20 19:29:23 2013 -0700
+ Merge pull request #532 from andyk/master
+ [SPARK-715: Adds instructions for building with Maven to documentation]
+
+ 3558849 Wed Mar 20 19:27:47 2013 -0700
+ Merge pull request #538 from rxin/cogroup
+ [Added mapSideCombine flag to CoGroupedRDD. Added unit test for CoGroupedRDD.]
+
+ ca4d083 Wed Mar 20 11:22:36 2013 -0700
+ Merge pull request #528 from MLnick/java-examples
+ [[SPARK-707] Adding Java versions of Pi, LogQuery and K-Means examples]
+
+ b812e6b Wed Mar 20 11:21:02 2013 -0700
+ Merge pull request #526 from markhamstra/foldByKey
+ [Add foldByKey]
+
+ 945d1e7 Tue Mar 19 21:59:06 2013 -0700
+ Merge pull request #536 from sasurfer/master
+ [CoalescedRDD for many partitions]
+
+ 1cbbe94 Tue Mar 19 21:34:34 2013 -0700
+ Merge pull request #534 from stephenh/removetrycatch
+ [Remove try/catch block that can't be hit.]
+
+ 71e53f8 Tue Mar 19 21:31:41 2013 -0700
+ Merge pull request #537 from wishbear/configurableInputFormat
+ [call setConf from input format if it is Configurable]
+
+ c1e9cdc Sat Mar 16 11:47:45 2013 -0700
+ Merge pull request #525 from stephenh/subtractByKey
+ [Add PairRDDFunctions.subtractByKey.]
+
+ cdbfd1e Fri Mar 15 15:13:28 2013 -0700
+ Merge pull request #516 from squito/fix_local_metrics
+ [Fix local metrics]
+
+ f9fa2ad Fri Mar 15 15:12:43 2013 -0700
+ Merge pull request #530 from mbautin/master-update-log4j-and-make-compile-in-IntelliJ
+ [Add a log4j compile dependency to fix build in IntelliJ]
+
+ 4032beb Wed Mar 13 19:29:46 2013 -0700
+ Merge pull request #521 from stephenh/earlyclose
+ [Close the reader in HadoopRDD as soon as iteration end.]
+
+ 3c97276 Wed Mar 13 19:25:08 2013 -0700
+ Merge pull request #524 from andyk/master
+ [Fix broken link to YARN documentation]
+
+ 1c3d981 Wed Mar 13 19:23:48 2013 -0700
+ Merge pull request #517 from Reinvigorate/sm-build-fixes
+ [Build fixes for streaming /w SBT]
+
+ 2d477fd Wed Mar 13 06:49:16 2013 -0700
+ Merge pull request #523 from andyk/master
+ [Fix broken link in Quick Start]
+
+ 00c4d23 Tue Mar 12 22:19:00 2013 -0700
+ Merge pull request #518 from woggling/long-bm-sizes
+ [Send block sizes as longs in BlockManager updates]
+
+ cbf8f0d Mon Mar 11 00:23:57 2013 -0700
+ Merge pull request #513 from MLnick/bagel-caching
+ [Adds choice of persistence level to Bagel.]
+
+ 91a9d09 Sun Mar 10 15:48:23 2013 -0700
+ Merge pull request #512 from patelh/fix-kryo-serializer
+ [Fix reference bug in Kryo serializer, add test, update version]
+
+ 557cfd0 Sun Mar 10 15:44:57 2013 -0700
+ Merge pull request #515 from woggling/deploy-app-death
+ [Notify standalone deploy client of application death.]
+
+ 04fb81f Sun Mar 3 17:20:07 2013 -0800
+ Merge pull request #506 from rxin/spark-706
+ [Fixed SPARK-706: Failures in block manager put leads to read task hanging.]
+
+ 6cf4be4 Sun Mar 3 17:16:22 2013 -0800
+ Merge pull request #462 from squito/stageInfo
+ [Track assorted metrics for each task, report summaries to user at stage completion]
+
+ 6bfc7ca Sat Mar 2 22:14:49 2013 -0800
+ Merge pull request #504 from mosharaf/master
+ [Worker address was getting removed when removing an app.]
+
+ 94b3db1 Sat Mar 2 22:13:52 2013 -0800
+ Merge pull request #508 from markhamstra/TestServerInUse
+ [Avoid bind failure in InputStreamsSuite]
+
+ 25c71d3 Fri Mar 1 08:00:18 2013 -0800
+ Merge pull request #507 from markhamstra/poms271
+ [bump version to 0.7.1-SNAPSHOT in the subproject poms]
+
From 7348893f0edd96dacce2f00970db1976266f7008 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 15 Jan 2014 14:53:02 -0800
Subject: [PATCH 023/133] [maven-release-plugin] prepare release
v0.9.0-incubating
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index dcd9601fe4a90..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index cb8e79f22535b..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 9e5a450d57a47..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 7855706389709..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 443910a03a94e..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 23b2fead657e6..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 31b4fa87de772..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 216e6c1d8ff44..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index c240d595742cf..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 4eca4747ea96a..daadc73774e8d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index dda3900afebdf..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 70c17e9fc74a8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 2dfe7ac900b83..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 459756912dbe5..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 28f5ef14b1a35..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index aea8b0cddefa2..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 62fe3e274250f..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 4ae8a4b39e738daf3f06c38de869b07927a927e1 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 15 Jan 2014 14:53:11 -0800
Subject: [PATCH 024/133] [maven-release-plugin] prepare for next development
iteration
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..a99e3d2a02569 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..42e624402f77e 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..abf48935cd915 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..967556744c1e6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..978b99f4a7054 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..a3d5fc64f070e 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..1f416dd8c06d4 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..f23091684f95c 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..6a250b3916ead 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index daadc73774e8d..67a8e015872cc 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..41600c4c4b561 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..150dba8d636d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..346c672165d7d 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..7e10ef6f471be 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..bb8f747ae9328 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..508317b5fc01c 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..04b29c76e5830 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
From 7b0d5a5f306b0adb6590eb551096939d0c617142 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Thu, 16 Jan 2014 23:18:15 -0800
Subject: [PATCH 025/133] Merge pull request #451 from Qiuzhuang/master
Fixed Window spark shell launch script error.
JIRA SPARK-1029:https://spark-project.atlassian.net/browse/SPARK-1029
(cherry picked from commit d749d472b37448edb322bc7208a3db925c9a4fc2)
Signed-off-by: Patrick Wendell
---
bin/spark-class2.cmd | 2 +-
bin/spark-shell.cmd | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
mode change 100644 => 100755 bin/spark-class2.cmd
mode change 100644 => 100755 bin/spark-shell.cmd
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
old mode 100644
new mode 100755
index 460e6614766f8..80818c78ec24b
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -73,7 +73,7 @@ for %%d in ("%TOOLS_DIR%\target\scala-%SCALA_VERSION%\spark-tools*assembly*.jar"
rem Compute classpath using external script
set DONT_PRINT_CLASSPATH=1
-call "%FWDIR%sbin\compute-classpath.cmd"
+call "%FWDIR%bin\compute-classpath.cmd"
set DONT_PRINT_CLASSPATH=0
set CLASSPATH=%CLASSPATH%;%SPARK_TOOLS_JAR%
diff --git a/bin/spark-shell.cmd b/bin/spark-shell.cmd
old mode 100644
new mode 100755
index 23973e3e3dd43..99799128eb734
--- a/bin/spark-shell.cmd
+++ b/bin/spark-shell.cmd
@@ -18,6 +18,6 @@ rem limitations under the License.
rem
rem Find the path of sbin
-set SBIN=%~dp0..\sbin\
+set BIN=%~dp0..\bin\
-cmd /V /E /C %SBIN%spark-class2.cmd org.apache.spark.repl.Main %*
+cmd /V /E /C %BIN%spark-class2.cmd org.apache.spark.repl.Main %*
From c8f92730f886fd91a96eb1090ac97cb34cf3709a Mon Sep 17 00:00:00 2001
From: Matei Zaharia
Date: Sat, 18 Jan 2014 12:48:49 -0800
Subject: [PATCH 026/133] Remove Typesafe Config usage and conf files to fix
nested property names
With Typesafe Config we had the subtle problem of no longer allowing
nested property names, which are used for a few of our properties:
http://apache-spark-developers-list.1001551.n3.nabble.com/Config-properties-broken-in-master-td208.html
---
.../scala/org/apache/spark/SparkConf.scala | 18 +++----
core/src/test/resources/spark.conf | 8 ----
.../org/apache/spark/SparkConfSuite.scala | 47 +++++++++++--------
docs/configuration.md | 28 +----------
project/SparkBuild.scala | 1 -
python/pyspark/conf.py | 10 ++--
6 files changed, 41 insertions(+), 71 deletions(-)
delete mode 100644 core/src/test/resources/spark.conf
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 369c6ce78fa71..951bfd79d0d6a 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -20,19 +20,17 @@ package org.apache.spark
import scala.collection.JavaConverters._
import scala.collection.mutable.HashMap
-import com.typesafe.config.ConfigFactory
import java.io.{ObjectInputStream, ObjectOutputStream, IOException}
/**
* Configuration for a Spark application. Used to set various Spark parameters as key-value pairs.
*
* Most of the time, you would create a SparkConf object with `new SparkConf()`, which will load
- * values from both the `spark.*` Java system properties and any `spark.conf` on your application's
- * classpath (if it has one). In this case, system properties take priority over `spark.conf`, and
- * any parameters you set directly on the `SparkConf` object take priority over both of those.
+ * values from any `spark.*` Java system properties set in your application as well. In this case,
+ * parameters you set directly on the `SparkConf` object take priority over system properties.
*
* For unit tests, you can also call `new SparkConf(false)` to skip loading external settings and
- * get the same configuration no matter what is on the classpath.
+ * get the same configuration no matter what the system properties are.
*
* All setter methods in this class support chaining. For example, you can write
* `new SparkConf().setMaster("local").setAppName("My app")`.
@@ -40,7 +38,7 @@ import java.io.{ObjectInputStream, ObjectOutputStream, IOException}
* Note that once a SparkConf object is passed to Spark, it is cloned and can no longer be modified
* by the user. Spark does not support modifying the configuration at runtime.
*
- * @param loadDefaults whether to load values from the system properties and classpath
+ * @param loadDefaults whether to also load values from Java system properties
*/
class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
@@ -50,11 +48,9 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
private val settings = new HashMap[String, String]()
if (loadDefaults) {
- ConfigFactory.invalidateCaches()
- val typesafeConfig = ConfigFactory.systemProperties()
- .withFallback(ConfigFactory.parseResources("spark.conf"))
- for (e <- typesafeConfig.entrySet().asScala if e.getKey.startsWith("spark.")) {
- settings(e.getKey) = e.getValue.unwrapped.toString
+ // Load any spark.* system properties
+ for ((k, v) <- System.getProperties.asScala if k.startsWith("spark.")) {
+ settings(k) = v
}
}
diff --git a/core/src/test/resources/spark.conf b/core/src/test/resources/spark.conf
deleted file mode 100644
index aa4e7512354d3..0000000000000
--- a/core/src/test/resources/spark.conf
+++ /dev/null
@@ -1,8 +0,0 @@
-# A simple spark.conf file used only in our unit tests
-
-spark.test.intTestProperty = 1
-
-spark.test {
- stringTestProperty = "hi"
- listTestProperty = ["a", "b"]
-}
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index fa49974db445b..87e9012622456 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -20,35 +20,23 @@ package org.apache.spark
import org.scalatest.FunSuite
class SparkConfSuite extends FunSuite with LocalSparkContext {
- // This test uses the spark.conf in core/src/test/resources, which has a few test properties
- test("loading from spark.conf") {
- val conf = new SparkConf()
- assert(conf.get("spark.test.intTestProperty") === "1")
- assert(conf.get("spark.test.stringTestProperty") === "hi")
- // NOTE: we don't use list properties yet, but when we do, we'll have to deal with this syntax
- assert(conf.get("spark.test.listTestProperty") === "[a, b]")
- }
-
- // This test uses the spark.conf in core/src/test/resources, which has a few test properties
- test("system properties override spark.conf") {
+ test("loading from system properties") {
try {
- System.setProperty("spark.test.intTestProperty", "2")
+ System.setProperty("spark.test.testProperty", "2")
val conf = new SparkConf()
- assert(conf.get("spark.test.intTestProperty") === "2")
- assert(conf.get("spark.test.stringTestProperty") === "hi")
+ assert(conf.get("spark.test.testProperty") === "2")
} finally {
- System.clearProperty("spark.test.intTestProperty")
+ System.clearProperty("spark.test.testProperty")
}
}
test("initializing without loading defaults") {
try {
- System.setProperty("spark.test.intTestProperty", "2")
+ System.setProperty("spark.test.testProperty", "2")
val conf = new SparkConf(false)
- assert(!conf.contains("spark.test.intTestProperty"))
- assert(!conf.contains("spark.test.stringTestProperty"))
+ assert(!conf.contains("spark.test.testProperty"))
} finally {
- System.clearProperty("spark.test.intTestProperty")
+ System.clearProperty("spark.test.testProperty")
}
}
@@ -124,4 +112,25 @@ class SparkConfSuite extends FunSuite with LocalSparkContext {
assert(sc.master === "local[2]")
assert(sc.appName === "My other app")
}
+
+ test("nested property names") {
+ // This wasn't supported by some external conf parsing libraries
+ try {
+ System.setProperty("spark.test.a", "a")
+ System.setProperty("spark.test.a.b", "a.b")
+ System.setProperty("spark.test.a.b.c", "a.b.c")
+ val conf = new SparkConf()
+ assert(conf.get("spark.test.a") === "a")
+ assert(conf.get("spark.test.a.b") === "a.b")
+ assert(conf.get("spark.test.a.b.c") === "a.b.c")
+ conf.set("spark.test.a.b", "A.B")
+ assert(conf.get("spark.test.a") === "a")
+ assert(conf.get("spark.test.a.b") === "A.B")
+ assert(conf.get("spark.test.a.b.c") === "a.b.c")
+ } finally {
+ System.clearProperty("spark.test.a")
+ System.clearProperty("spark.test.a.b")
+ System.clearProperty("spark.test.a.b.c")
+ }
+ }
}
diff --git a/docs/configuration.md b/docs/configuration.md
index da70cabba2d9b..00864906b3c7b 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -18,8 +18,8 @@ Spark provides three locations to configure the system:
Spark properties control most application settings and are configured separately for each application.
The preferred way to set them is by passing a [SparkConf](api/core/index.html#org.apache.spark.SparkConf)
class to your SparkContext constructor.
-Alternatively, Spark will also load them from Java system properties (for compatibility with old versions
-of Spark) and from a [`spark.conf` file](#configuration-files) on your classpath.
+Alternatively, Spark will also load them from Java system properties, for compatibility with old versions
+of Spark.
SparkConf lets you configure most of the common properties to initialize a cluster (e.g., master URL and
application name), as well as arbitrary key-value pairs through the `set()` method. For example, we could
@@ -468,30 +468,6 @@ Apart from these, the following properties are also available, and may be useful
The application web UI at `http://:4040` lists Spark properties in the "Environment" tab.
This is a useful place to check to make sure that your properties have been set correctly.
-## Configuration Files
-
-You can also configure Spark properties through a `spark.conf` file on your Java classpath.
-Because these properties are usually application-specific, we recommend putting this fine *only* on your
-application's classpath, and not in a global Spark classpath.
-
-The `spark.conf` file uses Typesafe Config's [HOCON format](https://github.com/typesafehub/config#json-superset),
-which is a superset of Java properties files and JSON. For example, the following is a simple config file:
-
-{% highlight awk %}
-# Comments are allowed
-spark.executor.memory = 512m
-spark.serializer = org.apache.spark.serializer.KryoSerializer
-{% endhighlight %}
-
-The format also allows hierarchical nesting, as follows:
-
-{% highlight awk %}
-spark.akka {
- threads = 8
- timeout = 200
-}
-{% endhighlight %}
-
# Environment Variables
Certain Spark settings can be configured through environment variables, which are read from the `conf/spark-env.sh`
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index bcc286d7ea190..075e912f2d96c 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -277,7 +277,6 @@ object SparkBuild extends Build {
"com.codahale.metrics" % "metrics-graphite" % "3.0.0",
"com.twitter" %% "chill" % "0.3.1",
"com.twitter" % "chill-java" % "0.3.1",
- "com.typesafe" % "config" % "1.0.2",
"com.clearspring.analytics" % "stream" % "2.5.1"
)
)
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index d72aed6a30ec1..3870cd8f2b097 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -61,14 +61,12 @@ class SparkConf(object):
Most of the time, you would create a SparkConf object with
C{SparkConf()}, which will load values from C{spark.*} Java system
- properties and any C{spark.conf} on your Spark classpath. In this
- case, system properties take priority over C{spark.conf}, and any
- parameters you set directly on the C{SparkConf} object take priority
- over both of those.
+ properties as well. In this case, any parameters you set directly on
+ the C{SparkConf} object take priority over system properties.
For unit tests, you can also call C{SparkConf(false)} to skip
loading external settings and get the same configuration no matter
- what is on the classpath.
+ what the system properties are.
All setter methods in this class support chaining. For example,
you can write C{conf.setMaster("local").setAppName("My app")}.
@@ -82,7 +80,7 @@ def __init__(self, loadDefaults=True, _jvm=None):
Create a new Spark configuration.
@param loadDefaults: whether to load values from Java system
- properties and classpath (True by default)
+ properties (True by default)
@param _jvm: internal parameter used to pass a handle to the
Java VM; does not need to be set by users
"""
From ff7201cf9e4f3af6943d6ff5d4b625404a174fec Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 12:49:21 -0800
Subject: [PATCH 027/133] Merge pull request #461 from pwendell/master
Use renamed shuffle spill config in CoGroupedRDD.scala
This one got missed when it was renamed.
(cherry picked from commit aa981e4e97a11dbd5a4d012bfbdb395982968372)
Signed-off-by: Patrick Wendell
---
core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index f2feb406f7783..0e47f2e022610 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -106,7 +106,7 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
override def compute(s: Partition, context: TaskContext): Iterator[(K, CoGroupCombiner)] = {
val sparkConf = SparkEnv.get.conf
- val externalSorting = sparkConf.getBoolean("spark.shuffle.externalSorting", true)
+ val externalSorting = sparkConf.getBoolean("spark.shuffle.spill", true)
val split = s.asInstanceOf[CoGroupPartition]
val numRdds = split.deps.size
From 4ac8cab08141302c82c0388ccb98d627a144445d Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 16:21:43 -0800
Subject: [PATCH 028/133] Merge pull request #426 from mateiz/py-ml-tests
Re-enable Python MLlib tests (require Python 2.7 and NumPy 1.7+)
We disabled these earlier because Jenkins didn't have these versions.
(cherry picked from commit 4c16f79ce45a68ee613a3d565b0e8676b724f867)
Signed-off-by: Patrick Wendell
---
python/pyspark/mllib/__init__.py | 10 ++++++++++
python/run-tests | 10 +++++-----
2 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py
index b1a5df109b46e..b420d7a7f23ba 100644
--- a/python/pyspark/mllib/__init__.py
+++ b/python/pyspark/mllib/__init__.py
@@ -18,3 +18,13 @@
"""
Python bindings for MLlib.
"""
+
+# MLlib currently needs Python 2.7+ and NumPy 1.7+, so complain if lower
+
+import sys
+if sys.version_info[0:2] < (2, 7):
+ raise Exception("MLlib requires Python 2.7+")
+
+import numpy
+if numpy.version.version < '1.7':
+ raise Exception("MLlib requires NumPy 1.7+")
diff --git a/python/run-tests b/python/run-tests
index 2005f610b43b4..a986ac9380be4 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -40,11 +40,11 @@ run_test "-m doctest pyspark/broadcast.py"
run_test "-m doctest pyspark/accumulators.py"
run_test "-m doctest pyspark/serializers.py"
run_test "pyspark/tests.py"
-#run_test "pyspark/mllib/_common.py"
-#run_test "pyspark/mllib/classification.py"
-#run_test "pyspark/mllib/clustering.py"
-#run_test "pyspark/mllib/recommendation.py"
-#run_test "pyspark/mllib/regression.py"
+run_test "pyspark/mllib/_common.py"
+run_test "pyspark/mllib/classification.py"
+run_test "pyspark/mllib/clustering.py"
+run_test "pyspark/mllib/recommendation.py"
+run_test "pyspark/mllib/regression.py"
if [[ $FAILED != 0 ]]; then
echo -en "\033[31m" # Red
From 76147a290327c8ff76c845cbe347b5fe09de3da7 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 16:23:56 -0800
Subject: [PATCH 029/133] Merge pull request #437 from mridulm/master
Minor api usability changes
- Expose checkpoint directory - since it is autogenerated now
- null check for jars
- Expose SparkHadoopUtil : so that configuration creation is abstracted even from user code to avoid duplication of functionality already in spark.
(cherry picked from commit 73dfd42fba5e526cc57e2a2ed78be323b63cb8fa)
Signed-off-by: Patrick Wendell
---
core/src/main/scala/org/apache/spark/SparkContext.scala | 4 +++-
.../scala/org/apache/spark/api/java/JavaSparkContext.scala | 2 ++
.../main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala | 1 -
3 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index ba3e91effbdb4..ddd7d60d96bd5 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -956,6 +956,8 @@ class SparkContext(
}
}
+ def getCheckpointDir = checkpointDir
+
/** Default level of parallelism to use when not given by user (e.g. parallelize and makeRDD). */
def defaultParallelism: Int = taskScheduler.defaultParallelism
@@ -1125,7 +1127,7 @@ object SparkContext {
if (sparkHome != null) {
res.setSparkHome(sparkHome)
}
- if (!jars.isEmpty) {
+ if (jars != null && !jars.isEmpty) {
res.setJars(jars)
}
res.setExecutorEnv(environment.toSeq)
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 8041163e3d748..33c931b1a7c8b 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -401,6 +401,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
sc.setCheckpointDir(dir)
}
+ def getCheckpointDir = JavaUtils.optionToOptional(sc.getCheckpointDir)
+
protected def checkpointFile[T](path: String): JavaRDD[T] = {
implicit val cm: ClassTag[T] =
implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 27dc42bf7e50e..b479225b45ee9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -28,7 +28,6 @@ import org.apache.spark.{SparkContext, SparkException}
/**
* Contains util methods to interact with Hadoop from Spark.
*/
-private[spark]
class SparkHadoopUtil {
val conf = newConfiguration()
UserGroupInformation.setConfiguration(conf)
From 03019d106becae3cca95428b462d661c1afac37e Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 16:29:23 -0800
Subject: [PATCH 030/133] Merge pull request #459 from
srowen/UpdaterL2Regularization
Correct L2 regularized weight update with canonical form
Per thread on the user@ mailing list, and comments from Ameet, I believe the weight update for L2 regularization needs to be corrected. See http://mail-archives.apache.org/mod_mbox/spark-user/201401.mbox/%3CCAH3_EVMetuQuhj3__NdUniDLc4P-FMmmrmxw9TS14or8nT4BNQ%40mail.gmail.com%3E
(cherry picked from commit fe8a3546f40394466a41fc750cb60f6fc73d8bbb)
Signed-off-by: Patrick Wendell
---
.../scala/org/apache/spark/mllib/optimization/Updater.scala | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
index 4c51f4f881f76..37124f261eeb9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -86,13 +86,17 @@ class L1Updater extends Updater {
/**
* Updater that adjusts the learning rate and performs L2 regularization
+ *
+ * See, for example, explanation of gradient and loss with L2 regularization on slide 21-22
+ * of
+ * these slides.
*/
class SquaredL2Updater extends Updater {
override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix,
stepSize: Double, iter: Int, regParam: Double): (DoubleMatrix, Double) = {
val thisIterStepSize = stepSize / math.sqrt(iter)
val normGradient = gradient.mul(thisIterStepSize)
- val newWeights = weightsOld.sub(normGradient).div(2.0 * thisIterStepSize * regParam + 1.0)
+ val newWeights = weightsOld.mul(1.0 - 2.0 * thisIterStepSize * regParam).sub(normGradient)
(newWeights, pow(newWeights.norm2, 2.0) * regParam)
}
}
From a4b316f27c0bf30fac941fb3e6e595ec7cada7a2 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 16:37:23 -0800
Subject: [PATCH 031/133] Rolling back versions for 0.9.0 release
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 2 +-
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index a99e3d2a02569..292140acf4464 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 42e624402f77e..6f42dd9c90f5c 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index abf48935cd915..4f51a0e377db2 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 967556744c1e6..231fc6dab211f 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 978b99f4a7054..9c658c16ec02a 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index a3d5fc64f070e..63fc9e8c9a9dc 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 1f416dd8c06d4..a71a0dd5dcae3 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index f23091684f95c..3fd15c65b532c 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6a250b3916ead..8e044a906533f 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 67a8e015872cc..4e053a19092cd 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 41600c4c4b561..cdf0c38094990 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index 150dba8d636d8..154995ffaa10c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
diff --git a/repl/pom.xml b/repl/pom.xml
index 346c672165d7d..700d453299019 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 7e10ef6f471be..2064b9e7f9cdf 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index bb8f747ae9328..7d828296b361c 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 508317b5fc01c..f58687ebf9889 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 04b29c76e5830..0ea48e15abad6 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating-SNASPHOT
../pom.xml
From 49a2c8180271f438f7db57323ba4e95e0eafa9eb Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 16:58:44 -0800
Subject: [PATCH 032/133] Typo fix in build versions
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 2 +-
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 292140acf4464..dcd9601fe4a90 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 6f42dd9c90f5c..cb8e79f22535b 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 4f51a0e377db2..9e5a450d57a47 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 231fc6dab211f..7855706389709 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 9c658c16ec02a..443910a03a94e 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 63fc9e8c9a9dc..23b2fead657e6 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index a71a0dd5dcae3..31b4fa87de772 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 3fd15c65b532c..216e6c1d8ff44 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 8e044a906533f..c240d595742cf 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 4e053a19092cd..4eca4747ea96a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index cdf0c38094990..dda3900afebdf 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index 154995ffaa10c..70c17e9fc74a8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
diff --git a/repl/pom.xml b/repl/pom.xml
index 700d453299019..2dfe7ac900b83 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 2064b9e7f9cdf..459756912dbe5 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 7d828296b361c..28f5ef14b1a35 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index f58687ebf9889..aea8b0cddefa2 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 0ea48e15abad6..62fe3e274250f 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating-SNASPHOT
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
From 77c32470a1b02d6f1475bda2cfb9ae5bd4b53dde Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 17:14:22 -0800
Subject: [PATCH 033/133] [maven-release-plugin] prepare release
v0.9.0-incubating
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index dcd9601fe4a90..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index cb8e79f22535b..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 9e5a450d57a47..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 7855706389709..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 443910a03a94e..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 23b2fead657e6..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 31b4fa87de772..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 216e6c1d8ff44..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index c240d595742cf..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 4eca4747ea96a..daadc73774e8d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index dda3900afebdf..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 70c17e9fc74a8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 2dfe7ac900b83..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 459756912dbe5..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 28f5ef14b1a35..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index aea8b0cddefa2..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 62fe3e274250f..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 4f8f86c2c66dc2f6a17d5b0e4fdeeb06a71ba52f Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 17:14:28 -0800
Subject: [PATCH 034/133] [maven-release-plugin] prepare for next development
iteration
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..a99e3d2a02569 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..42e624402f77e 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..abf48935cd915 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..967556744c1e6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..978b99f4a7054 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..a3d5fc64f070e 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..1f416dd8c06d4 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..f23091684f95c 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..6a250b3916ead 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index daadc73774e8d..67a8e015872cc 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..41600c4c4b561 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..150dba8d636d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..346c672165d7d 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..7e10ef6f471be 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..bb8f747ae9328 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..508317b5fc01c 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..04b29c76e5830 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
From 3368699c7c965cb42d7fa0064d8056ae16f172b2 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 21:29:31 -0800
Subject: [PATCH 035/133] Revert "[maven-release-plugin] prepare for next
development iteration"
This reverts commit 4f8f86c2c66dc2f6a17d5b0e4fdeeb06a71ba52f.
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index a99e3d2a02569..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 42e624402f77e..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index abf48935cd915..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 967556744c1e6..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 978b99f4a7054..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index a3d5fc64f070e..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 1f416dd8c06d4..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index f23091684f95c..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6a250b3916ead..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 67a8e015872cc..daadc73774e8d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 41600c4c4b561..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 150dba8d636d8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 346c672165d7d..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 7e10ef6f471be..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index bb8f747ae9328..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 508317b5fc01c..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 04b29c76e5830..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 91c970904782c71f9fd25c899d163a8c57321f88 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 21:29:40 -0800
Subject: [PATCH 036/133] Revert "[maven-release-plugin] prepare release
v0.9.0-incubating"
This reverts commit 77c32470a1b02d6f1475bda2cfb9ae5bd4b53dde.
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..dcd9601fe4a90 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..cb8e79f22535b 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..9e5a450d57a47 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..7855706389709 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..443910a03a94e 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..23b2fead657e6 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..31b4fa87de772 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..216e6c1d8ff44 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..c240d595742cf 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index daadc73774e8d..4eca4747ea96a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..dda3900afebdf 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..70c17e9fc74a8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..2dfe7ac900b83 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..459756912dbe5 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..28f5ef14b1a35 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..aea8b0cddefa2 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..62fe3e274250f 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
From eddd3476a741b5be5b15a1672a59ae965715647b Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 21:31:37 -0800
Subject: [PATCH 037/133] Updating CHANGES.txt file
---
CHANGES.txt | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/CHANGES.txt b/CHANGES.txt
index d5ca405dcc198..e6baf7bee61f2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,30 @@ Spark Change Log
Release 0.9.0-incubating
+ 03019d1 Sat Jan 18 16:29:43 2014 -0800
+ Merge pull request #459 from srowen/UpdaterL2Regularization
+ [Correct L2 regularized weight update with canonical form]
+
+ 76147a2 Sat Jan 18 16:24:16 2014 -0800
+ Merge pull request #437 from mridulm/master
+ [Minor api usability changes]
+
+ 4ac8cab Sat Jan 18 16:22:46 2014 -0800
+ Merge pull request #426 from mateiz/py-ml-tests
+ [Re-enable Python MLlib tests (require Python 2.7 and NumPy 1.7+)]
+
+ 34e911c Sat Jan 18 16:17:34 2014 -0800
+ Merge pull request #462 from mateiz/conf-file-fix
+ [Remove Typesafe Config usage and conf files to fix nested property names]
+
+ ff7201c Sat Jan 18 12:50:02 2014 -0800
+ Merge pull request #461 from pwendell/master
+ [Use renamed shuffle spill config in CoGroupedRDD.scala]
+
+ 7b0d5a5 Thu Jan 16 23:18:48 2014 -0800
+ Merge pull request #451 from Qiuzhuang/master
+ [Fixed Window spark shell launch script error.]
+
4ccedb3 Wed Jan 15 14:26:48 2014 -0800
Merge pull request #444 from mateiz/py-version
[Clarify that Python 2.7 is only needed for MLlib]
From 00c847af1d4be2fe5fad887a57857eead1e517dc Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 21:45:13 -0800
Subject: [PATCH 038/133] [maven-release-plugin] prepare release
v0.9.0-incubating
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index dcd9601fe4a90..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index cb8e79f22535b..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 9e5a450d57a47..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 7855706389709..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 443910a03a94e..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 23b2fead657e6..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 31b4fa87de772..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 216e6c1d8ff44..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index c240d595742cf..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 4eca4747ea96a..daadc73774e8d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index dda3900afebdf..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 70c17e9fc74a8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 2dfe7ac900b83..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 459756912dbe5..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 28f5ef14b1a35..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index aea8b0cddefa2..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 62fe3e274250f..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 34ae65b06128077751ec2b923c9740a429d8299d Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sat, 18 Jan 2014 21:45:20 -0800
Subject: [PATCH 039/133] [maven-release-plugin] prepare for next development
iteration
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..a99e3d2a02569 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..42e624402f77e 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..abf48935cd915 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..967556744c1e6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..978b99f4a7054 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..a3d5fc64f070e 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..1f416dd8c06d4 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..f23091684f95c 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..6a250b3916ead 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index daadc73774e8d..67a8e015872cc 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..41600c4c4b561 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..150dba8d636d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..346c672165d7d 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..7e10ef6f471be 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..bb8f747ae9328 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..508317b5fc01c 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..04b29c76e5830 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
From 0f077b5b480cd6034b1e6c6f34d69e0c3c3854f2 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sun, 19 Jan 2014 10:29:54 -0800
Subject: [PATCH 040/133] Merge pull request #458 from tdas/docs-update
Updated java API docs for streaming, along with very minor changes in the code examples.
Docs updated for:
Scala: StreamingContext, DStream, PairDStreamFunctions
Java: JavaStreamingContext, JavaDStream, JavaPairDStream
Example updated:
JavaQueueStream: Not use deprecated method
ActorWordCount: Use the public interface the right way.
(cherry picked from commit 256a3553c447db0865ea8807a8fdbccb66a97b28)
Signed-off-by: Patrick Wendell
---
.../streaming/examples/JavaQueueStream.java | 3 +-
.../streaming/examples/ActorWordCount.scala | 2 +-
.../spark/streaming/StreamingContext.scala | 17 ++++--
.../streaming/api/java/JavaDStream.scala | 22 ++------
.../streaming/api/java/JavaPairDStream.scala | 4 ++
.../api/java/JavaStreamingContext.scala | 55 +++++++++----------
.../spark/streaming/dstream/DStream.scala | 6 +-
.../dstream/PairDStreamFunctions.scala | 11 ++--
.../streaming/receivers/ActorReceiver.scala | 35 +++++++-----
9 files changed, 79 insertions(+), 76 deletions(-)
diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
index 7ef9c6c8f4aaf..e2d55f1a4e180 100644
--- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
@@ -58,10 +58,9 @@ public static void main(String[] args) throws Exception {
}
for (int i = 0; i < 30; i++) {
- rddQueue.add(ssc.sc().parallelize(list));
+ rddQueue.add(ssc.sparkContext().parallelize(list));
}
-
// Create the QueueInputDStream and use it do some processing
JavaDStream inputStream = ssc.queueStream(rddQueue);
JavaPairDStream mappedStream = inputStream.map(
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
index 57e1b1f806e82..5a4aa7f3a2524 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
@@ -88,7 +88,7 @@ extends Actor with Receiver {
override def preStart = remotePublisher ! SubscribeReceiver(context.self)
def receive = {
- case msg ⇒ context.parent ! pushBlock(msg.asInstanceOf[T])
+ case msg ⇒ pushBlock(msg.asInstanceOf[T])
}
override def postStop() = remotePublisher ! UnsubscribeReceiver(context.self)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 26257e652e537..5847b95e3f5d1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -42,9 +42,15 @@ import org.apache.spark.streaming.scheduler._
import org.apache.hadoop.conf.Configuration
/**
- * A StreamingContext is the main entry point for Spark Streaming functionality. Besides the basic
- * information (such as, cluster URL and job name) to internally create a SparkContext, it provides
- * methods used to create DStream from various input sources.
+ * Main entry point for Spark Streaming functionality. It provides methods used to create
+ * [[org.apache.spark.streaming.dstream.DStream]]s from various input sources. It can be either
+ * created by providing a Spark master URL and an appName, or from a org.apache.spark.SparkConf
+ * configuration (see core Spark documentation), or from an existing org.apache.spark.SparkContext.
+ * The associated SparkContext can be accessed using `context.sparkContext`. After
+ * creating and transforming DStreams, the streaming computation can be started and stopped
+ * using `context.start()` and `context.stop()`, respectively.
+ * `context.awaitTransformation()` allows the current thread to wait for the termination
+ * of the context by `stop()` or by an exception.
*/
class StreamingContext private[streaming] (
sc_ : SparkContext,
@@ -63,7 +69,7 @@ class StreamingContext private[streaming] (
/**
* Create a StreamingContext by providing the configuration necessary for a new SparkContext.
- * @param conf a [[org.apache.spark.SparkConf]] object specifying Spark parameters
+ * @param conf a org.apache.spark.SparkConf object specifying Spark parameters
* @param batchDuration the time interval at which streaming data will be divided into batches
*/
def this(conf: SparkConf, batchDuration: Duration) = {
@@ -88,7 +94,7 @@ class StreamingContext private[streaming] (
}
/**
- * Re-create a StreamingContext from a checkpoint file.
+ * Recreate a StreamingContext from a checkpoint file.
* @param path Path to the directory that was specified as the checkpoint directory
* @param hadoopConf Optional, configuration object if necessary for reading from
* HDFS compatible filesystems
@@ -151,6 +157,7 @@ class StreamingContext private[streaming] (
private[streaming] val scheduler = new JobScheduler(this)
private[streaming] val waiter = new ContextWaiter
+
/**
* Return the associated Spark context
*/
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala
index c92854ccd9a28..e23b725052864 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala
@@ -27,22 +27,12 @@ import scala.reflect.ClassTag
import org.apache.spark.streaming.dstream.DStream
/**
- * A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous
- * sequence of RDDs (of the same type) representing a continuous stream of data (see [[org.apache.spark.rdd.RDD]]
- * for more details on RDDs). DStreams can either be created from live data (such as, data from
- * HDFS, Kafka or Flume) or it can be generated by transformation existing DStreams using operations
- * such as `map`, `window` and `reduceByKeyAndWindow`. While a Spark Streaming program is running, each
- * DStream periodically generates a RDD, either from live data or by transforming the RDD generated
- * by a parent DStream.
- *
- * This class contains the basic operations available on all DStreams, such as `map`, `filter` and
- * `window`. In addition, [[org.apache.spark.streaming.api.java.JavaPairDStream]] contains operations available
- * only on DStreams of key-value pairs, such as `groupByKeyAndWindow` and `join`.
- *
- * DStreams internally is characterized by a few basic properties:
- * - A list of other DStreams that the DStream depends on
- * - A time interval at which the DStream generates an RDD
- * - A function that is used to generate an RDD after each time interval
+ * A Java-friendly interface to [[org.apache.spark.streaming.dstream.DStream]], the basic
+ * abstraction in Spark Streaming that represents a continuous stream of data.
+ * DStreams can either be created from live data (such as, data from TCP sockets, Kafka, Flume,
+ * etc.) or it can be generated by transforming existing DStreams using operations such as `map`,
+ * `window`. For operations applicable to key-value pair DStreams, see
+ * [[org.apache.spark.streaming.api.java.JavaPairDStream]].
*/
class JavaDStream[T](val dstream: DStream[T])(implicit val classTag: ClassTag[T])
extends JavaDStreamLike[T, JavaDStream[T], JavaRDD[T]] {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index 6bb985ca540ff..79fa6a623d290 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -37,6 +37,10 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.PairRDDFunctions
import org.apache.spark.streaming.dstream.DStream
+/**
+ * A Java-friendly interface to a DStream of key-value pairs, which provides extra methods
+ * like `reduceByKey` and `join`.
+ */
class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
implicit val kManifest: ClassTag[K],
implicit val vManifest: ClassTag[V])
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 613683ca40501..921b56143af25 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -22,7 +22,6 @@ import scala.collection.JavaConversions._
import scala.reflect.ClassTag
import java.io.InputStream
-import java.lang.{Integer => JInt}
import java.util.{List => JList, Map => JMap}
import akka.actor.{Props, SupervisorStrategy}
@@ -39,19 +38,20 @@ import org.apache.hadoop.conf.Configuration
import org.apache.spark.streaming.dstream.DStream
/**
- * A StreamingContext is the main entry point for Spark Streaming functionality. Besides the basic
- * information (such as, cluster URL and job name) to internally create a SparkContext, it provides
- * methods used to create DStream from various input sources.
+ * A Java-friendly version of [[org.apache.spark.streaming.StreamingContext]] which is the main
+ * entry point for Spark Streaming functionality. It provides methods to create
+ * [[org.apache.spark.streaming.api.java.JavaDStream]] and
+ * [[org.apache.spark.streaming.api.java.JavaPairDStream.]] from input sources. The internal
+ * org.apache.spark.api.java.JavaSparkContext (see core Spark documentation) can be accessed
+ * using `context.sparkContext`. After creating and transforming DStreams, the streaming
+ * computation can be started and stopped using `context.start()` and `context.stop()`,
+ * respectively. `context.awaitTransformation()` allows the current thread to wait for the
+ * termination of a context by `stop()` or by an exception.
*/
class JavaStreamingContext(val ssc: StreamingContext) {
- // TODOs:
- // - Test to/from Hadoop functions
- // - Support creating and registering InputStreams
-
-
/**
- * Creates a StreamingContext.
+ * Create a StreamingContext.
* @param master Name of the Spark Master
* @param appName Name to be used when registering with the scheduler
* @param batchDuration The time interval at which streaming data will be divided into batches
@@ -60,7 +60,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
this(new StreamingContext(master, appName, batchDuration, null, Nil, Map()))
/**
- * Creates a StreamingContext.
+ * Create a StreamingContext.
* @param master Name of the Spark Master
* @param appName Name to be used when registering with the scheduler
* @param batchDuration The time interval at which streaming data will be divided into batches
@@ -77,7 +77,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
this(new StreamingContext(master, appName, batchDuration, sparkHome, Seq(jarFile), Map()))
/**
- * Creates a StreamingContext.
+ * Create a StreamingContext.
* @param master Name of the Spark Master
* @param appName Name to be used when registering with the scheduler
* @param batchDuration The time interval at which streaming data will be divided into batches
@@ -94,7 +94,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
this(new StreamingContext(master, appName, batchDuration, sparkHome, jars, Map()))
/**
- * Creates a StreamingContext.
+ * Create a StreamingContext.
* @param master Name of the Spark Master
* @param appName Name to be used when registering with the scheduler
* @param batchDuration The time interval at which streaming data will be divided into batches
@@ -113,7 +113,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
this(new StreamingContext(master, appName, batchDuration, sparkHome, jars, environment))
/**
- * Creates a StreamingContext using an existing SparkContext.
+ * Create a JavaStreamingContext using an existing JavaSparkContext.
* @param sparkContext The underlying JavaSparkContext to use
* @param batchDuration The time interval at which streaming data will be divided into batches
*/
@@ -121,7 +121,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
this(new StreamingContext(sparkContext.sc, batchDuration))
/**
- * Creates a StreamingContext using an existing SparkContext.
+ * Create a JavaStreamingContext using a SparkConf configuration.
* @param conf A Spark application configuration
* @param batchDuration The time interval at which streaming data will be divided into batches
*/
@@ -129,19 +129,18 @@ class JavaStreamingContext(val ssc: StreamingContext) {
this(new StreamingContext(conf, batchDuration))
/**
- * Re-creates a StreamingContext from a checkpoint file.
+ * Recreate a JavaStreamingContext from a checkpoint file.
* @param path Path to the directory that was specified as the checkpoint directory
*/
def this(path: String) = this(new StreamingContext(path, new Configuration))
/**
- * Re-creates a StreamingContext from a checkpoint file.
+ * Re-creates a JavaStreamingContext from a checkpoint file.
* @param path Path to the directory that was specified as the checkpoint directory
*
*/
def this(path: String, hadoopConf: Configuration) = this(new StreamingContext(path, hadoopConf))
-
@deprecated("use sparkContext", "0.9.0")
val sc: JavaSparkContext = sparkContext
@@ -149,7 +148,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
val sparkContext = new JavaSparkContext(ssc.sc)
/**
- * Create a input stream from network source hostname:port. Data is received using
+ * Create an input stream from network source hostname:port. Data is received using
* a TCP socket and the receive bytes is interpreted as UTF8 encoded \n delimited
* lines.
* @param hostname Hostname to connect to for receiving data
@@ -162,7 +161,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
}
/**
- * Create a input stream from network source hostname:port. Data is received using
+ * Create an input stream from network source hostname:port. Data is received using
* a TCP socket and the receive bytes is interpreted as UTF8 encoded \n delimited
* lines. Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
* @param hostname Hostname to connect to for receiving data
@@ -173,7 +172,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
}
/**
- * Create a input stream from network source hostname:port. Data is received using
+ * Create an input stream from network source hostname:port. Data is received using
* a TCP socket and the receive bytes it interepreted as object using the given
* converter.
* @param hostname Hostname to connect to for receiving data
@@ -195,7 +194,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
}
/**
- * Create a input stream that monitors a Hadoop-compatible filesystem
+ * Create an input stream that monitors a Hadoop-compatible filesystem
* for new files and reads them as text files (using key as LongWritable, value
* as Text and input format as TextInputFormat). Files must be written to the
* monitored directory by "moving" them from another location within the same
@@ -207,7 +206,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
}
/**
- * Create a input stream from network source hostname:port, where data is received
+ * Create an input stream from network source hostname:port, where data is received
* as serialized blocks (serialized using the Spark's serializer) that can be directly
* pushed into the block manager without deserializing them. This is the most efficient
* way to receive data.
@@ -226,7 +225,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
}
/**
- * Create a input stream from network source hostname:port, where data is received
+ * Create an input stream from network source hostname:port, where data is received
* as serialized blocks (serialized using the Spark's serializer) that can be directly
* pushed into the block manager without deserializing them. This is the most efficient
* way to receive data.
@@ -241,7 +240,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
}
/**
- * Create a input stream that monitors a Hadoop-compatible filesystem
+ * Create an input stream that monitors a Hadoop-compatible filesystem
* for new files and reads them using the given key-value types and input format.
* Files must be written to the monitored directory by "moving" them from another
* location within the same file system. File names starting with . are ignored.
@@ -324,7 +323,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
}
/**
- * Creates a input stream from an queue of RDDs. In each batch,
+ * Creates an input stream from an queue of RDDs. In each batch,
* it will process either one or all of the RDDs returned by the queue.
*
* NOTE: changes to the queue after the stream is created will not be recognized.
@@ -340,7 +339,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
}
/**
- * Creates a input stream from an queue of RDDs. In each batch,
+ * Creates an input stream from an queue of RDDs. In each batch,
* it will process either one or all of the RDDs returned by the queue.
*
* NOTE: changes to the queue after the stream is created will not be recognized.
@@ -357,7 +356,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
}
/**
- * Creates a input stream from an queue of RDDs. In each batch,
+ * Creates an input stream from an queue of RDDs. In each batch,
* it will process either one or all of the RDDs returned by the queue.
*
* NOTE: changes to the queue after the stream is created will not be recognized.
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 71a4c5c93e76a..6bff56a9d332a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -37,8 +37,9 @@ import org.apache.spark.streaming.Duration
* A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous
* sequence of RDDs (of the same type) representing a continuous stream of data (see
* org.apache.spark.rdd.RDD in the Spark core documentation for more details on RDDs).
- * DStreams can either be created from live data (such as, data from Kafka, Flume, sockets, HDFS)
- * or it can be generated by transforming existing DStreams using operations such as `map`,
+ * DStreams can either be created from live data (such as, data from TCP sockets, Kafka, Flume,
+ * etc.) using a [[org.apache.spark.streaming.StreamingContext]] or it can be generated by
+ * transforming existing DStreams using operations such as `map`,
* `window` and `reduceByKeyAndWindow`. While a Spark Streaming program is running, each DStream
* periodically generates a RDD, either from live data or by transforming the RDD generated by a
* parent DStream.
@@ -540,7 +541,6 @@ abstract class DStream[T: ClassTag] (
* on each RDD of 'this' DStream.
*/
def transform[U: ClassTag](transformFunc: (RDD[T], Time) => RDD[U]): DStream[U] = {
- //new TransformedDStream(this, context.sparkContext.clean(transformFunc))
val cleanedF = context.sparkContext.clean(transformFunc)
val realTransformFunc = (rdds: Seq[RDD[_]], time: Time) => {
assert(rdds.length == 1)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index f57762321c40e..fb9df2f48eae3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -18,20 +18,17 @@
package org.apache.spark.streaming.dstream
import org.apache.spark.streaming.StreamingContext._
-import org.apache.spark.streaming.dstream._
import org.apache.spark.{Partitioner, HashPartitioner}
import org.apache.spark.SparkContext._
-import org.apache.spark.rdd.{ClassTags, RDD, PairRDDFunctions}
-import org.apache.spark.storage.StorageLevel
+import org.apache.spark.rdd.RDD
import scala.collection.mutable.ArrayBuffer
-import scala.reflect.{ClassTag, classTag}
+import scala.reflect.ClassTag
-import org.apache.hadoop.mapred.{JobConf, OutputFormat}
+import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
import org.apache.hadoop.mapred.OutputFormat
-import org.apache.hadoop.security.UserGroupInformation
import org.apache.hadoop.conf.Configuration
import org.apache.spark.streaming.{Time, Duration}
@@ -108,7 +105,7 @@ extends Serializable {
/**
* Combine elements of each key in DStream's RDDs using custom functions. This is similar to the
* combineByKey for RDDs. Please refer to combineByKey in
- * [[org.apache.spark.rdd.PairRDDFunctions]] for more information.
+ * org.apache.spark.rdd.PairRDDFunctions in the Spark core documentation for more information.
*/
def combineByKey[C: ClassTag](
createCombiner: V => C,
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala
index fdf5371a89587..79ed696814f07 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala
@@ -44,40 +44,49 @@ object ReceiverSupervisorStrategy {
/**
* A receiver trait to be mixed in with your Actor to gain access to
- * pushBlock API.
+ * the API for pushing received data into Spark Streaming for being processed.
*
* Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html
*
* @example {{{
* class MyActor extends Actor with Receiver{
* def receive {
- * case anything :String => pushBlock(anything)
+ * case anything: String => pushBlock(anything)
* }
* }
- * //Can be plugged in actorStream as follows
+ *
+ * // Can be used with an actorStream as follows
* ssc.actorStream[String](Props(new MyActor),"MyActorReceiver")
*
* }}}
*
- * @note An important point to note:
- * Since Actor may exist outside the spark framework, It is thus user's responsibility
+ * @note Since Actor may exist outside the spark framework, It is thus user's responsibility
* to ensure the type safety, i.e parametrized type of push block and InputDStream
* should be same.
- *
*/
-trait Receiver { self: Actor ⇒
+trait Receiver {
+
+ self: Actor ⇒ // to ensure that this can be added to Actor classes only
+
+ /**
+ * Push an iterator received data into Spark Streaming for processing
+ */
def pushBlock[T: ClassTag](iter: Iterator[T]) {
context.parent ! Data(iter)
}
+ /**
+ * Push a single item of received data into Spark Streaming for processing
+ */
def pushBlock[T: ClassTag](data: T) {
context.parent ! Data(data)
}
-
}
/**
- * Statistics for querying the supervisor about state of workers
+ * Statistics for querying the supervisor about state of workers. Used in
+ * conjunction with `StreamingContext.actorStream` and
+ * [[org.apache.spark.streaming.receivers.Receiver]].
*/
case class Statistics(numberOfMsgs: Int,
numberOfWorkers: Int,
@@ -96,17 +105,15 @@ private[streaming] case class Data[T: ClassTag](data: T)
* his own Actor to run as receiver for Spark Streaming input source.
*
* This starts a supervisor actor which starts workers and also provides
- * [http://doc.akka.io/docs/akka/2.0.5/scala/fault-tolerance.html fault-tolerance].
+ * [http://doc.akka.io/docs/akka/snapshot/scala/fault-tolerance.html fault-tolerance].
*
- * Here's a way to start more supervisor/workers as its children.
+ * Here's a way to start more supervisor/workers as its children.
*
* @example {{{
* context.parent ! Props(new Supervisor)
* }}} OR {{{
- * context.parent ! Props(new Worker,"Worker")
+ * context.parent ! Props(new Worker, "Worker")
* }}}
- *
- *
*/
private[streaming] class ActorReceiver[T: ClassTag](
props: Props,
From 94ae25d4e66762e11881cdc9ed5c7cab6325eebb Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sun, 19 Jan 2014 11:33:11 -0800
Subject: [PATCH 041/133] Merge pull request #470 from
tgravescs/fix_spark_examples_yarn
Only log error on missing jar to allow spark examples to jar.
Right now to run the spark examples on Yarn you have to use the --addJars option and put the jar in hdfs. To make that nicer so the user doesn't have to specify the --addJars option change it to simply log an error instead of throwing.
(cherry picked from commit 792d9084e2bc9f778a00a56fa7dcfe4084153aea)
Signed-off-by: Patrick Wendell
---
.../main/scala/org/apache/spark/SparkContext.scala | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index ddd7d60d96bd5..923b4ed68839c 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -708,8 +708,11 @@ class SparkContext(
env.httpFileServer.addJar(new File(fileName))
} catch {
case e: Exception => {
+ // For now just log an error but allow to go through so spark examples work.
+ // The spark examples don't really need the jar distributed since its also
+ // the app jar.
logError("Error adding jar (" + e + "), was the --addJars option used?")
- throw e
+ null
}
}
} else {
@@ -722,8 +725,10 @@ class SparkContext(
path
}
}
- addedJars(key) = System.currentTimeMillis
- logInfo("Added JAR " + path + " at " + key + " with timestamp " + addedJars(key))
+ if (key != null) {
+ addedJars(key) = System.currentTimeMillis
+ logInfo("Added JAR " + path + " at " + key + " with timestamp " + addedJars(key))
+ }
}
}
From 4b4011b5df8320fbd6e5009101df8b95aa106139 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sun, 19 Jan 2014 12:59:20 -0800
Subject: [PATCH 042/133] Revert "[maven-release-plugin] prepare for next
development iteration"
This reverts commit 34ae65b06128077751ec2b923c9740a429d8299d.
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index a99e3d2a02569..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 42e624402f77e..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index abf48935cd915..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 967556744c1e6..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 978b99f4a7054..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index a3d5fc64f070e..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 1f416dd8c06d4..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index f23091684f95c..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6a250b3916ead..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 67a8e015872cc..daadc73774e8d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 41600c4c4b561..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 150dba8d636d8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 346c672165d7d..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 7e10ef6f471be..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index bb8f747ae9328..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 508317b5fc01c..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 04b29c76e5830..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 303b33f89167d891293479eb9c980b6f52664878 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sun, 19 Jan 2014 12:59:22 -0800
Subject: [PATCH 043/133] Revert "[maven-release-plugin] prepare release
v0.9.0-incubating"
This reverts commit 00c847af1d4be2fe5fad887a57857eead1e517dc.
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..dcd9601fe4a90 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..cb8e79f22535b 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..9e5a450d57a47 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..7855706389709 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..443910a03a94e 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..23b2fead657e6 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..31b4fa87de772 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..216e6c1d8ff44 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..c240d595742cf 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index daadc73774e8d..4eca4747ea96a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..dda3900afebdf 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..70c17e9fc74a8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..2dfe7ac900b83 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..459756912dbe5 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..28f5ef14b1a35 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..aea8b0cddefa2 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..62fe3e274250f 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
From 130b543573e480e625f975ba0192ae78de69e963 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sun, 19 Jan 2014 13:00:44 -0800
Subject: [PATCH 044/133] Updating CHANGES.txt
---
CHANGES.txt | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/CHANGES.txt b/CHANGES.txt
index e6baf7bee61f2..2f234d205cf5c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,14 @@ Spark Change Log
Release 0.9.0-incubating
+ 94ae25d Sun Jan 19 11:33:51 2014 -0800
+ Merge pull request #470 from tgravescs/fix_spark_examples_yarn
+ [Only log error on missing jar to allow spark examples to jar.]
+
+ 0f077b5 Sun Jan 19 10:30:29 2014 -0800
+ Merge pull request #458 from tdas/docs-update
+ [Updated java API docs for streaming, along with very minor changes in the code examples.]
+
03019d1 Sat Jan 18 16:29:43 2014 -0800
Merge pull request #459 from srowen/UpdaterL2Regularization
[Correct L2 regularized weight update with canonical form]
From a7760eff4ea6a474cab68896a88550f63bae8b0d Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sun, 19 Jan 2014 13:15:33 -0800
Subject: [PATCH 045/133] [maven-release-plugin] prepare release
v0.9.0-incubating
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index dcd9601fe4a90..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index cb8e79f22535b..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 9e5a450d57a47..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 7855706389709..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 443910a03a94e..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 23b2fead657e6..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 31b4fa87de772..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 216e6c1d8ff44..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index c240d595742cf..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 4eca4747ea96a..daadc73774e8d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index dda3900afebdf..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 70c17e9fc74a8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 2dfe7ac900b83..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 459756912dbe5..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 28f5ef14b1a35..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index aea8b0cddefa2..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 62fe3e274250f..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 50b88ffcc6f80c86438b19788ec0eaf8f3a10ee4 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Sun, 19 Jan 2014 13:15:39 -0800
Subject: [PATCH 046/133] [maven-release-plugin] prepare for next development
iteration
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..a99e3d2a02569 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..42e624402f77e 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..abf48935cd915 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..967556744c1e6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..978b99f4a7054 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..a3d5fc64f070e 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..1f416dd8c06d4 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..f23091684f95c 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..6a250b3916ead 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index daadc73774e8d..67a8e015872cc 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..41600c4c4b561 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..150dba8d636d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..346c672165d7d 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..7e10ef6f471be 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..bb8f747ae9328 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..508317b5fc01c 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..04b29c76e5830 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
From f1379479998a40d8774b014459e58a90c82b2feb Mon Sep 17 00:00:00 2001
From: Reynold Xin
Date: Mon, 20 Jan 2014 21:44:29 -0800
Subject: [PATCH 047/133] Merge pull request #483 from pwendell/gitignore
Restricting /lib to top level directory in .gitignore
This patch was proposed by Sean Mackrory.
(cherry picked from commit 7373ffb5e794d3163d3f8d1801836c891e0d6cca)
Signed-off-by: Patrick Wendell
---
.gitignore | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.gitignore b/.gitignore
index 39635d7eefbe7..3d178992123da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,4 +44,4 @@ derby.log
dist/
spark-*-bin.tar.gz
unit-tests.log
-lib/
+/lib/
From 410ba06ff0d7c7bfd31621f6d7d95d7eab00cb1a Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Mon, 20 Jan 2014 22:25:50 -0800
Subject: [PATCH 048/133] Merge pull request #482 from
tdas/streaming-example-fix
Added StreamingContext.awaitTermination to streaming examples
StreamingContext.start() currently starts a non-daemon thread which prevents termination of a Spark Streaming program even if main function has exited. Since the expected behavior of a streaming program is to run until explicitly killed, this was sort of fine when spark streaming applications are launched from the command line. However, when launched in Yarn-standalone mode, this did not work as the driver effectively got terminated when the main function exits. So SparkStreaming examples did not work on Yarn.
This addition to the examples ensures that the examples work on Yarn and also ensures that everyone learns that StreamingContext.awaitTermination() being necessary for SparkStreaming programs to wait.
The true bug-fix of making sure all threads by Spark Streaming are daemon threads is left for post-0.9.
(cherry picked from commit 0367981d47761cdccd8a44fc6fe803079979c5e3)
Signed-off-by: Patrick Wendell
---
.../org/apache/spark/streaming/examples/JavaFlumeEventCount.java | 1 +
.../org/apache/spark/streaming/examples/JavaKafkaWordCount.java | 1 +
.../apache/spark/streaming/examples/JavaNetworkWordCount.java | 1 +
.../org/apache/spark/streaming/examples/JavaQueueStream.java | 1 +
.../org/apache/spark/streaming/examples/ActorWordCount.scala | 1 +
.../org/apache/spark/streaming/examples/FlumeEventCount.scala | 1 +
.../org/apache/spark/streaming/examples/HdfsWordCount.scala | 1 +
.../org/apache/spark/streaming/examples/KafkaWordCount.scala | 1 +
.../org/apache/spark/streaming/examples/MQTTWordCount.scala | 1 +
.../org/apache/spark/streaming/examples/NetworkWordCount.scala | 1 +
.../org/apache/spark/streaming/examples/RawNetworkGrep.scala | 1 +
.../spark/streaming/examples/RecoverableNetworkWordCount.scala | 1 +
.../spark/streaming/examples/StatefulNetworkWordCount.scala | 1 +
.../org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala | 1 +
.../org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala | 1 +
.../org/apache/spark/streaming/examples/TwitterPopularTags.scala | 1 +
.../org/apache/spark/streaming/examples/ZeroMQWordCount.scala | 1 +
17 files changed, 17 insertions(+)
diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
index 7b5a243e26414..f061001dd264d 100644
--- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
@@ -70,5 +70,6 @@ public String call(Long in) {
}).print();
ssc.start();
+ ssc.awaitTermination();
}
}
diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java
index 04f62ee204145..2ffd351b4e498 100644
--- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java
@@ -104,5 +104,6 @@ public Integer call(Integer i1, Integer i2) {
wordCounts.print();
jssc.start();
+ jssc.awaitTermination();
}
}
diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java
index 349d826ab5df7..7777c9832abd3 100644
--- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java
@@ -84,5 +84,6 @@ public Integer call(Integer i1, Integer i2) {
wordCounts.print();
ssc.start();
+ ssc.awaitTermination();
}
}
diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
index e2d55f1a4e180..26c44620abec1 100644
--- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
@@ -80,5 +80,6 @@ public Integer call(Integer i1, Integer i2) {
reducedStream.print();
ssc.start();
+ ssc.awaitTermination();
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
index 5a4aa7f3a2524..a5888811cc5ea 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
@@ -171,5 +171,6 @@ object ActorWordCount {
lines.flatMap(_.split("\\s+")).map(x => (x, 1)).reduceByKey(_ + _).print()
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala
index a59be7899dd37..11c3aaad3c8a8 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala
@@ -60,5 +60,6 @@ object FlumeEventCount {
stream.count().map(cnt => "Received " + cnt + " flume events." ).print()
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala
index 704b315ef8b22..954bcc9b6ef5d 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala
@@ -50,6 +50,7 @@ object HdfsWordCount {
val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
wordCounts.print()
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala
index 4a3d81c09a122..d9cb7326bb97d 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala
@@ -61,6 +61,7 @@ object KafkaWordCount {
wordCounts.print()
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/MQTTWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/MQTTWordCount.scala
index 78b49fdcf1eb3..eb61caf8c85b9 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/MQTTWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/MQTTWordCount.scala
@@ -101,5 +101,6 @@ object MQTTWordCount {
val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
wordCounts.print()
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
index 02264757123db..5656d487a57cc 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
@@ -54,5 +54,6 @@ object NetworkWordCount {
val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
wordCounts.print()
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala
index 99b79c3949a4e..cdd7547d0d3b4 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala
@@ -61,5 +61,6 @@ object RawNetworkGrep {
union.filter(_.contains("the")).count().foreachRDD(r =>
println("Grep count: " + r.collect().mkString))
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala
index 8c5d0bd56845b..aa82bf3c6bd8e 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala
@@ -114,5 +114,6 @@ object RecoverableNetworkWordCount {
createContext(master, ip, port, outputPath)
})
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala
index 1183eba84686b..88f1cef89b318 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala
@@ -65,5 +65,6 @@ object StatefulNetworkWordCount {
val stateDstream = wordDstream.updateStateByKey[Int](updateFunc)
stateDstream.print()
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala
index 483c4d311810f..bbd44948b6fa5 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala
@@ -110,5 +110,6 @@ object TwitterAlgebirdCMS {
})
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala
index 94c2bf29ac433..a0094d460feec 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala
@@ -87,5 +87,6 @@ object TwitterAlgebirdHLL {
})
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala
index 8a70d4a978cd4..896d010c68f18 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala
@@ -69,5 +69,6 @@ object TwitterPopularTags {
})
ssc.start()
+ ssc.awaitTermination()
}
}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala
index 12d2a1084f900..85b4ce5e81950 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala
@@ -91,5 +91,6 @@ object ZeroMQWordCount {
val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
wordCounts.print()
ssc.start()
+ ssc.awaitTermination()
}
}
From e5f8917fd75e1c6f596db7f1bbca5760e3b6c301 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Mon, 20 Jan 2014 23:34:35 -0800
Subject: [PATCH 049/133] Merge pull request #484 from tdas/run-example-fix
Made run-example respect SPARK_JAVA_OPTS and SPARK_MEM.
bin/run-example scripts was not passing Java properties set through the SPARK_JAVA_OPTS to the example. This is important for examples like Twitter** as the Twitter authentication information must be set through java properties. Hence added the same JAVA_OPTS code in run-example as it is in bin/spark-class script.
Also added SPARK_MEM, in case someone wants to run the example with different amounts of memory. This can be removed if it is not tune with the intended semantics of the run-example scripts.
@matei Please check this soon I want this to go in 0.9-rc4
(cherry picked from commit c67d3d8beb101fff2ea6397b759dd1bfdf9fcfa5)
Signed-off-by: Patrick Wendell
---
bin/run-example | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/bin/run-example b/bin/run-example
index 2e9d51440bd5d..adba7dd97aaf8 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -76,11 +76,20 @@ else
fi
fi
+# Set JAVA_OPTS to be able to load native libraries and to set heap size
+JAVA_OPTS="$SPARK_JAVA_OPTS"
+JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
+# Load extra JAVA_OPTS from conf/java-opts, if it exists
+if [ -e "$FWDIR/conf/java-opts" ] ; then
+ JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
+fi
+export JAVA_OPTS
+
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
echo -n "Spark Command: "
- echo "$RUNNER" -cp "$CLASSPATH" "$@"
+ echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
echo "========================================"
echo
fi
-exec "$RUNNER" -cp "$CLASSPATH" "$@"
+exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
From b6fd3cd33d667e6fda517c7c491462b68c48145c Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 21 Jan 2014 00:09:42 -0800
Subject: [PATCH 050/133] Merge pull request #480 from pwendell/0.9-fixes
Handful of 0.9 fixes
This patch addresses a few fixes for Spark 0.9.0 based on the last release candidate.
@mridulm gets credit for reporting most of the issues here. Many of the fixes here are based on his work in #477 and follow up discussion with him.
(cherry picked from commit 77b986f6616e6f7e0be9e46bb355829686f9845b)
Signed-off-by: Patrick Wendell
---
.../scala/org/apache/spark/SparkConf.scala | 10 ++++-
.../org/apache/spark/rdd/CheckpointRDD.scala | 4 +-
.../spark/scheduler/TaskSetManager.scala | 2 +-
.../spark/storage/BlockObjectWriter.scala | 5 ++-
.../spark/storage/ShuffleBlockManager.scala | 16 ++++++--
.../collection/ExternalAppendOnlyMap.scala | 41 +++++++++++++++----
docs/configuration.md | 11 ++---
7 files changed, 65 insertions(+), 24 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 951bfd79d0d6a..45d19bcbfa6f2 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -192,7 +192,15 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
}
/** Get all akka conf variables set on this SparkConf */
- def getAkkaConf: Seq[(String, String)] = getAll.filter {case (k, v) => k.startsWith("akka.")}
+ def getAkkaConf: Seq[(String, String)] =
+ /* This is currently undocumented. If we want to make this public we should consider
+ * nesting options under the spark namespace to avoid conflicts with user akka options.
+ * Otherwise users configuring their own akka code via system properties could mess up
+ * spark's akka options.
+ *
+ * E.g. spark.akka.option.x.y.x = "value"
+ */
+ getAll.filter {case (k, v) => k.startsWith("akka.")}
/** Does the configuration contain a given parameter? */
def contains(key: String): Boolean = settings.contains(key)
diff --git a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
index 83109d1a6f853..30e578dd93e8d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
@@ -43,8 +43,8 @@ class CheckpointRDD[T: ClassTag](sc: SparkContext, val checkpointPath: String)
val numPartitions =
// listStatus can throw exception if path does not exist.
if (fs.exists(cpath)) {
- val dirContents = fs.listStatus(cpath)
- val partitionFiles = dirContents.map(_.getPath.toString).filter(_.contains("part-")).sorted
+ val dirContents = fs.listStatus(cpath).map(_.getPath)
+ val partitionFiles = dirContents.filter(_.getName.startsWith("part-")).map(_.toString).sorted
val numPart = partitionFiles.size
if (numPart > 0 && (! partitionFiles(0).endsWith(CheckpointRDD.splitIdToFile(0)) ||
! partitionFiles(numPart-1).endsWith(CheckpointRDD.splitIdToFile(numPart-1)))) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index fc0ee070897dd..73d6972bb4204 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -233,7 +233,7 @@ private[spark] class TaskSetManager(
/** Check whether a task is currently running an attempt on a given host */
private def hasAttemptOnHost(taskIndex: Int, host: String): Boolean = {
- !taskAttempts(taskIndex).exists(_.host == host)
+ taskAttempts(taskIndex).exists(_.host == host)
}
/**
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
index 48cec4be4111c..530712b5df4a8 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
@@ -138,6 +138,7 @@ private[spark] class DiskBlockObjectWriter(
fos = null
ts = null
objOut = null
+ initialized = false
}
}
@@ -145,7 +146,8 @@ private[spark] class DiskBlockObjectWriter(
override def commit(): Long = {
if (initialized) {
- // NOTE: Flush the serializer first and then the compressed/buffered output stream
+ // NOTE: Because Kryo doesn't flush the underlying stream we explicitly flush both the
+ // serializer stream and the lower level stream.
objOut.flush()
bs.flush()
val prevPos = lastValidPosition
@@ -175,7 +177,6 @@ private[spark] class DiskBlockObjectWriter(
}
override def fileSegment(): FileSegment = {
- val bytesWritten = lastValidPosition - initialPosition
new FileSegment(file, initialPosition, bytesWritten)
}
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
index e2b24298a55e8..bb07c8cb134cc 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
@@ -23,10 +23,11 @@ import java.util.concurrent.atomic.AtomicInteger
import scala.collection.JavaConversions._
+import org.apache.spark.Logging
import org.apache.spark.serializer.Serializer
-import org.apache.spark.util.{MetadataCleanerType, MetadataCleaner, TimeStampedHashMap}
-import org.apache.spark.util.collection.{PrimitiveKeyOpenHashMap, PrimitiveVector}
import org.apache.spark.storage.ShuffleBlockManager.ShuffleFileGroup
+import org.apache.spark.util.{MetadataCleaner, MetadataCleanerType, TimeStampedHashMap}
+import org.apache.spark.util.collection.{PrimitiveKeyOpenHashMap, PrimitiveVector}
/** A group of writers for a ShuffleMapTask, one writer per reducer. */
private[spark] trait ShuffleWriterGroup {
@@ -58,7 +59,7 @@ private[spark] trait ShuffleWriterGroup {
* files within a ShuffleFileGroups associated with the block's reducer.
*/
private[spark]
-class ShuffleBlockManager(blockManager: BlockManager) {
+class ShuffleBlockManager(blockManager: BlockManager) extends Logging {
def conf = blockManager.conf
// Turning off shuffle file consolidation causes all shuffle Blocks to get their own file.
@@ -106,6 +107,15 @@ class ShuffleBlockManager(blockManager: BlockManager) {
Array.tabulate[BlockObjectWriter](numBuckets) { bucketId =>
val blockId = ShuffleBlockId(shuffleId, mapId, bucketId)
val blockFile = blockManager.diskBlockManager.getFile(blockId)
+ // Because of previous failures, the shuffle file may already exist on this machine.
+ // If so, remove it.
+ if (blockFile.exists) {
+ if (blockFile.delete()) {
+ logInfo(s"Removed existing shuffle file $blockFile")
+ } else {
+ logWarning(s"Failed to remove existing shuffle file $blockFile")
+ }
+ }
blockManager.getDiskWriter(blockId, blockFile, serializer, bufferSize)
}
}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 64e9b436f04a2..fb73636162af9 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -20,14 +20,15 @@ package org.apache.spark.util.collection
import java.io._
import java.util.Comparator
-import it.unimi.dsi.fastutil.io.FastBufferedInputStream
-
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
+import it.unimi.dsi.fastutil.io.FastBufferedInputStream
+
import org.apache.spark.{Logging, SparkEnv}
-import org.apache.spark.serializer.{KryoDeserializationStream, KryoSerializationStream, Serializer}
-import org.apache.spark.storage.{BlockId, BlockManager, DiskBlockManager, DiskBlockObjectWriter}
+import org.apache.spark.io.LZFCompressionCodec
+import org.apache.spark.serializer.{KryoDeserializationStream, Serializer}
+import org.apache.spark.storage.{BlockId, BlockManager, DiskBlockObjectWriter}
/**
* An append-only map that spills sorted content to disk when there is insufficient space for it
@@ -153,9 +154,33 @@ private[spark] class ExternalAppendOnlyMap[K, V, C](
.format(mapSize / (1024 * 1024), spillCount, if (spillCount > 1) "s" else ""))
val (blockId, file) = diskBlockManager.createTempBlock()
- val compressStream: OutputStream => OutputStream = blockManager.wrapForCompression(blockId, _)
+ /* IMPORTANT NOTE: To avoid having to keep large object graphs in memory, this approach
+ * closes and re-opens serialization and compression streams within each file. This makes some
+ * assumptions about the way that serialization and compression streams work, specifically:
+ *
+ * 1) The serializer input streams do not pre-fetch data from the underlying stream.
+ *
+ * 2) Several compression streams can be opened, written to, and flushed on the write path
+ * while only one compression input stream is created on the read path
+ *
+ * In practice (1) is only true for Java, so we add a special fix below to make it work for
+ * Kryo. (2) is only true for LZF and not Snappy, so we coerce this to use LZF.
+ *
+ * To avoid making these assumptions we should create an intermediate stream that batches
+ * objects and sends an EOF to the higher layer streams to make sure they never prefetch data.
+ * This is a bit tricky because, within each segment, you'd need to track the total number
+ * of bytes written and then re-wind and write it at the beginning of the segment. This will
+ * most likely require using the file channel API.
+ */
+
+ val shouldCompress = blockManager.shouldCompress(blockId)
+ val compressionCodec = new LZFCompressionCodec(sparkConf)
+ def wrapForCompression(outputStream: OutputStream) = {
+ if (shouldCompress) compressionCodec.compressedOutputStream(outputStream) else outputStream
+ }
+
def getNewWriter = new DiskBlockObjectWriter(blockId, file, serializer, fileBufferSize,
- compressStream, syncWrites)
+ wrapForCompression, syncWrites)
var writer = getNewWriter
var objectsWritten = 0
@@ -168,6 +193,8 @@ private[spark] class ExternalAppendOnlyMap[K, V, C](
if (objectsWritten == serializerBatchSize) {
writer.commit()
+ writer.close()
+ _diskBytesSpilled += writer.bytesWritten
writer = getNewWriter
objectsWritten = 0
}
@@ -176,8 +203,8 @@ private[spark] class ExternalAppendOnlyMap[K, V, C](
if (objectsWritten > 0) writer.commit()
} finally {
// Partial failures cannot be tolerated; do not revert partial writes
- _diskBytesSpilled += writer.bytesWritten
writer.close()
+ _diskBytesSpilled += writer.bytesWritten
}
currentMap = new SizeTrackingAppendOnlyMap[K, C]
spilledMaps.append(new DiskMapIterator(file, blockId))
diff --git a/docs/configuration.md b/docs/configuration.md
index 00864906b3c7b..be548e372dcd4 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -158,7 +158,9 @@ Apart from these, the following properties are also available, and may be useful
spark.shuffle.spill.compress |
true |
- Whether to compress data spilled during shuffles.
+ Whether to compress data spilled during shuffles. If enabled, spill compression
+ always uses the `org.apache.spark.io.LZFCompressionCodec` codec,
+ regardless of the value of `spark.io.compression.codec`.
|
@@ -379,13 +381,6 @@ Apart from these, the following properties are also available, and may be useful
Too large a value decreases parallelism during broadcast (makes it slower); however, if it is too small, BlockManager
might take a performance hit.
-
- akka.x.y.... |
- value |
-
- An arbitrary akka configuration can be set directly on spark conf and it is applied for all the ActorSystems created spark wide for that SparkContext and its assigned executors as well.
- |
-
spark.shuffle.consolidateFiles |
From 808a9f0b231aa395cde06ca4b08c2de5bd88d1fe Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 21 Jan 2014 00:12:32 -0800
Subject: [PATCH 051/133] Revert "[maven-release-plugin] prepare for next
development iteration"
This reverts commit 50b88ffcc6f80c86438b19788ec0eaf8f3a10ee4.
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index a99e3d2a02569..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 42e624402f77e..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index abf48935cd915..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 967556744c1e6..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 978b99f4a7054..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index a3d5fc64f070e..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 1f416dd8c06d4..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index f23091684f95c..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6a250b3916ead..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 67a8e015872cc..daadc73774e8d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 41600c4c4b561..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 150dba8d636d8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 346c672165d7d..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 7e10ef6f471be..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index bb8f747ae9328..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 508317b5fc01c..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 04b29c76e5830..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 6b31963a7afe6170cbd7517781fa0d7765796aab Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 21 Jan 2014 00:12:35 -0800
Subject: [PATCH 052/133] Revert "[maven-release-plugin] prepare release
v0.9.0-incubating"
This reverts commit a7760eff4ea6a474cab68896a88550f63bae8b0d.
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..dcd9601fe4a90 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..cb8e79f22535b 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..9e5a450d57a47 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..7855706389709 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..443910a03a94e 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..23b2fead657e6 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..31b4fa87de772 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..216e6c1d8ff44 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..c240d595742cf 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index daadc73774e8d..4eca4747ea96a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..dda3900afebdf 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..70c17e9fc74a8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..2dfe7ac900b83 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..459756912dbe5 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..28f5ef14b1a35 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..aea8b0cddefa2 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..62fe3e274250f 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
From 32545664bec32749f8a6056748cfa7a9dafcd292 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 21 Jan 2014 00:13:09 -0800
Subject: [PATCH 053/133] Updating CHANGES.txt file
---
CHANGES.txt | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/CHANGES.txt b/CHANGES.txt
index 2f234d205cf5c..f42ed7f9d14d7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,22 @@ Spark Change Log
Release 0.9.0-incubating
+ b6fd3cd Tue Jan 21 00:12:01 2014 -0800
+ Merge pull request #480 from pwendell/0.9-fixes
+ [Handful of 0.9 fixes]
+
+ e5f8917 Mon Jan 20 23:35:07 2014 -0800
+ Merge pull request #484 from tdas/run-example-fix
+ [Made run-example respect SPARK_JAVA_OPTS and SPARK_MEM.]
+
+ 410ba06 Mon Jan 20 22:26:14 2014 -0800
+ Merge pull request #482 from tdas/streaming-example-fix
+ [Added StreamingContext.awaitTermination to streaming examples]
+
+ f137947 Mon Jan 20 22:24:07 2014 -0800
+ Merge pull request #483 from pwendell/gitignore
+ [Restricting /lib to top level directory in .gitignore]
+
94ae25d Sun Jan 19 11:33:51 2014 -0800
Merge pull request #470 from tgravescs/fix_spark_examples_yarn
[Only log error on missing jar to allow spark examples to jar.]
From 7653cf39e851cb63f1fda899cd7904ffab9a7a51 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 21 Jan 2014 01:23:01 -0800
Subject: [PATCH 054/133] [maven-release-plugin] prepare release
v0.9.0-incubating
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index dcd9601fe4a90..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index cb8e79f22535b..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 9e5a450d57a47..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 7855706389709..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 443910a03a94e..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 23b2fead657e6..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 31b4fa87de772..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 216e6c1d8ff44..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index c240d595742cf..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 4eca4747ea96a..daadc73774e8d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index dda3900afebdf..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 70c17e9fc74a8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 2dfe7ac900b83..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 459756912dbe5..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 28f5ef14b1a35..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index aea8b0cddefa2..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 62fe3e274250f..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 51b5e048517eb049f1b2fb515f98923d1dcdff3d Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 21 Jan 2014 01:29:55 -0800
Subject: [PATCH 055/133] Revert "[maven-release-plugin] prepare release
v0.9.0-incubating"
This reverts commit 7653cf39e851cb63f1fda899cd7904ffab9a7a51.
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..dcd9601fe4a90 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..cb8e79f22535b 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..9e5a450d57a47 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..7855706389709 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..443910a03a94e 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..23b2fead657e6 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..31b4fa87de772 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..216e6c1d8ff44 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..c240d595742cf 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index daadc73774e8d..4eca4747ea96a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..dda3900afebdf 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..70c17e9fc74a8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..2dfe7ac900b83 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..459756912dbe5 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..28f5ef14b1a35 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..aea8b0cddefa2 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..62fe3e274250f 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
From cd65c150d7e4be55695ca54a1709d577fdd509ba Mon Sep 17 00:00:00 2001
From: Ubuntu
Date: Tue, 21 Jan 2014 10:10:06 +0000
Subject: [PATCH 056/133] [maven-release-plugin] prepare release
v0.9.0-incubating
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index dcd9601fe4a90..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index cb8e79f22535b..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 9e5a450d57a47..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 7855706389709..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 443910a03a94e..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 23b2fead657e6..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 31b4fa87de772..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 216e6c1d8ff44..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index c240d595742cf..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 4eca4747ea96a..daadc73774e8d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index dda3900afebdf..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 70c17e9fc74a8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 2dfe7ac900b83..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 459756912dbe5..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 28f5ef14b1a35..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index aea8b0cddefa2..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 62fe3e274250f..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From ab188b57b75267b118a45dc1c5ce35c1839d2ad6 Mon Sep 17 00:00:00 2001
From: Ubuntu
Date: Tue, 21 Jan 2014 10:10:12 +0000
Subject: [PATCH 057/133] [maven-release-plugin] prepare for next development
iteration
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..a99e3d2a02569 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..42e624402f77e 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..abf48935cd915 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..967556744c1e6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..978b99f4a7054 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..a3d5fc64f070e 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..1f416dd8c06d4 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..f23091684f95c 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..6a250b3916ead 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index daadc73774e8d..67a8e015872cc 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..41600c4c4b561 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..150dba8d636d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..346c672165d7d 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..7e10ef6f471be 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..bb8f747ae9328 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..508317b5fc01c 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..04b29c76e5830 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
From a3bd28a7a0ca82db2bd40d09c6cc244e1464b425 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 21 Jan 2014 02:15:50 -0800
Subject: [PATCH 058/133] Revert "[maven-release-plugin] prepare for next
development iteration"
This reverts commit ab188b57b75267b118a45dc1c5ce35c1839d2ad6.
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index a99e3d2a02569..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 42e624402f77e..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index abf48935cd915..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 967556744c1e6..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 978b99f4a7054..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index a3d5fc64f070e..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 1f416dd8c06d4..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index f23091684f95c..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6a250b3916ead..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 67a8e015872cc..daadc73774e8d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 41600c4c4b561..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 150dba8d636d8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 346c672165d7d..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 7e10ef6f471be..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index bb8f747ae9328..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 508317b5fc01c..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 04b29c76e5830..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 334a8481f9e8e7e9323bd1141d729231e21aa0a7 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Tue, 21 Jan 2014 02:19:04 -0800
Subject: [PATCH 059/133] Revert "[maven-release-plugin] prepare release
v0.9.0-incubating"
This reverts commit cd65c150d7e4be55695ca54a1709d577fdd509ba.
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..dcd9601fe4a90 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..cb8e79f22535b 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..9e5a450d57a47 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..7855706389709 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..443910a03a94e 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..23b2fead657e6 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..31b4fa87de772 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..216e6c1d8ff44 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..c240d595742cf 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index daadc73774e8d..4eca4747ea96a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..dda3900afebdf 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..70c17e9fc74a8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..2dfe7ac900b83 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..459756912dbe5 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..28f5ef14b1a35 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..aea8b0cddefa2 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..62fe3e274250f 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
From 0771df675363c69622404cb514bd751bc90526af Mon Sep 17 00:00:00 2001
From: Ubuntu
Date: Tue, 21 Jan 2014 10:30:33 +0000
Subject: [PATCH 060/133] [maven-release-plugin] prepare release
v0.9.0-incubating
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index dcd9601fe4a90..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index cb8e79f22535b..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 9e5a450d57a47..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 7855706389709..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 443910a03a94e..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 23b2fead657e6..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 31b4fa87de772..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 216e6c1d8ff44..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index c240d595742cf..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 4eca4747ea96a..daadc73774e8d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index dda3900afebdf..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 70c17e9fc74a8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 2dfe7ac900b83..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 459756912dbe5..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 28f5ef14b1a35..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index aea8b0cddefa2..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 62fe3e274250f..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From e1dc5bedb48d2ac9b9e1c9b3b1a15c41b7d90ad8 Mon Sep 17 00:00:00 2001
From: Ubuntu
Date: Tue, 21 Jan 2014 10:30:40 +0000
Subject: [PATCH 061/133] [maven-release-plugin] prepare for next development
iteration
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..a99e3d2a02569 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..42e624402f77e 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..abf48935cd915 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..967556744c1e6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..978b99f4a7054 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..a3d5fc64f070e 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..1f416dd8c06d4 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..f23091684f95c 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..6a250b3916ead 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index daadc73774e8d..67a8e015872cc 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..41600c4c4b561 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..150dba8d636d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..346c672165d7d 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..7e10ef6f471be 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..bb8f747ae9328 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..508317b5fc01c 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..04b29c76e5830 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
From dd533c9e42a01319ebcbc0b01c3190a25784a2e1 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 22 Jan 2014 14:10:07 -0800
Subject: [PATCH 062/133] Merge pull request #478 from sryza/sandy-spark-1033
SPARK-1033. Ask for cores in Yarn container requests
Tested on a pseudo-distributed cluster against the Fair Scheduler and observed a worker taking more than a single core.
(cherry picked from commit 576c4a4c502ccca5fcd6b3552dd93cc2f3c50666)
Signed-off-by: Patrick Wendell
---
docs/running-on-yarn.md | 2 +-
.../apache/spark/deploy/yarn/YarnAllocationHandler.scala | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 3bd62646bab06..5dadd54492dca 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -133,7 +133,7 @@ See [Building Spark with Maven](building-with-maven.html) for instructions on ho
# Important Notes
-- We do not requesting container resources based on the number of cores. Thus the numbers of cores given via command line arguments cannot be guaranteed.
+- Before Hadoop 2.2, YARN does not support cores in container resource requests. Thus, when running against an earlier version, the numbers of cores given via command line arguments cannot be passed to YARN. Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
- The local directories used for spark will be the local directories configured for YARN (Hadoop Yarn config yarn.nodemanager.local-dirs). If the user specifies spark.local.dir, it will be ignored.
- The --files and --archives options support specifying file names with the # similar to Hadoop. For example you can specify: --files localtest.txt#appSees.txt and this will upload the file you have locally named localtest.txt into HDFS but this will be linked to by the name appSees.txt and your application should use the name as appSees.txt to reference it when running on YARN.
- The --addJars option allows the SparkContext.addJar function to work if you are using it with local files. It does not need to be used if you are using it with HDFS, HTTP, HTTPS, or FTP files.
diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index 738ff986d85a5..1ac61124cb028 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -532,15 +532,15 @@ private[yarn] class YarnAllocationHandler(
priority: Int
): ArrayBuffer[ContainerRequest] = {
- val memoryResource = Records.newRecord(classOf[Resource])
- memoryResource.setMemory(workerMemory + YarnAllocationHandler.MEMORY_OVERHEAD)
+ val memoryRequest = workerMemory + YarnAllocationHandler.MEMORY_OVERHEAD
+ val resource = Resource.newInstance(memoryRequest, workerCores)
val prioritySetting = Records.newRecord(classOf[Priority])
prioritySetting.setPriority(priority)
val requests = new ArrayBuffer[ContainerRequest]()
for (i <- 0 until numWorkers) {
- requests += new ContainerRequest(memoryResource, hosts, racks, prioritySetting)
+ requests += new ContainerRequest(resource, hosts, racks, prioritySetting)
}
requests
}
From dc5857a36d2b12208120a0968b8fc1cb38043894 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 22 Jan 2014 14:32:59 -0800
Subject: [PATCH 063/133] Merge pull request #492 from skicavs/master
fixed job name and usage information for the JavaSparkPi example
(cherry picked from commit a1238bb5fcab763d32c729ea7ed99cb3c05c896f)
Signed-off-by: Patrick Wendell
---
.../src/main/java/org/apache/spark/examples/JavaSparkPi.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
index 3ec4a58d48ed6..ac8df02c4630b 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
@@ -30,11 +30,11 @@ public final class JavaSparkPi {
public static void main(String[] args) throws Exception {
if (args.length == 0) {
- System.err.println("Usage: JavaLogQuery [slices]");
+ System.err.println("Usage: JavaSparkPi [slices]");
System.exit(1);
}
- JavaSparkContext jsc = new JavaSparkContext(args[0], "JavaLogQuery",
+ JavaSparkContext jsc = new JavaSparkContext(args[0], "JavaSparkPi",
System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaSparkPi.class));
int slices = (args.length == 2) ? Integer.parseInt(args[1]) : 2;
From 828f7b46ea7c6f7cf7f72bc03c61eeb9b929ac07 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 22 Jan 2014 15:45:04 -0800
Subject: [PATCH 064/133] Merge pull request #495 from
srowen/GraphXCommonsMathDependency
Fix graphx Commons Math dependency
`graphx` depends on Commons Math (2.x) in `SVDPlusPlus.scala`. However the module doesn't declare this dependency. It happens to work because it is included by Hadoop artifacts. But, I can tell you this isn't true as of a month or so ago. Building versus recent Hadoop would fail. (That's how we noticed.)
The simple fix is to declare the dependency, as it should be. But it's also worth noting that `commons-math` is the old-ish 2.x line, while `commons-math3` is where newer 3.x releases are. Drop-in replacement, but different artifact and package name. Changing this only usage to `commons-math3` works, tests pass, and isn't surprising that it does, so is probably also worth changing. (A comment in some test code also references `commons-math3`, FWIW.)
It does raise another question though: `mllib` looks like it uses the `jblas` `DoubleMatrix` for general purpose vector/matrix stuff. Should `graphx` really use Commons Math for this? Beyond the tiny scope here but worth asking.
(cherry picked from commit 3184facdc5b1e9ded89133f9b1e4985c9ac78c55)
Signed-off-by: Patrick Wendell
---
graphx/pom.xml | 5 +++++
.../main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala | 2 +-
project/SparkBuild.scala | 5 ++++-
3 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 67a8e015872cc..9073cf7aa66ec 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -37,6 +37,11 @@
spark-core_${scala.binary.version}
${project.version}
+
+ org.apache.commons
+ commons-math3
+ 3.2
+
org.eclipse.jetty
jetty-server
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index c327ce7935147..79280f836f21d 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -18,7 +18,7 @@
package org.apache.spark.graphx.lib
import scala.util.Random
-import org.apache.commons.math.linear._
+import org.apache.commons.math3.linear._
import org.apache.spark.rdd._
import org.apache.spark.graphx._
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 075e912f2d96c..b891ffab3259b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -316,7 +316,10 @@ object SparkBuild extends Build {
) ++ assemblySettings ++ extraAssemblySettings
def graphxSettings = sharedSettings ++ Seq(
- name := "spark-graphx"
+ name := "spark-graphx",
+ libraryDependencies ++= Seq(
+ "org.apache.commons" % "commons-math3" % "3.2"
+ )
)
def bagelSettings = sharedSettings ++ Seq(
From 51960b88dcb4b00623ae49d859f9690f42665713 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Wed, 22 Jan 2014 19:37:29 -0800
Subject: [PATCH 065/133] Merge pull request #496 from pwendell/master
Fix bug in worker clean-up in UI
Introduced in d5a96fec (/cc @aarondav).
This should be picked into 0.8 and 0.9 as well. The bug causes old (zombie) workers on a node to not disappear immediately from the UI when a new one registers.
(cherry picked from commit a1cd185122602c96fb8ae16c0b506702283bf6e2)
Signed-off-by: Patrick Wendell
---
core/src/main/scala/org/apache/spark/deploy/master/Master.scala | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index d9ea96afcf52a..7be774980afa3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -515,7 +515,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
// There may be one or more refs to dead workers on this same node (w/ different ID's),
// remove them.
workers.filter { w =>
- (w.host == host && w.port == port) && (w.state == WorkerState.DEAD)
+ (w.host == worker.host && w.port == worker.port) && (w.state == WorkerState.DEAD)
}.foreach { w =>
workers -= w
}
From 7a62353247a613b6ed1c4942858a311f965b74b4 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Thu, 23 Jan 2014 19:08:34 -0800
Subject: [PATCH 066/133] Merge pull request #501 from
JoshRosen/cartesian-rdd-fixes
Fix two bugs in PySpark cartesian(): SPARK-978 and SPARK-1034
This pull request fixes two bugs in PySpark's `cartesian()` method:
- [SPARK-978](https://spark-project.atlassian.net/browse/SPARK-978): PySpark's cartesian method throws ClassCastException exception
- [SPARK-1034](https://spark-project.atlassian.net/browse/SPARK-1034): Py4JException on PySpark Cartesian Result
The JIRAs have more details describing the fixes.
(cherry picked from commit cad3002fead89d3c9a8de4fa989e88f367bc0b05)
Signed-off-by: Patrick Wendell
---
.../apache/spark/api/java/JavaPairRDD.scala | 3 +-
.../apache/spark/api/python/PythonRDD.scala | 59 ++++++++++++-------
python/pyspark/tests.py | 16 +++++
3 files changed, 56 insertions(+), 22 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 0fb7e195b34c4..f430a33db1e4a 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -49,8 +49,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kClassTag: ClassTag[K
override def wrapRDD(rdd: RDD[(K, V)]): JavaPairRDD[K, V] = JavaPairRDD.fromRDD(rdd)
- override val classTag: ClassTag[(K, V)] =
- implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[Tuple2[K, V]]]
+ override val classTag: ClassTag[(K, V)] = rdd.elementClassTag
import JavaPairRDD._
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 82527fe663848..57bde8d85f1a8 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -78,9 +78,7 @@ private[spark] class PythonRDD[T: ClassTag](
dataOut.writeInt(command.length)
dataOut.write(command)
// Data values
- for (elem <- parent.iterator(split, context)) {
- PythonRDD.writeToStream(elem, dataOut)
- }
+ PythonRDD.writeIteratorToStream(parent.iterator(split, context), dataOut)
dataOut.flush()
worker.shutdownOutput()
} catch {
@@ -206,20 +204,43 @@ private[spark] object PythonRDD {
JavaRDD.fromRDD(sc.sc.parallelize(objs, parallelism))
}
- def writeToStream(elem: Any, dataOut: DataOutputStream) {
- elem match {
- case bytes: Array[Byte] =>
- dataOut.writeInt(bytes.length)
- dataOut.write(bytes)
- case pair: (Array[Byte], Array[Byte]) =>
- dataOut.writeInt(pair._1.length)
- dataOut.write(pair._1)
- dataOut.writeInt(pair._2.length)
- dataOut.write(pair._2)
- case str: String =>
- dataOut.writeUTF(str)
- case other =>
- throw new SparkException("Unexpected element type " + other.getClass)
+ def writeIteratorToStream[T](iter: Iterator[T], dataOut: DataOutputStream) {
+ // The right way to implement this would be to use TypeTags to get the full
+ // type of T. Since I don't want to introduce breaking changes throughout the
+ // entire Spark API, I have to use this hacky approach:
+ if (iter.hasNext) {
+ val first = iter.next()
+ val newIter = Seq(first).iterator ++ iter
+ first match {
+ case arr: Array[Byte] =>
+ newIter.asInstanceOf[Iterator[Array[Byte]]].foreach { bytes =>
+ dataOut.writeInt(bytes.length)
+ dataOut.write(bytes)
+ }
+ case string: String =>
+ newIter.asInstanceOf[Iterator[String]].foreach { str =>
+ dataOut.writeUTF(str)
+ }
+ case pair: Tuple2[_, _] =>
+ pair._1 match {
+ case bytePair: Array[Byte] =>
+ newIter.asInstanceOf[Iterator[Tuple2[Array[Byte], Array[Byte]]]].foreach { pair =>
+ dataOut.writeInt(pair._1.length)
+ dataOut.write(pair._1)
+ dataOut.writeInt(pair._2.length)
+ dataOut.write(pair._2)
+ }
+ case stringPair: String =>
+ newIter.asInstanceOf[Iterator[Tuple2[String, String]]].foreach { pair =>
+ dataOut.writeUTF(pair._1)
+ dataOut.writeUTF(pair._2)
+ }
+ case other =>
+ throw new SparkException("Unexpected Tuple2 element type " + pair._1.getClass)
+ }
+ case other =>
+ throw new SparkException("Unexpected element type " + first.getClass)
+ }
}
}
@@ -230,9 +251,7 @@ private[spark] object PythonRDD {
def writeToFile[T](items: Iterator[T], filename: String) {
val file = new DataOutputStream(new FileOutputStream(filename))
- for (item <- items) {
- writeToStream(item, file)
- }
+ writeIteratorToStream(items, file)
file.close()
}
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 7acb6eaf10931..acd1ca5676209 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -152,6 +152,22 @@ def test_save_as_textfile_with_unicode(self):
raw_contents = ''.join(input(glob(tempFile.name + "/part-0000*")))
self.assertEqual(x, unicode(raw_contents.strip(), "utf-8"))
+ def test_transforming_cartesian_result(self):
+ # Regression test for SPARK-1034
+ rdd1 = self.sc.parallelize([1, 2])
+ rdd2 = self.sc.parallelize([3, 4])
+ cart = rdd1.cartesian(rdd2)
+ result = cart.map(lambda (x, y): x + y).collect()
+
+ def test_cartesian_on_textfile(self):
+ # Regression test for
+ path = os.path.join(SPARK_HOME, "python/test_support/hello.txt")
+ a = self.sc.textFile(path)
+ result = a.cartesian(a).collect()
+ (x, y) = result[0]
+ self.assertEqual("Hello World!", x.strip())
+ self.assertEqual("Hello World!", y.strip())
+
class TestIO(PySparkTestCase):
From e8d3f2b2fbc3bb57f694aad1e8e6667cf7c7318a Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Thu, 23 Jan 2014 19:11:59 -0800
Subject: [PATCH 067/133] Merge pull request #502 from pwendell/clone-1
Remove Hadoop object cloning and warn users making Hadoop RDD's.
The code introduced in #359 used Hadoop's WritableUtils.clone() to
duplicate objects when reading from Hadoop files. Some users have
reported exceptions when cloning data in various file formats,
including Avro and another custom format.
This patch removes that functionality to ensure stability for the
0.9 release. Instead, it puts a clear warning in the documentation
that copying may be necessary for Hadoop data sets.
(cherry picked from commit c3196171f3dffde6c9e67e3d35c398a01fbba846)
Conflicts:
core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
---
.../scala/org/apache/spark/SparkContext.scala | 127 +++++++++++-------
.../spark/api/java/JavaSparkContext.scala | 52 ++++++-
.../org/apache/spark/rdd/HadoopRDD.scala | 28 +---
.../org/apache/spark/rdd/NewHadoopRDD.scala | 24 +---
.../scala/org/apache/spark/util/Utils.scala | 23 +---
5 files changed, 138 insertions(+), 116 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 923b4ed68839c..566472e597958 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -341,7 +341,7 @@ class SparkContext(
*/
def textFile(path: String, minSplits: Int = defaultMinSplits): RDD[String] = {
hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
- minSplits, cloneRecords = false).map(pair => pair._2.toString)
+ minSplits).map(pair => pair._2.toString)
}
/**
@@ -354,33 +354,37 @@ class SparkContext(
* @param keyClass Class of the keys
* @param valueClass Class of the values
* @param minSplits Minimum number of Hadoop Splits to generate.
- * @param cloneRecords If true, Spark will clone the records produced by Hadoop RecordReader.
- * Most RecordReader implementations reuse wrapper objects across multiple
- * records, and can cause problems in RDD collect or aggregation operations.
- * By default the records are cloned in Spark. However, application
- * programmers can explicitly disable the cloning for better performance.
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
*/
- def hadoopRDD[K: ClassTag, V: ClassTag](
+ def hadoopRDD[K, V](
conf: JobConf,
inputFormatClass: Class[_ <: InputFormat[K, V]],
keyClass: Class[K],
valueClass: Class[V],
- minSplits: Int = defaultMinSplits,
- cloneRecords: Boolean = true
+ minSplits: Int = defaultMinSplits
): RDD[(K, V)] = {
// Add necessary security credentials to the JobConf before broadcasting it.
SparkHadoopUtil.get.addCredentials(conf)
- new HadoopRDD(this, conf, inputFormatClass, keyClass, valueClass, minSplits, cloneRecords)
+ new HadoopRDD(this, conf, inputFormatClass, keyClass, valueClass, minSplits)
}
- /** Get an RDD for a Hadoop file with an arbitrary InputFormat */
- def hadoopFile[K: ClassTag, V: ClassTag](
+ /** Get an RDD for a Hadoop file with an arbitrary InputFormat
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
+ * */
+ def hadoopFile[K, V](
path: String,
inputFormatClass: Class[_ <: InputFormat[K, V]],
keyClass: Class[K],
valueClass: Class[V],
- minSplits: Int = defaultMinSplits,
- cloneRecords: Boolean = true
+ minSplits: Int = defaultMinSplits
): RDD[(K, V)] = {
// A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it.
val confBroadcast = broadcast(new SerializableWritable(hadoopConfiguration))
@@ -392,8 +396,7 @@ class SparkContext(
inputFormatClass,
keyClass,
valueClass,
- minSplits,
- cloneRecords)
+ minSplits)
}
/**
@@ -403,16 +406,20 @@ class SparkContext(
* {{{
* val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path, minSplits)
* }}}
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
*/
def hadoopFile[K, V, F <: InputFormat[K, V]]
- (path: String, minSplits: Int, cloneRecords: Boolean = true)
+ (path: String, minSplits: Int)
(implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] = {
hadoopFile(path,
fm.runtimeClass.asInstanceOf[Class[F]],
km.runtimeClass.asInstanceOf[Class[K]],
vm.runtimeClass.asInstanceOf[Class[V]],
- minSplits,
- cloneRecords)
+ minSplits)
}
/**
@@ -422,68 +429,91 @@ class SparkContext(
* {{{
* val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path)
* }}}
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
*/
- def hadoopFile[K, V, F <: InputFormat[K, V]](path: String, cloneRecords: Boolean = true)
+ def hadoopFile[K, V, F <: InputFormat[K, V]](path: String)
(implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] =
- hadoopFile[K, V, F](path, defaultMinSplits, cloneRecords)
+ hadoopFile[K, V, F](path, defaultMinSplits)
/** Get an RDD for a Hadoop file with an arbitrary new API InputFormat. */
def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]]
- (path: String, cloneRecords: Boolean = true)
+ (path: String)
(implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] = {
newAPIHadoopFile(
path,
fm.runtimeClass.asInstanceOf[Class[F]],
km.runtimeClass.asInstanceOf[Class[K]],
- vm.runtimeClass.asInstanceOf[Class[V]],
- cloneRecords = cloneRecords)
+ vm.runtimeClass.asInstanceOf[Class[V]])
}
/**
* Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
* and extra configuration options to pass to the input format.
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
*/
- def newAPIHadoopFile[K: ClassTag, V: ClassTag, F <: NewInputFormat[K, V]](
+ def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]](
path: String,
fClass: Class[F],
kClass: Class[K],
vClass: Class[V],
- conf: Configuration = hadoopConfiguration,
- cloneRecords: Boolean = true): RDD[(K, V)] = {
+ conf: Configuration = hadoopConfiguration): RDD[(K, V)] = {
val job = new NewHadoopJob(conf)
NewFileInputFormat.addInputPath(job, new Path(path))
val updatedConf = job.getConfiguration
- new NewHadoopRDD(this, fClass, kClass, vClass, updatedConf, cloneRecords)
+ new NewHadoopRDD(this, fClass, kClass, vClass, updatedConf)
}
/**
* Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
* and extra configuration options to pass to the input format.
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
*/
- def newAPIHadoopRDD[K: ClassTag, V: ClassTag, F <: NewInputFormat[K, V]](
+ def newAPIHadoopRDD[K, V, F <: NewInputFormat[K, V]](
conf: Configuration = hadoopConfiguration,
fClass: Class[F],
kClass: Class[K],
- vClass: Class[V],
- cloneRecords: Boolean = true): RDD[(K, V)] = {
- new NewHadoopRDD(this, fClass, kClass, vClass, conf, cloneRecords)
- }
-
- /** Get an RDD for a Hadoop SequenceFile with given key and value types. */
- def sequenceFile[K: ClassTag, V: ClassTag](path: String,
+ vClass: Class[V]): RDD[(K, V)] = {
+ new NewHadoopRDD(this, fClass, kClass, vClass, conf)
+ }
+
+ /** Get an RDD for a Hadoop SequenceFile with given key and value types.
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
+ */
+ def sequenceFile[K, V](path: String,
keyClass: Class[K],
valueClass: Class[V],
- minSplits: Int,
- cloneRecords: Boolean = true
+ minSplits: Int
): RDD[(K, V)] = {
val inputFormatClass = classOf[SequenceFileInputFormat[K, V]]
- hadoopFile(path, inputFormatClass, keyClass, valueClass, minSplits, cloneRecords)
+ hadoopFile(path, inputFormatClass, keyClass, valueClass, minSplits)
}
- /** Get an RDD for a Hadoop SequenceFile with given key and value types. */
- def sequenceFile[K: ClassTag, V: ClassTag](path: String, keyClass: Class[K], valueClass: Class[V],
- cloneRecords: Boolean = true): RDD[(K, V)] =
- sequenceFile(path, keyClass, valueClass, defaultMinSplits, cloneRecords)
+ /** Get an RDD for a Hadoop SequenceFile with given key and value types.
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
+ * */
+ def sequenceFile[K, V](path: String, keyClass: Class[K], valueClass: Class[V]
+ ): RDD[(K, V)] =
+ sequenceFile(path, keyClass, valueClass, defaultMinSplits)
/**
* Version of sequenceFile() for types implicitly convertible to Writables through a
@@ -500,9 +530,14 @@ class SparkContext(
* have a parameterized singleton object). We use functions instead to create a new converter
* for the appropriate type. In addition, we pass the converter a ClassTag of its type to
* allow it to figure out the Writable class to use in the subclass case.
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
*/
def sequenceFile[K, V]
- (path: String, minSplits: Int = defaultMinSplits, cloneRecords: Boolean = true)
+ (path: String, minSplits: Int = defaultMinSplits)
(implicit km: ClassTag[K], vm: ClassTag[V],
kcf: () => WritableConverter[K], vcf: () => WritableConverter[V])
: RDD[(K, V)] = {
@@ -511,7 +546,7 @@ class SparkContext(
val format = classOf[SequenceFileInputFormat[Writable, Writable]]
val writables = hadoopFile(path, format,
kc.writableClass(km).asInstanceOf[Class[Writable]],
- vc.writableClass(vm).asInstanceOf[Class[Writable]], minSplits, cloneRecords)
+ vc.writableClass(vm).asInstanceOf[Class[Writable]], minSplits)
writables.map { case (k, v) => (kc.convert(k), vc.convert(v)) }
}
@@ -1024,7 +1059,7 @@ object SparkContext {
implicit def rddToAsyncRDDActions[T: ClassTag](rdd: RDD[T]) = new AsyncRDDActions(rdd)
implicit def rddToSequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable: ClassTag](
- rdd: RDD[(K, V)]) =
+ rdd: RDD[(K, V)]) =
new SequenceFileRDDFunctions(rdd)
implicit def rddToOrderedRDDFunctions[K <% Ordered[K]: ClassTag, V: ClassTag](
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 33c931b1a7c8b..c777472cd7310 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -137,7 +137,13 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
*/
def textFile(path: String, minSplits: Int): JavaRDD[String] = sc.textFile(path, minSplits)
- /**Get an RDD for a Hadoop SequenceFile with given key and value types. */
+ /** Get an RDD for a Hadoop SequenceFile with given key and value types.
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
+ * */
def sequenceFile[K, V](path: String,
keyClass: Class[K],
valueClass: Class[V],
@@ -148,7 +154,13 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
new JavaPairRDD(sc.sequenceFile(path, keyClass, valueClass, minSplits))
}
- /**Get an RDD for a Hadoop SequenceFile. */
+ /** Get an RDD for a Hadoop SequenceFile.
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
+ */
def sequenceFile[K, V](path: String, keyClass: Class[K], valueClass: Class[V]):
JavaPairRDD[K, V] = {
implicit val kcm: ClassTag[K] = ClassTag(keyClass)
@@ -184,6 +196,11 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
* Get an RDD for a Hadoop-readable dataset from a Hadooop JobConf giving its InputFormat and any
* other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable,
* etc).
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
*/
def hadoopRDD[K, V, F <: InputFormat[K, V]](
conf: JobConf,
@@ -201,6 +218,11 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
* Get an RDD for a Hadoop-readable dataset from a Hadooop JobConf giving its InputFormat and any
* other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable,
* etc).
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
*/
def hadoopRDD[K, V, F <: InputFormat[K, V]](
conf: JobConf,
@@ -213,7 +235,13 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
new JavaPairRDD(sc.hadoopRDD(conf, inputFormatClass, keyClass, valueClass))
}
- /** Get an RDD for a Hadoop file with an arbitrary InputFormat */
+ /** Get an RDD for a Hadoop file with an arbitrary InputFormat.
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
+ */
def hadoopFile[K, V, F <: InputFormat[K, V]](
path: String,
inputFormatClass: Class[F],
@@ -226,7 +254,13 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
new JavaPairRDD(sc.hadoopFile(path, inputFormatClass, keyClass, valueClass, minSplits))
}
- /** Get an RDD for a Hadoop file with an arbitrary InputFormat */
+ /** Get an RDD for a Hadoop file with an arbitrary InputFormat
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
+ */
def hadoopFile[K, V, F <: InputFormat[K, V]](
path: String,
inputFormatClass: Class[F],
@@ -242,6 +276,11 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
/**
* Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
* and extra configuration options to pass to the input format.
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
*/
def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]](
path: String,
@@ -257,6 +296,11 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
/**
* Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
* and extra configuration options to pass to the input format.
+ *
+ * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+ * record, directly caching the returned RDD will create many references to the same object.
+ * If you plan to directly cache Hadoop writable objects, you should first copy them using
+ * a `map` function.
*/
def newAPIHadoopRDD[K, V, F <: NewInputFormat[K, V]](
conf: Configuration,
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index dbe76f34316ae..ad74d4636fb1b 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -19,10 +19,7 @@ package org.apache.spark.rdd
import java.io.EOFException
-import scala.reflect.ClassTag
-
import org.apache.hadoop.conf.{Configuration, Configurable}
-import org.apache.hadoop.io.Writable
import org.apache.hadoop.mapred.InputFormat
import org.apache.hadoop.mapred.InputSplit
import org.apache.hadoop.mapred.JobConf
@@ -34,7 +31,6 @@ import org.apache.spark._
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.util.NextIterator
-import org.apache.spark.util.Utils.cloneWritables
/**
@@ -64,21 +60,15 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, @transient s: InputSp
* @param keyClass Class of the key associated with the inputFormatClass.
* @param valueClass Class of the value associated with the inputFormatClass.
* @param minSplits Minimum number of Hadoop Splits (HadoopRDD partitions) to generate.
- * @param cloneRecords If true, Spark will clone the records produced by Hadoop RecordReader.
- * Most RecordReader implementations reuse wrapper objects across multiple
- * records, and can cause problems in RDD collect or aggregation operations.
- * By default the records are cloned in Spark. However, application
- * programmers can explicitly disable the cloning for better performance.
*/
-class HadoopRDD[K: ClassTag, V: ClassTag](
+class HadoopRDD[K, V](
sc: SparkContext,
broadcastedConf: Broadcast[SerializableWritable[Configuration]],
initLocalJobConfFuncOpt: Option[JobConf => Unit],
inputFormatClass: Class[_ <: InputFormat[K, V]],
keyClass: Class[K],
valueClass: Class[V],
- minSplits: Int,
- cloneRecords: Boolean = true)
+ minSplits: Int)
extends RDD[(K, V)](sc, Nil) with Logging {
def this(
@@ -87,8 +77,7 @@ class HadoopRDD[K: ClassTag, V: ClassTag](
inputFormatClass: Class[_ <: InputFormat[K, V]],
keyClass: Class[K],
valueClass: Class[V],
- minSplits: Int,
- cloneRecords: Boolean) = {
+ minSplits: Int) = {
this(
sc,
sc.broadcast(new SerializableWritable(conf))
@@ -97,8 +86,7 @@ class HadoopRDD[K: ClassTag, V: ClassTag](
inputFormatClass,
keyClass,
valueClass,
- minSplits,
- cloneRecords)
+ minSplits)
}
protected val jobConfCacheKey = "rdd_%d_job_conf".format(id)
@@ -170,9 +158,7 @@ class HadoopRDD[K: ClassTag, V: ClassTag](
// Register an on-task-completion callback to close the input stream.
context.addOnCompleteCallback{ () => closeIfNeeded() }
val key: K = reader.createKey()
- val keyCloneFunc = cloneWritables[K](jobConf)
val value: V = reader.createValue()
- val valueCloneFunc = cloneWritables[V](jobConf)
override def getNext() = {
try {
finished = !reader.next(key, value)
@@ -180,11 +166,7 @@ class HadoopRDD[K: ClassTag, V: ClassTag](
case eof: EOFException =>
finished = true
}
- if (cloneRecords) {
- (keyCloneFunc(key.asInstanceOf[Writable]), valueCloneFunc(value.asInstanceOf[Writable]))
- } else {
- (key, value)
- }
+ (key, value)
}
override def close() {
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 992bd4aa0ad5d..d1fff296878c3 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -20,15 +20,11 @@ package org.apache.spark.rdd
import java.text.SimpleDateFormat
import java.util.Date
-import scala.reflect.ClassTag
-
import org.apache.hadoop.conf.{Configurable, Configuration}
import org.apache.hadoop.io.Writable
import org.apache.hadoop.mapreduce._
import org.apache.spark.{InterruptibleIterator, Logging, Partition, SerializableWritable, SparkContext, TaskContext}
-import org.apache.spark.util.Utils.cloneWritables
-
private[spark]
class NewHadoopPartition(rddId: Int, val index: Int, @transient rawSplit: InputSplit with Writable)
@@ -48,19 +44,13 @@ class NewHadoopPartition(rddId: Int, val index: Int, @transient rawSplit: InputS
* @param keyClass Class of the key associated with the inputFormatClass.
* @param valueClass Class of the value associated with the inputFormatClass.
* @param conf The Hadoop configuration.
- * @param cloneRecords If true, Spark will clone the records produced by Hadoop RecordReader.
- * Most RecordReader implementations reuse wrapper objects across multiple
- * records, and can cause problems in RDD collect or aggregation operations.
- * By default the records are cloned in Spark. However, application
- * programmers can explicitly disable the cloning for better performance.
*/
-class NewHadoopRDD[K: ClassTag, V: ClassTag](
+class NewHadoopRDD[K, V](
sc : SparkContext,
inputFormatClass: Class[_ <: InputFormat[K, V]],
keyClass: Class[K],
valueClass: Class[V],
- @transient conf: Configuration,
- cloneRecords: Boolean)
+ @transient conf: Configuration)
extends RDD[(K, V)](sc, Nil)
with SparkHadoopMapReduceUtil
with Logging {
@@ -107,8 +97,6 @@ class NewHadoopRDD[K: ClassTag, V: ClassTag](
// Register an on-task-completion callback to close the input stream.
context.addOnCompleteCallback(() => close())
- val keyCloneFunc = cloneWritables[K](conf)
- val valueCloneFunc = cloneWritables[V](conf)
var havePair = false
var finished = false
@@ -125,13 +113,7 @@ class NewHadoopRDD[K: ClassTag, V: ClassTag](
throw new java.util.NoSuchElementException("End of stream")
}
havePair = false
- val key = reader.getCurrentKey
- val value = reader.getCurrentValue
- if (cloneRecords) {
- (keyCloneFunc(key.asInstanceOf[Writable]), valueCloneFunc(value.asInstanceOf[Writable]))
- } else {
- (key, value)
- }
+ (reader.getCurrentKey, reader.getCurrentValue)
}
private def close() {
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index caa9bf4c9280e..64acfbd3526f3 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -26,7 +26,7 @@ import scala.collection.JavaConversions._
import scala.collection.Map
import scala.collection.mutable.ArrayBuffer
import scala.io.Source
-import scala.reflect.{classTag, ClassTag}
+import scala.reflect.ClassTag
import com.google.common.io.Files
import com.google.common.util.concurrent.ThreadFactoryBuilder
@@ -46,27 +46,6 @@ import org.apache.spark.{SparkConf, SparkException, Logging}
*/
private[spark] object Utils extends Logging {
- /**
- * We try to clone for most common types of writables and we call WritableUtils.clone otherwise
- * intention is to optimize, for example for NullWritable there is no need and for Long, int and
- * String creating a new object with value set would be faster.
- */
- def cloneWritables[T: ClassTag](conf: Configuration): Writable => T = {
- val cloneFunc = classTag[T] match {
- case ClassTag(_: Text) =>
- (w: Writable) => new Text(w.asInstanceOf[Text].getBytes).asInstanceOf[T]
- case ClassTag(_: LongWritable) =>
- (w: Writable) => new LongWritable(w.asInstanceOf[LongWritable].get).asInstanceOf[T]
- case ClassTag(_: IntWritable) =>
- (w: Writable) => new IntWritable(w.asInstanceOf[IntWritable].get).asInstanceOf[T]
- case ClassTag(_: NullWritable) =>
- (w: Writable) => w.asInstanceOf[T] // TODO: should we clone this ?
- case _ =>
- (w: Writable) => WritableUtils.clone(w, conf).asInstanceOf[T] // slower way of cloning.
- }
- cloneFunc
- }
-
/** Serialize an object using Java serialization */
def serialize[T](o: T): Array[Byte] = {
val bos = new ByteArrayOutputStream()
From e66d4c27cadccaa8bb8b2a9ab486889ce2de37d0 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Thu, 23 Jan 2014 19:47:00 -0800
Subject: [PATCH 068/133] Merge pull request #503 from pwendell/master
Fix bug on read-side of external sort when using Snappy.
This case wasn't handled correctly and this patch fixes it.
(cherry picked from commit 3d6e75419330d27435becfdf8cfb0b6d20d56cf8)
Signed-off-by: Patrick Wendell
---
.../spark/util/collection/ExternalAppendOnlyMap.scala | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index fb73636162af9..3d9b09ec33e2a 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -358,7 +358,15 @@ private[spark] class ExternalAppendOnlyMap[K, V, C](
private class DiskMapIterator(file: File, blockId: BlockId) extends Iterator[(K, C)] {
val fileStream = new FileInputStream(file)
val bufferedStream = new FastBufferedInputStream(fileStream, fileBufferSize)
- val compressedStream = blockManager.wrapForCompression(blockId, bufferedStream)
+
+ val shouldCompress = blockManager.shouldCompress(blockId)
+ val compressionCodec = new LZFCompressionCodec(sparkConf)
+ val compressedStream =
+ if (shouldCompress) {
+ compressionCodec.compressedInputStream(bufferedStream)
+ } else {
+ bufferedStream
+ }
var deserializeStream = ser.deserializeStream(compressedStream)
var objectsRead = 0
From d0a105d4e5ec3c84bc5ee8c8d55cca40f43cb9b8 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Thu, 23 Jan 2014 20:53:18 -0800
Subject: [PATCH 069/133] Merge pull request #505 from JoshRosen/SPARK-1026
Deprecate mapPartitionsWithSplit in PySpark (SPARK-1026)
This commit deprecates `mapPartitionsWithSplit` in PySpark (see [SPARK-1026](https://spark-project.atlassian.net/browse/SPARK-1026) and removes the remaining references to it from the docs.
(cherry picked from commit 05be7047744c88e64e7e6bd973f9bcfacd00da5f)
Signed-off-by: Patrick Wendell
---
docs/scala-programming-guide.md | 4 ++--
python/pyspark/rdd.py | 25 +++++++++++++++++++++----
2 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/docs/scala-programming-guide.md b/docs/scala-programming-guide.md
index c1ef46a1cded7..7c0f67bc99e83 100644
--- a/docs/scala-programming-guide.md
+++ b/docs/scala-programming-guide.md
@@ -168,9 +168,9 @@ The following tables list the transformations and actions currently supported (s
Iterator[T] => Iterator[U] when running on an RDD of type T.
- mapPartitionsWithSplit(func) |
+ mapPartitionsWithIndex(func) |
Similar to mapPartitions, but also provides func with an integer value representing the index of
- the split, so func must be of type (Int, Iterator[T]) => Iterator[U] when running on an RDD of type T.
+ the partition, so func must be of type (Int, Iterator[T]) => Iterator[U] when running on an RDD of type T.
|
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 6fb4a7b3be25d..1ad4b5298758b 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -27,6 +27,7 @@
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
from threading import Thread
+import warnings
from pyspark.serializers import NoOpSerializer, CartesianDeserializer, \
BatchedSerializer, CloudPickleSerializer, pack_long
@@ -179,7 +180,7 @@ def flatMap(self, f, preservesPartitioning=False):
[(2, 2), (2, 2), (3, 3), (3, 3), (4, 4), (4, 4)]
"""
def func(s, iterator): return chain.from_iterable(imap(f, iterator))
- return self.mapPartitionsWithSplit(func, preservesPartitioning)
+ return self.mapPartitionsWithIndex(func, preservesPartitioning)
def mapPartitions(self, f, preservesPartitioning=False):
"""
@@ -191,10 +192,24 @@ def mapPartitions(self, f, preservesPartitioning=False):
[3, 7]
"""
def func(s, iterator): return f(iterator)
- return self.mapPartitionsWithSplit(func)
+ return self.mapPartitionsWithIndex(func)
+
+ def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
+ """
+ Return a new RDD by applying a function to each partition of this RDD,
+ while tracking the index of the original partition.
+
+ >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
+ >>> def f(splitIndex, iterator): yield splitIndex
+ >>> rdd.mapPartitionsWithIndex(f).sum()
+ 6
+ """
+ return PipelinedRDD(self, f, preservesPartitioning)
def mapPartitionsWithSplit(self, f, preservesPartitioning=False):
"""
+ Deprecated: use mapPartitionsWithIndex instead.
+
Return a new RDD by applying a function to each partition of this RDD,
while tracking the index of the original partition.
@@ -203,7 +218,9 @@ def mapPartitionsWithSplit(self, f, preservesPartitioning=False):
>>> rdd.mapPartitionsWithSplit(f).sum()
6
"""
- return PipelinedRDD(self, f, preservesPartitioning)
+ warnings.warn("mapPartitionsWithSplit is deprecated; "
+ "use mapPartitionsWithIndex instead", DeprecationWarning, stacklevel=2)
+ return self.mapPartitionsWithIndex(f, preservesPartitioning)
def filter(self, f):
"""
@@ -235,7 +252,7 @@ def sample(self, withReplacement, fraction, seed):
>>> sc.parallelize(range(0, 100)).sample(False, 0.1, 2).collect() #doctest: +SKIP
[2, 3, 20, 21, 24, 41, 42, 66, 67, 89, 90, 98]
"""
- return self.mapPartitionsWithSplit(RDDSampler(withReplacement, fraction, seed).func, True)
+ return self.mapPartitionsWithIndex(RDDSampler(withReplacement, fraction, seed).func, True)
# this is ported from scala/spark/RDD.scala
def takeSample(self, withReplacement, num, seed):
From 59921578d535873980f579622db8397103a6038d Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Thu, 23 Jan 2014 21:53:37 -0800
Subject: [PATCH 070/133] Revert "[maven-release-plugin] prepare for next
development iteration"
This reverts commit e1dc5bedb48d2ac9b9e1c9b3b1a15c41b7d90ad8.
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index a99e3d2a02569..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 42e624402f77e..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index abf48935cd915..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 967556744c1e6..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 978b99f4a7054..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index a3d5fc64f070e..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 1f416dd8c06d4..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index f23091684f95c..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6a250b3916ead..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 9073cf7aa66ec..b872c7a705a9c 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 41600c4c4b561..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 150dba8d636d8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 346c672165d7d..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 7e10ef6f471be..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index bb8f747ae9328..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 508317b5fc01c..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 04b29c76e5830..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.1-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From c91f44ad3ec25d75a5e4d96f339d7f1e499a361d Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Thu, 23 Jan 2014 21:53:42 -0800
Subject: [PATCH 071/133] Revert "[maven-release-plugin] prepare release
v0.9.0-incubating"
This reverts commit 0771df675363c69622404cb514bd751bc90526af.
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..dcd9601fe4a90 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..cb8e79f22535b 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..9e5a450d57a47 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..7855706389709 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..443910a03a94e 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..23b2fead657e6 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..31b4fa87de772 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..216e6c1d8ff44 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..c240d595742cf 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index b872c7a705a9c..d97dbb804bc99 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..dda3900afebdf 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..70c17e9fc74a8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..2dfe7ac900b83 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..459756912dbe5 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..28f5ef14b1a35 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..aea8b0cddefa2 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..62fe3e274250f 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.0-incubating-SNAPSHOT
../pom.xml
From 2ac96e7ee6fa1ab863c5bcc7eef3706a270a13f9 Mon Sep 17 00:00:00 2001
From: Patrick Wendell
Date: Thu, 23 Jan 2014 21:55:49 -0800
Subject: [PATCH 072/133] Updating changes file
---
CHANGES.txt | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/CHANGES.txt b/CHANGES.txt
index f42ed7f9d14d7..8c78d55ccd862 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,38 @@ Spark Change Log
Release 0.9.0-incubating
+ d0a105d Thu Jan 23 20:53:31 2014 -0800
+ Merge pull request #505 from JoshRosen/SPARK-1026
+ [Deprecate mapPartitionsWithSplit in PySpark (SPARK-1026)]
+
+ e66d4c2 Thu Jan 23 19:47:16 2014 -0800
+ Merge pull request #503 from pwendell/master
+ [Fix bug on read-side of external sort when using Snappy.]
+
+ e8d3f2b Thu Jan 23 19:20:22 2014 -0800
+ Merge pull request #502 from pwendell/clone-1
+ [Remove Hadoop object cloning and warn users making Hadoop RDD's.]
+
+ 7a62353 Thu Jan 23 19:09:25 2014 -0800
+ Merge pull request #501 from JoshRosen/cartesian-rdd-fixes
+ [Fix two bugs in PySpark cartesian(): SPARK-978 and SPARK-1034]
+
+ 51960b8 Wed Jan 22 19:37:50 2014 -0800
+ Merge pull request #496 from pwendell/master
+ [Fix bug in worker clean-up in UI]
+
+ 828f7b4 Wed Jan 22 15:45:18 2014 -0800
+ Merge pull request #495 from srowen/GraphXCommonsMathDependency
+ [Fix graphx Commons Math dependency]
+
+ dc5857a Wed Jan 22 14:33:25 2014 -0800
+ Merge pull request #492 from skicavs/master
+ [fixed job name and usage information for the JavaSparkPi example]
+
+ dd533c9 Wed Jan 22 14:15:58 2014 -0800
+ Merge pull request #478 from sryza/sandy-spark-1033
+ [SPARK-1033. Ask for cores in Yarn container requests]
+
b6fd3cd Tue Jan 21 00:12:01 2014 -0800
Merge pull request #480 from pwendell/0.9-fixes
[Handful of 0.9 fixes]
From 95d28ff3d0d20d9c583e184f9e2c5ae842d8a4d9 Mon Sep 17 00:00:00 2001
From: Ubuntu
Date: Fri, 24 Jan 2014 06:15:08 +0000
Subject: [PATCH 073/133] [maven-release-plugin] prepare release
v0.9.0-incubating
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index dcd9601fe4a90..2faa3ff33ca35 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index cb8e79f22535b..0dcc2983c821f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 9e5a450d57a47..62ceba186ab46 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index 7855706389709..a30d0b3fa0906 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 443910a03a94e..f3da4f5abe101 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 23b2fead657e6..f9c97673ad8ae 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 31b4fa87de772..7e438cca101b5 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 216e6c1d8ff44..2ec96be677946 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index c240d595742cf..6e644a5342325 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index d97dbb804bc99..b872c7a705a9c 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index dda3900afebdf..6f024f6f538ec 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/pom.xml b/pom.xml
index 70c17e9fc74a8..e1cf107965fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- HEAD
+ v0.9.0-incubating
diff --git a/repl/pom.xml b/repl/pom.xml
index 2dfe7ac900b83..0d73cfa84126b 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 459756912dbe5..90b65fc118ec6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 28f5ef14b1a35..be77741d591ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index aea8b0cddefa2..3383a227590c5 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index 62fe3e274250f..cf915a1a259d8 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating-SNAPSHOT
+ 0.9.0-incubating
../pom.xml
From 0f60ef2c4d3c1f3da3cf455fc86a8d603a499117 Mon Sep 17 00:00:00 2001
From: Ubuntu
Date: Fri, 24 Jan 2014 06:15:15 +0000
Subject: [PATCH 074/133] [maven-release-plugin] prepare for next development
iteration
---
assembly/pom.xml | 2 +-
bagel/pom.xml | 2 +-
core/pom.xml | 2 +-
examples/pom.xml | 2 +-
external/flume/pom.xml | 2 +-
external/kafka/pom.xml | 2 +-
external/mqtt/pom.xml | 2 +-
external/twitter/pom.xml | 2 +-
external/zeromq/pom.xml | 2 +-
graphx/pom.xml | 2 +-
mllib/pom.xml | 2 +-
pom.xml | 4 ++--
repl/pom.xml | 2 +-
streaming/pom.xml | 2 +-
tools/pom.xml | 2 +-
yarn/pom.xml | 2 +-
yarn/stable/pom.xml | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2faa3ff33ca35..a99e3d2a02569 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 0dcc2983c821f..42e624402f77e 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 62ceba186ab46..abf48935cd915 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a30d0b3fa0906..967556744c1e6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f3da4f5abe101..978b99f4a7054 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index f9c97673ad8ae..a3d5fc64f070e 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 7e438cca101b5..1f416dd8c06d4 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index 2ec96be677946..f23091684f95c 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 6e644a5342325..6a250b3916ead 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index b872c7a705a9c..9073cf7aa66ec 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6f024f6f538ec..41600c4c4b561 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/pom.xml b/pom.xml
index e1cf107965fb7..150dba8d636d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
pom
Spark Project Parent POM
http://spark.incubator.apache.org/
@@ -40,7 +40,7 @@
scm:git:git@github.com:apache/incubator-spark.git
scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git
scm:git:git@github.com:apache/incubator-spark.git
- v0.9.0-incubating
+ HEAD
diff --git a/repl/pom.xml b/repl/pom.xml
index 0d73cfa84126b..346c672165d7d 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 90b65fc118ec6..7e10ef6f471be 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index be77741d591ae..bb8f747ae9328 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 3383a227590c5..508317b5fc01c 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
index cf915a1a259d8..04b29c76e5830 100644
--- a/yarn/stable/pom.xml
+++ b/yarn/stable/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
yarn-parent_2.10
- 0.9.0-incubating
+ 0.9.1-incubating-SNAPSHOT
../pom.xml
From 5edbd175e07dc9704b1babb9c5e8d97fb644be65 Mon Sep 17 00:00:00 2001
From: Josh Rosen
Date: Tue, 28 Jan 2014 21:30:20 -0800
Subject: [PATCH 075/133] Merge pull request #523 from JoshRosen/SPARK-1043
Switch from MUTF8 to UTF8 in PySpark serializers.
This fixes SPARK-1043, a bug introduced in 0.9.0 where PySpark couldn't serialize strings > 64kB.
This fix was written by @tyro89 and @bouk in #512. This commit squashes and rebases their pull request in order to fix some merge conflicts.
(cherry picked from commit f8c742ce274fbae2a9e616d4c97469b6a22069bb)
Signed-off-by: Patrick Wendell
---
.../apache/spark/api/python/PythonRDD.scala | 18 +++++++---
.../spark/api/python/PythonRDDSuite.scala | 35 +++++++++++++++++++
python/pyspark/context.py | 4 +--
python/pyspark/serializers.py | 6 ++--
python/pyspark/worker.py | 8 ++---
5 files changed, 57 insertions(+), 14 deletions(-)
create mode 100644 core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 57bde8d85f1a8..46d53e3e66f7c 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -62,7 +62,7 @@ private[spark] class PythonRDD[T: ClassTag](
// Partition index
dataOut.writeInt(split.index)
// sparkFilesDir
- dataOut.writeUTF(SparkFiles.getRootDirectory)
+ PythonRDD.writeUTF(SparkFiles.getRootDirectory, dataOut)
// Broadcast variables
dataOut.writeInt(broadcastVars.length)
for (broadcast <- broadcastVars) {
@@ -72,7 +72,9 @@ private[spark] class PythonRDD[T: ClassTag](
}
// Python includes (*.zip and *.egg files)
dataOut.writeInt(pythonIncludes.length)
- pythonIncludes.foreach(dataOut.writeUTF)
+ for (include <- pythonIncludes) {
+ PythonRDD.writeUTF(include, dataOut)
+ }
dataOut.flush()
// Serialized command:
dataOut.writeInt(command.length)
@@ -219,7 +221,7 @@ private[spark] object PythonRDD {
}
case string: String =>
newIter.asInstanceOf[Iterator[String]].foreach { str =>
- dataOut.writeUTF(str)
+ writeUTF(str, dataOut)
}
case pair: Tuple2[_, _] =>
pair._1 match {
@@ -232,8 +234,8 @@ private[spark] object PythonRDD {
}
case stringPair: String =>
newIter.asInstanceOf[Iterator[Tuple2[String, String]]].foreach { pair =>
- dataOut.writeUTF(pair._1)
- dataOut.writeUTF(pair._2)
+ writeUTF(pair._1, dataOut)
+ writeUTF(pair._2, dataOut)
}
case other =>
throw new SparkException("Unexpected Tuple2 element type " + pair._1.getClass)
@@ -244,6 +246,12 @@ private[spark] object PythonRDD {
}
}
+ def writeUTF(str: String, dataOut: DataOutputStream) {
+ val bytes = str.getBytes("UTF-8")
+ dataOut.writeInt(bytes.length)
+ dataOut.write(bytes)
+ }
+
def writeToFile[T](items: java.util.Iterator[T], filename: String) {
import scala.collection.JavaConverters._
writeToFile(items.asScala, filename)
diff --git a/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala b/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala
new file mode 100644
index 0000000000000..1bebfe5ec84ec
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.python
+
+import org.scalatest.FunSuite
+import org.scalatest.matchers.ShouldMatchers
+import org.apache.spark.api.python.PythonRDD
+
+import java.io.{ByteArrayOutputStream, DataOutputStream}
+
+class PythonRDDSuite extends FunSuite {
+
+ test("Writing large strings to the worker") {
+ val input: List[String] = List("a"*100000)
+ val buffer = new DataOutputStream(new ByteArrayOutputStream)
+ PythonRDD.writeIteratorToStream(input.iterator, buffer)
+ }
+
+}
+
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index f955aad7a4f12..f318b5d9a73d7 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -27,7 +27,7 @@
from pyspark.conf import SparkConf
from pyspark.files import SparkFiles
from pyspark.java_gateway import launch_gateway
-from pyspark.serializers import PickleSerializer, BatchedSerializer, MUTF8Deserializer
+from pyspark.serializers import PickleSerializer, BatchedSerializer, UTF8Deserializer
from pyspark.storagelevel import StorageLevel
from pyspark.rdd import RDD
@@ -234,7 +234,7 @@ def textFile(self, name, minSplits=None):
"""
minSplits = minSplits or min(self.defaultParallelism, 2)
return RDD(self._jsc.textFile(name, minSplits), self,
- MUTF8Deserializer())
+ UTF8Deserializer())
def _checkpointFile(self, name, input_deserializer):
jrdd = self._jsc.checkpointFile(name)
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 2a500ab919bea..8c6ad79059c23 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -261,13 +261,13 @@ class MarshalSerializer(FramedSerializer):
loads = marshal.loads
-class MUTF8Deserializer(Serializer):
+class UTF8Deserializer(Serializer):
"""
- Deserializes streams written by Java's DataOutputStream.writeUTF().
+ Deserializes streams written by getBytes.
"""
def loads(self, stream):
- length = struct.unpack('>H', stream.read(2))[0]
+ length = read_int(stream)
return stream.read(length).decode('utf8')
def load_stream(self, stream):
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index d77981f61fa36..4be4063dcf602 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -30,11 +30,11 @@
from pyspark.cloudpickle import CloudPickler
from pyspark.files import SparkFiles
from pyspark.serializers import write_with_length, write_int, read_long, \
- write_long, read_int, SpecialLengths, MUTF8Deserializer, PickleSerializer
+ write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer
pickleSer = PickleSerializer()
-mutf8_deserializer = MUTF8Deserializer()
+utf8_deserializer = UTF8Deserializer()
def report_times(outfile, boot, init, finish):
@@ -51,7 +51,7 @@ def main(infile, outfile):
return
# fetch name of workdir
- spark_files_dir = mutf8_deserializer.loads(infile)
+ spark_files_dir = utf8_deserializer.loads(infile)
SparkFiles._root_directory = spark_files_dir
SparkFiles._is_running_on_worker = True
@@ -66,7 +66,7 @@ def main(infile, outfile):
sys.path.append(spark_files_dir) # *.py files that were added will be copied here
num_python_includes = read_int(infile)
for _ in range(num_python_includes):
- filename = mutf8_deserializer.loads(infile)
+ filename = utf8_deserializer.loads(infile)
sys.path.append(os.path.join(spark_files_dir, filename))
command = pickleSer._read_with_length(infile)
From d18fe1f3a32a98b9dfdbe2606ab16d6113a200e1 Mon Sep 17 00:00:00 2001
From: Tathagata Das
Date: Tue, 28 Jan 2014 21:51:05 -0800
Subject: [PATCH 076/133] Merge pull request #497 from tdas/docs-update
Updated Spark Streaming Programming Guide
Here is the updated version of the Spark Streaming Programming Guide. This is still a work in progress, but the major changes are in place. So feedback is most welcome.
In general, I have tried to make the guide to easier to understand even if the reader does not know much about Spark. The updated website is hosted here -
http://www.eecs.berkeley.edu/~tdas/spark_docs/streaming-programming-guide.html
The major changes are:
- Overview illustrates the usecases of Spark Streaming - various input sources and various output sources
- An example right after overview to quickly give an idea of what Spark Streaming program looks like
- Made Java API and examples a first class citizen like Scala by using tabs to show both Scala and Java examples (similar to AMPCamp tutorial's code tabs)
- Highlighted the DStream operations updateStateByKey and transform because of their powerful nature
- Updated driver node failure recovery text to highlight automatic recovery in Spark standalone mode
- Added information about linking and using the external input sources like Kafka and Flume
- In general, reorganized the sections to better show the Basic section and the more advanced sections like Tuning and Recovery.
Todos:
- Links to the docs of external Kafka, Flume, etc
- Illustrate window operation with figure as well as example.
Author: Tathagata Das
== Merge branch commits ==
commit 18ff10556570b39d672beeb0a32075215cfcc944
Author: Tathagata Das
Date: Tue Jan 28 21:49:30 2014 -0800
Fixed a lot of broken links.
commit 34a5a6008dac2e107624c7ff0db0824ee5bae45f
Author: Tathagata Das
Date: Tue Jan 28 18:02:28 2014 -0800
Updated github url to use SPARK_GITHUB_URL variable.
commit f338a60ae8069e0a382d2cb170227e5757cc0b7a
Author: Tathagata Das
Date: Mon Jan 27 22:42:42 2014 -0800
More updates based on Patrick and Harvey's comments.
commit 89a81ff25726bf6d26163e0dd938290a79582c0f
Author: Tathagata Das
Date: Mon Jan 27 13:08:34 2014 -0800
Updated docs based on Patricks PR comments.
commit d5b6196b532b5746e019b959a79ea0cc013a8fc3
Author: Tathagata Das
Date: Sun Jan 26 20:15:58 2014 -0800
Added spark.streaming.unpersist config and info on StreamingListener interface.
commit e3dcb46ab83d7071f611d9b5008ba6bc16c9f951
Author: Tathagata Das
Date: Sun Jan 26 18:41:12 2014 -0800
Fixed docs on StreamingContext.getOrCreate.
commit 6c29524639463f11eec721e4d17a9d7159f2944b
Author: Tathagata Das
Date: Thu Jan 23 18:49:39 2014 -0800
Added example and figure for window operations, and links to Kafka and Flume API docs.
commit f06b964a51bb3b21cde2ff8bdea7d9785f6ce3a9
Author: Tathagata Das
Date: Wed Jan 22 22:49:12 2014 -0800
Fixed missing endhighlight tag in the MLlib guide.
commit 036a7d46187ea3f2a0fb8349ef78f10d6c0b43a9
Merge: eab351d a1cd185
Author: Tathagata Das
Date: Wed Jan 22 22:17:42 2014 -0800
Merge remote-tracking branch 'apache/master' into docs-update
commit eab351d05c0baef1d4b549e1581310087158d78d
Author: Tathagata Das
Date: Wed Jan 22 22:17:15 2014 -0800
Update Spark Streaming Programming Guide.
(cherry picked from commit 793020961489e16e924c4531da3a13884d2b9175)
Conflicts:
docs/mllib-guide.md
---
docs/_config.yml | 1 +
docs/_layouts/global.html | 11 +
docs/_plugins/copy_api_dirs.rb | 5 +-
docs/configuration.md | 11 +-
docs/css/main.css | 44 +-
docs/img/java-sm.png | Bin 0 -> 670 bytes
docs/img/python-sm.png | Bin 0 -> 1455 bytes
docs/img/scala-sm.png | Bin 0 -> 2241 bytes
docs/img/streaming-arch.png | Bin 0 -> 78856 bytes
docs/img/streaming-dstream-ops.png | Bin 0 -> 48429 bytes
docs/img/streaming-dstream-window.png | Bin 0 -> 40938 bytes
docs/img/streaming-dstream.png | Bin 0 -> 26823 bytes
docs/img/streaming-figures.pptx | Bin 0 -> 887545 bytes
docs/img/streaming-flow.png | Bin 0 -> 31544 bytes
docs/index.md | 2 +-
docs/js/main.js | 106 ++
docs/spark-standalone.md | 4 +-
docs/streaming-programming-guide.md | 1278 +++++++++++++++++++------
18 files changed, 1162 insertions(+), 300 deletions(-)
create mode 100644 docs/img/java-sm.png
create mode 100644 docs/img/python-sm.png
create mode 100644 docs/img/scala-sm.png
create mode 100644 docs/img/streaming-arch.png
create mode 100644 docs/img/streaming-dstream-ops.png
create mode 100644 docs/img/streaming-dstream-window.png
create mode 100644 docs/img/streaming-dstream.png
create mode 100644 docs/img/streaming-figures.pptx
create mode 100644 docs/img/streaming-flow.png
diff --git a/docs/_config.yml b/docs/_config.yml
index 3e96d2c1ea136..97a2c4abf2998 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -8,3 +8,4 @@ SPARK_VERSION_SHORT: 0.9.0
SCALA_VERSION: "2.10"
MESOS_VERSION: 0.13.0
SPARK_ISSUE_TRACKER_URL: https://spark-project.atlassian.net
+SPARK_GITHUB_URL: https://github.com/apache/incubator-spark
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index c529d89ffd192..33525953ac4f6 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -82,6 +82,17 @@
MLlib (Machine Learning)
Bagel (Pregel on Spark)
GraphX (Graph Processing)
+
+
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index acc6bf08160eb..44d64057f4fb3 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -20,7 +20,10 @@
if not (ENV['SKIP_API'] == '1' or ENV['SKIP_SCALADOC'] == '1')
# Build Scaladoc for Java/Scala
- projects = ["core", "examples", "repl", "bagel", "graphx", "streaming", "mllib"]
+ core_projects = ["core", "examples", "repl", "bagel", "graphx", "streaming", "mllib"]
+ external_projects = ["flume", "kafka", "mqtt", "twitter", "zeromq"]
+
+ projects = core_projects + external_projects.map { |project_name| "external/" + project_name }
puts "Moving to project root and building scaladoc."
curr_dir = pwd
diff --git a/docs/configuration.md b/docs/configuration.md
index be548e372dcd4..0cf9d231ab896 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -362,7 +362,16 @@ Apart from these, the following properties are also available, and may be useful
spark.streaming.blockInterval |
200 |
- Duration (milliseconds) of how long to batch new objects coming from network receivers.
+ Duration (milliseconds) of how long to batch new objects coming from network receivers used
+ in Spark Streaming.
+ |
+
+
+ spark.streaming.unpersist |
+ false |
+
+ Force RDDs generated and persisted by Spark Streaming to be automatically unpersisted from
+ Spark's memory. Setting this to true is likely to reduce Spark's RDD memory usage.
|
diff --git a/docs/css/main.css b/docs/css/main.css
index 31122d5633801..8566400f071c9 100755
--- a/docs/css/main.css
+++ b/docs/css/main.css
@@ -87,20 +87,54 @@ a:hover code {
max-width: 914px;
}
-/**
- * Make dropdown menus in nav bars show on hover instead of click
- * using solution at http://stackoverflow.com/questions/8878033/how-
- * to-make-twitter-bootstrap-menu-dropdown-on-hover-rather-than-click
- **/
.dropdown-menu {
/* Remove the default 2px top margin which causes a small
gap between the hover trigger area and the popup menu */
margin-top: 0;
+ /* Avoid too much whitespace at the right for shorter menu items */
+ min-width: 50px;
}
+
+/**
+ * Make dropdown menus in nav bars show on hover instead of click
+ * using solution at http://stackoverflow.com/questions/8878033/how-
+ * to-make-twitter-bootstrap-menu-dropdown-on-hover-rather-than-click
+ **/
ul.nav li.dropdown:hover ul.dropdown-menu{
display: block;
}
+
a.menu:after, .dropdown-toggle:after {
content: none;
}
+/** Make the submenus open on hover on the parent menu item */
+ul.nav li.dropdown ul.dropdown-menu li.dropdown-submenu:hover ul.dropdown-menu {
+ display: block;
+}
+
+/** Make the submenus be invisible until the parent menu item is hovered upon */
+ul.nav li.dropdown ul.dropdown-menu li.dropdown-submenu ul.dropdown-menu {
+ display: none;
+}
+
+/**
+ * Made the navigation bar buttons not grey out when clicked.
+ * Essentially making nav bar buttons not react to clicks, only hover events.
+ */
+.navbar .nav li.dropdown.open > .dropdown-toggle {
+ background-color: transparent;
+}
+
+/**
+ * Made the active tab caption blue. Otherwise the active tab is black, and inactive tab is blue.
+ * That looks weird. Changed the colors to active - blue, inactive - black, and
+ * no color change on hover.
+ */
+.nav-tabs > .active > a, .nav-tabs > .active > a:hover {
+ color: #08c;
+}
+
+.nav-tabs > li > a, .nav-tabs > li > a:hover {
+ color: #333;
+}
diff --git a/docs/img/java-sm.png b/docs/img/java-sm.png
new file mode 100644
index 0000000000000000000000000000000000000000..a82ee7d682e497d2c905f6b51dfaf5a41cbb19a1
GIT binary patch
literal 670
zcmeAS@N?(olHy`uVBq!ia0vp^MnEjY!3-qzd~3G?DajJoh?3y^w370~qErUQl>DSr
z1<%~X^wgl##FWaylc_d9MUw-3LR^8ggM&kTef`P<7xwSpfB*jd_wU~a1O!}p@Vvjj
zKOrHZZsAS^1%=L)`_`{tfB*U0^XJdc2ZEia1Ox;&pSbqr`_F=cf|u_30uB{snrrlT6xp{rvum9^;=PY-gm1*56x8cH%2i2;~6}`H4QXl57$?M{?
z;8%FQJ$>Tm-$lhWTr+>Y-Er%`i_kL*UiX7D76>KZ31P~cxY9oEm~|qLkBVG_(-QUY
zZHss|$t{I@h4#1f=1$~b
zI?r*K?G%gGlF2m-eHpi1J-0OC!Z+!bCn?vs(-@dpO!yyU2(Ga=6%x;VbMDZJ)x9UY
zFSF}Te0}u$q=QuxT|zeby}r5covv$&z$Tf6Vi{U&g}V)Sb|1a7kwc>D6n{~Q<=(Wa
zFCvu>wEGQL99lJP>c&fPnyDp!YL7oUtf&*So#XKijU{vEoBe*j)hDz5#j)KtgiHj#
fzSsYzdVoRpSgu&$EGKbbWHETU`njxgN@xNA2%%eu
literal 0
HcmV?d00001
diff --git a/docs/img/python-sm.png b/docs/img/python-sm.png
new file mode 100644
index 0000000000000000000000000000000000000000..ae01e05252abd38e7d4f3b9a79038ba6cd14e778
GIT binary patch
literal 1455
zcmdT@`BM@I6h^bl?J&1vbGvk0o2_lE%+!QZ1X1(EdtJ9sG)+-L3y&lZLL;jkVqNn<
zP0iGz$jT6}@^;J$JWx>-G*ZwMj}EQ3%>Ebq&Aj)$H}8jU-kbMbb9cpU)HTpWAP^fJ
z9WZzeV>F?^Zj~nY8f2W;(9T$_U#w?zXe^aL2|?Hrqk}^BIg$wHLhvC3V*G`U5F`Sj
zjdFMP!m3m%DwVppxJV+AWHK3ii0SU`ZfR*rNl6h11Z8DqFq6gMa84#ZuuG{rb@`zU
z?Gcv#99HJxJqe@%cBxN6g15c$7-T^(9%fCw4^n@S1uuMFhJL#fm+rxkD<I^_wB1|AO2cDi0?b+Od>Mk7p|N5xklCAdR3DwP7!pduLXIYh2d0G-SO1qHrH9Iw@uwT;09
z?Y->B{J^5V`6DxgIJ*%4W5oI>4b^M;}?oV@>!5wg5`-J#cW)~
zTbm?q^BcZHh5*LtM`(aA2a2@38j&sSF${W`k@8Dhq#(Aprj|J|>yDEPVTx1S
zByqX<(p*#b9l_oeyH%TdDHy%RwCDg;4epN#TGz!6Q=u>IP62kqeq(N#w(N
zRnj~RBfd=DTTqq5*cr>%lJ1eIBj@kH%JiuuZdh)!Lq-iKF4$h-nUV6K4}Id2Mz{
zbP-#ag)e0r%>0L^FEri=cyjwqYvhv7=iSxwMjfsuZY=0xIJC{w)$_tX^~4LCHXJzm
z^_r+_L{jYQ>i^(-|bjW2svHh$55C?K#-BY9Gda->yz=E$f3NtnoY`FO(@T!{l8uYmL_+Ry>)CI{uLym0XqbP~c)MYRQ0MEN
z7MER0bZ@Ti<-)B=ym|E*Ea!Mb{c5;EeBS^7
literal 0
HcmV?d00001
diff --git a/docs/img/scala-sm.png b/docs/img/scala-sm.png
new file mode 100644
index 0000000000000000000000000000000000000000..30db034b70cf9440ca5e5b8a5c57e6a1df65f5a5
GIT binary patch
literal 2241
zcmaJ@X;>5I7LFoI!{&mBL}41y8kTGRI&hyO7cfNVw_nh;c_s8rD4)U{v
z+Cw1_h-H93EmZ#|>+j`eCi>@}hkFbkR?FzovM^zSOwN*k5MPck9s~mTtVA#rWO35O
zH$f5vVg%=gN6Vs_fkd{Dk7OAzNCjV{XG0()ABBj;=7BOG9!%s4$nfzWTi^hfLxx9r
zGSN&C6-?s#r%S-F^q_EdI*(1@ze
z#v{Z3E-IQC3{Zs<5b#8z-PmX^G~k6nV%$A&IGihhMPtw?G!})yx?ylccPtT&20mPH
zeKrXvff!2L@*$T#BEyqpG7%AllFQ{txjRxQNkm}?1cE^Wi*?f@+@xs&8B5_Nkgi)$
zpn+1hge#J9g#y5!$ch)H$jESgrGK};7k!i!NIx8t{=iTQmI#GGq75xA0GZ7H59RYe
zqNTD>@Sl4Br?50UO$4GsL8&lB!qyKiVVxnBh)9)yESXReE)*s&oMLd2P$rZn2}J-k
z%oAX;*j#}jevfA|i2(wsj3r=$0W>mPuYlxoIYbXS6^EgC_^20!26bo+)vkn|D@gr
zF6Yy=5I)I8>5D-b2K!%wUf9yR$FTfpTYd1+_@F@Vc8T8BAx*m;LLkPT0W{xmMc=(*
zUQ#UGru9zN$g{GZlPNWb%S^_K(z_4h%@K@{+-rV6Xial%cSGUM%jrYK*T1N&j6Uhy
zvpmG2XW+!#K}N)o`B~_ZnV+_VUFdw)>9OH@vgToH)24Tc(~mgB-+y!3v)^H6bfif>
z)_&AIv4hY%qs&zT70aBcT+55k+-y4wuDo3Pq@iR@A06?=XMfpXHsLfT_v_B>7Ds-n
zY~Qgf=wQXF`D%1BWrP1a^I@ckJcsgbel;HNSXFC0>}wotqK1|9{Pn!N=LJE6D6HRZ
zmb#O-Iror*DQjZ6;;k#fO_!zAsWh@?M$00`tOHbF)NPCYw*UC_Sl$>+`^9YZ^mi-c
zCS0A%;saCMJ8m>tMxKKlH99oPz0VxlFyweDyw_m_JNs`lTz{vHS}>~2Y!=MlIriLm
z1~K@{lNbQn;98o#c+s0K&10xC=;|NWEc3(esnQyaT|=&?nUUIbHPBeo=G)j)9{23C
zeN574@Cx`JLtlfL4@$GiM!!W)iVsb_%yA^?E)_(EJMA@xCSASEz2Q;ak3Jn`X79fN
zVcUA7GUUv$yskyXw#n;e5PF6
z&)AkbAKXBNO{Cqmc%>viwl2SRW2)t*F8lCGwdSE83$ZGv*4Go|T^u`bqOM4)Wr^wo
z8~KM{o>JApFY`E1Bw!QD$;HR~d>nFa*Ra*?-8oGo$ZXO%=1kqI%!6`TzPP1poI7Dw
zj(@J6=8lg(&en-7CdXQ;^TfC$aQ&{MG4YK4IhhUO87wx_x&dNU@Taj+aH@rWm2t^t
z=dvivy0v?8zF%5hle?I|rq??TUQjbRWD)9AV=BMhYivc^@ry+ykxRjOYg$or8MjIt
zx5Vne&e!ztU!MnToULAIgZnCVJc6_2*q%~c$Hvk6Z^N5={U)(NF4^YOsyRa5MnXk<
z3G=x+<~lw>EjnXQFB+q4&S<$(uvEr-vyF61Gfx
z%4m1qqgz_lk>$KhNz!)PJ>`pW&b_tb^$&DmM2-qJqfkhkvi9!t_HhF*2_Do*0lTw~
z%3DWmDia=vT~>sJIB!9JwXCEffj0KNLWyJsbT7&4E!_XF&qLZ0L#$pEVvngnOjfLC
zK)pk(iFePNMuNPpQ8fm-KGBlArH#r$(WT
z+Bu~wER8r}BiY~9o0n5(`DF!h>0I2BJontE@g8g21CL4%7{!Q2F9#0Fb*{QCng|y>
z@xlw(fVaGz)t(_{~}GLT4FR^hkwrhex~sPP}3>w2s{sLdyBf=
z6@r`{zGw}3ou$bS>pJ-=-@;#WHfuHM1ew=4K%;Qq*4x1&wNn+NeRj2yq0%@i-6C^q
z0UO2$npdlTDBS$kz0XR7FK?GyV&i|E+&&zb44r7bMj0G-_C8?VCXbXL9R{)Y?rXFR
z2j0QaF%VL^>R4Cv;yt$L!SmIF04&lwt2;t{TSaWl&`|BR=HI%D#J+Ybb|RSG@q4ef%<2Gkfpe-K$r3ua9tLMJW^{d?Y9+C=?lK2~{X4U?LP0bRz;BWZ-q&;q(UQltJFW}HWT1i^ufs_
z6dlXHE!v-_quAo(4*x~~zqH?2s^kskhI@h*S%5*479d4|`{=!J(^W6?A(XCEM%sI?
z2aS~K7*>6~)%_#&haSD_hhg3|-d+A(gCz+;L#u{#JoT{TvdXM6ill^+w~5&b5tDV(
zaxl|n`-+V5!=oUkAK16Q4%dky5QdNAPhJ#n$U8qpE>DD$DRtRn6ZzuZA*+l`+ce5y
zb;3X1SvA2eUlN{8aRC8kfE3?Mkmp1{wV)!YFk;L&-63`bLK~bw+q^k*ld&NT?m)u2
zx;;S)nR$FTR4ev32HHrzX}{G^Px}S9hfh4J)u#;Z|Y2Cmc)OC_Gxbv3e8Y)=XSe9;ObE
z^US$ffa(fy0!}sV>gP@N;=c}ju=;KM`-iM=X_{fWlg3Wa%5a@d+6aI=*0i%^bSnTp
zK)Al5?JMl)Bp)|>{@V*VO&j~lZ=#Ep>Dnr9p4rm)4Ppmq?OKz%6N!gWTio;ThTqEe
zP3%+oSD1gYz*wAzZOz76jLTV>JaUD@^_mxM<73`|BV5v-R{hvm&?U7eAwjPAmht1v
zH(CBdC2(I<+*Kvo9jq8)sW%Np<
zT|ZZ~eP3hu-F#II)CpnRz9`qC?%Avz=?eRs`#hQYmgM08e
zIn@v00ilt}=JoN?m;9G+_FoOZ;!LmcTLXbA7A5l#@`b$~Dk8e??y6u9vx;9O22A
z9YHM}ej2IN)4#zEXv%K8Gq|IwE{3^&Nx)>`
z$aW{X5}~i%{D&Hrx?eGUkI}C?@fSnR6n%Es^CRJ}TiQ2v3#7N;;y>_JuRRj&%WbT<
zHcfa2+^UaJOYjqO)&@3bgvFKIGzGpAmIP1@YdgfuV|TCe^ffuFs3c8r!K8TC@L{j?;DFDs
zZtBDH!wh{9u5Y3bJG5}Ia01A=+?=!9e1XL)uoF*ZzC@^qM(%jmbP5+SmR%r*De*8tm
z1+y!~BTMz6Q?ji84Y~Eip2?1b^$C|2F=8Cl|9lF`(WQeY*HR2I?k}cLgq@=ANwH*(
zr%*DzD9|YYI)|gmOwWy-g1>Jt6=@WJR&7zf!kzsz@%DtTL6Gd*=LMU+Nda7kS<*Lh9Ux
zPd2f?h&r5RNpBN(d5}g$`!EuAmy)J-Uq{@8y-By=K_`C;}+4~Zg
z`hWcz33!J@*<@y%FsZ?eC`Pr2_SmL@3RDwQz(yO4gb7XIc#i0@7Phs}|C99=?(|G=
z?l$Wz+cfikoi_mJ9wkZ9iNsPg)q;E^XduwtxptLHwpaEnzG08PcR**t+6?QHf<
z*KORsaD3HfdBUYrapz1LU?fpENNWx0tvt@tglD~h$;lk2x+IZsF@VY+`H@i7ksQYygs#lfmAbiG@RqT($|KjmYP%|XKmW@117wci0wD5RkuT{V#Jsu
z7AvW3)BapITe3(zn>m(zqU8HI2iic`DRNTXHG+<-G=Fp3*ag;qkNGuJgak`g(q!0m
zEW@GvYh1%IGDVy>(EZMBp4Ywru<7Z)n~OUP1oS29)a+N~9OGY#bDzAM85jEDC2%sf
zgSlL%Nmrq{Ion=2l-H)R4
zySl%{(C0_A^XVn0wA$=Q;CGTaf
zUa{@@`Oj@TM+Icx9a-RBtdN;6HVfv@-Z%~I$tW`<9Yu6s1({8C4}B{UDl2b0-8H<~
zy3ikH3H=uo*aH8Y@tn6B=kFPvfM?YO1yhQh0_k7d5VH;wD+9GA8%Nk4n$|_7M#}uy
zb1HE!|4EY~B1&FYHiY&+pRds-tSmBNUuA?twaFLu)$Gfsmyvv{VfyPI
zU)bam{>c|2x|q2?R~uh|^Fn{jWl93=^4k1Dn6v2b9lDs
zZMV2q5FEz(H@VacLhH~pJX@eozdH&B6QS&*?ZZ#RUB|_ej*0ca4=YRK*1o-9F(n8B
zLjWrpv8N=<$<@s3KF|@XwW^a|nZmAn6)YU7_i0N|C#A)D=%YVI@jvc?FG})SYmp;<
zzMDU#Bs}IeO1$p&&EjBtM>#93Blr@NJVo$Q`hd6iA9G3P9wR8{O}ifjF4jIN=)>M~
z7V0wwGD!L5o$BEfo}O*9$^a}{JWiVKOf26&058p1ct
z_}zbi02-JpZ%wSeu^}TC}h{3^y(M%t$WT`~+RCY^nl;^MKiM+u?w<-EOjAK~H
zAczq|L$8|7zLRQcc2B=^GSy>22|38$h^R+|rb}D|dPj2eVK9bqe&G)Ld3Y+O<2>5#
z4-Yy|;=sQk4zo>Yg}d-aS7Ez{(OCF<6;#kQ@}JrERgNfJYn$|G67m;onZop5MxxbY
zn%b%H{^1Um>Hydj#%wd2$HYIRTquzYkB!Z2v4DCsKx)EB0+BzR5Gt
zcO2l3ut2F7l^N?kXt#>oDYpMXt+T-^!^jR|t5H!XIVz6E{Or0&Hkm^^NnHy`Chv!;
z(FxSUe~8b#FJPTk=~XUtnsuIC)8N+(kR~Za{ooGMQ&d?wVG!ZJ1A`z3)in!?YI*@Q#Q<&bh-bD
zr`&ZQSpp98U=x|t4^`T0*gO>}sW}HP1X;?~^@bXXe^)vZ0#9f8xOWQ8jyD1;MK3oB
z1_(?;n}|QLTZf~GcJSN_!I8zZ_P%~$h0jjm#TeUbfD@wJ&x
zDS25RO-8NcIn-5dwUBs&!c!@XCUaKIKD0dOPW;2;X;r8d2tfe)Xy*pTR@gV#BiFH`
zeYAFZ-z!Ckv19h%9ur$!K*v?M(x`7qwjSP06`e^Bx0!ku#Q_6@aWd~*;sL|E|6rmQ
z5dsq%VY(IIJj;D5LmPs)cZlJS7wv;W(qS;CSj2SM0M)92eRQYdXOjWJIMcX?5AWm?
zs2#D25@|BNtdmNtDpDQrMhFL3khc47XNxYDDIWx0om{PH3MNoMSk}3>qTrBBs%~3B
z5F4#|q8ns30A9eW9t+_}Md3{j&E{A5BGoxlw3?v%+P$kO!*IPDc{%&8
zFSarNQQfb#bfh6i)aZTaJ7Q$ndB4q)@ZVqvOS-|<++v=>uWZOEP^UKDZbQc#l
z8Tutz%XF-H{-lLFloVBTk?kBRt9VvownjA_cvZFhysrZyB91+k^4AIsYr9$5mZlL62eJOGntmeS`wu{?)pLCrllidLq??8(?92!
zNVNF`+06KtD%5G+U39lbP5xohB$tRm%-%bkcGHxr*&7CH
zqP)Mv*lm0|xO1HTBaeuX4Bh~onXR~#=D5W)OkgHslr0Vkx>c5bB6xl`iRMgWO1M>m
zo>qwbxee_cjy3`L5+k15IFR(kS+4YQThDOdleF);Bi3D3RUA)_z#G-|;gqeD8)1c{
z5i;;^j4(yvV>SUxpJ9Hswgo@Z(^_(eiyhW~Z%`0OmcpDJ>)p)oT!r|3?v%LV?#t~B
zK3UYiaD(+H`XX!(XZWEN?me#2*TgrJTS-MT4Q>PU;AJdC>xu4ZF|iAm#BTy9emYC^
zsD5KhQ}Io#--`6n&eEI{eWD7_DXK0R*9^;n#_(6(7jwB1)^8sn2X~>fO#M2}ivpFs
zR~ok=&l|9;kkeP9D)nTT`G>b{aNO_LejK=>|NOLVTEgwWw>>sLT7R^`^=5MELo7{QPyPm`Qnqk9?epzz
zq!nlY7m!r^q*0(|;ShQjOi;#rpF#5v{|f-trUtxg{wk(xO_3#q}${6w7QF&{nitMzWc^u#SOrJd)^CJPQ53nE1Ok(sX|n%o|4NLHf*+K0je
zoQLkbETsj(neJzDM@emLjoEU&0PZ1WFm
zzeKN2FTtOk>;ul`KIatf|8$5?QB>Jvq|*>}5G}OTrA;Ulo+8;GTqfuU?NM2c;y9Pz
z_fM~6jRYyVTb}i%y@q>GY&5<
z8FiyXX7Lj4{xR`Isx2+slj?eHicwM~_ZTtE?Tga~5ASJN=5Tpl)x@TM>q}^gIN|NI
z-udfenbYwzmopgl%cYI(fd3#QJ?YQ#5p@dmn1(V5q#K2c1d1XXYz`sRexQxVPm`c_
zEN7A}>o6_GXN(J;Le6eAj*FJ6JW*-G(_uL(_KMd*KM2jTQzYR#j;eF~AYH>Zo!k-L
z+R|_@^%M24RU<)NTQA(Ja7N!2De)u2t7PH+b9{|Ij!*xKtLg~mN+eO_<&59->c)cX
z*if>(rCTp|K%B7_MlubD{g4>-`llY3gteg
zh?(4(pL-}*k$ZYIzi?xbk(y6>Ygk0EuThiKKI+=Q?f-N(41^~?UQ45cWZ6T$1d8z%
zV3d=O9KqG1&x(!W;mzlYe|)wc8+^7^Vi*#5@>*Db%J<@BG-O5fUp}q*!|oVrj%Z$$
zTrW>J^xILq402?~$d3&T++i;J+l=^r+S?)QfK8FTtI
zxC%uZq;W8)`}uB6VBb{jg(>0-BH=5F*HD6e^q!ohX>wJbPvPmsmYMUb;EHJ?P$yPo
zJEV5gC};lXQvR>0Td%4AZZt>4uT}6}H&^FU0(tLni1KuJc8dJvf7HHJ4nm_>DCb+|
z{G)d01QcX_il~pH?L{{}4p2EzZFt{L`itn4;|ZN_%c&lqhpzUEw3$z-6mp9^{a8zJ
zbsOSTJ(V2mMeGs2E=*9--k^%x1
zg{p2{B6CRp$LGPzq()W^1Dpq+v&d)rjTdzDAY|;~|Al!|2tcvCpeANzj-zQ-t)Z|<
z@~@(abcWm?9hrtA0AD*ut~(4F8xlG#?s%WvEE$F{DczAdSwvwk!_Z
zxkw&{vGI4l(ZO5qjw1)sm1G-yj9Q{L`_6ZzCY%0Gc@NGcjhp-NiK%5Y+{79^S(IQT
zzJSghkpE;ikk$!6P5c9gIx&BmGK*go_=)q9oRjt!oCmbn5lxw(85|DMF7(G+8N5c8
zf7MyEDs)-8luI=+MT*Z!6bn#BeEbJfz=RaJ{Q~U`9}m7&ecbDxL0(huCsPboMM0PR
z0_+>pNh4lSTV0!Qw*x)H@e65%;Jm~J
zAD`vy>hu1-WFU|XY#~4>S>^+I;a6>#A*!>vK75=B8BRcaCqsgRpqBiN*eH{8Sg>I>
zX`Sp@j43M1WLXqyHa*l!o8+1Kud9G}sL$4$i6W?Cj-AD&b*C;HTG+}fY8q#8aDt8}
zPrS@`F5a|e%{*1qN`fqTpzkK-?isrreu|o}Bk1IC5E>iF#k861PY^$y|Mbw?6o4w)
zp{HBGWhr7pUy{>dxI7w&uSlY*
z6?dTvRrd0E@4>DYr3y?LC~dSwBu9n6*%lVMVk}BX+%Ex0lYknty6osTl;+L`-Z9bSzZQ1+Ul1DU
zV#M9|Q5TfkEJ|Gsq?+)lrO8lR;Pc2XvQb5ig5V?>4nvYmT6`yN!Pl>rWiPcW%apmy
zpIpUz>!&(`t_KZdkW$}9f{eAl_^HMV5VYrsn*WUm+P^|R9FqU5+HDe
zLK5JBZN4t)PFt0hGN%VOCvd)ZBSp1lr`W!$N>nvCwjsTkA>9~b(RW$!5|FXk(0XIY
z-1~E8Q#K=((QN9YiKfWNARA!ZxkOv+~hd0H2uE
zNJU%RgQucGb|*ok08vem|JZ8;M^V-QLD+EaeKEs}FXVV9=-+?;+J4rjzG1?W45lrZ
z#ZR`KJ-x7f)V}*{O!e8rU};Q>wx^)RHmPPP#+t6T=M
z%f}QN06dlV4jF-z53>KdA$hejEq^A9
zN{tB6CK_Icy=^?(+&{fz&W$ix6rXlDh6*#@8VKHfs@y}ku^5|Zcn4P1-(I`7tM|Ny
z1bljh>l32S>CMUljLS?|YqFxQYj19OP8EE2sOm)^-Zv?}XK~8iF45a7(|55HaUX#7
zZij^la(#|saUXIXVt(K^GA_p<}{T#9nf1w?2+a8iqg%{tt-lmL&9fvX!Y)k!6k`y_U#RL?37qBM71NZx@lXf%?!Y0Ld6cCC0
z9P^S09*DOW$p#~vGy
z_JhdR8Vr|S3lapz(!apzqR{8xd|
z{IM#^mI&f+m8%xIG{k@SLnomx{4T~3?@EUm(d!-88W@Ty^e={Hg8yIlROd?c-GhsN~QLQl#_O7_3?}4_MLmxRaU#NT_Cf$C}lXQXKEU%LFbu|B^u8bmX2DoF$X?h>FBs~B5H1`Nf6Z=R4YfUC;2C1lv(W~sm
z*7Hxb2hJo52%TX4|#mHR7W~coAY<@xneVl5Wf0s{V$b*fbUpiQo
z%g_3V{SOB_k|pWgz;YnnY<+8tiWt{*0vf1Nd%k2S6HK1r{zlh~gxd1caa_=90hS2O
zPL;AP42Un&Wtk=23}k9oorG45BH*s1>H29iJzm{oSMo}sx3A+qWp5g7ofHgdb$I
zGtp80m9{=J(e`yA&t8m?)MCdu)9%a%!@4X3#O;~gU3Z!XXR*f`}#L@XxNtNdV$f
zu1CNul{FxT=)M>N{mSpJ<|batUSC<1Zjleim27Br+`Iro#bIX0O)GB9Qhj4nGYhG0
zojG}A@V9&Qr?Z}xS_5qGiS(Qtg|3}|idG8SDMU$?;7V5GXiD$oqD?JWHy^OmSevEo
z?PJWN?aDOf3+ajl30MSFU2}DOGC4U(PNJ3#cR{|B3*o1WpG9)9WnHof=ID#FCOq{$
zZ=!Znh@iUSfhDqb=N^4r8CfD?qn^fUy-~Z8?cq(oW2L%h+iLoS&dln#6W(LT5b?Xu
zFE_iajpZ{z0!jn`=?xJ!ehOwN(`A&DlEspZ;VKJdT5LJ5INe#B;xzT8A8WjcG`47R
z&9mMo$Rw9a%&xX|oiL%i-FVj{_>W6>AiK-E==JmlWUk7k+&E@A&;g6)Lpe%@`a~BYB)r}JG;gU6%
zb@1GTLq6|w`y2Yb|Mim9TIqf9Ri!|ZgTPSZ?^r|#KxzgT#E_;`sEMwAzrlqcfmej;
zS?$dEM6nX`=&CnU6aI!FBoP*Vg(ausp$?<8YtB>_RB^CPzUX1-Zk*!a!`3o<%Y?tQ
z&Fpw0NNsz_f0@+5RakL3F^f^{gOAQ#+-kE7fH54&9|K|3A@6&sO0e$G(dOOsIt(_`
zMnfk1KS-4~DT!r^GS+jwPk$pvlm89EMoM&^V9>i#@BUfJmtdJmv;Zy(;>x9)v51N$
zA?Ocbp>@XBv2#fSio;)zOpH_
z+soz6&)NDL(nD&mMSNJwSyp8|l!#_?W}1>kQE*W^QsJWRDlh8Qk4C*#2YoZI7~p3S
zsLP-j$8ldx^uBJ^I1vI6I*}Nfv@yXASiu4TEW+s3`bX8WqO~o|l6IIt>a&0XvNQwi
zAW&oYo`dJ4h*@{SymWP&&*d%c9kl3H!eaE&5VMSlAuj&;eV6kLy2@-Jn$CM76OeJq
zkeJNSm0`Wr@X#B+RglNDN~4)u)v&0?GUt0O#oK2z=Di!qo?s!mjT5w_u@~!BP7dn@
zUnILJ-t|(&n2JT?W}WY<92$Fa(d?-@k4YU(oXyTe^~%+ItEvh82#CN`sF(S%pTi~W
zJEIx5t^uKgd7Nlq(XTsH(vzjfZ+iTZ?h#ZqaibG(MES(?(VgJ@ISY4^qdjpi)zzSZ2&<^G`ipdz0l#8f|WL-^psI~@QDE0y-1i$>$9
zd?YHk_I$D%j+IpJT6_E*!Nv|B?owLkd=PN@bo*P$3iXM$J^lmCt`Q_@=p=Ss(5v;>
zR>M(Qrwt7|b88_b8PA@FH<53j%)U0bM4rS1O*0?3k5@3*l?9S%Y`m+e_$*ctS)HM0
zv4Dt*REUtMK3X7h;jXx`$L(Gq*R?y@`CO7p1r9);-`0)X4e8W#8RZl|{k|C!5fBys
z6GCUG^A<0C)P)0GeUXa7XG!Xn6k>0b3z4fzq)IO^$nr?=4n{(Kj
zFO!WS1SqlV)0H>&)`GX;JMYjKh^}45L}XMeir0JRO@UJTU7^?5?AEq4FhOG2DRch~
z4wHb-)dZArc`O|rwv2ZbB68K%P@F`LCh2aYxGUhcNcXNj
z+TRP(B{HM7pnV+`ZJLgF)a=o?negN_m#T49pu-TBy5P`jHA_si)`^2NZ5p@i*575RV(eaMiX1FP71LfnKHtea
zunvIZLV|wKa3EiN5=W|&XF(8I14ZQ~VI+tujnn_6^C3%;c}Eci_oa)nfApEfHdrwL
z;8x)g;Mn&-@4&a`B#&D5uER7ljQHtpGjePt&-iyA;QW(MdtIjfC2c`^?<`&K1%&Yi57c13kT9P)%o#@yTv
z?Jdu;1^Of3F>iNWClL@Sej>3n(|4K}bm7{X3>~c5VFE`grq@Z*Gg4J^LZeYh{qi*%
zG2q1GefvXjJp>VxY9NE~FZ|Ej0q)&Bm3ve*?2z6ay%dYJ43XY4y>}E2>C+X)^O)JZ
z=G|M}-hjK&OH@%GD%HW#c(E?j0^5PKx^LBbZz2{#+rDl5kv@pTAsS_)hphGP^9;>}
zf`M${fIFyTxOg*DU(!&q4m1qn_b(*$(U1iml&0f5V0L55#CvW(ut`7TP+`tUc^Nw+
zZ=nx(ly27IjpN1Pkb<6&iiw(^xU2stV`OJFQV+bJ`IzsT5$>RCjwENd_
z-;y*6$oLEQn^Z((^fWiF&V47r4+2POcxDv5UIIsx^y?kF`3Uz1mp=_C5^x^|I&6E*
zGM|TMB=vI8rj{F@^~$vU;gVu^I{{xV6|5F-ILt8}tSYW7#c-iegl7jWXAIbWc}P=E0ze$4cFfR!kP=7hn{H&ng+2GlWF)#$
z)}@8&FwR?T+1=!}2Xkjsz^O$|2_5zeEbN!WXtgA6#G{jDHsg^GmKRh~X)<)lxUm%o
ze)t~4IL1;-x>r)Qjgl&saB5MsnPrA1`3R!|yk}@K&VtH!Y>0Ot`EkmVd
zL%Xy+G>E|XfOA3I)Lfr_kHen_A2c*>4p&OSIFPDgiv!&K6eFHgbn9%LrmwdDN$+|=
zSa>1Z9+2H#ITyHX*cbIc^zI}5IeLb(qUzg=i_(-DQAXf%{39iY2y@HOvN5kU8$)ED
zHFoE$bAd-le#X1Q>+xCc4ryC$3+>}#tdJQiE!C79RV%mrQc7*Ik`ax$J>8a`Ibq)4
zR@e%f$q+3n3&0QM@MDw%*dyvrC6sD>~tuEZT=~qA}t6$YL07TWT
zNtX}5(Gg%RXb2Ji(Dy>>fbfkOlm3`?|C8nBeLun}!ydgq(OrKlX8-ilIicz8@IDv+uZIfpx8Iy;Lao(w&4Y$?%_54oLZ{!W^t*oT
z<&hC^r(e>TkdRlRrwBz(C%=OVr@Zu}
z0CNCQMOTwK?pGPk?WR@NDb*N+C0DsYfx5XhNnbxak)oW>Fm@!pz!6J95#i!a8%?)yR4CeUfA#xJLj=c2CJl-!yj;I^ep^)9*$
z7TbP1OWY#aL0*&iQW_Fj0%u(?hTUZQC8;>_*HK<)QpCQlM3bH8TdcAGKR`k}>7<4z
zgWR0h&4sJTMm}MM(u5nu(DV+xjGx83APVf#i%4hME$lE&uV7Pko#f8&f%_cs9_MgU
zc!)G3F_4%qR30Eo5g?QI+OOz`U~}R4Vk!U54B+U2-ap0)nMiSDcM({8>uzsxWdeWP
zMYyBqQ)@NX@|!WQrvSy^1FLgSJcL!Hb~46iG+e37j`xt|`_
zSYf%W|L)g^jQRpDhggRNAy<{rppIEO<22K%>XuBxi31C*h$fE;zXV5|k-9`okLk-L
zg^e9jv}Au3n<4>G0I!76?=G;1IG6+a5@sR2osQ(?!wB}yXpZn}!r#^y7^sn_WDxwt
zwFan~(=d*ki1CdvOzgmEOhyG1t}5f6{>J;FT}7Mj!9NZ*PA1M7K^zq$SBy4P$QCsT
z_*q_rAr{>_B&`1F`*C#JM!I~FeKw16H+h};sLt-731o;t5{gisx?Gf?Um2=1#7CC5
z@^hn-^Q-=VYcy}Zgz9m%EizN2JJk8dN3X0}8YlvkbC!AZa%q>aFZ4$l4(P#JPAK@4Z#TYGj*P
zoE7@(1-HZboA|gQ4{9+kFC~_DwvY)a?BnJ3o-|1cqMA;!8*g%p;30UN6F+(t?5giz
zA<&CQ2@OI3fd3R{W5bci9t>`U>O1djLtUM;Of0D94x+Lqk?6{jq-$MKCM02<2D0L|
z;q|%wR=>Vcxr?L5f2G1%nR+Fdsxd^H>%b6-M-BV&<3o!(a3aB{eQ+-i*E~Lr5;k~dhhDy(ZMe4uS*$dPQHkO@2SJA3KVEaajl5giTp-1V!K-o#xb~qg3~Dw>6UQfpo(QT{`I2z8`?2|BJL+JIhtmX
zTG)`-G|B~-$O0d1H^UF`m!61QK@^H4W3|+C6#hy!twpH{DmxmOeaHT7wDNJktyQOg
z@H)a}$>TJ?yU1Od2**XK3}uv8B5U#D+U#+4e8ApaDU^
z{r!CrHa0en(bgk{RIGOov1IJK=jk@D%SNg{Xl4!A2-7KdKR{ysU8r?XFM&?qA~>n0
zcgj@oea9irzoc2Cb|bF0Av3|2bukO2nb`U57WW@1-A@VAWz_pF|IhnIkjXS{YEGGl
z+Q(?8Z0b$=p4+QJJW8+#rsZ7a3fl1SaAz>Qlu9YpPma_Uv|U9e8A5?CA}k0D&+bjA
z>oLC|10w1!(d!?^!$u2b>cAq|{_iH;AHfu|g>;nidcEs*sm4?Gm)(@Ioc15vx@i>IKwg~?O_WjRZ^8QjXQbk$4tG4_Euijx!dud*D=mV-<=8Udr
zsM$_2!f8Rxt4z4GJYqi`s_C%wl;<@)B7ih8k)8UNGcjsUrdu~*Xb$8g64j97_ek~z#B(hZDKWL0k10TWt5khLAsx3=y6$kwo4sY+MJkT
z@}SH0YHyEGKfwwXSqQgW1RC%e1P}4K-)r;h8bX)lfnyHeh6z)sd=C@*rUJc~$|7L9
z8(Lpm?-X_Dm+80+-{4;9PUsO8!!OLRl>EKvE;5?=O2699>b$_ruqlLXFiP3IADk6~
zM(-ln*AS~4$t9C0(7H
z0JJJWB}<wC>FFtA6!;j^M6?zR=x-9f2!7UsH(c$U|=K
z<=JP)J-EGi*-UHdjTh3PsrU1fl_poMy7>p1kE5%NBZeVXkh^<8?8?87rMd2CqBy}U
zf@Gkj{)~Pe#Ci}o#&TDAU^GsXbU%@MqdRXWn08|n`$X*hg)SMhp`7)%E8v!rtV;}1
z5r^{U^j=Dbh1Wayi|*O@>F!)ThK>m^s%d;nFOT)Z?BHZ(&nSx}6M2Ik=
zvv)ALTJZ}Pzqo`ZOOeiM&$;-3Y*=|%{I&5rOATP)XN>_*?N05zKXpq(MPj+PtF
zCTFf1bNSU6r{1Hx^8V{`>GHh2O>du{($@=lyqA7*wI-jblTS;N)RIKpnTmDl*Y%P5
zyJu&uq&r&N5*q{t)(US*<&As&;zV3P0hlNLqTgK6Iy5;i0g##!yw>Z`y{Jv-!5$|f
zhp`tN@&4e=2Jz`!T<5f=Blj
ze0$!Qfa$ltej++7oEFl0@?GqRy&yGXP9lNAWbSRyozSV@>gCJdp=?1Rh{s?+qT8tBr0F7q!~ICB>gNez
zLHGI>ksFM81FI%>Ne}4~%WrlZ`2nmfeA2k>q}I#a8V+lBK4Y*Mt3)hS8<*|u(rH%*
z@uei0hOX+y(pflF?rhoi@^ErDwkC~yeJu+8D^lJ}s@VHOtg(=Wl@svd^3n`1eni~{
z@Wk}ub+I+5j@oL&22lnTW6sY8$S3fzN^p?UG0p|@oC>
zRER~!y(8JaDd`o$K!uJJT>1I_f!1?#UNHn*5L|z><;>#YJh*|DtUpEz(4R?WM4WM>
zM|NGD+bH2fK?^)>MU`UkEuZ`(vWBXkQvR;OAc|6q8P#*|5Zj8$GNHW9evfEXbyFtq
zgPloPt)kc|%hKArQpcQ=`!lN7r7f;~egaA`33@88%Z}uoRvX-9AiTilKBNYw(jaBS
zF&{tyUY<15cbiueDtI+B>ju0oYxv
zPSR;ozmZ+{*U&rJ2A4i)2hyYNN)Y(L^pweq)lzMda$#<$ThC;c$LNI>XmQ*p$-FpE
z_9?`{zF^A3XLM!qx(M}BoTqfCoV?DP#16(%{eA5MJz!z~t);#}G{;9O@+#*BE_@B)
zm5rE%F^$=Ud*kOwWga1m>-qe(ig)U+7cJo{TfO2LA$Ch@4EP?zvOMM)Jg8NiT{0fh
zhl=4oha)<}XF!PDGy%bAuF31}rG2X%*Cn*fc!ELW3b|SIw-xdSKg^r2Z^MP(d}H=O
zgIyHA`Rp2e1rB2yBH$unB_!j%cO|ns*3~yhj`9D@p_UeuZ_QdrPO;z-E(~KOk8U>c
zKF&T4hGZ>d;7cP$91N*y3E?++K#(3vbZW>p9Ypw#hwub+bF~%`)Xa}sF2n~_Rusk6
zyWmJ*DGgSm#v6+M0!tHIaCH%E6?>L*2cG}Nq@ks4#w@`$KWy!PnNB4*)
zXS7AO-LujY^|frMq9u{310zgTrrdA2v_Kc#O`f@;g1kF99SgOLze#H4QKwF=W9M_c*fgqgOamQrPygHpF8C*c{3SPau9-f7^L
zPL&O19DZ+HD~!Q~pSTuVRo4?p9TR-?=H4z{Y?qYz3)SoH0R6>QB-_6707_gxsfTD!
z%f3Il!}s0{BY``hDK(Bf7IUw3!k8J`-pz!i8kWLe=oZQKd7kyB0`1t-1R8lw-|h0J
zk&0W99~xAAw`u9NwyHKebaddl>4<9F47aJMrZqzDA(=se9Ey5**wH~IEmSPcl_S&LaIzkM>4t9ahXkuC
zX6{=8J&NmbMfz|R4P-9B_Aa8lROhy|;@_0^iuCkiZJxR<5AB{xh?
zG5grHtVIML->{dzg%Lb)9p`Gy$M$~x%0G@1-6`r0)I(rPadU1?vIm#V5yPt_B(tAW)Wo61JH`AZK
z+T6G$3JHu&VqVzYqL%568VI)RMNs0n6Fp4c;Tz&BSeNDDgu`lheY{-qY=3erOg5J3
zr0mg4$|0_m^l=o$Vd`A~2fcm~slyl;9&Uwo365>Y{#bdYtHt4QyJIGx^ImAsRc0hE-{1W|FW@}b*S@s&TI;jL?J$~?K6?f#8+P10(MWWXm@5c>?1-zP
z!Y7g{k1ZI0)p(JRXQn)q8LC`7>}+gp!+|N(7<%oLBAL{r(;tGxp}?wdQiEfr{fI+A
zde)9)e10Xc!ArD#FplSK+#_y-M^|RS6aO+^A*xL=;n-CCL!9)73iVewp7hTl={yIp
zM$4I+Q1Qk9hxO)$hP%i1+R4tw=OEc^MLhT3NI`!m;iHFCe$pT#RP6ejMz9rj>fvu0
z)qK1@B0J-DMAVeiYoD-J>t8K;r5(189tV*krZZ{LuBC*ZJH
z?M^vV-3gdi>NYi$Xp#5{`2SW6ul-p_FG~>rj5^*BztAFpcgz@uoRcNb<+hU-G_-<<
zx2$>v>ERKZ(e(}NE(v(=bHh(dw_I#99r55TD}EjL0YtfB
zjZQaQmDFbb#swgF1oQT#r_YTEuOPf&-Cq*@;4P19w;aqB;9LDSDg%`Vz{8t8IGt&o
z7LTLuMsik6!jRM^DpMNoPEe})F{r4-gGwHy^=u|9<&2=ESc<2@ku73PdY0BIYZI=Y2l;(DCEx0`xI<
zLzqwx-Di#+-4)(B
zz&eEr0S6Bs5ZKmlu|vMFo5Gy(kzV4~s%DD?SCspM)j9W~Oz$ZzsWrf93d57(N%z$!
zh6N}Bnw0|E>GC@zdHsuQILxQI*|rF=DQn<;a@MD~Y-Z*xMzMgfCNA_%Lbb7SqIy|(
z4b#=L|GdMMxpwLYm?~576YVLDXsx4JVt^+a1z|Cnaf{(g_X&}{@@z1OpMOx8Abe@1C|zOBjmR;%J@w{rVXV$KEQs7oMPyS^LMv
z=qpL0Ers6xGC{Du<{57Tt=aSx;P7^~`Ek&vSQdSVWtVGtibN*bLYP>#p2^U>U8FJ=
zE^ffLr`Pfh_izpBS#D)R{ZH@g5g{ZkJ=Qe=pw9~AL3Jo5E~5JQ>pL8olo@^Aa1A
z=?A4aXDhG@-P*g*nLWT?Pd0DK{vx@ki}P=Rw}v^f_cC?g7S%-3N;)#rurM|l5(2a7
zND$WV&=@V$DG67cce^k@*wRdx?0<1nXkSnpsUk+FqM>=^AAU-5jO82<_CyA{tKkUk
z_V$JNqz@HxuD7s*rkH3qSZJ0*zw-o_++(=sP5aSLbj@F`TVHJZTNWkT{w9-%zz?Imo>9~px7a`Gp)j(%bzKAV$45%72`c4M3OMknKp-{EhQ2n=V#|K%9ypOBg
zyF)`gyYg`v@F_eJA4Z9Rg+-Yz%Hb$lRXqe7DPpn1y}@z+tM#<2Y@MxVKJE)7PN5$y
z;VsT~IU3yYnOLV1-?ec6?7BA2(f3Z5=$j*so{EbzSJ8FY{wH%;0nvCwMQmn~Nby1I
z!k0+N(6GyaZN)qb2K=7vY1g{1i2BEr#>3RNLp<9%-cp(?jWv^87}{xdusX3H(6W2i
zH*sIzw73%<56#`sWXLV&*hOnJ00Q3#{Gg(XamlqMJbz`A1<{cO=9*U(%xPBoK_Tlp
zUCxqgFMs^#m>_q2ARUha^AAg=!gUPNgGeI6df|N`6!Yi1%^qDPmR&aU)`eC?9s(Kw
zB)aGD1*lE>m=hZ8%yWIY%Rwjh0$FQ@gGEkA-=z2X20Bn=cPyr}IIkfU2@CDTb{F>s
zHvBNFpOeftcEU09TxF?Jl0{SS!1Xd78ws9xItTQl4kAz#+EW4>Zn%tc)}uE*mecl^
z&aBLjzrK@N_~+Co4A1o9rD?qH47pBp69WD8@+Hz0gBXibVq>uA*0t25i_UH}K2U5L
zaiw3-t<|`mce~&@9T})}>P^Dv7628vYS3_*J&qN6tYx;JT_mP^Pg=A`Gg
zWaRwjH2hmIK?pdv7}N74yGwUY;bd&6?Z4R4@!aEFJ6WIi9^Zv+8#^
zTyfQNhE8WnZLziRb^|_`H=YYv5%l+k0Zg|WP898yR0JPl(&!SE(EyT4;sU32bQkOc
zwb8LeLD8BZro_Nt?zaS^ZhuUQ2>oO(lw6M?49yEl4Y@Hs!j16WGpn`LL<}CH@gfHL
zayglJBn|3yxCo^BRM!`Qw+q*gOm8>};i61x|gr3f0ngJnVX!7K~3S6P$Xxc0~X9I}yAN5$EFN
z!MG2NuRrd&XY=a>dpR#n(SCd+uUe!UOcaboNHN9A_R-POO=|HP+50tZZbJtQjQ2g3
zDenUR1omv`LFKBYfIi`;w6ZH@#WbJ6eP?eH+HR~yhRK&tqZj+Ebaw*F2q0l%k+9Bq0IJ^0)JhagH2fR#kcM4Kp4@r|$@q6|*ob1-Rn)c7
zLeEYb{p^)lW3}oMR)wEgv$k+FG=C7g;tv}_0zV^UF4!yWwgO8r0G;h!^@F!OCn(5)
z>MCzd(M1P!O$KSP9QRyqyS>sHqwSkoItHs{xm#gzYjmzZO8Gu?C%9Y%^$pyf&g%7R
z4Y*r2oO9*QNEpT5`|SK%P3XT#c#6*9j1aYc`O2?5r)DD;r)--{*NmC{Tg%p9=(Or~
zncurd@!KhkYMfx-83+wh`u_6`fcO>2C&iiw^4MO!j%ZI359z*(Qaou${Ig0enOLTIfJGxL!#M
ztAZr(>@3uMObZ}f;?Dovl$dX(8#_0E$*#~4*tdr_xhGaur{uA%it^E}Xd;hynxdUe
zXE^8T);pVOUsm7AwV?MJ$}w)Jyp8UN>5qTkgjeIc@?FokMIv1N0yF=aORM=L^zq?r
z?}eKBt4H>vIjT+o(ee3pt!16D=%IX7-oezn?d@<}S$~Jq_`^f1!_j`nsuuUY^LuQ_HrI(b`PQT%zA1Q!QyUyOv^mU@4P2T9wU=%?f9V~Q^
zh`6|TQSmTnPmE^Y%pdCCluaodPECp3c+(xoLEm*M^p|D-Y4d)H03Y+^UcS#iiPBlq
z5KEIM3@%)n0Y1+N+CJ|--R{;YQD2}AghvC-EFPSx<6DKUBHSW+ze3S^MA7Eu%RcO%
z7kFznZ)<;@Yi5%qM9^05e-5ONmW3W@=AvLGr!#aW$SH%#1?wTNVI?z2`80Ik9o?Tz
zyFq58cR(;DaVSygN004&36Xf$#`ga5!=0x*Wcb;F_i~e*{O7myuP8_>b@Zq8A1da`
zMUxuO-9fD8AG0l1fBwE-ZcHq1wCE}+p-fM}^M~Vn73yqg>c|j*9;)RSmVbBO{es9r
zB2>&+U3AB5?W%X)m$+@DctQ*hM1v6q5>(iWauji*OJ%C!K05QU!vXh#TULJH6_=0`
zq$gDBwVxkvo77%)R=RKEih(bz7-_RCag)PmL=A
zeh+e?RO|IomiRdYdf=1D?|yDcYUk*`_wqmpFlpkK#BZa0kVZ-|%uvdkZq14s>8ENm<;3Fs`P)rMN`d4StyK&$IG6G-3Ae15KduxVV`_5sJ7!9w+|gD
z`>mg}w3w#k129~5qXSJRsR1odL13d5vehBb_t?w7KcvI@pXjyQ)l#=z1aHEpfGEXv
zdRW-r3>`r?>dftkcbbg)vi!O}B^)(x)e*gJgAQQMr93O$NN8W_^}d;@rTt!7eOzLH
z_eo~r+PKSjHt$msXQdr-1rfzO8R8w%6PCN)AkzO*Ckom03Mt!SNqdf78s~y4!rV4H
z-{=ehLyc%-S=6Y@&CX}vp+-*2FzoI=o!N@FIeU9l;Vk)wNsYDT#`0kC&gJ8C9`s5;
z5FL=h7i950P+#&|uf=Xnyv)U`L8mkJbtx1UoecDZ}mHQ7;Q2!thQw*gO{L()Y`K=ECv-*4_bP2%~9#@TxR1>OI6(
z;qt|ytUj+8bQru#!VryQ%q(lXsvoN>S7^Kdtva?Ymh!Xq8Nsj4KZOYz-KT@uDkoWh
zDCfvuW$oMjEN#DE^5ciUI?cvLO!B48lY7U)j7|j}kkEB-wZ{A&ZbyhFWD|J?@`OJg
z8}+uD
z3QaV`hcni*I$uOWYo=E4_hycdw|=Y2{^e@R_@614JUOxK#*KdGw-ukB*vrB>s_LCA
zxU>}@^TiDDKi9O`&OTYaj*3=JpcT#m0%-PYD{zsa-iiCQHyaBc7IJmzW%SnFDpTYm
zwREU5*m=2Ld3d~rrT>@0SA*(!Huc(O{X(Yc3xJ~KYAd;1+l
zb=Yr|>(;h=z2R^US+f>X+|$qf`P|Ow$B9y!r%`Fzo{x5^p5iYSqyu+}fV215^@<)uE6r~L{Hlbo3;Jrsm@KwV
zGQN*XxZ&ydSMa9o8>!|5Q0gkt>e=tv>*EA;SdQ+TkxR3hOl&fE2e9naNTlkoWY5+?
z!R7qj$nH9s6Ygkr9*w=r%Wb)FWx6CCg=}9ywuluSs1{pKe7d5iPwfET9)71U&&Gei
z$3K@7p=|UdI~=-gs-7%1vf8u8WlB2Vhs@w4$t@Cp51{gn5@yot;ZIFTr--lQC4xjx
z2W!18!bBSsiRVd;WdkELI&j9RSXmuAgq4FM62?3BUe90y?z(Hpd^dqw9n4KkL0WVz
zmV+ANKv||Ylj2I%vIfM&zKu7ZVwVYQDp*L>B{D7Nmy&OHZr{pN0r`aHI9jVk>lON;
za#dXCVPIYMyOibi`u?LWlk>-tso)IR>4NjM!EKpMIygQ@l&Cx4riEEqH$ADY&tC&3K#3+uU
ztdt>K__jt)yJzKVW-+R<hJUpb(B;h=ioG)
zHnj50{wT3Tfmzs!9NX7dPs_^6L>tV{ov_MkR-=ha@S`Xd{Os?8zShU4b@YS*=wu^&
zO&7RrU~O!B4&PNjI?-iAL+WA2)L&E_88O(MCWasj0bn!=QgoXeI1vd}zw~9#k0;QN
zxViE%dqnByzx+R5AA-Tp$Ota}L}32F;oM$BjNp43%vQSbTb{erbrg@MyRLydJj821
zIPJe0uIcfqnX9Z|+&B#McXJt7mIoA1_wsWmdo)>rEiZx(j#s)ZRgA~b`B2lP#_tq2
zSWF}e(rdQVz7HpLQuYY15Mr|3)RjgL?{_H~|;@?1{vpxy6IW?PKu*R8(4}dBPBq$V;39@ZHJcl2Hx5FgzsQ?1{>D>vuPAsa^0rVsVM)2J4S1fPEUDY;^Y`NX}I#z0&$&B{>skQ|^oWB!vO^
zmWL#M5KwtL#D+e^l8_G-M#sdEO|4m80G3^?#sZ0sBdXSh8yv&$Y|waI#5fDf$YmGEZe?QN;hHYNeUNAqOZM>!$Tf&ciIKYGvhgRKVIlN)YkL
z@YgWQ4zQ4+H=D3VWbW-E)8$YG82no$>BT`bCcb92(?O(^^{zeLXD+7v;ArXUpz8uH
zJ6$59x!DbpLPdqd-hFKSb8h>2Z((pT((*4+>`yCII3ga)cgAZ_EwvBW4i|Zl7bgU%qHSW`;AI*y7KkXQ
zvzSJ~2X7l3x^UTurI{pRJpa7&J3wkYK`w51_JEUnwz*QUkERwdlPcR?+RfLHgIwD{
z@S&5Ww(?U1i!?W1Rk6l0rBk?XDDO?_Bs;MN#|j5rT%pNDF8f0MPRrb$HrDao{5W|b
zYvMqe$H*wo*Y8E{z{l!KmYP6V)m0~O%N>P6{pQ*UVoNXE+B9|6{NQY;p|c(*Fu)1Q
z;|61Lpc2_2f#k!2y)RLfM^c0Ls2#vn@G-PdH_Oe^
zt_
zqVX3KMF#6kEnrL(qN}do${tzA3iT*CW`Q*
zLw!))Mp|W2&1)W^TJ5_TK$-XNZ?fR0HVf;2&538NO@z?H%0&c`Ff~R*lk)wmJijm3
zVSe;G*uyxxB}SKn!j+!SS1~;(dguy6@1AMLvx3D61*2;HcG}Z7J%7s0=P5Li@W$YrTj
zA>)~-CR&A`l6M7KkvyeVVjLUW-`il}5C6`AE)|~2U4pvsca^&LGGYm)Xa@Bs(jUO@*iiDA}aW0#lBIo*d
zm$AzH*XVru*jA^wR9-q`fe;Tq&yvcXSmam`vRN^RcF*h2h-_ZZcv1HIX*ejpTEftf
ztMt7*-sDLnj9oIafB9#R0yZlfTZcVKz?2HriM;ec&%eI;g}gay_@g3)H_
zNdJ7LcIRMOC|9$2+&`dvlgDW;=kO?}%kkNEl2Wel=*LzA!#v$N6kXg~OjA>Hc$4kI
zS^6-l=rLv)zYb5S?IV&dIlpnE
zSvHKIpRATwAt4SrHt0y$R2J83jmO)-KSPTTB=L1z2dK;NE{^DO12p=g;yEA|&V1h6
zKL`5r0=m3vIzWZ!2NsDamr^Ae}ku_)oknVTRZU^4}J5XIoY!Nm_IOTIC5
zUL@F6)d;_e!;!izZljT`aFULQvG<>%-^pFJPyADme(Yg}z3}Pf<7XeUtLi
zMG#Z8gf=;vs4IVrM(sVGE$$py4H6SY`*0n_k_A|M8i_$_oQI$ssB
zBdSzZP+5s;)mT6o_46_~R&l~Cop)_omQRd^ipVvz4)27=lSz1#lr&DtXD
z4{?=m$~MSoxCc?C3~1WXkpM4Vi78_|cF~1!oATEwzqiNicJm2GNColk|Ai4GWCRHnVg2B4;!w-aYro!E?BTjn3~)GYK=z6Xs`EI5pN4``be_c<
zG!0ChW8WXio?n;=QYC!FB}}2?wHOWjWN+~x@~@*Dl%-fgB^%6IC1w|+{d7
zHqpaC1ABuK9CYr?8R%rkkf<6CKu=X^HXx9GsIU%&7piBVOcvstsd1-;roxedftD7I
zbG0e5iY&6?hPPt9)3~T*JwPnN)21j1Qczl;2l9t1vYI(|+MCbJewzZo
zT{=X}_lsd#%WxYJKF6o4EwQ{EjPd>dU-&VCiYQap&*F@`aDbtCAY-ze{^brt4+N}a
z&=9bxa_T%Tb?&l+czc|sn7}5CgIT?90wobn@gVw*Kwv;Pd5KmuoE_gYc5|(y{`%8B
zw0oO7-(pd%Vyj3qt{fKE$?*`GsifyY5pF6bt_q22