diff --git a/docs/bagel-programming-guide.md b/docs/bagel-programming-guide.md
index da6d0c9dcd97b..14f43cb6d3946 100644
--- a/docs/bagel-programming-guide.md
+++ b/docs/bagel-programming-guide.md
@@ -46,7 +46,7 @@ import org.apache.spark.bagel.Bagel._
 Next, we load a sample graph from a text file as a distributed dataset and package it into `PRVertex` objects. We also cache the distributed dataset because Bagel will use it multiple times and we'd like to avoid recomputing it.
 
 {% highlight scala %}
-val input = sc.textFile("pagerank_data.txt")
+val input = sc.textFile("data/pagerank_data.txt")
 
 val numVerts = input.count()
 
diff --git a/docs/java-programming-guide.md b/docs/java-programming-guide.md
index 07c8512bf9294..c34eb28fc06a2 100644
--- a/docs/java-programming-guide.md
+++ b/docs/java-programming-guide.md
@@ -55,7 +55,7 @@ classes.  RDD methods like `map` are overloaded by specialized `PairFunction`
 and `DoubleFunction` classes, allowing them to return RDDs of the appropriate
 types.  Common methods like `filter` and `sample` are implemented by
 each specialized RDD class, so filtering a `PairRDD` returns a new `PairRDD`,
-etc (this acheives the "same-result-type" principle used by the [Scala collections
+etc (this achieves the "same-result-type" principle used by the [Scala collections
 framework](http://docs.scala-lang.org/overviews/core/architecture-of-scala-collections.html)).
 
 ## Function Interfaces
@@ -102,7 +102,7 @@ the following changes:
   `Function` classes will need to use `implements` rather than `extends`.
 * Certain transformation functions now have multiple versions depending
   on the return type. In Spark core, the map functions (`map`, `flatMap`, and
-  `mapPartitons`) have type-specific versions, e.g. 
+  `mapPartitions`) have type-specific versions, e.g.
   [`mapToPair`](api/java/org/apache/spark/api/java/JavaRDDLike.html#mapToPair(org.apache.spark.api.java.function.PairFunction))
   and [`mapToDouble`](api/java/org/apache/spark/api/java/JavaRDDLike.html#mapToDouble(org.apache.spark.api.java.function.DoubleFunction)).
   Spark Streaming also uses the same approach, e.g. [`transformToPair`](api/java/org/apache/spark/streaming/api/java/JavaDStreamLike.html#transformToPair(org.apache.spark.api.java.function.Function)).
@@ -115,11 +115,11 @@ As an example, we will implement word count using the Java API.
 import org.apache.spark.api.java.*;
 import org.apache.spark.api.java.function.*;
 
-JavaSparkContext sc = new JavaSparkContext(...);
-JavaRDD<String> lines = ctx.textFile("hdfs://...");
+JavaSparkContext jsc = new JavaSparkContext(...);
+JavaRDD<String> lines = jsc.textFile("hdfs://...");
 JavaRDD<String> words = lines.flatMap(
   new FlatMapFunction<String, String>() {
-    public Iterable<String> call(String s) {
+    @Override public Iterable<String> call(String s) {
       return Arrays.asList(s.split(" "));
     }
   }
@@ -140,10 +140,10 @@ Here, the `FlatMapFunction` was created inline; another option is to subclass
 
 {% highlight java %}
 class Split extends FlatMapFunction<String, String> {
-  public Iterable<String> call(String s) {
+  @Override public Iterable<String> call(String s) {
     return Arrays.asList(s.split(" "));
   }
-);
+}
 JavaRDD<String> words = lines.flatMap(new Split());
 {% endhighlight %}
 
@@ -162,8 +162,8 @@ Continuing with the word count example, we map each word to a `(word, 1)` pair:
 import scala.Tuple2;
 JavaPairRDD<String, Integer> ones = words.mapToPair(
   new PairFunction<String, String, Integer>() {
-    public Tuple2<String, Integer> call(String s) {
-      return new Tuple2(s, 1);
+    @Override public Tuple2<String, Integer> call(String s) {
+      return new Tuple2<String, Integer>(s, 1);
     }
   }
 );
@@ -178,7 +178,7 @@ occurrences of each word:
 {% highlight java %}
 JavaPairRDD<String, Integer> counts = ones.reduceByKey(
   new Function2<Integer, Integer, Integer>() {
-    public Integer call(Integer i1, Integer i2) {
+    @Override public Integer call(Integer i1, Integer i2) {
       return i1 + i2;
     }
   }
diff --git a/docs/mllib-basics.md b/docs/mllib-basics.md
index 710ce1721fe25..704308802d65b 100644
--- a/docs/mllib-basics.md
+++ b/docs/mllib-basics.md
@@ -9,7 +9,7 @@ title: <a href="mllib-guide.html">MLlib</a> - Basics
 MLlib supports local vectors and matrices stored on a single machine, 
 as well as distributed matrices backed by one or more RDDs.
 In the current implementation, local vectors and matrices are simple data models 
-to serve public interfaces. The underly linear algebra operations are provided by
+to serve public interfaces. The underlying linear algebra operations are provided by
 [Breeze](http://www.scalanlp.org/) and [jblas](http://jblas.org/).
 A training example used in supervised learning is called "labeled point" in MLlib.
 
@@ -205,7 +205,7 @@ import org.apache.spark.mllib.regression.LabeledPoint;
 import org.apache.spark.mllib.util.MLUtils;
 import org.apache.spark.rdd.RDDimport;
 
-RDD[LabeledPoint] training = MLUtils.loadLibSVMData(sc, "mllib/data/sample_libsvm_data.txt")
+RDD<LabeledPoint> training = MLUtils.loadLibSVMData(jsc, "mllib/data/sample_libsvm_data.txt");
 {% endhighlight %}
 </div>
 </div>
@@ -307,6 +307,7 @@ A [`RowMatrix`](api/mllib/index.html#org.apache.spark.mllib.linalg.distributed.R
 created from a `JavaRDD<Vector>` instance.  Then we can compute its column summary statistics.
 
 {% highlight java %}
+import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.mllib.linalg.Vector;
 import org.apache.spark.mllib.linalg.distributed.RowMatrix;
 
@@ -348,10 +349,10 @@ val mat: RowMatrix = ... // a RowMatrix
 val summary: MultivariateStatisticalSummary = mat.computeColumnSummaryStatistics()
 println(summary.mean) // a dense vector containing the mean value for each column
 println(summary.variance) // column-wise variance
-println(summary.numNonzers) // number of nonzeros in each column
+println(summary.numNonzeros) // number of nonzeros in each column
 
 // Compute the covariance matrix.
-val Cov: Matrix = mat.computeCovariance()
+val cov: Matrix = mat.computeCovariance()
 {% endhighlight %}
 </div>
 </div>
@@ -397,11 +398,12 @@ wrapper over `(long, Vector)`.  An `IndexedRowMatrix` can be converted to a `Row
 its row indices.
 
 {% highlight java %}
+import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.mllib.linalg.distributed.IndexedRow;
 import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
 import org.apache.spark.mllib.linalg.distributed.RowMatrix;
 
-JavaRDD[IndexedRow] rows = ... // a JavaRDD of indexed rows
+JavaRDD<IndexedRow> rows = ... // a JavaRDD of indexed rows
 // Create an IndexedRowMatrix from a JavaRDD<IndexedRow>.
 IndexedRowMatrix mat = new IndexedRowMatrix(rows.rdd());
 
@@ -458,7 +460,9 @@ wrapper over `(long, long, double)`.  A `CoordinateMatrix` can be converted to a
 with sparse rows by calling `toIndexedRowMatrix`.
 
 {% highlight scala %}
+import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
+import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix;
 import org.apache.spark.mllib.linalg.distributed.MatrixEntry;
 
 JavaRDD<MatrixEntry> entries = ... // a JavaRDD of matrix entries
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index b3293afe408d0..276868fa8490d 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -18,7 +18,7 @@ models are trained for each cluster).
 MLlib supports
 [k-means](http://en.wikipedia.org/wiki/K-means_clustering) clustering, one of
 the most commonly used clustering algorithms that clusters the data points into
-predfined number of clusters. The MLlib implementation includes a parallelized
+predefined number of clusters. The MLlib implementation includes a parallelized
 variant of the [k-means++](http://en.wikipedia.org/wiki/K-means%2B%2B) method
 called [kmeans||](http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf).
 The implementation in MLlib has the following parameters:  
@@ -30,7 +30,7 @@ initialization via k-means\|\|.
 * *runs* is the number of times to run the k-means algorithm (k-means is not
 guaranteed to find a globally optimal solution, and when run multiple times on
 a given dataset, the algorithm returns the best clustering result).
-* *initializiationSteps* determines the number of steps in the k-means\|\| algorithm.
+* *initializationSteps* determines the number of steps in the k-means\|\| algorithm.
 * *epsilon* determines the distance threshold within which we consider k-means to have converged. 
 
 ## Examples
diff --git a/docs/mllib-collaborative-filtering.md b/docs/mllib-collaborative-filtering.md
index 79f5e3a7ca4fb..57df4884fe6b7 100644
--- a/docs/mllib-collaborative-filtering.md
+++ b/docs/mllib-collaborative-filtering.md
@@ -77,7 +77,7 @@ val ratesAndPreds = ratings.map{
 }.join(predictions)
 val MSE = ratesAndPreds.map{
     case ((user, product), (r1, r2)) =>  math.pow((r1- r2), 2)
-}.reduce(_ + _)/ratesAndPreds.count
+}.sum / ratesAndPreds.count
 println("Mean Squared Error = " + MSE)
 {% endhighlight %}
 
diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md
index 0693766990732..ba67450089aa2 100644
--- a/docs/mllib-decision-tree.md
+++ b/docs/mllib-decision-tree.md
@@ -83,14 +83,14 @@ Section 9.2.4 in
 [Elements of Statistical Machine Learning](http://statweb.stanford.edu/~tibs/ElemStatLearn/) for
 details). For example, for a binary classification problem with one categorical feature with three
 categories A, B and C with corresponding proportion of label 1 as 0.2, 0.6 and 0.4, the categorical
-features are orded as A followed by C followed B or A, B, C. The two split candidates are A \| C, B
+features are ordered as A followed by C followed B or A, B, C. The two split candidates are A \| C, B
 and A , B \| C where \| denotes the split.
 
 ### Stopping rule
 
 The recursive tree construction is stopped at a node when one of the two conditions is met:
 
-1. The node depth is equal to the `maxDepth` training parammeter
+1. The node depth is equal to the `maxDepth` training parameter
 2. No split candidate leads to an information gain at the node.
 
 ### Practical limitations
@@ -178,7 +178,7 @@ val valuesAndPreds = parsedData.map { point =>
   val prediction = model.predict(point.features)
   (point.label, prediction)
 }
-val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2)}.reduce(_ + _)/valuesAndPreds.count
+val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2)}.sum / valuesAndPreds.count
 println("training Mean Squared Error = " + MSE)
 {% endhighlight %}
 </div>
diff --git a/docs/mllib-dimensionality-reduction.md b/docs/mllib-dimensionality-reduction.md
index 4e9ecf7c006fa..ab24663cfe258 100644
--- a/docs/mllib-dimensionality-reduction.md
+++ b/docs/mllib-dimensionality-reduction.md
@@ -44,6 +44,10 @@ say, less than $1000$, but many rows, which we call *tall-and-skinny*.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 {% highlight scala %}
+import org.apache.spark.mllib.linalg.Matrix
+import org.apache.spark.mllib.linalg.distributed.RowMatrix
+import org.apache.spark.mllib.linalg.SingularValueDecomposition
+
 val mat: RowMatrix = ...
 
 // Compute the top 20 singular values and corresponding singular vectors.
@@ -74,6 +78,9 @@ and use them to project the vectors into a low-dimensional space.
 The number of columns should be small, e.g, less than 1000.
 
 {% highlight scala %}
+import org.apache.spark.mllib.linalg.Matrix
+import org.apache.spark.mllib.linalg.distributed.RowMatrix
+
 val mat: RowMatrix = ...
 
 // Compute the top 10 principal components.
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index c49f857d07557..842ca5c8c6d8a 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -94,7 +94,7 @@ import org.apache.spark.mllib.linalg.Vector;
 import org.apache.spark.mllib.linalg.Vectors;
 
 double[] array = ... // a double array
-Vector vector = Vectors.dense(array) // a dense vector
+Vector vector = Vectors.dense(array); // a dense vector
 {% endhighlight %}
 
 [`Vectors`](api/mllib/index.html#org.apache.spark.mllib.linalg.Vectors$) provides factory methods to
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index ebb555f974bf7..fe132e2abf771 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -225,10 +225,11 @@ algorithm for 200 iterations.
 import org.apache.spark.mllib.optimization.L1Updater
 
 val svmAlg = new SVMWithSGD()
-svmAlg.optimizer.setNumIterations(200)
-  .setRegParam(0.1)
-  .setUpdater(new L1Updater)
-val modelL1 = svmAlg.run(parsedData)
+svmAlg.optimizer.
+  setNumIterations(200).
+  setRegParam(0.1).
+  setUpdater(new L1Updater)
+val modelL1 = svmAlg.run(training)
 {% endhighlight %}
 
 Similarly, you can use replace `SVMWithSGD` by
@@ -322,7 +323,7 @@ val valuesAndPreds = parsedData.map { point =>
   val prediction = model.predict(point.features)
   (point.label, prediction)
 }
-val MSE = valuesAndPreds.map{case(v, p) => math.pow((v - p), 2)}.reduce(_ + _) / valuesAndPreds.count
+val MSE = valuesAndPreds.map{case(v, p) => math.pow((v - p), 2)}.sum / valuesAndPreds.count
 println("training Mean Squared Error = " + MSE)
 {% endhighlight %}
 
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index 6160fe5b2fe8c..c8786b7dad209 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -7,13 +7,13 @@ Naive Bayes is a simple multiclass classification algorithm with the assumption
 between every pair of features. Naive Bayes can be trained very efficiently. Within a single pass to
 the training data, it computes the conditional probability distribution of each feature given label,
 and then it applies Bayes' theorem to compute the conditional probability distribution of label
-given an observation and use it for prediction. For more details, please visit the wikipedia page
+given an observation and use it for prediction. For more details, please visit the Wikipedia page
 [Naive Bayes classifier](http://en.wikipedia.org/wiki/Naive_Bayes_classifier).
 
 In MLlib, we implemented multinomial naive Bayes, which is typically used for document
 classification. Within that context, each observation is a document, each feature represents a term,
-whose value is the frequency of the term. For its formulation, please visit the wikipedia page
-[Multinomial naive Bayes](http://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes)
+whose value is the frequency of the term. For its formulation, please visit the Wikipedia page
+[Multinomial Naive Bayes](http://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes)
 or the section
 [Naive Bayes text classification](http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html)
 from the book Introduction to Information
@@ -58,29 +58,36 @@ optionally smoothing parameter `lambda` as input, and output a
 can be used for evaluation and prediction.
 
 {% highlight java %}
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.function.Function;
 import org.apache.spark.mllib.classification.NaiveBayes;
+import org.apache.spark.mllib.classification.NaiveBayesModel;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import scala.Tuple2;
 
 JavaRDD<LabeledPoint> training = ... // training set
 JavaRDD<LabeledPoint> test = ... // test set
 
 NaiveBayesModel model = NaiveBayes.train(training.rdd(), 1.0);
 
-JavaRDD<Double> prediction = model.predict(test.map(new Function<LabeledPoint, Vector>() {
-    public Vector call(LabeledPoint p) {
-      return p.features();
+JavaRDD<Double> prediction =
+  test.map(new Function<LabeledPoint, Double>() {
+    @Override public Double call(LabeledPoint p) {
+      return model.predict(p.features());
     }
-  })
+  });
 JavaPairRDD<Double, Double> predictionAndLabel = 
   prediction.zip(test.map(new Function<LabeledPoint, Double>() {
-    public Double call(LabeledPoint p) {
+    @Override public Double call(LabeledPoint p) {
       return p.label();
     }
-  })
+  }));
 double accuracy = 1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
-    public Boolean call(Tuple2<Double, Double> pl) {
+    @Override public Boolean call(Tuple2<Double, Double> pl) {
       return pl._1() == pl._2();
     }
-  }).count() / test.count()
+  }).count() / test.count();
 {% endhighlight %}
 </div>