Skip to content

Commit

Permalink
code style: max line lenght <= 100
Browse files Browse the repository at this point in the history
Signed-off-by: Manish Amde <manish9ue@gmail.com>
  • Loading branch information
manishamde committed Feb 28, 2014
1 parent dd0c0d7 commit 9372779
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 100 deletions.

This file was deleted.

216 changes: 154 additions & 62 deletions mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import org.apache.spark.mllib.tree.configuration.Algo._
object DecisionTreeRunner extends Logging {

val usage = """
Usage: DecisionTreeRunner <master>[slices] --algo <Classification,Regression> --trainDataDir path --testDataDir path --maxDepth num [--impurity <Gini,Entropy,Variance>] [--maxBins num]
Usage: DecisionTreeRunner<master>[slices] --algo <Classification,Regression> --trainDataDir path --testDataDir path --maxDepth num [--impurity <Gini,Entropy,Variance>] [--maxBins num]
"""


Expand Down Expand Up @@ -132,7 +132,8 @@ object DecisionTreeRunner extends Logging {

//TODO: Make these generic MLTable metrics
def meanSquaredError(tree : DecisionTreeModel, data : RDD[LabeledPoint]) : Double = {
val meanSumOfSquares = data.map(y => (tree.predict(y.features) - y.label)*(tree.predict(y.features) - y.label)).mean()
val meanSumOfSquares =
data.map(y => (tree.predict(y.features) - y.label)*(tree.predict(y.features) - y.label)).mean()
meanSumOfSquares
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,7 @@ object Gini extends Impurity {
}
}

def calculate(count: Double, sum: Double, sumSquares: Double): Double = throw new OperationNotSupportedException("Gini.calculate")
def calculate(count: Double, sum: Double, sumSquares: Double): Double =
throw new OperationNotSupportedException("Gini.calculate")

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,12 @@
*/
package org.apache.spark.mllib.tree.model

class InformationGainStats(val gain : Double,
val impurity: Double,
val leftImpurity : Double,
//val leftSamples : Long,
val rightImpurity : Double,
//val rightSamples : Long
val predict : Double) extends Serializable {
class InformationGainStats(
val gain : Double,
val impurity: Double,
val leftImpurity : Double,
val rightImpurity : Double,
val predict : Double) extends Serializable {

override def toString = {
"gain = %f, impurity = %f, left impurity = %f, right impurity = %f, predict = %f"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ class Node ( val id : Int,
val stats : Option[InformationGainStats]
) extends Serializable with Logging{

override def toString = "id = " + id + ", isLeaf = " + isLeaf + ", predict = " + predict + ", split = " + split + ", stats = " + stats
override def toString = "id = " + id + ", isLeaf = " + isLeaf + ", predict = " + predict + ", " +
"split = " + split + ", stats = " + stats

def build(nodes : Array[Node]) : Unit = {

logDebug("building node " + id + " at level " + (scala.math.log(id + 1)/scala.math.log(2)).toInt )
logDebug("building node " + id + " at level " +
(scala.math.log(id + 1)/scala.math.log(2)).toInt )
logDebug("id = " + id + ", split = " + split)
logDebug("stats = " + stats)
logDebug("predict = " + predict)
Expand Down
19 changes: 14 additions & 5 deletions mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,23 @@ package org.apache.spark.mllib.tree.model

import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType

case class Split(feature: Int, threshold : Double, featureType : FeatureType, categories : List[Double]){
case class Split(
feature: Int,
threshold : Double,
featureType : FeatureType,
categories : List[Double]){

override def toString =
"Feature = " + feature + ", threshold = " + threshold + ", featureType = " + featureType + ", categories = " + categories
"Feature = " + feature + ", threshold = " + threshold + ", featureType = " + featureType +
", categories = " + categories
}

class DummyLowSplit(feature: Int, kind : FeatureType) extends Split(feature, Double.MinValue, kind, List())
class DummyLowSplit(feature: Int, kind : FeatureType)
extends Split(feature, Double.MinValue, kind, List())

class DummyHighSplit(feature: Int, kind : FeatureType) extends Split(feature, Double.MaxValue, kind, List())
class DummyHighSplit(feature: Int, kind : FeatureType)
extends Split(feature, Double.MaxValue, kind, List())

class DummyCategoricalSplit(feature: Int, kind : FeatureType) extends Split(feature, Double.MaxValue, kind, List())
class DummyCategoricalSplit(feature: Int, kind : FeatureType)
extends Split(feature, Double.MaxValue, kind, List())

0 comments on commit 9372779

Please sign in to comment.