Skip to content

Commit

Permalink
fixes after rebasing on master
Browse files Browse the repository at this point in the history
  • Loading branch information
jkbradley committed Feb 5, 2015
1 parent fc62406 commit 8316d5e
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 70 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ object DeveloperApiExample {
val conf = new SparkConf().setAppName("DeveloperApiExample")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
import sqlContext._
import sqlContext.implicits._

// Prepare training data.
val training = sparkContext.parallelize(Seq(
val training = sc.parallelize(Seq(
LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
Expand All @@ -61,7 +61,7 @@ object DeveloperApiExample {
val model = lr.fit(training)

// Prepare test data.
val test = sparkContext.parallelize(Seq(
val test = sc.parallelize(Seq(
LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ object SimpleParamsExample {
println("Model 2 was fit using parameters: " + model2.fittingParamMap)

// Prepare test data.
val test = sparkContext.parallelize(Seq(
val test = sc.parallelize(Seq(
LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,8 @@ private[ml] object ClassificationModel {
val raw2pred: Vector => Double = (rawPred) => {
rawPred.toArray.zipWithIndex.maxBy(_._1)._2
}
tmpData = tmpData.select($"*",
callUDF(raw2pred, col(map(model.rawPredictionCol))).as(map(model.predictionCol)))
tmpData = tmpData.select($"*", callUDF(raw2pred, DoubleType,
col(map(model.rawPredictionCol))).as(map(model.predictionCol)))
numColsOutput += 1
}
} else if (map(model.predictionCol) != "") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import org.apache.spark.ml.param._
import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.Dsl._
import org.apache.spark.storage.StorageLevel


Expand Down Expand Up @@ -103,69 +102,6 @@ class LogisticRegressionModel private[ml] (
1.0 / (1.0 + math.exp(-m))
}

override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
// Check schema
transformSchema(dataset.schema, paramMap, logging = true)

val map = this.paramMap ++ paramMap

// Output selected columns only.
// This is a bit complicated since it tries to avoid repeated computation.
// rawPrediction (-margin, margin)
// probability (1.0-score, score)
// prediction (max margin)
var tmpData = dataset
var numColsOutput = 0
if (map(rawPredictionCol) != "") {
val features2raw: Vector => Vector = predictRaw
tmpData = tmpData.select($"*",
callUDF(features2raw, col(map(featuresCol))).as(map(rawPredictionCol)))
numColsOutput += 1
}
if (map(probabilityCol) != "") {
if (map(rawPredictionCol) != "") {
val raw2prob: Vector => Vector = (rawPreds) => {
val prob1 = 1.0 / (1.0 + math.exp(-rawPreds(1)))
Vectors.dense(1.0 - prob1, prob1)
}
tmpData = tmpData.select($"*",
callUDF(raw2prob, col(map(rawPredictionCol))).as(map(probabilityCol)))
} else {
val features2prob: Vector => Vector = predictProbabilities
tmpData = tmpData.select($"*",
callUDF(features2prob, col(map(featuresCol))).as(map(probabilityCol)))
}
numColsOutput += 1
}
if (map(predictionCol) != "") {
val t = map(threshold)
if (map(probabilityCol) != "") {
val predict: Vector => Double = (probs) => {
if (probs(1) > t) 1.0 else 0.0
}
tmpData = tmpData.select($"*",
callUDF(predict, col(map(probabilityCol))).as(map(predictionCol)))
} else if (map(rawPredictionCol) != "") {
val predict: Vector => Double = (rawPreds) => {
val prob1 = 1.0 / (1.0 + math.exp(-rawPreds(1)))
if (prob1 > t) 1.0 else 0.0
}
tmpData = tmpData.select($"*",
callUDF(predict, col(map(rawPredictionCol))).as(map(predictionCol)))
} else {
val predict: Vector => Double = this.predict
tmpData = tmpData.select($"*",
callUDF(predict, col(map(featuresCol))).as(map(predictionCol)))
}
numColsOutput += 1
}
if (numColsOutput == 0) {
this.logWarning(s"$uid: LogisticRegressionModel.transform() was called as NOOP" +
" since no output columns were set.")
}
tmpData
}

override val numClasses: Int = 2

/**
Expand Down

0 comments on commit 8316d5e

Please sign in to comment.