Skip to content

Commit

Permalink
change ML attribute from splits into buckets
Browse files Browse the repository at this point in the history
  • Loading branch information
yinxusen committed May 8, 2015
1 parent c3cc770 commit eacfcfa
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,16 @@ final class Bucketizer private[ml] (override val parent: Estimator[Bucketizer])
}

private def prepOutputField(schema: StructType): StructField = {
val attr = new NominalAttribute(name = Some($(outputCol)), isOrdinal = Some(true),
values = Some($(splits).map(_.toString)))

val innerRanges = $(splits).sliding(2).map(bucket => bucket.mkString(", ")).toArray
val values = ($(lowerInclusive), $(upperInclusive)) match {
case (true, true) =>
Array(s"-inf, ${$(splits).head}") ++ innerRanges ++ Array(s"${$(splits).last}, inf")
case (true, false) => Array(s"-inf, ${$(splits).head}") ++ innerRanges
case (false, true) => innerRanges ++ Array(s"${$(splits).last}, inf")
case _ => innerRanges
}
val attr =
new NominalAttribute(name = Some($(outputCol)), isOrdinal = Some(true), values = Some(values))
attr.toStructField()
}

Expand Down

0 comments on commit eacfcfa

Please sign in to comment.