From c7ad0852084dc28f3ebc144adfd4928b23f1c8ea Mon Sep 17 00:00:00 2001 From: "Joseph E. Gonzalez" Date: Thu, 30 Oct 2014 00:05:57 -0700 Subject: [PATCH] [SPARK-4130][MLlib] Fixing libSVM parser bug with extra whitespace This simple patch filters out extra whitespace entries. Author: Joseph E. Gonzalez Author: Joey Closes #2996 from jegonzal/loadLibSVM and squashes the following commits: e0227ab [Joey] improving readability e028e84 [Joseph E. Gonzalez] fixing whitespace bug in loadLibSVMFile when parsing libSVM files --- mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index dce0adffa6249..b88e08bf148ae 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -76,7 +76,7 @@ object MLUtils { .map { line => val items = line.split(' ') val label = items.head.toDouble - val (indices, values) = items.tail.map { item => + val (indices, values) = items.tail.filter(_.nonEmpty).map { item => val indexAndValue = item.split(':') val index = indexAndValue(0).toInt - 1 // Convert 1-based indices to 0-based. val value = indexAndValue(1).toDouble