diff --git a/mllib/src/main/scala/org/apache/spark/mllib/export/pmml/KMeansPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/export/pmml/KMeansPMMLModelExport.scala index 4a2b2bae05751..f99bff54f5e74 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/export/pmml/KMeansPMMLModelExport.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/export/pmml/KMeansPMMLModelExport.scala @@ -61,32 +61,30 @@ class KMeansPMMLModelExport(model : KMeansModel) extends PMMLModelExport{ var miningSchema = new MiningSchema() - for ( i <- 0 to (clusterCenter.size - 1)) { - fields(i) = FieldName.create("field_" + i) - dataDictionary - .withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) - miningSchema - .withMiningFields(new MiningField(fields(i)) - .withUsageType(FieldUsageType.ACTIVE)) - } - var comparisonMeasure = new ComparisonMeasure() .withKind(Kind.DISTANCE) .withMeasure(new SquaredEuclidean() ); - dataDictionary.withNumberOfFields((dataDictionary.getDataFields()).size()); - - pmml.setDataDictionary(dataDictionary); - var clusteringModel = new ClusteringModel(miningSchema, comparisonMeasure, MiningFunctionType.CLUSTERING, ModelClass.CENTER_BASED, model.clusterCenters.length) .withModelName("k-means"); for ( i <- 0 to (clusterCenter.size - 1)) { + fields(i) = FieldName.create("field_" + i) + dataDictionary + .withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) + miningSchema + .withMiningFields(new MiningField(fields(i)) + .withUsageType(FieldUsageType.ACTIVE)) clusteringModel.withClusteringFields( new ClusteringField(fields(i)).withCompareFunction(CompareFunctionType.ABS_DIFF) - ) + ) + } + + dataDictionary.withNumberOfFields((dataDictionary.getDataFields()).size()); + + for ( i <- 0 to (model.clusterCenters.size - 1)) { var cluster = new Cluster() .withName("cluster_" + i) .withArray(new org.dmg.pmml.Array() @@ -95,10 +93,10 @@ class KMeansPMMLModelExport(model : KMeansModel) extends PMMLModelExport{ .withValue(model.clusterCenters(i).toArray.mkString(" "))) // we don't have the size of the single cluster but only the centroids (withValue) // .withSize(value) - clusteringModel.withClusters(cluster) } + pmml.setDataDictionary(dataDictionary); pmml.withModels(clusteringModel); }