diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/LPA.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala similarity index 66% rename from graphx/src/main/scala/org/apache/spark/graphx/lib/LPA.scala rename to graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala index 5f89a72e10a48..4745d4ef87c9c 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/LPA.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala @@ -20,43 +20,44 @@ package org.apache.spark.graphx.lib import scala.reflect.ClassTag import org.apache.spark.graphx._ -/** LPA algorithm. */ -object LPA { +/** Label Propagation algorithm. */ +object LabelPropagation { /** - * Run LPA (label propogation algorithm) for detecting communities in networks using the pregel framework. - * - * Each node in the network is initially assigned to its own community. At every super step - * nodes send their community affiliation to all neighbors and update their state to the mode - * community affiliation of incomming messages. + * Run static Label Propagation for detecting communities in networks. * - * LPA is a standard community detection algorithm for graphs. It is very inexpensive + * Each node in the network is initially assigned to its own community. At every superstep, nodes + * send their community affiliation to all neighbors and update their state to the mode community + * affiliation of incoming messages. + * + * LPA is a standard community detection algorithm for graphs. It is very inexpensive * computationally, although (1) convergence is not guaranteed and (2) one can end up with * trivial solutions (all nodes are identified into a single community). * - * @tparam VD the vertex attribute type (discarded in the computation) * @tparam ED the edge attribute type (not used in the computation) * * @param graph the graph for which to compute the community affiliation - * @param maxSteps the number of supersteps of LPA to be performed + * @param maxSteps the number of supersteps of LPA to be performed. Because this is a static + * implementation, the algorithm will run for exactly this many supersteps. * * @return a graph with vertex attributes containing the label of community affiliation */ - def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED], maxSteps: Int): Graph[VertexId, Long]{ + def run[ED: ClassTag](graph: Graph[_, ED], maxSteps: Int): Graph[VertexId, ED] = { val lpaGraph = graph.mapVertices { case (vid, _) => vid } - def sendMessage(edge: EdgeTriplet[VertexId, ED]) = { - Iterator((e.srcId, Map(e.dstAttr -> 1L)),(e.dstId, Map(e.srcAttr -> 1L))) + def sendMessage(e: EdgeTriplet[VertexId, ED]) = { + Iterator((e.srcId, Map(e.dstAttr -> 1L)), (e.dstId, Map(e.srcAttr -> 1L))) } - def mergeMessage(count1: Map[VertexId, Long], count2: Map[VertexId, Long]): Map[VertexId, Long] = { + def mergeMessage(count1: Map[VertexId, Long], count2: Map[VertexId, Long]) + : Map[VertexId, Long] = { (count1.keySet ++ count2.keySet).map { i => - val count1Val = count1.getOrElse(i,0L) - val count2Val = count2.getOrElse(i,0L) - i -> (count1Val +count2Val) - }.toMap + val count1Val = count1.getOrElse(i, 0L) + val count2Val = count2.getOrElse(i, 0L) + i -> (count1Val + count2Val) + }.toMap } - def vertexProgram(vid: VertexId, attr: Long, message: Map[VertexId, Long])={ - if (message.isEmpty) attr else message.maxBy{_._2}._1), + def vertexProgram(vid: VertexId, attr: Long, message: Map[VertexId, Long]) = { + if (message.isEmpty) attr else message.maxBy(_._2)._1 } - val initialMessage = Map[VertexId,Long]() + val initialMessage = Map[VertexId, Long]() Pregel(lpaGraph, initialMessage, maxIterations = maxSteps)( vprog = vertexProgram, sendMsg = sendMessage,