-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add Clustering Coefficient algorithm (#15)
* add Clustering Coefficient algorithm * format result * remove CC license
- Loading branch information
Showing
19 changed files
with
224 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +0,0 @@ | ||
id,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13 | ||
1,Tom,tom,10,20,30,40,2021-01-27,2021-01-01T12:10:10,43535232,true,1.0,2.0,10:10:10 | ||
2,Jina,Jina,11,21,31,41,2021-01-28,2021-01-02T12:10:10,43535232,false,1.1,2.1,11:10:10 | ||
3,Tim,Tim,12,22,32,42,2021-01-29,2021-01-03T12:10:10,43535232,false,1.2,2.2,12:10:10 | ||
4,张三,张三,13,23,33,43,2021-01-30,2021-01-04T12:10:10,43535232,true,1.3,2.3,13:10:10 | ||
5,李四,李四,14,24,34,44,2021-02-01,2021-01-05T12:10:10,43535232,false,1.4,2.4,14:10:10 | ||
6,王五,王五,15,25,35,45,2021-02-02,2021-01-06T12:10:10,0,false,1.5,2.5,15:10:10 | ||
7,Jina,Jina,16,26,36,46,2021-02-03,2021-01-07T12:10:10,43535232,true,1.6,2.6,16:10:10 | ||
8,Jina,Jina,17,27,37,47,2021-02-04,2021-01-08T12:10:10,43535232,false,1.7,2.7,17:10:10 | ||
9,Jina,Jina,18,28,38,48,2021-02-05,2021-01-09T12:10:10,43535232,true,1.8,2.8,18:10:10 | ||
10,Jina,Jina,19,29,39,49,2021-02-06,2021-01-10T12:10:10,43535232,false,1.9,2.9,19:10:10 | ||
-1,Jina,Jina,20,30,40,50,2021-02-07,2021-02-11T12:10:10,43535232,false,2.0,3.0,20:10:10 | ||
-2,Jina,Jina,21,31,41,51,2021-02-08,2021-03-12T12:10:10,43535232,false,2.1,3.1,21:10:10 | ||
-3,Jina,Jina,22,32,42,52,2021-02-09,2021-04-13T12:10:10,43535232,false,2.2,3.2,22:10:10 | ||
This file was deleted.
Oops, something went wrong.
Empty file.
This file was deleted.
Oops, something went wrong.
9 changes: 9 additions & 0 deletions
9
example/src/main/scala/com/vesoft/nebula/algorithm/ClusteringCoefficientExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
/* Copyright (c) 2020 vesoft inc. All rights reserved. | ||
* | ||
* This source code is licensed under Apache 2.0 License, | ||
* attached with Common Clause Condition 1.0, found in the LICENSES directory. | ||
*/ | ||
|
||
package com.vesoft.nebula.algorithm | ||
|
||
object ClusteringCoefficientExample {} |
8 changes: 8 additions & 0 deletions
8
example/src/main/scala/com/vesoft/nebula/algorithm/DegreeStaticExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
/* Copyright (c) 2021 vesoft inc. All rights reserved. | ||
* | ||
* This source code is licensed under Apache 2.0 License. | ||
*/ | ||
|
||
package com.vesoft.nebula.algorithm | ||
|
||
object DegreeStaticExample {} |
8 changes: 8 additions & 0 deletions
8
example/src/main/scala/com/vesoft/nebula/algorithm/GraphTriangleCountExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
/* Copyright (c) 2020 vesoft inc. All rights reserved. | ||
* | ||
* This source code is licensed under Apache 2.0 License. | ||
*/ | ||
|
||
package com.vesoft.nebula.algorithm | ||
|
||
object GraphTriangleCountExample {} |
8 changes: 8 additions & 0 deletions
8
example/src/main/scala/com/vesoft/nebula/algorithm/LpaExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
/* Copyright (c) 2021 vesoft inc. All rights reserved. | ||
* | ||
* This source code is licensed under Apache 2.0 License. | ||
*/ | ||
|
||
package com.vesoft.nebula.algorithm | ||
|
||
object LpaExample {} |
8 changes: 8 additions & 0 deletions
8
example/src/main/scala/com/vesoft/nebula/algorithm/PageRankExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
/* Copyright (c) 2021 vesoft inc. All rights reserved. | ||
* | ||
* This source code is licensed under Apache 2.0 License. | ||
*/ | ||
|
||
package com.vesoft.nebula.algorithm | ||
|
||
object PageRankExample {} |
7 changes: 7 additions & 0 deletions
7
example/src/main/scala/com/vesoft/nebula/algorithm/ReadData.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
/* Copyright (c) 2020 vesoft inc. All rights reserved. | ||
* | ||
* This source code is licensed under Apache 2.0 License. | ||
*/ | ||
|
||
package com.vesoft.nebula.algorithm | ||
object ReadData {} |
7 changes: 7 additions & 0 deletions
7
example/src/main/scala/com/vesoft/nebula/algorithm/SCCExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
/* Copyright (c) 2020 vesoft inc. All rights reserved. | ||
* | ||
* This source code is licensed under Apache 2.0 License. | ||
*/ | ||
|
||
package com.vesoft.nebula.algorithm | ||
object SCCExample {} |
7 changes: 7 additions & 0 deletions
7
example/src/main/scala/com/vesoft/nebula/algorithm/ShortestPathExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
/* Copyright (c) 2021 vesoft inc. All rights reserved. | ||
* | ||
* This source code is licensed under Apache 2.0 License. | ||
*/ | ||
|
||
package com.vesoft.nebula.algorithm | ||
object ShortestPathExample {} |
7 changes: 7 additions & 0 deletions
7
example/src/main/scala/com/vesoft/nebula/algorithm/TriangleCountExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
/* Copyright (c) 2020 vesoft inc. All rights reserved. | ||
* | ||
* This source code is licensed under Apache 2.0 License. | ||
*/ | ||
|
||
package com.vesoft.nebula.algorithm | ||
object TriangleCountExample {} |
7 changes: 7 additions & 0 deletions
7
example/src/main/scala/com/vesoft/nebula/algorithm/WCCExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
/* Copyright (c) 2020 vesoft inc. All rights reserved. | ||
* | ||
* This source code is licensed under Apache 2.0 License. | ||
*/ | ||
|
||
package com.vesoft.nebula.algorithm | ||
object WCCExample {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
99 changes: 99 additions & 0 deletions
99
...-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/ClusteringCoefficientAlgo.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
/* Copyright (c) 2021 vesoft inc. All rights reserved. | ||
* | ||
* This source code is licensed under Apache 2.0 License. | ||
*/ | ||
|
||
package com.vesoft.nebula.algorithm.lib | ||
|
||
import com.vesoft.nebula.algorithm.config.{AlgoConstants, CoefficientConfig, KCoreConfig} | ||
import com.vesoft.nebula.algorithm.utils.NebulaUtil | ||
import org.apache.log4j.Logger | ||
import org.apache.spark.graphx.Graph | ||
import org.apache.spark.rdd.RDD | ||
import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StructField, StructType} | ||
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} | ||
|
||
object ClusteringCoefficientAlgo { | ||
private val LOGGER = Logger.getLogger(this.getClass) | ||
|
||
val ALGORITHM: String = "ClusterCoefficientAlgo" | ||
|
||
/** | ||
* run the clusterCoefficient algorithm for nebula graph | ||
*/ | ||
def apply(spark: SparkSession, | ||
dataset: Dataset[Row], | ||
coefficientConfig: CoefficientConfig): DataFrame = { | ||
|
||
val graph: Graph[None.type, Double] = NebulaUtil.loadInitGraph(dataset, false) | ||
var algoResult: DataFrame = null | ||
|
||
if (coefficientConfig.algoType.equalsIgnoreCase("local")) { | ||
// compute local clustering coefficient | ||
val localClusterCoefficient = executeLocalCC(graph) | ||
val schema = StructType( | ||
List( | ||
StructField(AlgoConstants.ALGO_ID_COL, LongType, nullable = false), | ||
StructField(AlgoConstants.CLUSTERCOEFFICIENT_RESULT_COL, DoubleType, nullable = true) | ||
)) | ||
algoResult = spark.sqlContext.createDataFrame(localClusterCoefficient, schema) | ||
// print the graph's average clustering coefficient | ||
|
||
import spark.implicits._ | ||
val vertexNum = algoResult.count() | ||
|
||
val averageCoeff: Double = | ||
if (vertexNum == 0) 0 | ||
else | ||
algoResult.map(row => row.get(1).toString.toDouble).reduce(_ + _) / algoResult.count() | ||
LOGGER.info(s"graph's average clustering coefficient is ${averageCoeff}") | ||
|
||
} else { | ||
// compute global clustering coefficient | ||
val GlobalClusterCoefficient: Double = executeGlobalCC(graph) | ||
val list = List(GlobalClusterCoefficient) | ||
val rdd = spark.sparkContext.parallelize(list).map(row => Row(row)) | ||
|
||
val schema = StructType( | ||
List( | ||
StructField("globalClusterCoefficient", DoubleType, nullable = false) | ||
)) | ||
algoResult = spark.sqlContext.createDataFrame(rdd, schema) | ||
} | ||
algoResult | ||
} | ||
|
||
/** | ||
* execute local cluster coefficient | ||
*/ | ||
def executeLocalCC(graph: Graph[None.type, Double]): RDD[Row] = { | ||
// compute the actual triangle count for each vertex | ||
val triangleNum = graph.triangleCount().vertices | ||
// compute the open triangle count for each vertex | ||
val idealTriangleNum = graph.degrees.mapValues(degree => degree * (degree - 1) / 2) | ||
val result = triangleNum | ||
.innerJoin(idealTriangleNum) { (vid, actualCount, idealCount) => | ||
{ | ||
if (idealCount == 0) 0.0 | ||
else (actualCount / idealCount * 1.0).formatted("%.6f").toDouble | ||
} | ||
} | ||
.map(vertex => Row(vertex._1, vertex._2)) | ||
|
||
result | ||
} | ||
|
||
/** | ||
* execute global cluster coefficient | ||
*/ | ||
def executeGlobalCC(graph: Graph[None.type, Double]): Double = { | ||
// compute the number of closed triangle | ||
val closedTriangleNum = graph.triangleCount().vertices.map(_._2).reduce(_ + _) | ||
// compute the number of open triangle and closed triangle (According to C(n,2)=n*(n-1)/2) | ||
val triangleNum = graph.degrees.map(vertex => (vertex._2 * (vertex._2 - 1)) / 2.0).reduce(_ + _) | ||
if (triangleNum == 0) | ||
0.0 | ||
else | ||
(closedTriangleNum / triangleNum * 1.0).formatted("%.6f").toDouble | ||
} | ||
} |