-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconvert_cred_to_graph.py
101 lines (82 loc) · 3.64 KB
/
convert_cred_to_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/python
#
# Simple script to convert CRED graphs into other kind of graphs using the igraph-python library
#
# The conversion traverses the cred.json file generated by SourceCred and extract node and edges information from
# the weighted graph section of the graph.
#
# Note that CRED graphs support dangling edges (i.e., edges with no source/target nodes). Dangling edges are not
# included in the resulting converted graph (a message notifies the number of dangling edges found)
#
# Author: Javier Canovas (me@jlcanovas.es)
#
import getopt
import json
import sys
from igraph import Graph
"""
Usage of this script
Main options:
-i - The path of the CRED graph (cred.json)
-o - The path for the generated graph
-f - Format of the generated graph (gml, graphml, dot, svg...)
"""
USAGE = 'convert_cred_to_graph.py -i CRED_GRAPH_PATH -o OUTPUT_GRAPH -f OUTPUT_GRAPH_FORMAT'
def convert_graph(input_graph_path, output_path, output_format):
"""
Converts a CRED-like graph into a graph format supported by the igraph library
:param input_graph_path: The path to the CRED graph to convert
:param output_path: The path where the resulting graph will be saved
:param output_format: The format of the resulting graph
"""
with open(input_graph_path, encoding="utf8") as f:
cred = json.load(f)
# Important places in the CRED graph to extract information
# The main weighted graph
cred_weighted_graph = cred[1]['weightedGraphJSON'][1]['graphJSON']
# The element addresses (to extract information such as the type and the name)
cred_node_addresses = cred_weighted_graph[1]['sortedNodeAddresses']
# Printing CRED graph version
print(f"Cred weighted graph JSON version {cred_weighted_graph[0]['version']}")
g = Graph(directed=True)
# Collecting nodes
for cred_node in cred_weighted_graph[1]['nodes']:
igraph_node_atts = {'label': cred_node_addresses[cred_node['index']][2]+'-'+cred_node_addresses[cred_node['index']][-1][:7],
'type': cred_node_addresses[cred_node['index']][2],
'description': cred_node['description'],
'timestampMs': cred_node['timestampMs'] if cred_node['timestampMs'] is not None else 0,
'index': cred_node['index']}
g.add_vertex(name=str(cred_node['index']), **igraph_node_atts)
# Collecting edges
# Note that CRED graphs support dangling edges (i.e., edges with no source/target nodes)
dangling_edges = []
for cred_edge in cred_weighted_graph[1]['edges']:
igraph_edge_atts = {'address': cred_edge['address'], 'timestampMs': cred_edge['timestampMs']}
try:
g.add_edge(str(cred_edge['srcIndex']), str(cred_edge['dstIndex']), **igraph_edge_atts)
except ValueError as ve:
dangling_edges.append({ "srcIndex": cred_edge['srcIndex'], "dstIndex": cred_edge['dstIndex']})
# Reporting the number of dangling edges found
print(f"Dangling edges found: {len(dangling_edges)}")
Graph.save(g, output_path, format=output_format)
def main(argv):
if len(argv) == 0:
sys.exit(0)
try:
opts, args = getopt.getopt(argv, "hi:o:f:", [])
except getopt.GetoptError:
print(USAGE)
sys.exit(2)
for opt, arg in opts:
if opt in ('-h', '--help'):
print(USAGE)
sys.exit()
elif opt in ('-i'):
input_graph_path = arg
elif opt in ('-o'):
output_path = arg
elif opt in ('-f'):
output_format = arg
convert_graph(input_graph_path, output_path, output_format)
if __name__ == "__main__":
main(sys.argv[1:])