convert_cred_to_graph.py

#!/usr/bin/python
#
# Simple script to convert CRED graphs into other kind of graphs using the igraph-python library
#
# The conversion traverses the cred.json file generated by SourceCred and extract node and edges information from
# the weighted graph section of the graph.
#
# Note that CRED graphs support dangling edges (i.e., edges with no source/target nodes). Dangling edges are not
# included in the resulting converted graph (a message notifies the number of dangling edges found)
#
# Author: Javier Canovas (me@jlcanovas.es)
#

import getopt
import json
import sys

from igraph import Graph

"""
Usage of this script
Main options:
-i   - The path of the CRED graph (cred.json)
-o   - The path for the generated graph
-f   - Format of the generated graph (gml, graphml, dot, svg...)
"""
USAGE = 'convert_cred_to_graph.py -i CRED_GRAPH_PATH -o OUTPUT_GRAPH -f OUTPUT_GRAPH_FORMAT'


def convert_graph(input_graph_path, output_path, output_format):
    """
    Converts a CRED-like graph into a graph format supported by the igraph library
    :param input_graph_path: The path to the CRED graph to convert
    :param output_path: The path where the resulting graph will be saved
    :param output_format: The format of the resulting graph
    """

    with open(input_graph_path, encoding="utf8") as f:
        cred = json.load(f)

    # Important places in the CRED graph to extract information
    # The main weighted graph
    cred_weighted_graph = cred[1]['weightedGraphJSON'][1]['graphJSON']
    # The element addresses (to extract information such as the type and the name)
    cred_node_addresses = cred_weighted_graph[1]['sortedNodeAddresses']

    # Printing CRED graph version
    print(f"Cred weighted graph JSON version {cred_weighted_graph[0]['version']}")

    g = Graph(directed=True)
    # Collecting nodes
    for cred_node in cred_weighted_graph[1]['nodes']:
        igraph_node_atts = {'label': cred_node_addresses[cred_node['index']][2]+'-'+cred_node_addresses[cred_node['index']][-1][:7],
                            'type': cred_node_addresses[cred_node['index']][2],
                            'description': cred_node['description'],
                            'timestampMs': cred_node['timestampMs'] if cred_node['timestampMs'] is not None else 0,
                            'index': cred_node['index']}
        g.add_vertex(name=str(cred_node['index']), **igraph_node_atts)

    # Collecting edges
    # Note that CRED graphs support dangling edges (i.e., edges with no source/target nodes)
    dangling_edges = []
    for cred_edge in cred_weighted_graph[1]['edges']:
        igraph_edge_atts = {'address': cred_edge['address'], 'timestampMs': cred_edge['timestampMs']}
        try:
            g.add_edge(str(cred_edge['srcIndex']), str(cred_edge['dstIndex']), **igraph_edge_atts)
        except ValueError as ve:
            dangling_edges.append({ "srcIndex": cred_edge['srcIndex'], "dstIndex": cred_edge['dstIndex']})

    # Reporting the number of dangling edges found
    print(f"Dangling edges found: {len(dangling_edges)}")

    Graph.save(g, output_path, format=output_format)


def main(argv):
    if len(argv) == 0:
        sys.exit(0)

    try:
        opts, args = getopt.getopt(argv, "hi:o:f:", [])
    except getopt.GetoptError:
        print(USAGE)
        sys.exit(2)

    for opt, arg in opts:
        if opt in ('-h', '--help'):
            print(USAGE)
            sys.exit()
        elif opt in ('-i'):
            input_graph_path = arg
        elif opt in ('-o'):
            output_path = arg
        elif opt in ('-f'):
            output_format = arg

    convert_graph(input_graph_path, output_path, output_format)


if __name__ == "__main__":
    main(sys.argv[1:])