Skip to content

Commit

Permalink
Fix and update requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
NoeCecillon authored Jul 27, 2023
1 parent 725e097 commit adc6e69
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 42 deletions.
10 changes: 6 additions & 4 deletions SG2V.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def relabel_graph(graph):
mapping = {}
cpt_nodes = 0
for node in graph.nodes():
mapping[n] = cpt_nodes
mapping[node] = cpt_nodes
cpt_nodes += 1
graph = nx.relabel_nodes(graph, mapping)
return graph
Expand All @@ -37,9 +37,8 @@ def get_graphs_features(graphs_path, model_type, wl_iterations):
graphs_features = []
for i in range(len(graph_files)):
graph_file = "%s/%s.graphml" % (graphs_path, i)
G = nx.read_grahml(graph_file)
G = nx.read_graphml(graph_file)
G = relabel_graph(G)
G = self._check_graph(G)
if model_type == "g2v":
#graph, wl_iterations, attributed, erase_base_features
wl_model = WeisfeilerLehmanHashing_g2v(G, wl_iterations, False, False)
Expand Down Expand Up @@ -78,11 +77,14 @@ def learn_embeddings(graph_features):
def write_embeddings(embeddings):
for i in range(len(embeddings)):
with open('out/SG2V/%s.pkl' % (i), 'wb') as outp:
pickle.dump(embeddings[i], outp, pickle.HIGHEST_PROTOCOL)
pkl.dump(embeddings[i], outp, pkl.HIGHEST_PROTOCOL)



if __name__ == '__main__':
graphs_path = "data/CCS"
model_type = "sg2vn"
wl_iterations = 2
graph_features = get_graphs_features(graphs_path, model_type, wl_iterations)
learned_embeddings = learn_embeddings(graph_features)
write_embeddings(learned_embeddings)
45 changes: 45 additions & 0 deletions requirements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
cached-property==1.5.2
certifi==2023.7.22
charset-normalizer==2.0.12
dataclasses==0.8
decorator==4.4.2
gensim==4.1.2
googledrivedownloader==0.4
h5py==3.1.0
idna==3.4
importlib-resources==5.4.0
isodate==0.6.1
joblib==1.1.1
karateclub==1.3.3
Levenshtein==0.21.1
networkx==2.5.1
numpy==1.19.5
pandas==1.1.5
Pillow==8.4.0
pkg_resources==0.0.0
plyfile==0.8
PyGSP==0.5.1
pyparsing==3.1.0
python-dateutil==2.8.2
python-Levenshtein==0.21.1
python-louvain==0.16
pytz==2023.3
rapidfuzz==2.11.1
rdflib==5.0.0
requests==2.27.1
scikit-learn==0.24.2
scipy==1.5.4
six==1.16.0
smart-open==6.3.0
texttable==1.5.0
threadpoolctl==3.1.0
torch==1.10.1
torch-cluster==1.5.9
torch-geometric==1.3.2
torch-scatter==2.0.9
torch-sparse==0.6.12
torchvision==0.11.2
tqdm==4.64.1
typing_extensions==4.1.1
urllib3==1.26.16
zipp==3.6.0
71 changes: 36 additions & 35 deletions run_sgcn.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import subprocess
import glob
import os

import networkx as nx
import csv

"""
Transforms a graphml graph to the format required by the SGCN method. The generated edgelists are saved in the "edgelist" folder.
Expand All @@ -10,31 +11,31 @@
:return None
"""
def transform_to_edgelist(graphs_path):
os.makedirs("%s/edgelist" % (graphs_path))
for i in range(len(glob.glob("%s/*.graphml" % (graphs_path)))):
G = nx.read_graphml("%s/%s.graphml" % (graphs_path, i))
corresp_nodes = {}
nb_nodes = 0
for n in G.nodes:
corresp_nodes[n] = nb_nodes
nb_nodes += 1

edgelist = []
for u,v,d in G.edges(data=True):
u = corresp_nodes[u]
v = corresp_nodes[v]
if d["weight"] >= 0.0:
d = 1
else:
d = -1
edgelist.append([u,v,d])

header = ["Node id 1", "Node id 2", "Sign"]
#write file
with open("%s/edgelist/%s.csv" % (graphs_path, i), 'w') as f:
writer_pos = csv.writer(f)
writer_pos.writerow(header)
writer_pos.writerows(edgelist)
os.makedirs("%s/edgelist" % (graphs_path))
for i in range(len(glob.glob("%s/*.graphml" % (graphs_path)))):
G = nx.read_graphml("%s/%s.graphml" % (graphs_path, i))
corresp_nodes = {}
nb_nodes = 0
for n in G.nodes:
corresp_nodes[n] = nb_nodes
nb_nodes += 1

edgelist = []
for u,v,d in G.edges(data=True):
u = corresp_nodes[u]
v = corresp_nodes[v]
if d["weight"] >= 0.0:
d = 1
else:
d = -1
edgelist.append([u,v,d])

header = ["Node id 1", "Node id 2", "Sign"]
#write file
with open("%s/edgelist/%s.csv" % (graphs_path, i), 'w') as f:
writer_pos = csv.writer(f)
writer_pos.writerow(header)
writer_pos.writerows(edgelist)

"""
Learns the SGCN representations of all graphs by running the SGCN script. First, it transforms graphs to an edgelist to match the format required by SGCN script.
Expand All @@ -43,16 +44,16 @@ def transform_to_edgelist(graphs_path):
:return None
"""
def run_all_SGCN(graphs_path):
if not os.path.exists("%s/edgelist" % (graphs_path)):
transform_to_edgelist(graphs_path)
for i in range(len(glob.glob("%s/edgelist/*.csv" % (graphs_path)))):
command = "python SGCN-master/src/main.py --layers 32 --learning-rate 0.01 --reduction-dimensions 64 --epochs 10 --reduction-iterations 10 --edge-path %s/edgelist/%s.csv --embedding-path out/SGCN/%s.csv --regression-weights-path /output/SGCN/weights/%s.csv" % (graphs_path, i, i ,i)
process = subprocess.Popen(command.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
if not os.path.exists("%s/edgelist" % (graphs_path)):
transform_to_edgelist(graphs_path)
for i in range(len(glob.glob("%s/edgelist/*.csv" % (graphs_path)))):
command = "python SGCN-master/src/main.py --layers 32 --learning-rate 0.01 --reduction-dimensions 64 --epochs 10 --reduction-iterations 10 --edge-path %s/edgelist/%s.csv --embedding-path out/SGCN/%s.csv --regression-weights-path out/SGCN/weights/%s.csv" % (graphs_path, i, i ,i)
process = subprocess.Popen(command.split(), stdout=subprocess.PIPE)
output, error = process.communicate()


if __name__ == '__main__':
dataset = "SSO"
graphs_path = "data/%s/" % (dataset)
dataset = "SSO"
graphs_path = "data/%s/" % (dataset)

run_all_SGCN(graphs_path)
run_all_SGCN(graphs_path)
10 changes: 7 additions & 3 deletions signedWL.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from karateclub.utils.treefeatures import WeisfeilerLehmanHashing
from karateclub import Graph2Vec
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
import hashlib

"""
Adapted from https://github.com/benedekrozemberczki/SGCN
Expand All @@ -11,7 +12,7 @@ class Signed_Graph2Vec(Graph2Vec):
Extends Graph2Vec class add a fit_documents method.
"""

def fit_documents(self, documents: List[gensim.models.doc2vec.TaggedDocument]):
def fit_documents(self, documents):
"""
Fit Doc2Vec model directly with extracted features.
"""
Expand All @@ -28,6 +29,8 @@ def fit_documents(self, documents: List[gensim.models.doc2vec.TaggedDocument]):
seed=self.seed,
)

self._embedding = [self.model.docvecs[str(i)] for i, _ in enumerate(documents)]


class signed_WeisfeilerLehmanHashing(WeisfeilerLehmanHashing):
"""
Expand Down Expand Up @@ -65,6 +68,7 @@ class WeisfeilerLehmanHashing_g2v(signed_WeisfeilerLehmanHashing):
"""
"g2v" model. Basic G2V model.
"""
pass

class WeisfeilerLehmanHashing_sg2vn(signed_WeisfeilerLehmanHashing):
"""
Expand Down Expand Up @@ -103,11 +107,11 @@ def _do_a_recursion(self):
}
return new_features

class WeisfeilerLehmanHashing_sg2vsb(signed_WeisfeilerLehmanHashing):
'''class WeisfeilerLehmanHashing_sg2vsb(signed_WeisfeilerLehmanHashing):
"""
"sg2vsb" model.
"""
#TODO
def _do_a_recursion(self):
def _do_a_recursion(self):'''

0 comments on commit adc6e69

Please sign in to comment.