Fix and update requirements

CompNet · Jul 27, 2023 · adc6e69 · adc6e69
1 parent 725e097
commit adc6e69
Show file tree

Hide file tree

Showing 4 changed files with 94 additions and 42 deletions.
diff --git a/SG2V.py b/SG2V.py
@@ -17,7 +17,7 @@ def relabel_graph(graph):
     mapping = {}
     cpt_nodes = 0
     for node in graph.nodes():
-        mapping[n] = cpt_nodes
+        mapping[node] = cpt_nodes
         cpt_nodes += 1
     graph = nx.relabel_nodes(graph, mapping)
     return graph
@@ -37,9 +37,8 @@ def get_graphs_features(graphs_path, model_type, wl_iterations):
     graphs_features = []
     for i in range(len(graph_files)):
         graph_file = "%s/%s.graphml" % (graphs_path, i)
-        G = nx.read_grahml(graph_file)
+        G = nx.read_graphml(graph_file)
         G = relabel_graph(G)
-        G = self._check_graph(G)
         if model_type == "g2v":
             #graph, wl_iterations, attributed, erase_base_features
             wl_model = WeisfeilerLehmanHashing_g2v(G, wl_iterations, False, False)
@@ -78,11 +77,14 @@ def learn_embeddings(graph_features):
 def write_embeddings(embeddings):
     for i in range(len(embeddings)):
         with open('out/SG2V/%s.pkl' % (i), 'wb') as outp:
-            pickle.dump(embeddings[i], outp, pickle.HIGHEST_PROTOCOL)
+            pkl.dump(embeddings[i], outp, pkl.HIGHEST_PROTOCOL)
 
 
 
 if __name__ == '__main__':
+    graphs_path = "data/CCS"
+    model_type = "sg2vn"
+    wl_iterations = 2
     graph_features  = get_graphs_features(graphs_path, model_type, wl_iterations)
     learned_embeddings = learn_embeddings(graph_features)
     write_embeddings(learned_embeddings)
diff --git a/requirements.py b/requirements.py
@@ -0,0 +1,45 @@
+cached-property==1.5.2
+certifi==2023.7.22
+charset-normalizer==2.0.12
+dataclasses==0.8
+decorator==4.4.2
+gensim==4.1.2
+googledrivedownloader==0.4
+h5py==3.1.0
+idna==3.4
+importlib-resources==5.4.0
+isodate==0.6.1
+joblib==1.1.1
+karateclub==1.3.3
+Levenshtein==0.21.1
+networkx==2.5.1
+numpy==1.19.5
+pandas==1.1.5
+Pillow==8.4.0
+pkg_resources==0.0.0
+plyfile==0.8
+PyGSP==0.5.1
+pyparsing==3.1.0
+python-dateutil==2.8.2
+python-Levenshtein==0.21.1
+python-louvain==0.16
+pytz==2023.3
+rapidfuzz==2.11.1
+rdflib==5.0.0
+requests==2.27.1
+scikit-learn==0.24.2
+scipy==1.5.4
+six==1.16.0
+smart-open==6.3.0
+texttable==1.5.0
+threadpoolctl==3.1.0
+torch==1.10.1
+torch-cluster==1.5.9
+torch-geometric==1.3.2
+torch-scatter==2.0.9
+torch-sparse==0.6.12
+torchvision==0.11.2
+tqdm==4.64.1
+typing_extensions==4.1.1
+urllib3==1.26.16
+zipp==3.6.0
diff --git a/run_sgcn.py b/run_sgcn.py
@@ -1,7 +1,8 @@
 import subprocess
 import glob
 import os
-
+import networkx as nx
+import csv
 
 """
   Transforms a graphml graph to the format required by the SGCN method. The generated edgelists are saved in the "edgelist" folder.
@@ -10,31 +11,31 @@
   :return None
 """
 def transform_to_edgelist(graphs_path):
-	os.makedirs("%s/edgelist" % (graphs_path)) 
-	for i in range(len(glob.glob("%s/*.graphml" % (graphs_path)))):
-    	G = nx.read_graphml("%s/%s.graphml" % (graphs_path, i))
-		corresp_nodes = {}
-		nb_nodes = 0
-		for n in G.nodes:
-			corresp_nodes[n] = nb_nodes
-			nb_nodes += 1
-
-		edgelist = []
-		for u,v,d in G.edges(data=True):
-			u = corresp_nodes[u]
-			v = corresp_nodes[v]
-			if d["weight"] >= 0.0:
-				d = 1
-			else:
-				d = -1
-			edgelist.append([u,v,d])
-
-		header = ["Node id 1", "Node id 2", "Sign"]
-		#write file
-		with open("%s/edgelist/%s.csv" % (graphs_path, i), 'w') as f:
-			writer_pos = csv.writer(f)
-			writer_pos.writerow(header)
-			writer_pos.writerows(edgelist)
+    os.makedirs("%s/edgelist" % (graphs_path)) 
+    for i in range(len(glob.glob("%s/*.graphml" % (graphs_path)))):
+        G = nx.read_graphml("%s/%s.graphml" % (graphs_path, i))
+        corresp_nodes = {}
+        nb_nodes = 0
+        for n in G.nodes:
+            corresp_nodes[n] = nb_nodes
+            nb_nodes += 1
+
+        edgelist = []
+        for u,v,d in G.edges(data=True):
+            u = corresp_nodes[u]
+            v = corresp_nodes[v]
+            if d["weight"] >= 0.0:
+                d = 1
+            else:
+                d = -1
+            edgelist.append([u,v,d])
+
+        header = ["Node id 1", "Node id 2", "Sign"]
+        #write file
+        with open("%s/edgelist/%s.csv" % (graphs_path, i), 'w') as f:
+            writer_pos = csv.writer(f)
+            writer_pos.writerow(header)
+            writer_pos.writerows(edgelist)
 
 """
   Learns the SGCN representations of all graphs by running the SGCN script. First, it transforms graphs to an edgelist to match the format required by SGCN script.
@@ -43,16 +44,16 @@ def transform_to_edgelist(graphs_path):
   :return None
 """
 def run_all_SGCN(graphs_path):
-	if not os.path.exists("%s/edgelist" % (graphs_path)):
-		transform_to_edgelist(graphs_path)
-	for i in range(len(glob.glob("%s/edgelist/*.csv" % (graphs_path)))):
-		command = "python SGCN-master/src/main.py --layers 32 --learning-rate 0.01 --reduction-dimensions 64 --epochs 10 --reduction-iterations 10 --edge-path %s/edgelist/%s.csv --embedding-path out/SGCN/%s.csv --regression-weights-path /output/SGCN/weights/%s.csv" % (graphs_path, i, i ,i)
-		process = subprocess.Popen(command.split(), stdout=subprocess.PIPE)
-		output, error = process.communicate()
+    if not os.path.exists("%s/edgelist" % (graphs_path)):
+        transform_to_edgelist(graphs_path)
+    for i in range(len(glob.glob("%s/edgelist/*.csv" % (graphs_path)))):
+        command = "python SGCN-master/src/main.py --layers 32 --learning-rate 0.01 --reduction-dimensions 64 --epochs 10 --reduction-iterations 10 --edge-path %s/edgelist/%s.csv --embedding-path out/SGCN/%s.csv --regression-weights-path out/SGCN/weights/%s.csv" % (graphs_path, i, i ,i)
+        process = subprocess.Popen(command.split(), stdout=subprocess.PIPE)
+        output, error = process.communicate()
 
 
 if __name__ == '__main__':
-	dataset = "SSO"
-	graphs_path = "data/%s/" % (dataset)
+    dataset = "SSO"
+    graphs_path = "data/%s/" % (dataset)
 
-	run_all_SGCN(graphs_path)
+    run_all_SGCN(graphs_path)
diff --git a/signedWL.py b/signedWL.py
@@ -1,6 +1,7 @@
 from karateclub.utils.treefeatures import WeisfeilerLehmanHashing
 from karateclub import Graph2Vec
 from gensim.models.doc2vec import Doc2Vec, TaggedDocument
+import hashlib
 
 """
     Adapted from https://github.com/benedekrozemberczki/SGCN
@@ -11,7 +12,7 @@ class Signed_Graph2Vec(Graph2Vec):
     Extends Graph2Vec class add a fit_documents method. 
     """
 
-    def fit_documents(self, documents: List[gensim.models.doc2vec.TaggedDocument]): 
+    def fit_documents(self, documents): 
         """
         Fit Doc2Vec model directly with extracted features.
         """
@@ -28,6 +29,8 @@ def fit_documents(self, documents: List[gensim.models.doc2vec.TaggedDocument]):
                 seed=self.seed,
             )
 
+        self._embedding = [self.model.docvecs[str(i)] for i, _ in enumerate(documents)]
+
 
 class signed_WeisfeilerLehmanHashing(WeisfeilerLehmanHashing):
     """
@@ -65,6 +68,7 @@ class WeisfeilerLehmanHashing_g2v(signed_WeisfeilerLehmanHashing):
     """
     "g2v" model. Basic G2V model.
     """
+    pass
 
 class WeisfeilerLehmanHashing_sg2vn(signed_WeisfeilerLehmanHashing):
     """
@@ -103,11 +107,11 @@ def _do_a_recursion(self):
         }
         return new_features
 
-class WeisfeilerLehmanHashing_sg2vsb(signed_WeisfeilerLehmanHashing):
+'''class WeisfeilerLehmanHashing_sg2vsb(signed_WeisfeilerLehmanHashing):
     """
     "sg2vsb" model.
     """
 
     #TODO
-    def _do_a_recursion(self):
+    def _do_a_recursion(self):'''