From 6a0a73312b99ec476a4d397159becf5e210ab48f Mon Sep 17 00:00:00 2001 From: Remy Date: Sat, 2 Oct 2021 10:39:34 -0400 Subject: [PATCH] Add verbose option Print time usage for random walk generation and skip-gram trainnig when set to True --- src/pecanpy/node2vec.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/pecanpy/node2vec.py b/src/pecanpy/node2vec.py index 540d26f3..684f0b63 100755 --- a/src/pecanpy/node2vec.py +++ b/src/pecanpy/node2vec.py @@ -1,5 +1,7 @@ """Different strategies for generating node2vec walks.""" +from time import time + import numpy as np from gensim.models import Word2Vec from numba import get_num_threads, jit, prange @@ -171,7 +173,8 @@ def preprocess_transition_probs(self): """Null default preprocess method.""" pass - def embed(self, dim=128, num_walks=10, walk_length=80, window_size=10, epochs=1): + def embed(self, dim=128, num_walks=10, walk_length=80, window_size=10, + epochs=1, verbose=False): """Generate embeddings. This is a shortcut function that combines ``simulate_walks`` with @@ -190,15 +193,24 @@ def embed(self, dim=128, num_walks=10, walk_length=80, window_size=10, epochs=1) ``Word2Vec`` model, default is 10 epochs (int): number of epochs for training ``Word2Vec``, default is 1 + verbose (bool): print time usage for random walk generation and + skip-gram training if set to True Return: numpy.ndarray: The embedding matrix, each row is a node embedding vector. The index is the same as that for the graph. """ + t = time() walks = self.simulate_walks(num_walks=num_walks, walk_length=walk_length) + if verbose: + print(f"Took {time() - t:.2f} sec to generate walks") + + t = time() w2v = Word2Vec(walks, vector_size=dim, window=window_size, sg=1, min_count=0, workers=self.workers, epochs=epochs) + if verbose: + print(f"Took {time() - t:.2f} sec to train") # index mapping back to node IDs idx_list = [w2v.wv.get_index(i) for i in self.IDlst]