Skip to content

Commit

Permalink
SiNE embeddings
Browse files Browse the repository at this point in the history
  • Loading branch information
NoeCecillon authored Aug 10, 2023
1 parent adc6e69 commit a959891
Show file tree
Hide file tree
Showing 6 changed files with 676 additions and 0 deletions.
99 changes: 99 additions & 0 deletions SiNE/graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import networkx as nx
import numpy as np
import csv

# From https://github.com/CompNet/SignedCentrality

class Vocabulary(object):
def __init__(self, graph):
self._id2node = {}
self._node2id = {}
self._curr_id = 1
for node in graph.nodes():
if node not in self._node2id:
self._curr_id += 1
self._node2id[node] = self._curr_id
self._id2node[self._curr_id] = node

def id2node(self, id):
return self._id2node[id]

def node2id(self, node):
return self._node2id[node]

def augment(self, graph):
for node in graph.nodes():
if node not in self._node2id:
self._curr_id += 1
self._node2id[node] = self._curr_id
self._id2node[self._curr_id] = node

def __len__(self):
return self._curr_id


class Graph(object):
def __init__(self, positive_graph, negative_graph):
self.positive_graph = positive_graph
self.negative_graph = negative_graph
self.vocab = Vocabulary(positive_graph)
self.vocab.augment(negative_graph)

def get_positive_edges(self):
return self.positive_graph.edges()

def get_negative_edges(self):
return self.negative_graph.edges()

def __len__(self):
return len(self.vocab)
#return max(len(self.positive_graph), len(self.negative_graph))

def get_triplets(self, p0=True, ids=True):
triplets = []
for xi in self.positive_graph.nodes():
for xj in self.positive_graph[xi]:
if xj in self.negative_graph:
for xk in self.negative_graph[xj]:
a, b, c = xi, xj, xk
if ids:
a = self.vocab.node2id(xi)
b = self.vocab.node2id(xj)
c = self.vocab.node2id(xk)
triplets.append([a, b, c])
elif p0:
a, b = xi, xj
c = 0
if ids:
a = self.vocab.node2id(xi)
b = self.vocab.node2id(xj)
triplets.append([a, b, c])
triplets = np.array(triplets)
return triplets

@staticmethod
def read_from_file(filepath, delimiter=',', directed=False):
positive_graph = nx.DiGraph() if directed else nx.Graph()
negative_graph = nx.DiGraph() if directed else nx.Graph()
file = open(filepath)
#skip header line
next(file)
for line in file:
line = line.strip()
#print(line)
u, v, w = line.split(delimiter)
w = float(w)
if w > 0:
positive_graph.add_edge(u, v, weight=w)
if w < 0:
negative_graph.add_edge(u, v, weight=w)
file.close()
graph = Graph(positive_graph, negative_graph)
return graph







47 changes: 47 additions & 0 deletions SiNE/learn_SiNE_emb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from SiNEmaster.graph import *
from SiNEmaster.stemmodels import SiNE, fit_sine_model as fit_model
import pickle

#pickled list of labels
labels_path = "labels.pickle"
graphs_path = "data/CCS"

embeddings = []
labels = []
with open(labels_path, "rb") as f:
lb = pickle.load(f)


for i in range(2545):
try:
graph = Graph.read_from_file("%s/%s.csv" %(graphs_path, i), delimiter=',', directed=True)
if len(graph.get_positive_edges()) + len(graph.get_negative_edges()) > 1:

model = fit_model(
num_nodes=len(graph),
dims_arr=[32, 32],
triples=graph.get_triplets(),
triples0=None,
delta=1.0,
delta0=0.5,
batch_size=300,
batch_size0=300,
epochs=30,
lr=0.01,
lam=0.0001,
lr_decay=0.0,
p=2,
print_loss=False,
p0=False,
)

embedding = model.get_x()
embedding = embedding.detach().numpy().tolist()[0]
embeddings.append(embedding)
labels.append(lb[i])
print (i)
except:
print ("error")

with open("out/SiNE/sine_embeddings.pkl", "wb") as f:
pickle.dump(embeddings, f)
134 changes: 134 additions & 0 deletions SiNE/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import torch
import torch.nn as nn
from torch.nn.parameter import Parameter
from torch.autograd import Variable
import numpy as np
import torch.optim as optim

# From https://github.com/CompNet/SignedCentrality

def hadamard(x, y):
return x * y


def average(x, y):
return (x + y)/2.0


def l1(x, y):
return np.abs(x - y)


def l2(x, y):
return np.power(x - y, 2)


def concat(x, y):
return np.concatenate((x, y), axis=1)


FEATURE_FUNCS = {
'l1': l1,
'l2': l2,
'concat': concat,
'average': average,
'hadamard': hadamard
}


class SiNE(nn.Module):
def __init__(self, num_nodes, dim1, dim2):
super(SiNE, self).__init__()
self.tanh = nn.Tanh()
self.embeddings = nn.Embedding(num_nodes + 1, dim1)
self.layer11 = nn.Linear(dim1, dim2, bias=False)
self.layer12 = nn.Linear(dim1, dim2, bias=False)
self.bias1 = Parameter(torch.zeros(1))
self.layer2 = nn.Linear(dim2, 1, bias=False)
self.bias2 = Parameter(torch.zeros(1))
self.register_parameter('bias1', self.bias1)
self.register_parameter('bias2', self.bias2)

def forward(self, xi, xj, xk, delta):
i_emb = self.embeddings(xi)
j_emb = self.embeddings(xj)
k_emb = self.embeddings(xk)

z11 = self.tanh(self.layer11(i_emb) + self.layer12(j_emb) + self.bias1)
z12 = self.tanh(self.layer11(i_emb) + self.layer12(k_emb) + self.bias1)

f_pos = self.tanh(self.layer2(z11) + self.bias2)
f_neg = self.tanh(self.layer2(z12) + self.bias2)

zeros = Variable(torch.zeros(1))

loss = torch.max(zeros, f_pos + delta - f_neg)
loss = torch.sum(loss)

return loss

def _regularizer(self, x):
zeros = torch.zeros_like(x)
normed = torch.norm(x - zeros, p=2)
term = torch.pow(normed, 2)
# print('The parameter of ', x)
# print('Yields ',term)
return term

def regularize_weights(self):
loss = 0
for parameter in self.parameters():
loss += self._regularizer(parameter)
return loss

def get_embedding(self, x):
x = Variable(torch.LongTensor([x]))
emb = self.embeddings(x)
emb = emb.data.numpy()[0]
return emb

def get_edge_feature(self, x, y, operation='hadamard'):
func = FEATURE_FUNCS[operation]
x = self.get_embedding(x)
y = self.get_embedding(y)
return func(x, y)




def tensorfy_col(x, col_idx):
col = x[:,col_idx]
col = torch.LongTensor(col)
col = Variable(col)
return col


def get_training_batch(triples, batch_size):
nrows = triples.shape[0]
rows = np.random.choice(nrows, batch_size, replace=False)
choosen = triples[rows,:]
xi = tensorfy_col(choosen, 0)
xj = tensorfy_col(choosen, 1)
xk = tensorfy_col(choosen, 2)
return xi, xj, xk


def fit_model(sine, triplets, delta, batch_size, epochs, alpha,
lr=0.4, weight_decay=0.0, print_loss=True):
optimizer = optim.Adagrad(sine.parameters(), lr=lr, weight_decay=weight_decay)
for epoch in range(epochs):
sine.zero_grad()
xi, xj, xk = get_training_batch(triplets, batch_size)
loss = sine(xi, xj, xk, delta)
# print(loss)
regularizer_loss = alpha * sine.regularize_weights()
# print(regularizer_loss)
loss += regularizer_loss
loss.backward()
optimizer.step()
if print_loss:
print('Loss at epoch ', epoch + 1, ' is ', loss.data[0])
return sine



42 changes: 42 additions & 0 deletions SiNE/stemgraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import networkx as nx
import numpy as np

# From https://github.com/CompNet/SignedCentrality

def get_empty_graph(directed=True):
if directed:
return nx.DiGraph()
return nx.Graph()

def from_edgelist_array_to_graph(X, y, directed=True):
positive_graph = get_empty_graph(directed)
negative_graph = get_empty_graph(directed)

for edge, label in zip(X, y):
u, v = edge
if label == 0:
negative_graph.add_edge(u, v)
else:
positive_graph.add_edge(u, v)
return positive_graph, negative_graph


def get_triples(positive_graph, negative_graph, p0=True):
triples = []
triples0 = []
for u, v in positive_graph.edges():
if v in negative_graph:
v_neigbors = negative_graph[v]
for w in v_neigbors:
triple = (u, v, w)
triples.append(triple)
elif p0:
triple0 = (u, v, 0)
triples0.append(triple0)
triples = np.array(triples)
triples0 = np.array(triples0)
if p0:
return triples, triples0
return triples


Loading

0 comments on commit a959891

Please sign in to comment.