-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
adc6e69
commit a959891
Showing
6 changed files
with
676 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import networkx as nx | ||
import numpy as np | ||
import csv | ||
|
||
# From https://github.com/CompNet/SignedCentrality | ||
|
||
class Vocabulary(object): | ||
def __init__(self, graph): | ||
self._id2node = {} | ||
self._node2id = {} | ||
self._curr_id = 1 | ||
for node in graph.nodes(): | ||
if node not in self._node2id: | ||
self._curr_id += 1 | ||
self._node2id[node] = self._curr_id | ||
self._id2node[self._curr_id] = node | ||
|
||
def id2node(self, id): | ||
return self._id2node[id] | ||
|
||
def node2id(self, node): | ||
return self._node2id[node] | ||
|
||
def augment(self, graph): | ||
for node in graph.nodes(): | ||
if node not in self._node2id: | ||
self._curr_id += 1 | ||
self._node2id[node] = self._curr_id | ||
self._id2node[self._curr_id] = node | ||
|
||
def __len__(self): | ||
return self._curr_id | ||
|
||
|
||
class Graph(object): | ||
def __init__(self, positive_graph, negative_graph): | ||
self.positive_graph = positive_graph | ||
self.negative_graph = negative_graph | ||
self.vocab = Vocabulary(positive_graph) | ||
self.vocab.augment(negative_graph) | ||
|
||
def get_positive_edges(self): | ||
return self.positive_graph.edges() | ||
|
||
def get_negative_edges(self): | ||
return self.negative_graph.edges() | ||
|
||
def __len__(self): | ||
return len(self.vocab) | ||
#return max(len(self.positive_graph), len(self.negative_graph)) | ||
|
||
def get_triplets(self, p0=True, ids=True): | ||
triplets = [] | ||
for xi in self.positive_graph.nodes(): | ||
for xj in self.positive_graph[xi]: | ||
if xj in self.negative_graph: | ||
for xk in self.negative_graph[xj]: | ||
a, b, c = xi, xj, xk | ||
if ids: | ||
a = self.vocab.node2id(xi) | ||
b = self.vocab.node2id(xj) | ||
c = self.vocab.node2id(xk) | ||
triplets.append([a, b, c]) | ||
elif p0: | ||
a, b = xi, xj | ||
c = 0 | ||
if ids: | ||
a = self.vocab.node2id(xi) | ||
b = self.vocab.node2id(xj) | ||
triplets.append([a, b, c]) | ||
triplets = np.array(triplets) | ||
return triplets | ||
|
||
@staticmethod | ||
def read_from_file(filepath, delimiter=',', directed=False): | ||
positive_graph = nx.DiGraph() if directed else nx.Graph() | ||
negative_graph = nx.DiGraph() if directed else nx.Graph() | ||
file = open(filepath) | ||
#skip header line | ||
next(file) | ||
for line in file: | ||
line = line.strip() | ||
#print(line) | ||
u, v, w = line.split(delimiter) | ||
w = float(w) | ||
if w > 0: | ||
positive_graph.add_edge(u, v, weight=w) | ||
if w < 0: | ||
negative_graph.add_edge(u, v, weight=w) | ||
file.close() | ||
graph = Graph(positive_graph, negative_graph) | ||
return graph | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from SiNEmaster.graph import * | ||
from SiNEmaster.stemmodels import SiNE, fit_sine_model as fit_model | ||
import pickle | ||
|
||
#pickled list of labels | ||
labels_path = "labels.pickle" | ||
graphs_path = "data/CCS" | ||
|
||
embeddings = [] | ||
labels = [] | ||
with open(labels_path, "rb") as f: | ||
lb = pickle.load(f) | ||
|
||
|
||
for i in range(2545): | ||
try: | ||
graph = Graph.read_from_file("%s/%s.csv" %(graphs_path, i), delimiter=',', directed=True) | ||
if len(graph.get_positive_edges()) + len(graph.get_negative_edges()) > 1: | ||
|
||
model = fit_model( | ||
num_nodes=len(graph), | ||
dims_arr=[32, 32], | ||
triples=graph.get_triplets(), | ||
triples0=None, | ||
delta=1.0, | ||
delta0=0.5, | ||
batch_size=300, | ||
batch_size0=300, | ||
epochs=30, | ||
lr=0.01, | ||
lam=0.0001, | ||
lr_decay=0.0, | ||
p=2, | ||
print_loss=False, | ||
p0=False, | ||
) | ||
|
||
embedding = model.get_x() | ||
embedding = embedding.detach().numpy().tolist()[0] | ||
embeddings.append(embedding) | ||
labels.append(lb[i]) | ||
print (i) | ||
except: | ||
print ("error") | ||
|
||
with open("out/SiNE/sine_embeddings.pkl", "wb") as f: | ||
pickle.dump(embeddings, f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
import torch | ||
import torch.nn as nn | ||
from torch.nn.parameter import Parameter | ||
from torch.autograd import Variable | ||
import numpy as np | ||
import torch.optim as optim | ||
|
||
# From https://github.com/CompNet/SignedCentrality | ||
|
||
def hadamard(x, y): | ||
return x * y | ||
|
||
|
||
def average(x, y): | ||
return (x + y)/2.0 | ||
|
||
|
||
def l1(x, y): | ||
return np.abs(x - y) | ||
|
||
|
||
def l2(x, y): | ||
return np.power(x - y, 2) | ||
|
||
|
||
def concat(x, y): | ||
return np.concatenate((x, y), axis=1) | ||
|
||
|
||
FEATURE_FUNCS = { | ||
'l1': l1, | ||
'l2': l2, | ||
'concat': concat, | ||
'average': average, | ||
'hadamard': hadamard | ||
} | ||
|
||
|
||
class SiNE(nn.Module): | ||
def __init__(self, num_nodes, dim1, dim2): | ||
super(SiNE, self).__init__() | ||
self.tanh = nn.Tanh() | ||
self.embeddings = nn.Embedding(num_nodes + 1, dim1) | ||
self.layer11 = nn.Linear(dim1, dim2, bias=False) | ||
self.layer12 = nn.Linear(dim1, dim2, bias=False) | ||
self.bias1 = Parameter(torch.zeros(1)) | ||
self.layer2 = nn.Linear(dim2, 1, bias=False) | ||
self.bias2 = Parameter(torch.zeros(1)) | ||
self.register_parameter('bias1', self.bias1) | ||
self.register_parameter('bias2', self.bias2) | ||
|
||
def forward(self, xi, xj, xk, delta): | ||
i_emb = self.embeddings(xi) | ||
j_emb = self.embeddings(xj) | ||
k_emb = self.embeddings(xk) | ||
|
||
z11 = self.tanh(self.layer11(i_emb) + self.layer12(j_emb) + self.bias1) | ||
z12 = self.tanh(self.layer11(i_emb) + self.layer12(k_emb) + self.bias1) | ||
|
||
f_pos = self.tanh(self.layer2(z11) + self.bias2) | ||
f_neg = self.tanh(self.layer2(z12) + self.bias2) | ||
|
||
zeros = Variable(torch.zeros(1)) | ||
|
||
loss = torch.max(zeros, f_pos + delta - f_neg) | ||
loss = torch.sum(loss) | ||
|
||
return loss | ||
|
||
def _regularizer(self, x): | ||
zeros = torch.zeros_like(x) | ||
normed = torch.norm(x - zeros, p=2) | ||
term = torch.pow(normed, 2) | ||
# print('The parameter of ', x) | ||
# print('Yields ',term) | ||
return term | ||
|
||
def regularize_weights(self): | ||
loss = 0 | ||
for parameter in self.parameters(): | ||
loss += self._regularizer(parameter) | ||
return loss | ||
|
||
def get_embedding(self, x): | ||
x = Variable(torch.LongTensor([x])) | ||
emb = self.embeddings(x) | ||
emb = emb.data.numpy()[0] | ||
return emb | ||
|
||
def get_edge_feature(self, x, y, operation='hadamard'): | ||
func = FEATURE_FUNCS[operation] | ||
x = self.get_embedding(x) | ||
y = self.get_embedding(y) | ||
return func(x, y) | ||
|
||
|
||
|
||
|
||
def tensorfy_col(x, col_idx): | ||
col = x[:,col_idx] | ||
col = torch.LongTensor(col) | ||
col = Variable(col) | ||
return col | ||
|
||
|
||
def get_training_batch(triples, batch_size): | ||
nrows = triples.shape[0] | ||
rows = np.random.choice(nrows, batch_size, replace=False) | ||
choosen = triples[rows,:] | ||
xi = tensorfy_col(choosen, 0) | ||
xj = tensorfy_col(choosen, 1) | ||
xk = tensorfy_col(choosen, 2) | ||
return xi, xj, xk | ||
|
||
|
||
def fit_model(sine, triplets, delta, batch_size, epochs, alpha, | ||
lr=0.4, weight_decay=0.0, print_loss=True): | ||
optimizer = optim.Adagrad(sine.parameters(), lr=lr, weight_decay=weight_decay) | ||
for epoch in range(epochs): | ||
sine.zero_grad() | ||
xi, xj, xk = get_training_batch(triplets, batch_size) | ||
loss = sine(xi, xj, xk, delta) | ||
# print(loss) | ||
regularizer_loss = alpha * sine.regularize_weights() | ||
# print(regularizer_loss) | ||
loss += regularizer_loss | ||
loss.backward() | ||
optimizer.step() | ||
if print_loss: | ||
print('Loss at epoch ', epoch + 1, ' is ', loss.data[0]) | ||
return sine | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import networkx as nx | ||
import numpy as np | ||
|
||
# From https://github.com/CompNet/SignedCentrality | ||
|
||
def get_empty_graph(directed=True): | ||
if directed: | ||
return nx.DiGraph() | ||
return nx.Graph() | ||
|
||
def from_edgelist_array_to_graph(X, y, directed=True): | ||
positive_graph = get_empty_graph(directed) | ||
negative_graph = get_empty_graph(directed) | ||
|
||
for edge, label in zip(X, y): | ||
u, v = edge | ||
if label == 0: | ||
negative_graph.add_edge(u, v) | ||
else: | ||
positive_graph.add_edge(u, v) | ||
return positive_graph, negative_graph | ||
|
||
|
||
def get_triples(positive_graph, negative_graph, p0=True): | ||
triples = [] | ||
triples0 = [] | ||
for u, v in positive_graph.edges(): | ||
if v in negative_graph: | ||
v_neigbors = negative_graph[v] | ||
for w in v_neigbors: | ||
triple = (u, v, w) | ||
triples.append(triple) | ||
elif p0: | ||
triple0 = (u, v, 0) | ||
triples0.append(triple0) | ||
triples = np.array(triples) | ||
triples0 = np.array(triples0) | ||
if p0: | ||
return triples, triples0 | ||
return triples | ||
|
||
|
Oops, something went wrong.