-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess2.py
51 lines (44 loc) · 1.36 KB
/
preprocess2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#coding=gbk
import json
import csv
with open('./HKUST_coauthor_graph.json','r',encoding='gbk')as fp:
json_data = json.load(fp)
#print('这是文件中的json数据:',json_data)
#print('这是读取到文件数据的数据类型:', type(json_data))
headrow = []
nodelist = []
i = 0
n2i = {}
id2n = {}
print(type(json_data))
for node in json_data["nodes"]:
if node["dept"] == "CSE":
nodelist.append({"name":node["fullname"],"radii":0})
headrow.append(node["fullname"])
n2i[node["fullname"]] = i
id2n[node["id"]] = node["fullname"]
i += 1
print(nodelist)
print(len(n2i.keys()))
print(id2n.keys())
adjmatrix = [[0 for k in range(i)] for j in range(i)]
edgelist = []
for edge in json_data["edges"]:
if edge["source"] in id2n and edge["target"] in id2n:
edgelist.append({"source":n2i[id2n[edge["source"]]],"target":n2i[id2n[edge["target"]]]})
radii=len(edge["publications"])
nodelist[n2i[id2n[edge["source"]]]]["radii"] += radii
nodelist[n2i[id2n[edge["target"]]]]["radii"] += radii
adjmatrix[n2i[id2n[edge["source"]]]][n2i[id2n[edge["target"]]]] = radii
adjmatrix[n2i[id2n[edge["target"]]]][n2i[id2n[edge["source"]]]] = radii
print(edgelist)
headrow = ["Y"]
res = []
for row in adjmatrix:
res += row
print(headrow)
with open('adjmatrix.csv', 'w', newline='') as f:
writer = csv.DictWriter(f, headrow)
writer.writeheader()
for row in res:
writer.writerow({"Y":row})