-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdisplacy_utils.py
78 lines (67 loc) · 2.24 KB
/
displacy_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from spacy import displacy
import pandas as pd
NER_dict = {'O': 'O',
'I-problem': 'problem',
'B-problem': 'problem',
'I-person': 'person',
'B-person': 'person',
'B-pronoun': 'person',
'B-treatment': 'treatment',
'I-treatment': 'treatment',
'B-test': 'test',
'I-test': 'test'}
def add_NER_tag(df):
df['NERtag'] = df.IOBtag.replace(NER_dict)
return df
def get_offset(lst):
total = 0
start_chars = []
end_chars = []
for token in lst:
start = total
end = total + len(token[0])
total += 1 + len(token[0])
start_chars.append(start)
end_chars.append(end)
return start_chars, end_chars
def make_ent(df):
ent = 0
ents = []
for i in range(len(df)):
if i > 0:
current = df.loc[i, 'NERtag']
prev = df.loc[i - 1, 'NERtag']
if current != prev:
ent += 1
ents.append(ent)
df['ents'] = ents
return df
def collapse(df):
tokens = df.groupby('ents')['token'].apply(list)
NERtags = df.groupby('ents')['NERtag'].apply(set).apply(list).apply(lambda x: "".join(map(str, x)))
starts = df.groupby('ents')['start'].apply(min)
ends = df.groupby('ents')['end'].apply(max)
prob = df.groupby('ents')['prob'].mean()
ent_df = pd.concat([tokens, NERtags, starts, ends, prob], axis=1)
return ent_df
def displacy_format(ent_df):
ent_lst = []
for i in range(len(ent_df)):
ent_dict = {}
if ent_df.loc[i, 'NERtag'] != 'O':
itag = ent_df.loc[i, 'NERtag'].upper()
istart = (ent_df.loc[i, 'start'])
iend = (ent_df.loc[i, 'end'])
# print((itag, istart, iend))
ent_dict['start'] = istart
ent_dict['end'] = iend
ent_dict['label'] = itag
ent_lst.append(ent_dict)
return ent_lst
colors = {
"PROBLEM": "linear-gradient(90deg, #aa9cfc, #fc9ce7)",
"TEST": "linear-gradient(90deg, #ffafbd, #ffc3a0)",
"TREATMENT": "linear-gradient(90deg, #02aab0, #00cdac)",
"PERSON": "linear-gradient(90deg, #6593f5, #73C2FB)"
}
options = {"ents": ["PROBLEM", "TEST", "TREATMENT", "PERSON"], "colors": colors}