-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathget_distractors__sense2vec.py
67 lines (49 loc) · 2.34 KB
/
get_distractors__sense2vec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# -*- coding: utf-8 -*-
"""get_Distractors _Sense2vec.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1vYuoJz6kFte5tcsPrIjXYFH8NafCRy16
"""
!wget https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz
!tar -xvf s2v_reddit_2015_md.tar.gz
# load sense2vec vectors
from sense2vec import Sense2Vec
s2v = Sense2Vec().from_disk('s2v_old')
def edits(word):
"All edits that are one edit away from `word`."
letters = 'abcdefghijklmnopqrstuvwxyz '+string.punctuation
splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
deletes = [L + R[1:] for L, R in splits if R]
transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
replaces = [L + c + R[1:] for L, R in splits if R for c in letters]
inserts = [L + c + R for L, R in splits for c in letters]
return set(deletes + transposes + replaces + inserts)
def sense2vec_get_words(word,s2v):
output = []
word_preprocessed = word.translate(word.maketrans("","", string.punctuation))
word_preprocessed = word_preprocessed.lower()
word_edits = edits(word_preprocessed)
word = word.replace(" ", "_")
sense = s2v.get_best_sense(word)
most_similar = s2v.most_similar(sense, n=15)
compare_list = [word_preprocessed]
for each_word in most_similar:
append_word = each_word[0].split("|")[0].replace("_", " ")
append_word = append_word.strip()
append_word_processed = append_word.lower()
append_word_processed = append_word_processed.translate(append_word_processed.maketrans("","", string.punctuation))
if append_word_processed not in compare_list and word_preprocessed not in append_word_processed and append_word_processed not in word_edits:
output.append(append_word.title())
compare_list.append(append_word_processed)
out = list(OrderedDict.fromkeys(output))
return out
def get_options(answer,s2v):
distractors =[]
try:
distractors = sense2vec_get_words(answer,s2v)
if len(distractors) > 0:
print(" Sense2vec_distractors successful for word : ", answer)
return distractors
except:
print (" Sense2vec_distractors failed for word : ",answer)
return distractors