-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrollchecker.py
62 lines (43 loc) · 1.71 KB
/
trollchecker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python
from SentimentAnalysis import *
from keyphrases import *
from sentenceparser import *
import base64
DEBUG = False
#open the list of bad words
bad_phrases = base64.b64decode(open("badwordlist_b64", "r").read()).lower()
bad_phrases = bad_phrases.split('\n')
bad_phrase_table = {}
for phrase in bad_phrases:
bad_phrase_table[phrase] = 1
#determines if a tweet should be considered a troll, based
#on the sentiment of the tweet, and its vulgarity
def is_tweet_troll(tweet):
tweet = tweet.lower()
vulgarity_threshold = 0.35
sentiment = getSentiment(tweet)
troll_sentiment = 1 - sentiment
#think of something to do with the key phrases
bad_phrase_count = sum([len(phrase) for phrase in bad_phrase_table if phrase in tweet])
#metrics: frequency of vulgar/blacklisted phrases
# sentiment level
bad_phrase_ratio = (1.0 * bad_phrase_count/len(tweet))
sentence_contents = parse_sentence(tweet)
if DEBUG:
print "tweet:", tweet
print "troll_sentiment:", troll_sentiment
print "bad_phrase_ratio", bad_phrase_ratio
print "ratio * troll_sentiment", bad_phrase_ratio * troll_sentiment
print "sentence contents", sentence_contents
subject = ""
if sentence_contents != None:
subject = sentence_contents['subject']['text'].encode('utf8').lower()
if subject == "you" or "your" in subject:
if troll_sentiment > 0.5:
return True
#make sure that there's at least some blacklisted words
#otherwise low sentiment could cause a false positive.
if bad_phrase_ratio > vulgarity_threshold:
if bad_phrase_ratio * troll_sentiment > 0.2:
return True
return False