-
Notifications
You must be signed in to change notification settings - Fork 2
/
ScrapeUrbanDictionary.py
52 lines (40 loc) · 1.36 KB
/
ScrapeUrbanDictionary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 26 15:40:38 2017
@author: Gretel_MacAir
"""
# %% Import Libs
import urllib2
import string
import pickle
import DonutLibrary as dl
# %% Load words already pickled
path = dl.get_path('MillenialTalk')
millenial_pickle_file = 'Millenial.p'
millenial = pickle.load(open(path + millenial_pickle_file, "rb"))
new_words = []
# %% Scrape popular words by letter of the alphabet from Urban Dictionary
# New words are added to the list
for letter in string.ascii_uppercase:
url = 'http://www.urbandictionary.com/popular.php?character=%s' % (letter)
req = urllib2.Request(url)
response = urllib2.urlopen(req)
the_page = response.read()
temp1 = the_page.split("All popular words")[1].split("copyright")[0]
temp2 = temp1.split('a href="/define.php?term=')
for temp in temp2[1:]:
urban_word = temp.split(">")[1].split("<")[0]
urban_word_lower = urban_word.lower()
if urban_word_lower not in millenial:
new_words.append(urban_word)
millenial.append(urban_word_lower)
# Pickle new enhanced list
pickle.dump(millenial, open(path + millenial_pickle_file, "wb"))
# %% Send mail
subject = 'New words from Urban Dictionary'
if len(new_words) > 0:
body = ', '.join(new_words)
else:
body = 'No new words'
dl.send_mail_general(subject, body, path)