-
Notifications
You must be signed in to change notification settings - Fork 8
/
startup.py
47 lines (39 loc) · 1.14 KB
/
startup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/python
print
print "checking for nltk"
try:
import nltk
except ImportError:
print "you should install nltk before continuing"
print "checking for numpy"
try:
import numpy
except ImportError:
print "you should install numpy before continuing"
print "checking for scipy"
try:
import scipy
except:
print "you should install scipy before continuing"
print "checking for sklearn"
try:
import sklearn
except:
print "you should install sklearn before continuing"
print
print "downloading the Enron dataset (this may take a while)"
print "to check on progress, you can cd up one level, then execute <ls -lthr>"
print "Enron dataset should be last item on the list, along with its current size"
print "download will complete at about 423 MB"
import urllib
url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tar.gz"
urllib.urlretrieve(url, filename="../enron_mail_20150507.tar.gz")
print "download complete!"
print
print "unzipping Enron dataset (this may take a while)"
import tarfile
import os
os.chdir("..")
tfile = tarfile.open("enron_mail_20150507.tar.gz", "r:gz")
tfile.extractall(".")
print "you're ready to go!"