This repository has been archived by the owner on Oct 10, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlink_to_search.py
102 lines (84 loc) · 3.3 KB
/
link_to_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
__author__ = 'sn'
import praw
import json
import smtplib
from smtplib import SMTPException
import traceback
import sys
import exceptions
import urllib2
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
my_ip = str(urllib2.urlopen('http://ip.42.pl/raw').read())
def send_email(exception):
print "exception: ", str(exception)
sender = 'from@fromdomain.com'
receivers = ['suren.k.n@icloud.com', 'khare.ashwini@gmail.com', 'prp3@gatech.edu','revant@gatech.edu']
message = """From: Social computing <redditer@reddit>
To: Les Redditorians
Subject: error occured in the process
""" + str(exception)
try:
smtpObj = smtplib.SMTP('localhost')
smtpObj.sendmail(sender, receivers, message)
except SMTPException:
print "Error: unable to send email"
def tuple_to_string(t):
if isinstance(t, str):
return t
return ','.join([str(x) for x in t]) + '\n'
index_to_start = 0
records_to_skip = 28909
records_done = 0
with open('data_revant.csv', 'a') as x:
with open('links.csv', 'r') as f:
r = praw.Reddit('A school project bot to study distribution of links amongst subreddits')
r.set_oauth_app_info(
client_id='rjDZ0vL2lOb9Zg',
client_secret='ryC8UlrOlTOmJrCwNF1mY0GqErk',
redirect_uri='http://127.0.0.1:5000/')
r.login(username="bad_guy_1991", password="qweasd")
for line in f:
try:
index_to_start += 1
if index_to_start <= records_to_skip:
continue
line = line.strip()
line = line.split()
if len(line) != 5:
print "MESSED UP LIN: ", line
continue
link, author, subreddit, time, score = line
all_items = []
for item in r.search(link):
if item.is_self:
continue
url = item.url
user = item.author.name
time = item.created
score = item.score
subrreddit = item.subreddit
all_items.append((user, subreddit, time, score))
x.write(tuple_to_string((index_to_start, link, author, subreddit, time, score, json.dumps(all_items))))
records_done += 1
if records_done % 100 == 0:
x.flush()
print "records done: ", records_done, " idx: ", index_to_start
except exceptions.AttributeError as e:
x.write(tuple_to_string((index_to_start, link, author, subreddit, time, score, json.dumps([]))))
x.flush()
print link
print e
print index_to_start
pass
except:
exception = ''.join(traceback.format_tb(sys.exc_info()[2])) + '\n' + str(
sys.exc_info()[0]) + '\n line: ' + str(index_to_start)
exception = tuple_to_string(exception)
exception = exception + ' ' + '\n' + str(sys.exc_info()[1]) + '\n' + my_ip
send_email(exception)
print "records done: ", records_done
print exception
print str(sys.exc_info()[0])
print str(sys.exc_info()[1])