-
Notifications
You must be signed in to change notification settings - Fork 2
/
RedditStream.py
75 lines (63 loc) · 2.6 KB
/
RedditStream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
'''
Created on Nov 24, 2018
@author: ishank
'''
from __future__ import print_function
import praw, json
NUM_REDDITS = 200
DEBUG = True
class RedditStream:
def __init__(self, subreddit='all'):
self.reddit = None
self.subreddit = subreddit
self.count = 0
self.authenticate()
def authenticate(self):
config = json.load(open('app.json'))
try:
self.reddit = praw.Reddit(client_id=config['client_id'],
client_secret=config['client_secret'],
password=config['password'],
user_agent=config['user_agent'],
username=config['username'])
print('Logged in: '+ str(self.reddit.user.me()))
except praw.exceptions.PRAWException as e:
print(e)
except Exception as e:
print(e)
def stream_data(self):
results = []
for submission in self.reddit.subreddit(self.subreddit).stream.submissions():
if not submission.stickied and submission.selftext:
try:
if submission.over_18:
results.append({
"title": self.cleanse(submission.title.decode('utf-8')),
"text": self.cleanse(submission.selftext.decode('utf-8')),
"NSFW": str(submission.over_18)
})
if DEBUG and self.count % 10 == 0: # randint(1,50) == 1:
title = ("\t(NSFW) " if submission.over_18 else "\t(SFW) ") + submission.title
print(self.count, " " , title[:40] + ("..." if len(title) > 40 else ""))
self.count += 1
except:
pass
if self.count >= NUM_REDDITS: break
return results
def cleanse(self, text):
text = text.strip().replace('_', ' ').replace("\\\\", "\\").replace('\n', ' ').replace('\\n', ' ')
return str(text.lower().decode('unicode-escape'))
@staticmethod
def jsonify(item):
return str(item) + ","
def main():
redstr = RedditStream()
with open("results.txt", "w") as res:
results = []
for item in redstr.stream_data():
results.append(item)
res.write(json.dumps(results))
if DEBUG:
print("\nCollected: ", redstr.count)
if __name__ == "__main__":
main()