-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmongo_read.py
127 lines (97 loc) · 3.07 KB
/
mongo_read.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import pymongo
from pymongo import MongoClient
import tweepy
import app
from pprint import pprint
#authentication keys
CONSUMER_KEY="Gu73BTDiz3grPM1aCfxqLPAP9"
CONSUMER_SECRET="oYyN16dKgPp5YHZ2USJPqWJpBNCFcl8Fsrc7QynMVcG5kbc1q9"
OAUTH_TOKEN="1228614243219582976-qRcxliWcrChAWAV0Atg4XTvyqeBzNa"
OAUTH_TOKEN_SECRET="wvcd5MQz074Q9ONho1FRejwFLYtXqJBYaDAFFF5GtWiiQ"
# Creating the authentication object
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
# Setting your access token and secret
auth.set_access_token(OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
# Creating the API object while passing in auth information
api = tweepy.API(auth)
#mongo client
client = MongoClient("mongodb+srv://test:Test123@cluster0-6occj.gcp.mongodb.net/test?retryWrites=true&w=majority")
#get the database
Users = client.get_database('Twitter_Users')
#get the collection
feeds = Users.Twitter_Feeds
#not needed but aim is to sort in ascending order
# feeds.create_index([("id",pymongo.ASCENDING)],unique=True)
f=open("name.txt", "r")
name = f.read()
f.close()
t=open("handle.txt", "r")
handle = t.read()
t.close()
if(handle != "None"):
name = handle
print(name)
# twitter handle is more accurate than name
#list of current account info
tweets = feeds.find_one({"name":name})
#print(len(tweets[name]))
#find total number of tweets
user = api.get_user(name)
total_tweets = api.get_user(name).statuses_count
#print(total_tweets)
# find if current id exists in database to update curr_num of tweets
fail_safe = feeds.count_documents({"name": name})
#print(fail_safe)
curr_tweets = len(tweets[name]) if fail_safe != 0 else 0
#print(curr_tweets)
#find number of new tweets
tweetCount = total_tweets-curr_tweets
# collection of new tweets
results = api.user_timeline(id=name, count=tweetCount)
search_results = {'name':name}
#adding tweet results
for tweet in results:
if name not in search_results:
search_results[name] = []
if tweet.text not in search_results[name]:
search_results[name].append(tweet.text)
if fail_safe != 0: #already exists
for i in range(curr_tweets):
search_results[name].append(tweets[name][i])
#print(search_results)
z = open("tweetsList.txt", "w+")
for i in range(len(search_results[name])):
for character in search_results[name][i]:
if (character >= ' ' and character <= 'z') or (character == '\n'):
z.write(character)
z.close()
z = open("tweetslist.txt", "r")
text1 = z.read()
z.close()
text_list = text1.split()
final_list = []
for x in text_list:
if (len(x) < 9) or (x[:8] != 'https://'):
final_list.append(x)
final_string = ""
for y in final_list:
final_string += y
final_string += " "
z = open("tweetslist.txt", "w")
z.write(final_string)
z.close()
if fail_safe != 0: #already exists in database
feeds.delete_many({"name": name})
try:
feeds.insert_one(search_results)
except:
pass
'''
#tweet_cursor = feeds.find()
#for document in tweet_cursor:
#for i in range(len(search_results[name])):
#if(name in document):
#print('-----')
#print('name:-',name)
# print('text:-',document[name][i])
'''