-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetTwitterData.py
123 lines (103 loc) · 3.64 KB
/
getTwitterData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json,time,csv
import pandas as pd
from nsetools import Nse
import Constants as con
import credentials as cr
# # # # TWITTER STREAMER # # # #
class TwitterStreamer():
"""
Class for streaming and processing live tweets.
"""
def __init__(self):
self.counter = 0
self.limit = 5
pass
def stream_tweets(self, fetched_tweets_filename, hash_tag_list):
# This handles Twitter authetification and the connection to Twitter Streaming API
listener = StdOutListener(fetched_tweets_filename)
auth = OAuthHandler(cr.CONSUMER_KEY, cr.CONSUMER_SECRET)
auth.set_access_token(cr.ACCESS_TOKEN, cr.ACCESS_TOKEN_SECRET)
stream = Stream(auth, listener,tweet_mode='extended')
# This line filter Twitter Streams to capture data by the keywords:
stream.filter(track=hash_tag_list, languages=['en'],is_async=True,encoding='utf-8')
# # # # TWITTER STREAM LISTENER # # # #
class StdOutListener(StreamListener):
"""
This is a basic listener that save received tweets to csv file.
"""
def __init__(self, fetched_tweets_filename):
self.fetched_tweets_filename = fetched_tweets_filename
def on_data(self, data):
try:
jsonData = json.loads(data)
if "extended_tweet" in jsonData:
text=jsonData['extended_tweet']['full_text']
else:
text = jsonData['text']
createdAt = jsonData['created_at']
# concatenate the timestamp with , and the text of the tweet
saveThis = [createdAt,text.encode('utf-8')]
# open file for writing, in append mode so that updates don't erase previous work
with open(self.fetched_tweets_filename, 'a',encoding="utf-8") as tf:
writer = csv.writer(tf) #delimiter create extra newline so removed it
writer.writerow(saveThis)
return True
except BaseException as e:
print("Error on_data %s" % str(e))
time.sleep(5)
return True
def on_error(self, status):
print(status)
if __name__ == '__main__':
# nse = Nse()
# all_stock_codes = nse.get_stock_codes(cached=True)
# stock_codes = list(all_stock_codes.keys())
# stock_names = list(all_stock_codes.values())
# print(len(stock_names))
# print("\n\ncompany codes\n")
# print(stock_codes)
# print("\n\ncompany names\n")
# print(stock_names)
# creating hash tag list of financial twitter feeds
# hash_tag_list = [
# "bse",
# "nse",
# "nifty",
# "sensex",
# "Livemint",
# "ReutersIndia",
# "EconomicTimes",
# "Moneycontrol",
# "NDTVProfit",
# "SafalNiveshak",
# "BasantMaheshwari",
# "ForbesIndia",
# "BusinessStandard",
# "ETMarkets",
# "Investopedia",
# "CNBCTV18",
# "IndiaInfoline News",
# "NSEIndia",
# "ZeeBusiness",
# "investing",
# "stocks",
# "trading",
# "stockmarket",
# "finance",
# "HDFC",
# "BHEL",
# "SBIN"
# ]
hash_tag_list_dict = con.hash_tag_list.values()
hash_tag_list_list = list(hash_tag_list_dict)
hash_tag_list = []
for sublist in hash_tag_list_list:
for item in sublist:
hash_tag_list.append(item)
print(hash_tag_list)
fetched_tweets_filename = "new_tweets.csv"
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(fetched_tweets_filename, hash_tag_list)