-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis2.py
144 lines (120 loc) · 4.74 KB
/
analysis2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Import necessary libraries
import tweepy
import pandas as pd
import csv
import time
import random
from datetime import date, timedelta, datetime
import datetime
from pandas import DataFrame
pd.options.mode.chained_assignment = None # default='warn' # to disable SettingWithCopyWarning
# Access the credentials file and store all keys and tokens in different variables
all_keys = open('credentials', 'r').read().splitlines()
api_key = all_keys[0]
api_key_secret = all_keys[1]
access_token = all_keys[2]
access_token_secret = all_keys[3]
# Authenticate to Twitter
auth = tweepy.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)
# Testing credentials authentication
try:
api.verify_credentials()
print("Authentication Successful")
except:
print("Authentication Error")
# getting the authenticated user's information
user = api.verify_credentials()
# printing the name of the user
print("The authenticated user's name is : " + user.name)
print("The authenticated user's location is : " + str(user.location))
print("\n")
# the username of the targeted user
# username = "ChirpNews"
# store tweets in csv files
# 3 different csv files for 3 different accounts
# for i in range(1, 10):
# api.update_status(i)
presentDay = date.today()
print("Today's date is : " + str(presentDay))
tomorrow = presentDay + timedelta(1)
print("Tomorrow's date is : " + str(tomorrow))
yesterday = presentDay + timedelta(0)
print("Yesterday's date is : " + str(yesterday))
# the username of the targeted account whose tweets are to be retrieved
username = "PTI_News"
user = api.get_user(screen_name=username)
followerCount = user.followers_count
print(username + " has " + str(followerCount) + " followers on Twitter \n")
# Open/create a file to append data to
csvFile = open('pti.csv', 'a')
# Use csv writer
csvWriter = csv.writer(csvFile)
# To delete previous contents of the file
csvFile.truncate(0)
start_date = datetime.datetime(2022, 11, 27, 00, 00, 00)
end_date = datetime.datetime(2022, 11, 26, 00, 00, 00)
for tweet in tweepy.Cursor(api.user_timeline,
screen_name=username,
# since=start_date,
# until=end_date,
# lang="en",
).items(750):
# print("ID TWEET: " + str(tweet.text))
# Write a row to the CSV file. Use UTF=8 encoding
engagementRate = (tweet.retweet_count + tweet.favorite_count) / 100
csvWriter.writerow([tweet.created_at.date(), tweet.text.encode('utf-8'),
"https://twitter.com/" + username + "/status/" + str(tweet.id),
tweet.retweet_count, tweet.favorite_count, engagementRate])
print(tweet.created_at.date(), tweet.text, "https://twitter.com/" + username + "/status/" + str(tweet.id),
tweet.retweet_count, tweet.favorite_count, engagementRate)
csvFile.close()
# assign header columns
# also add tweet link
headerList = ["Created At", "Tweet", "TweetLink", "RetweetCount", "LikeCount", "EngagementRate"]
colNames = ["Created At", "Tweet", "TweetLink", "RetweetCount", "LikeCount", "EngagementRate"]
# read contents of csv file
# file = pd.read_csv("pti.csv")
# Assigning Column names to CSV file
file = pd.read_csv("pti.csv", names=colNames, header=None)
# print(file)
# converting data frame to csv
# Assigning header names to be visible in the CSV file
file.to_csv("pti.csv", header=headerList, index=False)
print("\n")
for col in file.columns:
print(col)
# Getting Tweets of 1 day window
dateDf = file.groupby("Created At")
todayDf = dateDf.get_group("2022-12-19")
# Sorting the CSV for highest engagement rates
# sort data frame
todayDf.sort_values(by='EngagementRate',
ascending=False,
inplace=True)
# file.sort_values('EngagementRate',
# axis=0,
# ascending=False,
# inplace=True,
# na_position='first')
# file.sort_values(file.columns[5])
# file.sort_values(by='EngagementRate',
# # axis=1,
# ascending=False,
# inplace=True)
# kind="mergesort")
# print(file)
# for items in file:
# for i in range(5):
# # api.update_status(file['TweetLink'])
# print(file['TweetLink'])
# print(file.head(5))
# print(file['TweetLink'].head(5))
# api.update_status(file['TweetLink'].head(5))
# Creating a list of the top 5 tweets of the CSV file
tweetLinks = todayDf['TweetLink'].tolist()
# print(tweetLinks)
for i in range(5):
print(tweetLinks[i])
api.update_status(tweetLinks[i])