-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapp.py
156 lines (129 loc) · 6.39 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import streamlit as st
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import tweepy
from tweepy import OAuthHandler
import json
import re
import configparser
import detector
#To Hide Warnings
st.set_option('deprecation.showfileUploaderEncoding', False)
st.set_option('deprecation.showPyplotGlobalUse', False)
STYLE = """
<style>
img {
max-width: 100%;
}
</style> """
def main():
html_temp = """
<link href='https://fonts.googleapis.com/css?family=Open Sans' rel='stylesheet'>
<div style="background-color:rgb(143, 38, 255);">
<p style="color:white;font-size:50px;padding:9px;text-transform:uppercase;font-family:Open Sans">
<b>Phorch</b>
</p>
</div>
"""
st.markdown(html_temp, unsafe_allow_html=True)
st.subheader("Don't be the victim!")
from PIL import Image
image = Image.open('phish.gif')
st.image(image,use_column_width=True)
################# Twitter API Connection #######################
consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""
# Use the above credentials to authenticate the API.
auth = tweepy.OAuthHandler( consumer_key , consumer_secret )
auth.set_access_token( access_token , access_token_secret )
api = tweepy.API(auth)
################################################################
def prediction(url):
dt = detector.Detector()
result = dt.detect(url)
return result
def check_url():
# Collect Input from user :
url = str()
url = str(st.text_input("Enter the URL you wish to indentify (Press Enter once done)"))
result = prediction(url)
return result
def get_tweets(Topic,Count):
i=0
df = pd.DataFrame(columns=["Date","User","IsVerified","Tweet","Likes","RT",'User_location'])
my_bar = st.progress(100) # To track progress of Extracted tweets
for tweet in tweepy.Cursor(api.search_tweets, q=Topic,count=50, lang="en",exclude='retweets').items():
#time.sleep(0.1)
#my_bar.progress(i)
df.loc[i,"Date"] = tweet.created_at
df.loc[i,"User"] = tweet.user.name
df.loc[i,"IsVerified"] = tweet.user.verified
df.loc[i,"Tweet"] = tweet.text
df.loc[i,"Likes"] = tweet.favorite_count
df.loc[i,"RT"] = tweet.retweet_count
df.loc[i,"User_location"] = tweet.user.location
i=i+1
if i>Count:
break
else:
pass
return df
# Function to Clean the Tweet
def clean_tweet(tweet):
regex=r"\b((?:https?://)?(?:(?:www\.)?(?:[\da-z\.-]+)\.(?:[a-z]{2,6})|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])))(?::[0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])?(?:/[\w\.-]*)*/?)\b"
matches = re.findall(regex, tweet)
while len(matches)>1:
matches.pop()
if len(matches) == 0:
return 0
else:
match = "{}".format(",".join(["'{}'".format(char) for char in matches]))
return match
def fetch_tweet(api):
# Write a Function to extract tweets:
# Collect Input from user :
Topic = str()
Topic = str(st.text_input("Enter the topic you are interested in (Press Enter once done)"))
if len(Topic) > 0 :
# Call the function to extract the data. pass the topic and filename you want the data to be stored in.
with st.spinner("Please wait, Tweets are being extracted"):
df = get_tweets(Topic , Count=10)
st.success('Tweets have been Extracted !!!!')
# Call function to get Clean tweets
df['url'] = df['Tweet'].apply(lambda x : clean_tweet(x))
df.drop(df.loc[df['url']==0].index,inplace=True)
# See the Extracted Data :
if st.button("See the Extracted Data"):
#st.markdown(html_temp, unsafe_allow_html=True)
st.success("Below is the Extracted Data :")
st.write(df[['Tweet','url']])
if st.button("Predict Results"):
st.write("Wait while we process..........")
df['phishing'] = df['url'].apply(lambda x : prediction(x))
st.write(df[['url','phishing']])
process_name = st.selectbox(
'Select Operation',
('Select','Enter URL','Fetch Tweets')
)
if process_name == 'Enter URL':
result = check_url()
if st.button("Predict Results"):
st.write(result)
if process_name == 'Fetch Tweets':
fetch_tweet(api)
#Sidebar
st.sidebar.header("About App")
st.sidebar.info("URL classification process which recognizes whether the target website is a malicious(1) or benign(0). Enables individual URL checking and also checks random twitter posts to indentify any malicious redirections ")
#st.sidebar.text("Built with Streamlit")
st.sidebar.info("[Source Code](https://github.com/vidhigupta9/pytorch)")
st.sidebar.header("Made by :")
st.sidebar.info("[Vidhi Gupta](https://github.com/vidhigupta9)")
st.sidebar.info("[Deep Rodge](https://github.com/deeprodge)")
if st.button("Exit"):
st.balloons()
if __name__ == '__main__':
main()