-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
120 lines (89 loc) · 3.75 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import numpy
from flask import Flask, render_template, request, jsonify
from flask_cors import CORS, cross_origin
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as uReq
import logging
import joblib
text_clf = joblib.load('model_S.pkl')
logging.basicConfig(filename="scrapper.log", level=logging.INFO)
app = Flask(__name__)
@app.route("/", methods=['GET'])
def homepage():
return render_template("main.html")
@app.route("/review", methods=['POST', 'GET'])
def main():
if request.method == 'POST':
try:
searchString = request.form['content'].replace(" ", "")
flipkart_url = "https://www.flipkart.com/search?q=" + searchString
uClient = uReq(flipkart_url)
flipkartPage = uClient.read()
uClient.close()
flipkart_html = bs(flipkartPage, "html.parser")
bigboxes = flipkart_html.findAll(
"div", {"class": "cPHDOP col-12-12"}) # "_1AtVbE col-12-12"})
del bigboxes[0:3]
box = bigboxes[0]
try:
productLink = "https://www.flipkart.com" + \
box.div.div.div.a['href']
except:
productLink = "https://www.flipkart.com" + \
box.div.div.div.div.a["hrf"]
prodRes = requests.get(productLink)
prodRes.encoding = 'utf-8'
prod_html = bs(prodRes.text, "html.parser")
print(prod_html)
commentboxes = prod_html.find_all(
'div', {'class': "RcXBOT"}) # "_16PBlm"})
reviews = []
for commentbox in commentboxes:
try:
product = flipkart_html.find_all(
"div", {"class": "KzDlHZ"})[0].text
# _1fQZEK
# _4rR01T
except:
product = "na"
logging.info("product")
try:
name = commentbox.div.div.find_all(
"p", {"class": "_2NsDsF AwS1CA"})[0].text
except:
logging.info("name")
name = "no name found"
try:
rating = commentbox.div.div.div.div.text
except:
rating = 'No Rating'
logging.info("rating")
try:
commentHead = commentbox.div.div.div.p.text
except:
commentHead = 'No Comment Heading'
logging.info(commentHead)
try:
comtag = commentbox.div.div.find_all('div', {'class': ''})
custComment = comtag[0].div.text
except Exception as e:
logging.info(e)
custComment = "N/A"
try:
sentiment = text_clf.predict([custComment])[0]
except:
sentiment = "Not avalible"
mydict = {"Product": product, "Name": name, "Rating": rating, "CommentHead": commentHead,
"Comment": custComment, "Sentiment": sentiment}
reviews.append(mydict)
logging.info("log my final maincss {}".format(reviews))
return render_template('maincss.html', reviews=reviews[0:(len(reviews)-1)])
except Exception as e:
logging.info(e)
error_message = 'Something is wrong with your flipkart product search. Please search any other product.'
return render_template('ma.html', error_message=error_message)
else:
return render_template('ma.html')
if __name__ == "__main__":
app.run(host="0.0.0.0")