-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.py
69 lines (50 loc) · 1.75 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import boto3
import sys
import json
import string
import requests
import datetime
from urllib.parse import quote
loc = 'Manhattan, NY'
CUISINES = ["indian", "italian", "ethiopian", "american", "mexican", "japanese", "french","spanish","chinese"]
limit = 50
API_KEY = '2GzY81wdyHH2o4lc8Ts8mImXTQwpJzsuR8VQldAg0t11bZhtXSZJkTnnZ7uG7UQ2rxKCcN-zbij9HKlhB8yqEpfl9YS1PASViZpgsp9DYeIHrZfEuO2k4Jl6xRT5Y3Yx'
API_HOST = 'https://api.yelp.com'
searchaddr = '/v3/businesses/search'
def getresponse(cuisine, offset):
urlparams = {
'term': "{} restaurant".format(cuisine).replace(' ', '+'),
'location': loc.replace(' ', '+'),
'limit': limit,
'offset': offset
}
urlparams = urlparams or {}
url = '{0}{1}'.format(API_HOST, quote(searchaddr.encode('utf8')))
headers = {
'Authorization': 'Bearer %s' % API_KEY,
}
response = requests.request('GET', url, headers=headers, params=urlparams)
return response.json()
def getrestos(cuisine):
offset = 0
restolist = []
tot = 1000
while len(restolist)<=1000:
response = getresponse(cuisine,offset)
if tot > response.get('total',1000):
tot = response.get('total',1000)
result = response.get('businesses',None)
if result is None:
break
if len(result)==0:
break
restolist = restolist + result
offset = offset + limit
print("Got {0}/{1} restuarants for cuisine {2}".format(len(restolist),tot,cuisine))
return restolist
if __name__ == '__main__':
for c in CUISINES:
resto = getrestos(c)
with open("./restaurants/{}_data.json".format(c), "w") as f:
json.dump(resto, f)
print("{0}:{1} entries".format(c, len(resto)))