-
Notifications
You must be signed in to change notification settings - Fork 5
/
inat_api.py
110 lines (93 loc) · 4.03 KB
/
inat_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import json, os, pickle, requests, shelve, sys, time
#############################################################################
# #
# API calls to obtain taxonomic information. Used in case of name changes. #
# #
# See documention at https://api.inaturalist.org/v1/docs/#/Taxa #
# #
# We throttle the number of calls to less than 60 per minute. We also #
# implement a cache to avoid repeated lookups of the same taxa across runs. #
# Cache entries include time stamps and they expire after two weeks. #
# #
#############################################################################
API_HOST = "https://api.inaturalist.org/v1"
CACHE_EXPIRATION = 14 * 24 * 3600 # cache expires after 2 weeks
TOO_MANY_API_CALLS_DELAY = 60 # wait this long after error 429
# The cache stores the json responses.
if sys.platform == 'win32':
DATA_DIR = os.path.join(os.path.expanduser('~'),
'AppData', 'Local', 'inat_api')
else:
DATA_DIR = os.path.join(os.path.expanduser('~'), '.cache', 'inat_api')
if not os.path.exists(DATA_DIR):
os.makedirs(DATA_DIR)
cache = shelve.open(os.path.join(DATA_DIR, 'api.cache'))
# API call throttling.
class Throttle:
API_MAX_CALLS = 60 # max 60 calls per minute
API_INTERVAL = 60 # 1 minute
def __init__(self):
self.callTimes = [] # times of api calls
# wait if necessary to avoid more than API_MAX_CALLS in API_INTERVAL
def wait(self):
while len(self.callTimes) >= self.API_MAX_CALLS:
waitTime = self.callTimes[0] - (time.time() - self.API_INTERVAL)
if waitTime > 0:
print('Throttling API calls, '
f'sleeping for {waitTime:.1f} seconds.')
time.sleep(waitTime)
continue
self.callTimes = self.callTimes[1:]
self.callTimes.append(time.time())
api_call_throttle = Throttle()
# argument is an id or a list of id's
def get_taxa_by_id(id):
if type(id) is list:
url = API_HOST + '/taxa/' + '%2C'.join([str(i) for i in id])
else:
url = API_HOST + f'/taxa/{id}'
tim = time.time()
if not url in cache or cache[url][0] < tim - CACHE_EXPIRATION:
delay = TOO_MANY_API_CALLS_DELAY
headers = {'Content-type' : 'application/json' }
while True:
api_call_throttle.wait()
response = requests.get(url, headers=headers)
if response.status_code == requests.codes.too_many:
time.sleep(delay)
delay *= 2
else:
break
if response.status_code == requests.codes.ok:
cache[url] = (tim, response.json())
else:
print(response.text)
return None
return cache[url][1]
# returns taxa by name
def get_taxa(params):
url = API_HOST + '/taxa'
for key, val in params.items():
if type(val) == bool:
params[key] = 'true' if val else 'false'
key = pickle.dumps((url, params)).hex()
tim = time.time()
if not key in cache or cache[key][0] < tim - CACHE_EXPIRATION:
delay = TOO_MANY_API_CALLS_DELAY
headers = {'Content-type' : 'application/json' }
while True:
api_call_throttle.wait()
response = requests.get(url, headers=headers, params=params)
if response.status_code == requests.codes.too_many:
time.sleep(delay)
delay *= 2
else:
break
if response.status_code == requests.codes.ok:
cache[key] = (tim, response.json())
else:
print(response.text)
return None
return cache[key][1]
if __name__ == '__main__':
assert not 'Not a top-level Python module!'