-
Notifications
You must be signed in to change notification settings - Fork 62
/
iem_scraper_example.py
130 lines (103 loc) · 3.86 KB
/
iem_scraper_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""
Example script that scrapes data from the IEM ASOS download service.
Requires: Python 3
"""
import datetime
import json
import os
import sys
import time
from urllib.request import urlopen
# Number of attempts to download data
MAX_ATTEMPTS = 6
# HTTPS here can be problematic for installs that don't have Lets Encrypt CA
SERVICE = "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"
def download_data(uri):
"""Fetch the data from the IEM
The IEM download service has some protections in place to keep the number
of inbound requests in check. This function implements an exponential
backoff to keep individual downloads from erroring.
Args:
uri (string): URL to fetch
Returns:
string data
"""
attempt = 0
while attempt < MAX_ATTEMPTS:
try:
data = urlopen(uri, timeout=300).read().decode("utf-8")
if data is not None and not data.startswith("ERROR"):
return data
except Exception as exp:
print(f"download_data({uri}) failed with {exp}")
time.sleep(5)
attempt += 1
print("Exhausted attempts to download, returning empty data")
return ""
def get_stations_from_filelist(filename):
"""Build a listing of stations from a simple file listing the stations.
The file should simply have one station per line.
"""
if not os.path.isfile(filename):
print(f"Filename {filename} does not exist, aborting!")
sys.exit()
with open(filename, encoding="ascii") as fh:
stations = [line.strip() for line in fh]
return stations
def get_stations_from_networks():
"""Build a station list by using a bunch of IEM networks."""
stations = []
states = (
"AK AL AR AZ CA CO CT DE FL GA HI IA ID IL IN KS KY LA MA MD ME MI MN "
"MO MS MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT "
"WA WI WV WY"
)
networks = [f"{state}_ASOS" for state in states.split()]
for network in networks:
# Get metadata
uri = (
"https://mesonet.agron.iastate.edu/"
f"geojson/network/{network}.geojson"
)
data = urlopen(uri)
jdict = json.load(data)
for site in jdict["features"]:
stations.append(site["properties"]["sid"]) # noqa
return stations
def download_alldata():
"""An alternative method that fetches all available data.
Service supports up to 24 hours worth of data at a time."""
# timestamps in UTC to request data for
startts = datetime.datetime(2012, 8, 1)
endts = datetime.datetime(2012, 9, 1)
interval = datetime.timedelta(hours=24)
service = SERVICE + "data=all&tz=Etc/UTC&format=comma&latlon=yes&"
now = startts
while now < endts:
thisurl = service
thisurl += now.strftime("year1=%Y&month1=%m&day1=%d&")
thisurl += (now + interval).strftime("year2=%Y&month2=%m&day2=%d&")
print(f"Downloading: {now}")
data = download_data(thisurl)
outfn = f"{now:%Y%m%d}.txt"
with open(outfn, "w", encoding="ascii") as fh:
fh.write(data)
now += interval
def main():
"""Our main method"""
# timestamps in UTC to request data for
startts = datetime.datetime(2012, 8, 1)
endts = datetime.datetime(2012, 9, 1)
service = SERVICE + "data=all&tz=Etc/UTC&format=comma&latlon=yes&"
service += startts.strftime("year1=%Y&month1=%m&day1=%d&")
service += endts.strftime("year2=%Y&month2=%m&day2=%d&")
stations = get_stations_from_networks()
for station in stations:
uri = f"{service}&station={station}"
print(f"Downloading: {station}")
data = download_data(uri)
outfn = f"{station}_{startts:%Y%m%d%H%M}_{endts:%Y%m%d%H%M}.txt"
with open(outfn, "w", encoding="ascii") as fh:
fh.write(data)
if __name__ == "__main__":
download_alldata()