-
Notifications
You must be signed in to change notification settings - Fork 0
/
queryOsm.py
126 lines (99 loc) · 4.48 KB
/
queryOsm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import pandas as pd
import shapefile
import configparser
import psycopg2
import regex
import overpass
from collections import OrderedDict
class QueryOsm(object):
def __init__(self):
cmasSf = shapefile.Reader("resources/boundaries/cmas4326/cmas4326.shp")
self.cmas = {
shapeRecord.record.CMAUID : " ".join("{} {}".format(point[1], point[0])
for point in shapeRecord.shape.points)
for shapeRecord in cmasSf.shapeRecords()
}
self.cmas = OrderedDict(sorted(self.cmas.items(), key=lambda t: t[0]))
self.tagNaics = pd.read_csv("resources/tagNaics.tsv", sep='\t')
self.tagNaics.dropna(subset=["OSM tags"], inplace=True)
self.api = overpass.API(endpoint="https://lz4.overpass-api.de/api/interpreter", timeout=3600)
self.setId = 0
config = configparser.ConfigParser()
config.read("config/config.ini")
self.conn = psycopg2.connect(
host=config["credentials"]["host"],
database=config["credentials"]["database"],
user=config["credentials"]["user"],
password=config["credentials"]["password"],
)
def __del__(self):
self.conn.close()
def queryOsm(self):
for cma in list(self.cmas.keys()):
self.queryCma(cma)
def queryCma(self, cma):
self.tagNaics.apply(self.queryNaics, axis=1, args=[cma])
def queryNaics(self, row, cma):
query = self.getQl(row["OSM tags"], poly=self.cmas[cma], recurse=False)[0]
queryRecurse = self.getQl(row["OSM tags"], poly=self.cmas[cma], recurse=True)[0]
try:
responseGeojson = self.api.get(query)
except:
print("Failed geojson query for cma: {0} and naics: {1}".format(cma, row["2012 NAICS US Code"]))
return
sql = """INSERT INTO public.businesses(id, cma, naics)
VALUES(%s, %s, %s);"""
cur = self.conn.cursor()
for feature in responseGeojson["features"]:
try:
cur.execute(sql, (feature['id'], str(cma), row["2012 NAICS US Code"]))
except:
print("Failed INSERT for id: {0}, cma: {1}, and naics: {2}".format(feature['id'], cma, row["2012 NAICS US Code"]))
self.conn.commit()
cur.close()
try:
responseXml = self.api.get(queryRecurse, responseformat="xml")
except:
print("Failed xml query for cma: {0} and naics: {1}".format(cma, row["2012 NAICS US Code"]))
return
if responseGeojson["features"]:
with open("data/osm/{0}_{1}.osm".format(str(cma), row["2012 NAICS US Code"]), mode="w", encoding="utf-8") as f:
f.write(responseXml)
def getQl(self, turbo, poly, recurse, out=True):
if '(' in turbo:
nestedTurbo = regex.findall('\(((?>[^()]|(?R))*)\)', turbo)[0]
nestedQl, nestedSetId = self.getQl(nestedTurbo, poly=poly, recurse=recurse, out=False)
turbo = regex.sub("\(((?>[^()]|(?R))*)\)", str(nestedSetId), turbo)
def getQlTurboTag(turboTag):
if turboTag.isdigit():
return "\n\tnode{0};\n\tway{0};\n".format(".i" + str(turboTag))
operator = "!=" if "!=" in turboTag else "="
kv = turboTag.split(operator)
if kv[1] == "*":
return '\n\tnode["{0}"]({1});\n\tway["{0}"]({1});\n'.format(kv[0], 'poly:"' + poly + '"')
else:
return '\n\tnode["{0}"{1}"{2}"]({3});\n\tway["{0}"{1}"{2}"]({3});\n'.format(kv[0], operator, kv[1], 'poly:"' + poly + '"')
turbo = turbo.replace(" OR ", "||").replace(" ", "&&")
ql = ""
setIds = []
for turboSet in turbo.split("&&"):
# union
ql += '(' + "".join(getQlTurboTag(turboTag) for turboTag in turboSet.split("||")) + ")" + "->{};\n".format(".u" + str(self.setId))
setIds.append(".u" + str(self.setId))
self.setId += 1
# intersect
ql += "(node{0};\nway{0};\n)->{1};\n".format("".join(setIds), ".i" + str(self.setId))
if out:
if recurse:
ql += "({0};>;)->{0};\n".format(".i" + str(self.setId))
ql += "{} out;\n".format(".i" + str(self.setId))
self.setId += 1
try:
return nestedQl + ql, self.setId - 1
except:
return ql, self.setId - 1
def main():
queryOsm = QueryOsm()
queryOsm.queryOsm()
if __name__ == "__main__":
main()