-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlplm_geoprocessing.py
207 lines (156 loc) · 7.19 KB
/
lplm_geoprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
from typing import Callable
import geopandas
import numpy
import pandas
import rasterio
from shapely.geometry import shape
from skimage.segmentation import slic
import xarray
## Segmentation
## ------------
def apply_segmentation(xds_grd: xarray.Dataset,
date: numpy.datetime64,
n_segments = 5000,
compactness = 0.1):
"""Performs SLIC0 segmentation on the image for the given date, adding a 'segment_id' variable to the dataset"""
# Segments an image according to:
# https://docs.dea.ga.gov.au/notebooks/Frequently_used_code/Image_segmentation.html
#
# For an explanation of slic params, see:
# https://scikit-image.org/docs/dev/api/skimage.segmentation.html#skimage.segmentation.slic
# skimage-friendly format
np_image = xds_grd.sel(date=date).to_numpy()
# This seems to give OK-ish results
np_segments = slic(np_image, n_segments=n_segments, slic_zero=True, compactness=compactness)
xa_segments = xarray.DataArray(np_segments,
coords=xds_grd.drop("date").coords,
dims=['y', 'x'],
attrs=xds_grd.attrs)
xds_grd["segment_id"] = xa_segments
# xarray has been modified in-place, nothing to return.
return None
def vectorize_segments(xds_grd):
"""Create a vector layer (GeoDataFrame) from the segment raster layer"""
# Borrowed heavily from:
# https://github.com/digitalearthafrica/deafrica-sandbox-notebooks/blob/main/Tools/deafrica_tools/spatial.py#L28
xa_segments = xds_grd.segment_id.astype("int16")
shapes = list(
rasterio.features.shapes(xa_segments.to_numpy(), transform = xds_grd.rio.transform())
)
polygons = [shape(polygon) for polygon, _ in shapes]
segment_ids = [int(segment_id) for _, segment_id in shapes]
gdf = geopandas.GeoDataFrame(
data={"segment_id": segment_ids},
geometry=polygons,
index=segment_ids,
crs=xds_grd.rio.crs )
# We have to sort here, otherwise subsequent bulk field additions get jumbled.
gdf.sort_index(axis=0, inplace=True)
return gdf
def get_segment_id(gdf, x, y, delta = 0.1):
"""
Get segment id for the given point.
Note:
* Lookup is performed with a very small bounding box, and it's possible multiple
segments intersect. The first match is selected.
* An optional `delta` argument can be set if operating at a different scale or CRS
"""
segment_id = gdf.cx[
x:x + delta,
y:y + delta
].iloc[0]["segment_id"]
return segment_id
## Lava analysis and region growing
## --------------------------------
def make_similarity_func(field: str, reference_sample: geopandas.GeoDataFrame) -> Callable[[geopandas.GeoSeries], float]:
low = reference_sample[field].quantile(0.0)
q1 = reference_sample[field].quantile(0.25)
q3 = reference_sample[field].quantile(0.75)
high = reference_sample[field].quantile(1.0)
f = lambda seg_data: numpy.interp(
x = seg_data[field],
xp = [low, q1, q3, high],
fp = [0.25, 1.0, 1.0, 0.25],
left = 0,
right = 0 )
return f
# Calculate lava-likeness index. Input should be a GeoSeries representing a single object
def local_lava_likeness(segment_data: geopandas.GeoSeries) -> float:
ll = (segment_data.lava_mean_similarity + segment_data.lava_std_similarity) / 2
return ll
def get_neighbours(gdf, segment_id):
return gdf[gdf.touches(gdf.loc[segment_id].geometry)]
def get_neighbours_of_neighbours(gdf, segment_id):
neighs = get_neighbours(gdf, segment_id)
n_of_n = []
for neighbor_segment_id, _ in neighs.iterrows():
n_of_n.append(get_neighbours(gdf, neighbor_segment_id))
to_exclude = [segment_id, *list(neighs.segment_id)]
n_of_n = pandas.concat(n_of_n).drop(to_exclude)
return geopandas.GeoDataFrame(n_of_n)
def neighbourhood_lava_likeness(gdf, segment_id) -> float:
neighbors = get_neighbours(gdf, segment_id)
# COULDDO: Consider neighbours of neighbours.
# neighbors_of_neighbours = get_neighbours_of_neighbours(gdf, segment_id)
return (
0.5 * gdf.loc[segment_id]["local_lava_likeness"] +
0.5 * neighbors["local_lava_likeness"].sum() / len(neighbors) )
def get_unvisited_neighbors(gdf, segment_id, group_id):
all_neighbors = gdf[gdf.touches(gdf.loc[segment_id].geometry)]
new_neighbors = all_neighbors[gdf.group != group_id]
return new_neighbors
# COULDDO: Parameterize this. But, have to also consider the weighting of the components.
LL_THRESHOLD = 0.5
def lava_likeness_overall(gdf, start_segment_id):
"""
Final lava-likeness calculation. Grows a region using the starting segment as a seed,
taking into account the local and neighbourhood lava-likenesses, and whether combined
they exceed a preset threshold.
"""
gdf["neighbourhood_lava_likeness"] = gdf["local_lava_likeness"]
gdf["group"] = gdf.segment_id
gdf.at[start_segment_id, "neighbourhood_lava_likeness"] = 1.0
to_visit = [start_segment_id]
while len(to_visit) > 0:
current_segment_id = to_visit.pop()
neighs = get_unvisited_neighbors(gdf, current_segment_id, start_segment_id)
for neigh_seg_id, _ in neighs.iterrows():
ll = neighbourhood_lava_likeness(gdf, neigh_seg_id)
if ll > LL_THRESHOLD:
gdf.at[neigh_seg_id, "neighbourhood_lava_likeness"] = 1.0
gdf.at[neigh_seg_id, "group"] = start_segment_id
to_visit.append(neigh_seg_id)
def extract_lava_region(gdf, start_segment_id):
return gdf[gdf["group"] == start_segment_id].dissolve()
def segment_stats(xds_grd):
"""
Calculate segment pixel statistics: mean; standard deviation.
Note:
* These are calculated for all dates, effectively using a single segmentation result as a mask layer,
i.e., same segment, different day.
* This would be impractical for larger datasets, where subsetting would be appropriate.
"""
# Available stat/aggregate functions are listed here:
# https://xarray.pydata.org/en/v0.11.3/generated/xarray.core.groupby.DataArrayGroupBy.html
segments = xds_grd.groupby('segment_id')
means = segments.mean()
stds = segments.std()
return xarray.Dataset({"mean": means, "std":stds})
def enrich_stats(gdf, xds_segstats, date):
"""Enrich the GeoDataFrame with the segment stats"""
gdf["mean"] = xds_segstats.sel(date=date)["mean"]
gdf["std"] = xds_segstats.sel(date=date)["std"]
# gdf was modified in-place, so nothing to return.
return None
def enrich_lava_likeness(gdf, reference_sample):
"""
Enrich the GeoDataFrame with the derived lavalikeness metrics, based on the given reference sample.
The sample should be from known lava flow.
"""
lava_mean_similarity = make_similarity_func("mean", reference_sample)
lava_std_similarity = make_similarity_func("std", reference_sample)
gdf["lava_mean_similarity"] = gdf.apply(lambda gds: lava_mean_similarity(gds), axis=1)
gdf["lava_std_similarity"] = gdf.apply(lambda gds: lava_std_similarity(gds), axis=1)
gdf["local_lava_likeness"] = gdf.apply(local_lava_likeness, axis=1)
# gdf was modified in-place, so nothing to return.
return None