Skip to content

Commit

Permalink
Merge pull request #20 from stac-utils/self_link
Browse files Browse the repository at this point in the history
Add an option to add a self link column #15
  • Loading branch information
m-mohr authored Jul 19, 2023
2 parents 39f13b1 + 84e6bf7 commit 35aa16e
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 110 deletions.
39 changes: 35 additions & 4 deletions stac_geoparquet/stac_geoparquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,14 @@
import numpy as np
import shapely.geometry

from urllib.parse import urlparse

from stac_geoparquet.utils import fix_empty_multipolygon

STAC_ITEM_TYPES = ["application/json", "application/geo+json"]

SELF_LINK_COLUMN = "self_link"


def _fix_array(v):
if isinstance(v, np.ndarray):
Expand All @@ -24,7 +30,9 @@ def _fix_array(v):
return v


def to_geodataframe(items: Sequence[dict[str, Any]]) -> geopandas.GeoDataFrame:
def to_geodataframe(
items: Sequence[dict[str, Any]], add_self_link: bool = False
) -> geopandas.GeoDataFrame:
"""
Convert a sequence of STAC items to a :class:`geopandas.GeoDataFrame`.
Expand All @@ -34,6 +42,7 @@ def to_geodataframe(items: Sequence[dict[str, Any]]) -> geopandas.GeoDataFrame:
Parameters
----------
items: A sequence of STAC items.
add_self_link: Add the absolute link (if available) to the source STAC Item as a separate column named "self_link"
Returns
-------
Expand All @@ -46,6 +55,17 @@ def to_geodataframe(items: Sequence[dict[str, Any]]) -> geopandas.GeoDataFrame:
if k in item2:
raise ValueError("k", k)
item2[k] = v
if add_self_link:
self_href = None
for link in item["links"]:
if (
link["rel"] == "self"
and (not link["type"] or link["type"] in STAC_ITEM_TYPES)
and urlparse(link["href"]).netloc
):
self_href = link["href"]
break
item2[SELF_LINK_COLUMN] = self_href
items2.append(item2)

# Filter out missing geoms in MultiPolygons
Expand All @@ -61,7 +81,16 @@ def to_geodataframe(items: Sequence[dict[str, Any]]) -> geopandas.GeoDataFrame:

gdf = geopandas.GeoDataFrame(items2, geometry=geometry, crs="WGS84")

for column in ["datetime", "start_datetime", "end_datetime"]:
for column in [
"datetime", # common metadata
"start_datetime",
"end_datetime",
"created",
"updated",
"expires", # timestamps extension
"published",
"unpublished",
]:
if column in gdf.columns:
gdf[column] = pd.to_datetime(gdf[column], format="ISO8601")

Expand All @@ -82,7 +111,7 @@ def to_geodataframe(items: Sequence[dict[str, Any]]) -> geopandas.GeoDataFrame:
columns.remove(col)

gdf = pd.concat([gdf[columns], gdf.drop(columns=columns)], axis="columns")
for k in ["type", "stac_version", "id", "collection"]:
for k in ["type", "stac_version", "id", "collection", SELF_LINK_COLUMN]:
if k in gdf:
gdf[k] = gdf[k].astype("string")

Expand Down Expand Up @@ -113,7 +142,9 @@ def to_dict(record: dict) -> dict:
for k, v in record.items():
v = _fix_array(v)

if k in top_level_keys:
if k == SELF_LINK_COLUMN:
continue
elif k in top_level_keys:
item[k] = v
else:
properties[k] = v
Expand Down
224 changes: 118 additions & 106 deletions tests/test_stac_geoparquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def test_assert_equal():
assert_equal(a, b)


ITEM_SELF_HREF = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip/items/ia_m_4209150_sw_15_060_20190828_20191105" # noqa: E501
ITEM = {
"id": "ia_m_4209150_sw_15_060_20190828_20191105",
"bbox": [-91.879788, 42.121621, -91.807132, 42.191372],
Expand All @@ -47,11 +48,7 @@ def test_assert_equal():
"type": "application/json",
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/",
},
{
"rel": "self",
"type": "application/geo+json",
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip/items/ia_m_4209150_sw_15_060_20190828_20191105", # noqa: E501
},
{"rel": "self", "type": "application/geo+json", "href": ITEM_SELF_HREF},
{
"rel": "preview",
"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=naip&item=ia_m_4209150_sw_15_060_20190828_20191105", # noqa: E501
Expand Down Expand Up @@ -128,110 +125,108 @@ def test_assert_equal():
"stac_version": "1.0.0",
}


def test_to_geodataframe():
result = stac_geoparquet.to_geodataframe([ITEM])
expected = geopandas.GeoDataFrame(
{
"type": {0: "Feature"},
"stac_version": {0: "1.0.0"},
"stac_extensions": {
0: [
"https://stac-extensions.github.io/eo/v1.0.0/schema.json",
"https://stac-extensions.github.io/projection/v1.0.0/schema.json",
]
EXPECTED_GDF = {
"type": {0: "Feature"},
"stac_version": {0: "1.0.0"},
"stac_extensions": {
0: [
"https://stac-extensions.github.io/eo/v1.0.0/schema.json",
"https://stac-extensions.github.io/projection/v1.0.0/schema.json",
]
},
"id": {0: "ia_m_4209150_sw_15_060_20190828_20191105"},
"geometry": {0: shapely.geometry.shape(ITEM["geometry"])},
"bbox": {0: [-91.879788, 42.121621, -91.807132, 42.191372]},
"links": {
0: [
{
"rel": "collection",
"type": "application/json",
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip",
},
"id": {0: "ia_m_4209150_sw_15_060_20190828_20191105"},
"geometry": {0: shapely.geometry.shape(ITEM["geometry"])},
"bbox": {0: [-91.879788, 42.121621, -91.807132, 42.191372]},
"links": {
0: [
{
"rel": "collection",
"type": "application/json",
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip",
},
{
"rel": "parent",
"type": "application/json",
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip",
},
{
"rel": "root",
"type": "application/json",
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/",
},
{
"rel": "self",
"type": "application/geo+json",
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip/items/ia_m_4209150_sw_15_060_20190828_20191105", # noqa: E501
},
{
"rel": "parent",
"type": "application/json",
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip",
},
{
"rel": "root",
"type": "application/json",
"href": "https://planetarycomputer.microsoft.com/api/stac/v1/",
},
{
"rel": "self",
"type": "application/geo+json",
"href": ITEM_SELF_HREF,
},
{
"rel": "preview",
"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=naip&item=ia_m_4209150_sw_15_060_20190828_20191105", # noqa: E501
"title": "Map of item",
"type": "text/html",
},
]
},
"assets": {
0: {
"image": {
"href": "https://naipeuwest.blob.core.windows.net/naip/v002/ia/2019/ia_60cm_2019/42091/m_4209150_sw_15_060_20190828.tif", # noqa: E501
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"roles": ["data"],
"title": "RGBIR COG tile",
"eo:bands": [
{"name": "Red", "common_name": "red"},
{"name": "Green", "common_name": "green"},
{"name": "Blue", "common_name": "blue"},
{
"rel": "preview",
"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=naip&item=ia_m_4209150_sw_15_060_20190828_20191105", # noqa: E501
"title": "Map of item",
"type": "text/html",
"name": "NIR",
"common_name": "nir",
"description": "near-infrared",
},
]
],
},
"assets": {
0: {
"image": {
"href": "https://naipeuwest.blob.core.windows.net/naip/v002/ia/2019/ia_60cm_2019/42091/m_4209150_sw_15_060_20190828.tif", # noqa: E501
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"roles": ["data"],
"title": "RGBIR COG tile",
"eo:bands": [
{"name": "Red", "common_name": "red"},
{"name": "Green", "common_name": "green"},
{"name": "Blue", "common_name": "blue"},
{
"name": "NIR",
"common_name": "nir",
"description": "near-infrared",
},
],
},
"metadata": {
"href": "https://naipeuwest.blob.core.windows.net/naip/v002/ia/2019/ia_fgdc_2019/42091/m_4209150_sw_15_060_20190828.txt", # noqa: E501
"type": "text/plain",
"roles": ["metadata"],
"title": "FGDC Metdata",
},
"thumbnail": {
"href": "https://naipeuwest.blob.core.windows.net/naip/v002/ia/2019/ia_60cm_2019/42091/m_4209150_sw_15_060_20190828.200.jpg", # noqa: E501
"type": "image/jpeg",
"roles": ["thumbnail"],
"title": "Thumbnail",
},
"tilejson": {
"title": "TileJSON with default rendering",
"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=naip&item=ia_m_4209150_sw_15_060_20190828_20191105&assets=image&asset_bidx=image%7C1%2C2%2C3", # noqa: E501
"type": "application/json",
"roles": ["tiles"],
},
"rendered_preview": {
"title": "Rendered preview",
"rel": "preview",
"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=naip&item=ia_m_4209150_sw_15_060_20190828_20191105&assets=image&asset_bidx=image%7C1%2C2%2C3", # noqa: E501
"roles": ["overview"],
"type": "image/png",
},
}
"metadata": {
"href": "https://naipeuwest.blob.core.windows.net/naip/v002/ia/2019/ia_fgdc_2019/42091/m_4209150_sw_15_060_20190828.txt", # noqa: E501
"type": "text/plain",
"roles": ["metadata"],
"title": "FGDC Metdata",
},
"collection": {0: "naip"},
"gsd": {0: 0.6},
"datetime": {0: pd.Timestamp("2019-08-28 00:00:00+0000", tz="UTC")},
"naip:year": {0: "2019"},
"proj:bbox": {0: [592596.0, 4663966.8, 598495.8, 4671633.0]},
"proj:epsg": {0: 26915},
"naip:state": {0: "ia"},
"proj:shape": {0: [12777, 9833]},
"proj:transform": {
0: [0.6, 0.0, 592596.0, 0.0, -0.6, 4671633.0, 0.0, 0.0, 1.0]
"thumbnail": {
"href": "https://naipeuwest.blob.core.windows.net/naip/v002/ia/2019/ia_60cm_2019/42091/m_4209150_sw_15_060_20190828.200.jpg", # noqa: E501
"type": "image/jpeg",
"roles": ["thumbnail"],
"title": "Thumbnail",
},
"tilejson": {
"title": "TileJSON with default rendering",
"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?collection=naip&item=ia_m_4209150_sw_15_060_20190828_20191105&assets=image&asset_bidx=image%7C1%2C2%2C3", # noqa: E501
"type": "application/json",
"roles": ["tiles"],
},
"rendered_preview": {
"title": "Rendered preview",
"rel": "preview",
"href": "https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?collection=naip&item=ia_m_4209150_sw_15_060_20190828_20191105&assets=image&asset_bidx=image%7C1%2C2%2C3", # noqa: E501
"roles": ["overview"],
"type": "image/png",
},
}
)
},
"collection": {0: "naip"},
"gsd": {0: 0.6},
"datetime": {0: pd.Timestamp("2019-08-28 00:00:00+0000", tz="UTC")},
"naip:year": {0: "2019"},
"proj:bbox": {0: [592596.0, 4663966.8, 598495.8, 4671633.0]},
"proj:epsg": {0: 26915},
"naip:state": {0: "ia"},
"proj:shape": {0: [12777, 9833]},
"proj:transform": {0: [0.6, 0.0, 592596.0, 0.0, -0.6, 4671633.0, 0.0, 0.0, 1.0]},
}


def test_to_geodataframe():
result = stac_geoparquet.to_geodataframe([ITEM])
expected = geopandas.GeoDataFrame(EXPECTED_GDF)
for k in ["type", "stac_version", "id", "collection"]:
if k in expected:
expected[k] = expected[k].astype("string")
Expand All @@ -243,18 +238,35 @@ def test_to_geodataframe():
assert_equal(ic1, ic2)


def test_to_geodataframe_with_self_link():
result = stac_geoparquet.to_geodataframe([ITEM], add_self_link=True)
gdf = EXPECTED_GDF.copy()
gdf["self_link"] = {0: ITEM_SELF_HREF}
expected = geopandas.GeoDataFrame(gdf)
for k in ["type", "stac_version", "id", "collection", "self_link"]:
if k in expected:
expected[k] = expected[k].astype("string")

pandas.testing.assert_frame_equal(result, expected)

ic1 = to_item_collection(result)
ic2 = pystac.ItemCollection([ITEM])
assert_equal(ic1, ic2)


def test_s1_grd():
# item = requests.get("https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_EW_GRDM_1SSH_20150129T081916_20150129T081938_004383_005598").json() # noqa: E501
item = requests.get(
"https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-1-grd/items/S1A_EW_GRDM_1SSH_20150129T081916_20150129T081938_004383_005598" # noqa: E501
).json()

EO_V10 = "https://stac-extensions.github.io/eo/v1.0.0/schema.json"
EO_V11 = "https://stac-extensions.github.io/eo/v1.1.0/schema.json"

# pystac migrates EO extension to latest version, but PC is still on 1.0.0
for i, ext in enumerate(item["stac_extensions"]):
if ext == "https://stac-extensions.github.io/eo/v1.0.0/schema.json":
item["stac_extensions"][
i
] = "https://stac-extensions.github.io/eo/v1.1.0/schema.json"
if ext == EO_V10:
item["stac_extensions"][i] = EO_V11

item["geometry"] = fix_empty_multipolygon(item["geometry"]).__geo_interface__
df = stac_geoparquet.to_geodataframe([item])
Expand Down

0 comments on commit 35aa16e

Please sign in to comment.