Skip to content

Commit

Permalink
Ensure special keys are not in content when loaded (#112)
Browse files Browse the repository at this point in the history
* Ensure special keys not in content when loaded

The loader pop'd keys like collection off the item, then dehydrated the
item to be used as the 'content'. With collection removed prior to
dehydration, it was flagged with the "do-not-merge" marker because the
key is on the base_item. Instead, ensure that id, collection, and
geometry are not in content as they are stored on the table row and
shouldn't participate in hydration.

Additionally, bbox was previously a derived value at search runtime but
was recently changed to a returned value if it existed on the item.
However, during loading, the bbox was dropped so would never exist on
the persisted item.

* Simplify geometry check
  • Loading branch information
mmcfarland authored May 13, 2022
1 parent 3c61e03 commit b06cdc6
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 6 deletions.
15 changes: 10 additions & 5 deletions pypgstac/pypgstac/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,8 +511,8 @@ def format_item(self, _item: Union[Path, str, dict]) -> dict:

base_item, key, partition_trunc = self.collection_json(item["collection"])

out["id"] = item.pop("id")
out["collection"] = item.pop("collection")
out["id"] = item.get("id")
out["collection"] = item.get("collection")
properties: dict = item.get("properties", {})

dt = properties.get("datetime")
Expand Down Expand Up @@ -544,16 +544,21 @@ def format_item(self, _item: Union[Path, str, dict]) -> dict:

out["partition"] = partition

bbox = item.pop("bbox")
geojson = item.pop("geometry")
if geojson is None and bbox is not None:
geojson = item.get("geometry")
if geojson is None:
geometry = None
else:
geometry = str(Geometry.from_geojson(geojson).wkb)
out["geometry"] = geometry

content = dehydrate(base_item, item)

# Remove keys from the dehydrated item content which are stored directly
# on the table row.
content.pop("id", None)
content.pop("collection", None)
content.pop("geometry", None)

out["content"] = orjson.dumps(content).decode()

return out
Expand Down
30 changes: 29 additions & 1 deletion pypgstac/tests/test_load.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Tests for pypgstac."""
import json
from pathlib import Path
from pypgstac.load import Methods, Loader
from pypgstac.load import Methods, Loader, read_json
from psycopg.errors import UniqueViolation
import pytest

Expand Down Expand Up @@ -239,3 +240,30 @@ def test_load_items_dehydrated_ignore_succeeds(loader: Loader) -> None:
loader.load_items(
str(TEST_DEHYDRATED_ITEMS), insert_mode=Methods.ignore, dehydrated=True
)


def test_format_items_keys(loader: Loader) -> None:
"""Test pypgstac items ignore loader."""
loader.load_collections(
str(TEST_COLLECTIONS_JSON),
insert_mode=Methods.ignore,
)

items_iter = read_json(str(TEST_ITEMS))
item_json = next(iter(items_iter))
out = loader.format_item(item_json)

# Top level keys expected after format
assert "id" in out
assert "collection" in out
assert "geometry" in out
assert "content" in out

# Special keys expected not to be in the item content
content_json = json.loads(out["content"])
assert "id" not in content_json
assert "collection" not in content_json
assert "geometry" not in content_json

# Ensure bbox is included in content
assert "bbox" in content_json

0 comments on commit b06cdc6

Please sign in to comment.