Skip to content

Commit

Permalink
Include pgstac migrations in pypgstac package (#108)
Browse files Browse the repository at this point in the history
* Include pgstac migrations in pypgstac package

* Add tests for specific invalid asset regression

We've had both db and api regressions where assets would be included in
the hydrated item without an href causing validation errors. Adding
specific tests for this case to make sure the general do-no-merge key
solution address this problem, and the api-fix can be removed from
stac-fastapi backend.

* Clarify comments and variable names

* On the recursive portion of the dehydrate function, base_item and
full_item are not actual items, but sub-keys from root. Move to a
private inner function with clearer names, similar to hydrate.
* Update comments to be more precise with respect to leaving items on
the item or not.
  • Loading branch information
mmcfarland authored May 5, 2022
1 parent 5263ac0 commit c1e227b
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 48 deletions.
90 changes: 47 additions & 43 deletions pypgstac/pypgstac/hydration.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,51 +57,55 @@ def dehydrate(base_item: Dict[str, Any], full_item: Dict[str, Any]) -> Dict[str,
key should not be rehydrated with the corresponding base item value. This will allow
collection item-assets to contain keys that may not be present on individual items.
"""
out: dict = {}
for key, value in full_item.items():
if base_item is None or key not in base_item:
# Nothing to dehyrate from, preserve item value
out[key] = value
continue

if base_item[key] == value:
# Equal values, no need to dehydrate
continue

if isinstance(base_item[key], list) and isinstance(value, list):
if len(base_item[key]) == len(value):
# Equal length lists dehydrate dicts at each matching index
# and use incoming item values for other types
out[key] = []
for bv, v in zip(base_item[key], value):
if isinstance(bv, dict) and isinstance(v, dict):
dehydrated = dehydrate(bv, v)
apply_marked_keys(bv, v, dehydrated)
out[key].append(dehydrated)
else:
out[key].append(v)

def strip(base_value: Dict[str, Any], item_value: Dict[str, Any]) -> Dict[str, Any]:
out: dict = {}
for key, value in item_value.items():
if base_value is None or key not in base_value:
# Nothing on base; preserve item value in the dehydrated item
out[key] = value
continue

if base_value[key] == value:
# Equal values; do not include in the dehydrated item
continue

if isinstance(base_value[key], list) and isinstance(value, list):
if len(base_value[key]) == len(value):
# Equal length lists dehydrate dicts at each matching index
# and use incoming item values for other types
out[key] = []
for bv, v in zip(base_value[key], value):
if isinstance(bv, dict) and isinstance(v, dict):
dehydrated = strip(bv, v)
apply_marked_keys(bv, v, dehydrated)
out[key].append(dehydrated)
else:
out[key].append(v)
else:
# Unequal length lists are not dehydrated and just use the
# incoming item value
out[key] = value
continue

if value is None or value == []:
# Don't keep empty values
continue

if isinstance(value, dict):
# After dehdrating a dict, mark any keys that are present on the
# base item but not in the incoming item as `do-not-merge` during
# rehydration
dehydrated = strip(base_value[key], value)
apply_marked_keys(base_value[key], value, dehydrated)
out[key] = dehydrated
continue
else:
# Unequal length lists are not dehydrated and just use the
# incoming item value
# Unequal non-dict values are copied over from the incoming item
out[key] = value
continue

if value is None or value == []:
# Don't keep empty values
continue

if isinstance(value, dict):
# After dehdrating a dict, mark any keys that are present on the
# base item but not in the incoming item as `do-not-merge` during
# rehydration
dehydrated = dehydrate(base_item[key], value)
apply_marked_keys(base_item[key], value, dehydrated)
out[key] = dehydrated
continue
else:
# Unequal non-dict values are copied over from the incoming item
out[key] = value
return out
return out

return strip(base_item, full_item)


def apply_marked_keys(
Expand Down
8 changes: 4 additions & 4 deletions pypgstac/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
"types-orjson==0.1.1",
],
"psycopg": [
"psycopg[binary]==3.0.*",
"psycopg-pool==3.1.*",
]
"psycopg[binary]==3.0.*",
"psycopg-pool==3.1.*",
],
}


Expand All @@ -49,7 +49,7 @@
url="https://github.com/stac-utils/pgstac",
license="MIT",
packages=find_namespace_packages(exclude=["tests", "scripts"]),
package_data={"": ["py.typed"], "migrations": ["pypgstac/migrations/pgstac*.sql"]},
package_data={"": ["migrations/pgstac*.sql", "py.typed"]},
zip_safe=False,
install_requires=install_requires,
tests_require=[extra_reqs["dev"], extra_reqs["psycopg"]],
Expand Down
19 changes: 19 additions & 0 deletions pypgstac/tests/hydration/test_dehydrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,22 @@ def test_equal_list_of_non_dicts() -> None:

dehydrated = hydration.dehydrate(base_item, item)
assert dehydrated == {"assets": {"thumbnail": {"href": "http://foo.com"}}}


def test_invalid_assets_marked() -> None:
"""
Assets can be included on item-assets that are not uniformly included on
individual items. Ensure that base item asset keys without a matching item
key are marked do-no-merge after dehydration.
"""
base_item = {
"type": "Feature",
"assets": {"asset1": {"name": "Asset one"}, "asset2": {"name": "Asset two"}},
}
hydrated = {"assets": {"asset1": {"name": "Asset one", "href": "http://foo.com"}}}

dehydrated = hydration.dehydrate(base_item, hydrated)

assert dehydrated == {
"assets": {"asset1": {"href": "http://foo.com"}, "asset2": DO_NOT_MERGE_MARKER},
}
25 changes: 24 additions & 1 deletion pypgstac/tests/hydration/test_hydrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,34 @@ def test_deeply_nested_dict() -> None:


def test_equal_list_of_non_dicts() -> None:
"""Values of lists that match base_item should be dehydrated off"""
"""Values of lists that match base_item should be hydrated back on"""
base_item = {"assets": {"thumbnail": {"roles": ["thumbnail"]}}}
dehydrated = {"assets": {"thumbnail": {"href": "http://foo.com"}}}

hydrated = hydration.hydrate(base_item, dehydrated)
assert hydrated == {
"assets": {"thumbnail": {"roles": ["thumbnail"], "href": "http://foo.com"}}
}


def test_invalid_assets_removed() -> None:
"""
Assets can be included on item-assets that are not uniformly included on
individual items. Ensure that item asset keys from base_item aren't included
after hydration
"""
base_item = {
"type": "Feature",
"assets": {"asset1": {"name": "Asset one"}, "asset2": {"name": "Asset two"}},
}

dehydrated = {
"assets": {"asset1": {"href": "http://foo.com"}, "asset2": DO_NOT_MERGE_MARKER},
}

hydrated = hydration.hydrate(base_item, dehydrated)

assert hydrated == {
"type": "Feature",
"assets": {"asset1": {"name": "Asset one", "href": "http://foo.com"}},
}

0 comments on commit c1e227b

Please sign in to comment.