Include pgstac migrations in pypgstac package (#108)

* Include pgstac migrations in pypgstac package * Add tests for specific invalid asset regression We've had both db and api regressions where assets would be included in the hydrated item without an href causing validation errors. Adding specific tests for this case to make sure the general do-no-merge key solution address this problem, and the api-fix can be removed from stac-fastapi backend. * Clarify comments and variable names * On the recursive portion of the dehydrate function, base_item and full_item are not actual items, but sub-keys from root. Move to a private inner function with clearer names, similar to hydrate. * Update comments to be more precise with respect to leaving items on the item or not.
stac-utils · May 5, 2022 · c1e227b · c1e227b
1 parent 5263ac0
commit c1e227b
Show file tree

Hide file tree

Showing 4 changed files with 94 additions and 48 deletions.
diff --git a/pypgstac/pypgstac/hydration.py b/pypgstac/pypgstac/hydration.py
@@ -57,51 +57,55 @@ def dehydrate(base_item: Dict[str, Any], full_item: Dict[str, Any]) -> Dict[str,
     key should not be rehydrated with the corresponding base item value. This will allow
     collection item-assets to contain keys that may not be present on individual items.
     """
-    out: dict = {}
-    for key, value in full_item.items():
-        if base_item is None or key not in base_item:
-            # Nothing to dehyrate from, preserve item value
-            out[key] = value
-            continue
-
-        if base_item[key] == value:
-            # Equal values, no need to dehydrate
-            continue
-
-        if isinstance(base_item[key], list) and isinstance(value, list):
-            if len(base_item[key]) == len(value):
-                # Equal length lists dehydrate dicts at each matching index
-                # and use incoming item values for other types
-                out[key] = []
-                for bv, v in zip(base_item[key], value):
-                    if isinstance(bv, dict) and isinstance(v, dict):
-                        dehydrated = dehydrate(bv, v)
-                        apply_marked_keys(bv, v, dehydrated)
-                        out[key].append(dehydrated)
-                    else:
-                        out[key].append(v)
+
+    def strip(base_value: Dict[str, Any], item_value: Dict[str, Any]) -> Dict[str, Any]:
+        out: dict = {}
+        for key, value in item_value.items():
+            if base_value is None or key not in base_value:
+                # Nothing on base; preserve item value in the dehydrated item
+                out[key] = value
+                continue
+
+            if base_value[key] == value:
+                # Equal values; do not include in the dehydrated item
+                continue
+
+            if isinstance(base_value[key], list) and isinstance(value, list):
+                if len(base_value[key]) == len(value):
+                    # Equal length lists dehydrate dicts at each matching index
+                    # and use incoming item values for other types
+                    out[key] = []
+                    for bv, v in zip(base_value[key], value):
+                        if isinstance(bv, dict) and isinstance(v, dict):
+                            dehydrated = strip(bv, v)
+                            apply_marked_keys(bv, v, dehydrated)
+                            out[key].append(dehydrated)
+                        else:
+                            out[key].append(v)
+                else:
+                    # Unequal length lists are not dehydrated and just use the
+                    # incoming item value
+                    out[key] = value
+                continue
+
+            if value is None or value == []:
+                # Don't keep empty values
+                continue
+
+            if isinstance(value, dict):
+                # After dehdrating a dict, mark any keys that are present on the
+                # base item but not in the incoming item as `do-not-merge` during
+                # rehydration
+                dehydrated = strip(base_value[key], value)
+                apply_marked_keys(base_value[key], value, dehydrated)
+                out[key] = dehydrated
+                continue
             else:
-                # Unequal length lists are not dehydrated and just use the
-                # incoming item value
+                # Unequal non-dict values are copied over from the incoming item
                 out[key] = value
-            continue
-
-        if value is None or value == []:
-            # Don't keep empty values
-            continue
-
-        if isinstance(value, dict):
-            # After dehdrating a dict, mark any keys that are present on the
-            # base item but not in the incoming item as `do-not-merge` during
-            # rehydration
-            dehydrated = dehydrate(base_item[key], value)
-            apply_marked_keys(base_item[key], value, dehydrated)
-            out[key] = dehydrated
-            continue
-        else:
-            # Unequal non-dict values are copied over from the incoming item
-            out[key] = value
-    return out
+        return out
+
+    return strip(base_item, full_item)
 
 
 def apply_marked_keys(

diff --git a/pypgstac/setup.py b/pypgstac/setup.py
@@ -24,9 +24,9 @@
         "types-orjson==0.1.1",
     ],
     "psycopg": [
-     "psycopg[binary]==3.0.*",
-     "psycopg-pool==3.1.*",
-    ]
+        "psycopg[binary]==3.0.*",
+        "psycopg-pool==3.1.*",
+    ],
 }
 
 
@@ -49,7 +49,7 @@
     url="https://github.com/stac-utils/pgstac",
     license="MIT",
     packages=find_namespace_packages(exclude=["tests", "scripts"]),
-    package_data={"": ["py.typed"], "migrations": ["pypgstac/migrations/pgstac*.sql"]},
+    package_data={"": ["migrations/pgstac*.sql", "py.typed"]},
     zip_safe=False,
     install_requires=install_requires,
     tests_require=[extra_reqs["dev"], extra_reqs["psycopg"]],

diff --git a/pypgstac/tests/hydration/test_dehydrate.py b/pypgstac/tests/hydration/test_dehydrate.py
@@ -186,3 +186,22 @@ def test_equal_list_of_non_dicts() -> None:
 
     dehydrated = hydration.dehydrate(base_item, item)
     assert dehydrated == {"assets": {"thumbnail": {"href": "http://foo.com"}}}
+
+
+def test_invalid_assets_marked() -> None:
+    """
+    Assets can be included on item-assets that are not uniformly included on
+    individual items. Ensure that base item asset keys without a matching item
+    key are marked do-no-merge after dehydration.
+    """
+    base_item = {
+        "type": "Feature",
+        "assets": {"asset1": {"name": "Asset one"}, "asset2": {"name": "Asset two"}},
+    }
+    hydrated = {"assets": {"asset1": {"name": "Asset one", "href": "http://foo.com"}}}
+
+    dehydrated = hydration.dehydrate(base_item, hydrated)
+
+    assert dehydrated == {
+        "assets": {"asset1": {"href": "http://foo.com"}, "asset2": DO_NOT_MERGE_MARKER},
+    }
diff --git a/pypgstac/tests/hydration/test_hydrate.py b/pypgstac/tests/hydration/test_hydrate.py
@@ -158,11 +158,34 @@ def test_deeply_nested_dict() -> None:
 
 
 def test_equal_list_of_non_dicts() -> None:
-    """Values of lists that match base_item should be dehydrated off"""
+    """Values of lists that match base_item should be hydrated back on"""
     base_item = {"assets": {"thumbnail": {"roles": ["thumbnail"]}}}
     dehydrated = {"assets": {"thumbnail": {"href": "http://foo.com"}}}
 
     hydrated = hydration.hydrate(base_item, dehydrated)
     assert hydrated == {
         "assets": {"thumbnail": {"roles": ["thumbnail"], "href": "http://foo.com"}}
     }
+
+
+def test_invalid_assets_removed() -> None:
+    """
+    Assets can be included on item-assets that are not uniformly included on
+    individual items. Ensure that item asset keys from base_item aren't included
+    after hydration
+    """
+    base_item = {
+        "type": "Feature",
+        "assets": {"asset1": {"name": "Asset one"}, "asset2": {"name": "Asset two"}},
+    }
+
+    dehydrated = {
+        "assets": {"asset1": {"href": "http://foo.com"}, "asset2": DO_NOT_MERGE_MARKER},
+    }
+
+    hydrated = hydration.hydrate(base_item, dehydrated)
+
+    assert hydrated == {
+        "type": "Feature",
+        "assets": {"asset1": {"name": "Asset one", "href": "http://foo.com"}},
+    }