From c4539218ba814540850063ea99dc27cc6f1c7047 Mon Sep 17 00:00:00 2001 From: BinamB Date: Wed, 5 Feb 2025 08:38:51 -0600 Subject: [PATCH] Fix urls_metadata --- indexd/index/drivers/single_table_alchemy.py | 17 +++++++- tests/test_client.py | 42 ++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/indexd/index/drivers/single_table_alchemy.py b/indexd/index/drivers/single_table_alchemy.py index 2bbc8448..c2eab392 100644 --- a/indexd/index/drivers/single_table_alchemy.py +++ b/indexd/index/drivers/single_table_alchemy.py @@ -84,6 +84,7 @@ def to_document_dict(self): if self.content_updated_date is not None else None ) + urls_metadata = generate_url_metadata(self.url_metadata, self.urls) return { "did": self.guid, @@ -94,7 +95,7 @@ def to_document_dict(self): "version": self.version, "uploader": self.uploader, "urls": self.urls, - "urls_metadata": self.url_metadata, + "urls_metadata": urls_metadata, "acl": acl, "authz": authz, "hashes": self.hashes, @@ -1529,6 +1530,20 @@ def check_url_metadata(url_metadata, record): raise UserError("url {} in url_metadata does not exist".format(url)) +def generate_url_metadata(record_url_metadata, urls): + """ + Genrates url_metadata for an indexd record. Pulls urls information from urls if urls_metadata is empty. + + Args: + record_url_metadata (dict): urls metadata for an indexd record + urls (list): list of urls of an indexd record + """ + for url in urls: + if url not in record_url_metadata: + record_url_metadata[url] = {} + return record_url_metadata + + def get_record_if_exists(did, session): """ Searches for a record with this did and returns it. diff --git a/tests/test_client.py b/tests/test_client.py index 0b900328..7f464e68 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -2846,3 +2846,45 @@ def test_timestamps_no_updated_without_created( data["content_updated_date"] = "2022-03-14T17:02:54" create_obj_resp = client.post("/index/", json=data, headers=user) assert create_obj_resp.status_code == 400 + + +def test_check_urls_metadata(client, user, combined_default_and_single_table_settings): + """ + Checks that the urls_metadata field has the same url keys as the urls + """ + data = get_doc() + res = client.post("/index/", json=data, headers=user) + assert res.status_code == 200 + rec = res.json + did = rec["did"] + + res = client.get("/index/" + did, headers=user) + assert res.status_code == 200 + rec = res.json + urls = rec["urls"] + + assert len(rec["urls_metadata"]) == len(rec["urls"]) + + for key in rec["urls_metadata"]: + assert key in urls + + +def test_check_urls_metadata_partially_missing_metadata( + client, user, combined_default_and_single_table_settings +): + data = get_doc(has_urls_metadata=True) + data["urls"].append("s3://new-data/location.txt") + res = client.post("/index", json=data, headers=user) + assert res.status_code == 200 + rec = res.json + did = rec["did"] + + res = client.get("/index/" + did, headers=user) + assert res.status_code == 200 + rec = res.json + urls = rec["urls"] + + assert len(rec["urls_metadata"]) == len(rec["urls"]) + + for key in rec["urls_metadata"]: + assert key in urls