From 008d26434023afecd00052c7766b81cb8544d063 Mon Sep 17 00:00:00 2001 From: Daniel Hegeman Date: Sun, 28 Aug 2022 22:56:19 -0700 Subject: [PATCH 1/5] style: remove unused regex and consolidate curator tag logic --- backend/corpora/common/utils/regex.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/backend/corpora/common/utils/regex.py b/backend/corpora/common/utils/regex.py index 724262e737ab5..ee4da0985fe00 100644 --- a/backend/corpora/common/utils/regex.py +++ b/backend/corpora/common/utils/regex.py @@ -4,23 +4,20 @@ ID_REGEX = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" DATASET_ID_REGEX = f"(?P{ID_REGEX})" COLLECTION_ID_REGEX = f"(?P{ID_REGEX})" -CURATOR_TAG_PREFIX_REGEX = r"(?P.*)" CONTROL_CHARS = r"[\x00-\x1f\x7f-\xa0]" CURATOR_TAG_REGEX = r"(?P.*)" def validate_curator_tag(curator_tag: str) -> bool: """ - Verify the correct curator tag format is obeyed. + Verify the correct curator tag format is obeyed (i.e., it is not a UUID) :param curator_tag: the tag name to validate. - :return: True if CURATOR_TAG_PREFIX_REGEX is matched. + :return: True if CURATOR_TAG_REGEX is matched. """ regex = f"^({DATASET_ID_REGEX}|{CURATOR_TAG_REGEX})$" matched = re.match(regex, curator_tag) - if matched and (tag := matched.groupdict().get("tag")): - if not re.search(ID_REGEX, tag): - return - else: - raise ValueError("Curator tag cannot contain the same shape as a UUID.") - raise ValueError("Invalid curator tag.") + if matched and matched.groupdict().get("tag"): + return True + else: + raise ValueError("Curator tag cannot assume UUID format.") From 1e1f5a558ac93efcdda0464b28f94eb0b6336dfd Mon Sep 17 00:00:00 2001 From: Daniel Hegeman Date: Sun, 28 Aug 2022 23:08:38 -0700 Subject: [PATCH 2/5] feat: defensively code against empty curator tag Technically our Connexion API def is doing this for us, but it is best to be accurate in our code. --- backend/corpora/common/utils/regex.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/backend/corpora/common/utils/regex.py b/backend/corpora/common/utils/regex.py index ee4da0985fe00..b0afe79673e2e 100644 --- a/backend/corpora/common/utils/regex.py +++ b/backend/corpora/common/utils/regex.py @@ -5,7 +5,7 @@ DATASET_ID_REGEX = f"(?P{ID_REGEX})" COLLECTION_ID_REGEX = f"(?P{ID_REGEX})" CONTROL_CHARS = r"[\x00-\x1f\x7f-\xa0]" -CURATOR_TAG_REGEX = r"(?P.*)" +CURATOR_TAG_REGEX = r"(?P.+)" def validate_curator_tag(curator_tag: str) -> bool: @@ -17,7 +17,10 @@ def validate_curator_tag(curator_tag: str) -> bool: """ regex = f"^({DATASET_ID_REGEX}|{CURATOR_TAG_REGEX})$" matched = re.match(regex, curator_tag) - if matched and matched.groupdict().get("tag"): - return True - else: - raise ValueError("Curator tag cannot assume UUID format.") + if matched: + matches = matched.groupdict() + if matches.get("tag"): + return True + elif matches.get("dataset_id"): + raise ValueError("Curator tag cannot assume UUID format.") + raise ValueError("Curator tag cannot be empty.") From 0eddd3ce161a220077802aca564b14fd0b92ac72 Mon Sep 17 00:00:00 2001 From: Daniel Hegeman Date: Sun, 28 Aug 2022 23:17:08 -0700 Subject: [PATCH 3/5] minor update to swagger --- backend/config/curation-api.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/config/curation-api.yml b/backend/config/curation-api.yml index 6837b60a1ad82..36fa2f4bc40da 100644 --- a/backend/config/curation-api.yml +++ b/backend/config/curation-api.yml @@ -336,9 +336,7 @@ paths: type: object additionalProperties: false properties: - curator_tag: - type: string - description: curator-provided tag + $ref: "#/components/schemas/curator_tag" example: curator_tag: "new/curator_tag" responses: @@ -376,7 +374,7 @@ paths: additionalProperties: false properties: curator_tag: - type: string + $ref: "#/components/schemas/curator_tag" id: $ref: "#/components/schemas/dataset_id" link: @@ -518,6 +516,7 @@ components: type: array curator_tag: "$ref": "#/components/schemas/curator_tag" + nullable: true dataset_id: "$ref": "#/components/schemas/dataset_id" type: object From 9ec6908d41e25fb03912483ea176082f2c6be6d1 Mon Sep 17 00:00:00 2001 From: Daniel Hegeman Date: Sun, 28 Aug 2022 23:28:21 -0700 Subject: [PATCH 4/5] fix yml syntax mistake --- backend/config/curation-api.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/config/curation-api.yml b/backend/config/curation-api.yml index 36fa2f4bc40da..9b2918e8a1f49 100644 --- a/backend/config/curation-api.yml +++ b/backend/config/curation-api.yml @@ -336,7 +336,8 @@ paths: type: object additionalProperties: false properties: - $ref: "#/components/schemas/curator_tag" + curator_tag: + $ref: "#/components/schemas/curator_tag" example: curator_tag: "new/curator_tag" responses: From 99fbba8f7e62b7ed6e58d5d2440ff944f0e475ff Mon Sep 17 00:00:00 2001 From: Daniel Hegeman Date: Sun, 28 Aug 2022 23:31:19 -0700 Subject: [PATCH 5/5] fix tests --- .../corpora/api_server/curator/collection/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/backend/corpora/api_server/curator/collection/test_dataset.py b/tests/unit/backend/corpora/api_server/curator/collection/test_dataset.py index b281e56365ca7..25941958d20b2 100644 --- a/tests/unit/backend/corpora/api_server/curator/collection/test_dataset.py +++ b/tests/unit/backend/corpora/api_server/curator/collection/test_dataset.py @@ -88,7 +88,7 @@ def _test(_tag, _dataset): self.assertIsNone(_dataset.curator_tag) dataset = self.generate_dataset(self.session, collection=collection) - tests = [dataset.id, "prefix" + dataset.id, dataset.id + "suffix", "prefix" + dataset.id + "suffix"] + tests = [dataset.id, ""] for tag_name in tests: with self.subTest(tag_name): _test(tag_name, dataset)