diff --git a/backend/config/curation-api.yml b/backend/config/curation-api.yml index 6837b60a1ad82..9b2918e8a1f49 100644 --- a/backend/config/curation-api.yml +++ b/backend/config/curation-api.yml @@ -337,8 +337,7 @@ paths: additionalProperties: false properties: curator_tag: - type: string - description: curator-provided tag + $ref: "#/components/schemas/curator_tag" example: curator_tag: "new/curator_tag" responses: @@ -376,7 +375,7 @@ paths: additionalProperties: false properties: curator_tag: - type: string + $ref: "#/components/schemas/curator_tag" id: $ref: "#/components/schemas/dataset_id" link: @@ -518,6 +517,7 @@ components: type: array curator_tag: "$ref": "#/components/schemas/curator_tag" + nullable: true dataset_id: "$ref": "#/components/schemas/dataset_id" type: object diff --git a/backend/corpora/common/utils/regex.py b/backend/corpora/common/utils/regex.py index 724262e737ab5..b0afe79673e2e 100644 --- a/backend/corpora/common/utils/regex.py +++ b/backend/corpora/common/utils/regex.py @@ -4,23 +4,23 @@ ID_REGEX = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" DATASET_ID_REGEX = f"(?P{ID_REGEX})" COLLECTION_ID_REGEX = f"(?P{ID_REGEX})" -CURATOR_TAG_PREFIX_REGEX = r"(?P.*)" CONTROL_CHARS = r"[\x00-\x1f\x7f-\xa0]" -CURATOR_TAG_REGEX = r"(?P.*)" +CURATOR_TAG_REGEX = r"(?P.+)" def validate_curator_tag(curator_tag: str) -> bool: """ - Verify the correct curator tag format is obeyed. + Verify the correct curator tag format is obeyed (i.e., it is not a UUID) :param curator_tag: the tag name to validate. - :return: True if CURATOR_TAG_PREFIX_REGEX is matched. + :return: True if CURATOR_TAG_REGEX is matched. """ regex = f"^({DATASET_ID_REGEX}|{CURATOR_TAG_REGEX})$" matched = re.match(regex, curator_tag) - if matched and (tag := matched.groupdict().get("tag")): - if not re.search(ID_REGEX, tag): - return - else: - raise ValueError("Curator tag cannot contain the same shape as a UUID.") - raise ValueError("Invalid curator tag.") + if matched: + matches = matched.groupdict() + if matches.get("tag"): + return True + elif matches.get("dataset_id"): + raise ValueError("Curator tag cannot assume UUID format.") + raise ValueError("Curator tag cannot be empty.") diff --git a/tests/unit/backend/corpora/api_server/curator/collection/test_dataset.py b/tests/unit/backend/corpora/api_server/curator/collection/test_dataset.py index b281e56365ca7..25941958d20b2 100644 --- a/tests/unit/backend/corpora/api_server/curator/collection/test_dataset.py +++ b/tests/unit/backend/corpora/api_server/curator/collection/test_dataset.py @@ -88,7 +88,7 @@ def _test(_tag, _dataset): self.assertIsNone(_dataset.curator_tag) dataset = self.generate_dataset(self.session, collection=collection) - tests = [dataset.id, "prefix" + dataset.id, dataset.id + "suffix", "prefix" + dataset.id + "suffix"] + tests = [dataset.id, ""] for tag_name in tests: with self.subTest(tag_name): _test(tag_name, dataset)