chanzuckerberg · danieljhegeman · Aug 29, 2022 · Aug 29, 2022 · Aug 29, 2022 · Aug 29, 2022
diff --git a/backend/config/curation-api.yml b/backend/config/curation-api.yml
@@ -337,8 +337,7 @@ paths:
               additionalProperties: false
               properties:
                 curator_tag:
-                  type: string
-                  description: curator-provided tag
+                  $ref: "#/components/schemas/curator_tag"
             example:
               curator_tag: "new/curator_tag"
       responses:
@@ -376,7 +375,7 @@ paths:
               additionalProperties: false
               properties:
                 curator_tag:
-                  type: string
+                  $ref: "#/components/schemas/curator_tag"
                 id:
                   $ref: "#/components/schemas/dataset_id"
                 link:
@@ -518,6 +517,7 @@ components:
           type: array
         curator_tag:
           "$ref": "#/components/schemas/curator_tag"
+          nullable: true
         dataset_id:
           "$ref": "#/components/schemas/dataset_id"
       type: object

diff --git a/backend/corpora/common/utils/regex.py b/backend/corpora/common/utils/regex.py
@@ -4,23 +4,23 @@
 ID_REGEX = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"
 DATASET_ID_REGEX = f"(?P<dataset_id>{ID_REGEX})"
 COLLECTION_ID_REGEX = f"(?P<collection_id>{ID_REGEX})"
-CURATOR_TAG_PREFIX_REGEX = r"(?P<tag_prefix>.*)"
 CONTROL_CHARS = r"[\x00-\x1f\x7f-\xa0]"
-CURATOR_TAG_REGEX = r"(?P<tag>.*)"
+CURATOR_TAG_REGEX = r"(?P<tag>.+)"
 
 
 def validate_curator_tag(curator_tag: str) -> bool:
     """
-    Verify the correct curator tag format is obeyed.
+    Verify the correct curator tag format is obeyed (i.e., it is not a UUID)
 
     :param curator_tag: the tag name to validate.
-    :return: True if CURATOR_TAG_PREFIX_REGEX is matched.
+    :return: True if CURATOR_TAG_REGEX is matched.
     """
     regex = f"^({DATASET_ID_REGEX}|{CURATOR_TAG_REGEX})$"
     matched = re.match(regex, curator_tag)
-    if matched and (tag := matched.groupdict().get("tag")):
-        if not re.search(ID_REGEX, tag):
-            return
-        else:
-            raise ValueError("Curator tag cannot contain the same shape as a UUID.")
-    raise ValueError("Invalid curator tag.")
+    if matched:
+        matches = matched.groupdict()
+        if matches.get("tag"):
+            return True
+        elif matches.get("dataset_id"):
+            raise ValueError("Curator tag cannot assume UUID format.")
+    raise ValueError("Curator tag cannot be empty.")
diff --git a/tests/unit/backend/corpora/api_server/curator/collection/test_dataset.py b/tests/unit/backend/corpora/api_server/curator/collection/test_dataset.py
@@ -88,7 +88,7 @@ def _test(_tag, _dataset):
             self.assertIsNone(_dataset.curator_tag)
 
         dataset = self.generate_dataset(self.session, collection=collection)
-        tests = [dataset.id, "prefix" + dataset.id, dataset.id + "suffix", "prefix" + dataset.id + "suffix"]
+        tests = [dataset.id, ""]
         for tag_name in tests:
             with self.subTest(tag_name):
                 _test(tag_name, dataset)