Merge pull request caikit#665 from swith004/input_token_count_136

updated TokenizationResults data model to add optional field
opendatahub-io · Feb 22, 2024 · 855e630 · 855e630
2 parents 0c5c08e + 15a3221
commit 855e630
Showing 1 changed file with 5 additions and 2 deletions.
diff --git a/caikit/interfaces/nlp/data_model/text.py b/caikit/interfaces/nlp/data_model/text.py
@@ -14,7 +14,7 @@
 """Data structures for text representations"""
 
 # Standard
-from typing import List
+from typing import List, Optional
 
 # First Party
 from py_to_proto.dataclass_to_proto import Annotated, FieldNumber
@@ -43,7 +43,10 @@ class Token(DataObjectBase):
 class TokenizationResults(DataObjectBase):
     """Tokenization result generated from a text."""
 
-    results: Annotated[List[Token], FieldNumber(1)]
+    results: Annotated[Optional[List[Token]], FieldNumber(1)]
+    # The number of tokens
+    # Note: Field number 4 chosen due to Fields 2 and 3 used below
+    token_count: Annotated[Optional[int], FieldNumber(4)]
 
 
 @dataobject(package=NLP_PACKAGE)