diff --git a/caikit/interfaces/nlp/data_model/text.py b/caikit/interfaces/nlp/data_model/text.py index b80d4bedf..16787c05b 100644 --- a/caikit/interfaces/nlp/data_model/text.py +++ b/caikit/interfaces/nlp/data_model/text.py @@ -14,7 +14,7 @@ """Data structures for text representations""" # Standard -from typing import List +from typing import List, Optional # First Party from py_to_proto.dataclass_to_proto import Annotated, FieldNumber @@ -43,7 +43,10 @@ class Token(DataObjectBase): class TokenizationResults(DataObjectBase): """Tokenization result generated from a text.""" - results: Annotated[List[Token], FieldNumber(1)] + results: Annotated[Optional[List[Token]], FieldNumber(1)] + # The number of tokens + # Note: Field number 4 chosen due to Fields 2 and 3 used below + token_count: Annotated[Optional[int], FieldNumber(4)] @dataobject(package=NLP_PACKAGE)