Skip to content

Commit

Permalink
feat(tokenize): add rate-limit awareness to model.tokenize (#237)
Browse files Browse the repository at this point in the history
Co-authored-by: David Kristek <David.Kristek@ibm.com>
  • Loading branch information
David-Kristek and David Kristek committed Nov 16, 2023
1 parent 92f49d5 commit f238e21
Showing 1 changed file with 19 additions and 8 deletions.
27 changes: 19 additions & 8 deletions src/genai/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,25 +345,36 @@ def tokenize_as_completed(
if len(prompts) > 0 and isinstance(prompts[0], PromptPattern):
prompts = PromptPattern.list_str(prompts)

try:
params = TokenParams(return_tokens=return_tokens)
params = TokenParams(return_tokens=return_tokens)

def execute(attempt=0):
for i in range(0, len(prompts), Metadata.DEFAULT_MAX_PROMPTS):
tokenize_response = self.service.tokenize(
response = self.service.tokenize(
model=self.model,
inputs=prompts[i : min(i + Metadata.DEFAULT_MAX_PROMPTS, len(prompts))],
params=params,
options=options,
)

if tokenize_response.is_success:
response_json = tokenize_response.json()
if response.is_success:
response_json = response.json()
for y, result in enumerate(response_json["results"]):
result["input_text"] = prompts[i + y]
responses = TokenizeResponse(**response_json)
for token in responses.results:
yield token
return responses
elif (
response.status_code == httpx.codes.TOO_MANY_REQUESTS
and attempt < ConnectionManager.MAX_RETRIES_TOKENIZE
):
time.sleep(2 ** (attempt + 1))
return execute(attempt + 1)
else:
raise GenAiException(tokenize_response)
raise GenAiException(response)

try:
response = execute()
for token in response.results:
yield token
except Exception as ex:
raise to_genai_error(ex)

Expand Down

0 comments on commit f238e21

Please sign in to comment.