diff --git a/adala/runtimes/__init__.py b/adala/runtimes/__init__.py index df9aae16..dbcb33af 100644 --- a/adala/runtimes/__init__.py +++ b/adala/runtimes/__init__.py @@ -1,3 +1,7 @@ from .base import Runtime, AsyncRuntime -from ._openai import OpenAIChatRuntime, OpenAIVisionRuntime, AsyncOpenAIChatRuntime -from ._litellm import LiteLLMChatRuntime, AsyncLiteLLMChatRuntime +from ._openai import OpenAIChatRuntime, AsyncOpenAIChatRuntime, AsyncOpenAIVisionRuntime +from ._litellm import ( + LiteLLMChatRuntime, + AsyncLiteLLMChatRuntime, + AsyncLiteLLMVisionRuntime, +) diff --git a/adala/runtimes/_litellm.py b/adala/runtimes/_litellm.py index 0cb49ae8..7c654cfd 100644 --- a/adala/runtimes/_litellm.py +++ b/adala/runtimes/_litellm.py @@ -1,7 +1,9 @@ import asyncio import logging -from typing import Any, Dict, List, Optional, Type +from collections import defaultdict +from typing import Any, Dict, List, Optional, Type, Union, Literal, TypedDict, Iterable, Generator from functools import cached_property +from enum import Enum import litellm from litellm.exceptions import ( AuthenticationError, @@ -19,6 +21,7 @@ from adala.utils.parse import ( parse_template, partial_str_format, + TemplateChunks, ) from pydantic import ConfigDict, field_validator, BaseModel from pydantic_core import to_jsonable_python @@ -56,8 +59,25 @@ ) +# TODO: consolidate these data models and unify our preprocessing for LLM input into one step RawInputModel -> PreparedInputModel +class TextMessageChunk(TypedDict): + type: Literal["text"] + text: str + + +class ImageMessageChunk(TypedDict): + type: Literal["image"] + image_url: Dict[str, str] + + +MessageChunk = Union[TextMessageChunk, ImageMessageChunk] + +Message = Union[str, List[MessageChunk]] + + def get_messages( - user_prompt: str, + # user prompt can be a string or a list of multimodal message chunks + user_prompt: Message, system_prompt: Optional[str] = None, instruction_first: bool = True, ): @@ -143,6 +163,54 @@ def _from_litellm(self, **kwargs): return instructor.from_litellm(litellm.acompletion, **kwargs) +def handle_llm_exception( + e: Exception, messages: List[Dict[str, str]], model: str, retries +) -> tuple[Dict, Usage]: + """Handle exceptions from LLM calls and return standardized error dict and usage stats. + + Args: + e: The caught exception + messages: The messages that were sent to the LLM + model: The model name + retries: The retry policy object + + Returns: + Tuple of (error_dict, usage_stats) + """ + + if isinstance(e, IncompleteOutputException): + usage = e.total_usage + elif isinstance(e, InstructorRetryException): + usage = e.total_usage + # get root cause error from retries + e = e.__cause__.last_attempt.exception() + else: + # Approximate usage for other errors + # usage = e.total_usage + # not available here, so have to approximate by hand, assuming the same error occurred each time + n_attempts = retries.stop.max_attempt_number + prompt_tokens = n_attempts * litellm.token_counter( + model=model, messages=messages[:-1] + ) # response is appended as the last message + # TODO a pydantic validation error may be appended as the last message, don't know how to get the raw response in this case + usage = Usage( + prompt_tokens=prompt_tokens, + completion_tokens=0, + total_tokens=prompt_tokens, + ) + # Catch case where the model does not return a properly formatted out + # AttributeError is an instructor bug: https://github.com/instructor-ai/instructor/pull/1103 + # > AttributeError: 'NoneType' object has no attribute '_raw_response' + if type(e).__name__ in {"ValidationError", "AttributeError"}: + logger.error(f"Converting error to ConstrainedGenerationError: {str(e)}") + logger.debug(f"Traceback:\n{traceback.format_exc()}") + e = ConstrainedGenerationError() + + # the only other instructor error that would be thrown is IncompleteOutputException due to max_tokens reached + + return _log_llm_exception(e), usage + + class LiteLLMChatRuntime(InstructorClientMixin, Runtime): """ Runtime that uses [LiteLLM API](https://litellm.vercel.app/docs) and chat @@ -275,43 +343,8 @@ def record_to_record( ) usage = completion.usage dct = to_jsonable_python(response) - except IncompleteOutputException as e: - logger.error(f"Incomplete output error: {str(e)}") - logger.error(f"Traceback:\n{traceback.format_exc()}") - usage = e.total_usage - dct = _log_llm_exception(e) - except InstructorRetryException as e: - logger.error(f"Instructor retry error: {str(e)}") - logger.error(f"Traceback:\n{traceback.format_exc()}") - usage = e.total_usage - # get root cause error from retries - n_attempts = e.n_attempts - e = e.__cause__.last_attempt.exception() - dct = _log_llm_exception(e) except Exception as e: - logger.error(f"Other error: {str(e)}") - logger.error(f"Traceback:\n{traceback.format_exc()}") - # usage = e.total_usage - # not available here, so have to approximate by hand, assuming the same error occurred each time - n_attempts = retries.stop.max_attempt_number - prompt_tokens = n_attempts * litellm.token_counter( - model=self.model, messages=messages[:-1] - ) # response is appended as the last message - # TODO a pydantic validation error may be appended as the last message, don't know how to get the raw response in this case - completion_tokens = 0 - usage = Usage( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - total_tokens=(prompt_tokens + completion_tokens), - ) - - # Catch case where the model does not return a properly formatted output - # AttributeError is an instructor bug: https://github.com/instructor-ai/instructor/pull/1103 - # > AttributeError: 'NoneType' object has no attribute '_raw_response' - if type(e).__name__ in {"ValidationError", "AttributeError"}: - e = ConstrainedGenerationError() - # there are no other known errors to catch - dct = _log_llm_exception(e) + dct, usage = handle_llm_exception(e, messages, self.model, retries) # Add usage data to the response (e.g. token counts, cost) dct.update(_get_usage_dict(usage, model=self.model)) @@ -437,45 +470,11 @@ async def batch_to_batch( # convert list of LLMResponse objects to the dataframe records df_data = [] for response in responses: - if isinstance(response, IncompleteOutputException): - e = response - usage = e.total_usage - dct = _log_llm_exception(e) - elif isinstance(response, InstructorRetryException): - e = response - usage = e.total_usage - # get root cause error from retries - n_attempts = e.n_attempts - e = e.__cause__.last_attempt.exception() - dct = _log_llm_exception(e) - elif isinstance(response, Exception): - e = response - # usage = e.total_usage - # not available here, so have to approximate by hand, assuming the same error occurred each time - n_attempts = retries.stop.max_attempt_number + if isinstance(response, Exception): messages = [] # TODO how to get these? - prompt_tokens = n_attempts * litellm.token_counter( - model=self.model, messages=messages[:-1] - ) # response is appended as the last message - # TODO a pydantic validation error may be appended as the last message, don't know how to get the raw response in this case - completion_tokens = 0 - usage = Usage( - prompt_tokens, - completion_tokens, - total_tokens=(prompt_tokens + completion_tokens), + dct, usage = handle_llm_exception( + response, messages, self.model, retries ) - - # Catch case where the model does not return a properly formatted output - # AttributeError is an instructor bug: https://github.com/instructor-ai/instructor/pull/1103 - # > AttributeError: 'NoneType' object has no attribute '_raw_response' - if type(e).__name__ in {"ValidationError", "AttributeError"}: - logger.error( - f"Converting error to ConstrainedGenerationError: {str(e)}" - ) - logger.debug(f"Traceback:\n{traceback.format_exc()}") - e = ConstrainedGenerationError() - # the only other instructor error that would be thrown is IncompleteOutputException due to max_tokens reached - dct = _log_llm_exception(e) else: resp, completion = response usage = completion.usage @@ -603,109 +602,203 @@ def get_cost_estimate( ) -class LiteLLMVisionRuntime(LiteLLMChatRuntime): +class MessageChunkType(Enum): + TEXT = "text" + IMAGE_URL = "image_url" + + +def split_message_into_chunks( + input_template: str, input_field_types: Dict[str, MessageChunkType], **input_fields +) -> List[MessageChunk]: + """Split a template string with field types into a list of message chunks. + + Takes a template string with placeholders and splits it into chunks based on the field types, + preserving the text between placeholders. + + Args: + input_template (str): Template string with placeholders, e.g. '{a} is a {b} is an {a}' + input_field_types (Dict[str, MessageChunkType]): Dict mapping field names to their types + **input_fields: Field values to substitute into template + + Returns: + List[Dict[str, str]]: List of message chunks with appropriate type and content. + Text chunks have format: {'type': 'text', 'text': str} + Image chunks have format: {'type': 'image_url', 'image_url': {'url': str}} + + Example: + >>> split_message_into_chunks( + ... '{a} is a {b} is an {a}', + ... {'a': MessageChunkType.TEXT, 'b': MessageChunkType.IMAGE_URL}, + ... a='the letter a', + ... b='http://example.com/b.jpg' + ... ) + [ + {'type': 'text', 'text': 'the letter a is a '}, + {'type': 'image_url', 'image_url': {'url': 'http://example.com/b.jpg'}}, + {'type': 'text', 'text': ' is an the letter a'} + ] + """ + # Parse template to get field positions and surrounding text + parsed = parse_template(input_template) + + def add_to_current_chunk( + current_chunk: Optional[MessageChunk], chunk: MessageChunk + ) -> MessageChunk: + if current_chunk: + current_chunk["text"] += chunk["text"] + return current_chunk + else: + return chunk + + # Build chunks by iterating through parsed template parts + def build_chunks(parsed: Iterable[TemplateChunks]) -> Generator[MessageChunk, None, None]: + current_chunk: Optional[MessageChunk] = None + + for part in parsed: + if part["type"] == "text": + current_chunk = add_to_current_chunk( + current_chunk, {"type": "text", "text": part["text"]} + ) + elif part["type"] == "var": + field_value = part["text"] + try: + field_type = input_field_types[field_value] + except KeyError: + raise ValueError( + f"Field {field_value} not found in input_field_types" + ) + if field_type == MessageChunkType.TEXT: + # try to substitute in variable and add to current chunk + substituted_text = partial_str_format( + f"{{{field_value}}}", **input_fields + ) + if substituted_text != field_value: + current_chunk = add_to_current_chunk( + current_chunk, {"type": "text", "text": substituted_text} + ) + else: + # be permissive for unfound variables + current_chunk = add_to_current_chunk( + current_chunk, + {"type": "text", "text": f"{{{field_value}}}"}, + ) + elif field_type == MessageChunkType.IMAGE_URL: + substituted_text = partial_str_format( + f"{{{field_value}}}", **input_fields + ) + if substituted_text != field_value: + # push current chunk, push image chunk, and start new chunk + if current_chunk: + yield current_chunk + current_chunk = None + yield { + "type": "image_url", + "image_url": {"url": input_fields[field_value]}, + } + else: + # be permissive for unfound variables + current_chunk = add_to_current_chunk( + current_chunk, + {"type": "text", "text": f"{{{field_value}}}"}, + ) + + if current_chunk: + yield current_chunk + + return list(build_chunks(parsed)) + + +class AsyncLiteLLMVisionRuntime(AsyncLiteLLMChatRuntime): """ Runtime that uses [LiteLLM API](https://litellm.vercel.app/docs) and vision models to perform the skill. """ - def record_to_record( + def init_runtime(self) -> "Runtime": + super().init_runtime() + if not litellm.supports_vision(self.model): + raise ValueError(f"Model {self.model} does not support vision") + return self + + async def batch_to_batch( self, - record: Dict[str, str], + batch: InternalDataFrame, input_template: str, instructions_template: str, - output_template: str, + response_model: Type[BaseModel], + output_template: Optional[ + str + ] = None, # TODO: deprecated in favor of response_model, can be removed extra_fields: Optional[Dict[str, str]] = None, field_schema: Optional[Dict] = None, - instructions_first: bool = False, - ) -> Dict[str, str]: - """ - Execute LiteLLM request given record and templates for input, - instructions and output. + instructions_first: bool = True, + input_field_types: Optional[Dict[str, MessageChunkType]] = None, + ) -> InternalDataFrame: + """Execute batch of requests with async calls to OpenAI API""" - Args: - record: Record to be used for input, instructions and output templates. - input_template: Template for input message. - instructions_template: Template for instructions message. - output_template: Template for output message. - extra_fields: Extra fields to be used in templates. - field_schema: Field jsonschema to be used for parsing templates. - Field schema must contain "format": "uri" for image fields. - For example: - ```json - { - "image": { - "type": "string", - "format": "uri" - } - } - ``` - instructions_first: If True, instructions will be sent before input. - """ + if not response_model: + raise ValueError( + "You must explicitly specify the `response_model` in runtime." + ) + + input_field_types = input_field_types or defaultdict( + lambda: MessageChunkType.TEXT + ) extra_fields = extra_fields or {} - field_schema = field_schema or {} + user_prompts = batch.apply( + # TODO: remove "extra_fields" to avoid name collisions + lambda row: split_message_into_chunks( + input_template, input_field_types, **row, **extra_fields + ), + axis=1, + ).tolist() - output_fields = parse_template( - partial_str_format(output_template, **extra_fields), - include_texts=False, - ) + # rest of this function is the same as AsyncLiteLLMChatRuntime.batch_to_batch - if len(output_fields) > 1: - raise NotImplementedError( - f"{self.__class__.__name__} does not support multiple output fields. " - f"Found: {output_fields}" + retries = AsyncRetrying(**RETRY_POLICY) + + tasks = [ + asyncio.ensure_future( + self.client.chat.completions.create_with_completion( + messages=get_messages( + user_prompt, + instructions_template, + instructions_first, + ), + response_model=response_model, + model=self.model, + max_tokens=self.max_tokens, + temperature=self.temperature, + seed=self.seed, + max_retries=retries, + # extra inference params passed to this runtime + **self.model_extra, + ) ) - output_field = output_fields[0] - output_field_name = output_field["text"] - - input_fields = parse_template(input_template) - - # split input template into text and image parts - input_text = "" - content = [ - { - "type": "text", - "text": instructions_template, - } + for user_prompt in user_prompts ] - for field in input_fields: - if field["type"] == "text": - input_text += field["text"] - elif field["type"] == "var": - if field["text"] not in field_schema: - input_text += record[field["text"]] - elif field_schema[field["text"]]["type"] == "string": - if field_schema[field["text"]].get("format") == "uri": - if input_text: - content.append({"type": "text", "text": input_text}) - input_text = "" - content.append( - { - "type": "image_url", - "image_url": record[field["text"]], - } - ) - else: - input_text += record[field["text"]] - else: - raise ValueError( - f'Unsupported field type: {field_schema[field["text"]]["type"]}' - ) - if input_text: - content.append({"type": "text", "text": input_text}) + responses = await asyncio.gather(*tasks, return_exceptions=True) - if self.verbose: - print(f"**Prompt content**:\n{content}") + # convert list of LLMResponse objects to the dataframe records + df_data = [] + for response in responses: + if isinstance(response, Exception): + messages = [] # TODO how to get these? + dct, usage = handle_llm_exception( + response, messages, self.model, retries + ) + else: + resp, completion = response + usage = completion.usage + dct = to_jsonable_python(resp) - completion = litellm.completion( - messages=[{"role": "user", "content": content}], - max_tokens=self.max_tokens, - temperature=self.temperature, - seed=self.seed, - # extra inference params passed to this runtime - **self.model_extra, - ) + # Add usage data to the response (e.g. token counts, cost) + dct.update(_get_usage_dict(usage, model=self.model)) - completion_text = completion.choices[0].message.content - return {output_field_name: completion_text} + df_data.append(dct) + + output_df = InternalDataFrame(df_data) + return output_df.set_index(batch.index) + + # TODO: cost estimate diff --git a/adala/runtimes/_openai.py b/adala/runtimes/_openai.py index 009d1980..c3441699 100644 --- a/adala/runtimes/_openai.py +++ b/adala/runtimes/_openai.py @@ -1,7 +1,11 @@ -from ._litellm import AsyncLiteLLMChatRuntime, LiteLLMChatRuntime, LiteLLMVisionRuntime +from ._litellm import ( + AsyncLiteLLMChatRuntime, + LiteLLMChatRuntime, + AsyncLiteLLMVisionRuntime, +) # litellm already reads the OPENAI_API_KEY env var, which was the reason for this class OpenAIChatRuntime = LiteLLMChatRuntime AsyncOpenAIChatRuntime = AsyncLiteLLMChatRuntime -OpenAIVisionRuntime = LiteLLMVisionRuntime +AsyncOpenAIVisionRuntime = AsyncLiteLLMVisionRuntime diff --git a/adala/skills/collection/label_studio.py b/adala/skills/collection/label_studio.py index edf3a0d0..8a1582b5 100644 --- a/adala/skills/collection/label_studio.py +++ b/adala/skills/collection/label_studio.py @@ -2,14 +2,17 @@ import pandas as pd from typing import Type, Iterator, Optional from functools import cached_property +from collections import defaultdict from adala.skills._base import TransformSkill +from adala.runtimes import AsyncLiteLLMVisionRuntime +from adala.runtimes._litellm import MessageChunkType from pydantic import BaseModel, Field, model_validator from adala.runtimes import Runtime, AsyncRuntime from adala.utils.internal_data import InternalDataFrame from label_studio_sdk.label_interface import LabelInterface -from label_studio_sdk.label_interface.control_tags import ControlTag +from label_studio_sdk.label_interface.control_tags import ControlTag, ObjectTag from label_studio_sdk._extensions.label_studio_tools.core.utils.json_schema import ( json_schema_to_pydantic, ) @@ -35,39 +38,60 @@ class LabelStudioSkill(TransformSkill): # TODO: implement postprocessing to verify Taxonomy + @cached_property + def label_interface(self) -> LabelInterface: + return LabelInterface(self.label_config) + + @cached_property def ner_tags(self) -> Iterator[ControlTag]: # check if the input config has NER tag ( + ), and return its `from_name` and `to_name` - interface = LabelInterface(self.label_config) - for tag in interface.controls: - # NOTE: don't need to check object tag because at this point, unusable control tags should have been stripped out of the label config - if tag.tag.lower() == "labels": + control_tag_names = self.allowed_control_tags or list( + self.label_interface._controls.keys() + ) + for tag_name in control_tag_names: + tag = self.label_interface.get_control(tag_name) + if tag.tag.lower() in {"labels", "hypertextlabels"}: + yield tag + + @cached_property + def image_tags(self) -> Iterator[ObjectTag]: + # check if any image tags are used as input variables + object_tag_names = self.allowed_object_tags or list( + self.label_interface._objects.keys() + ) + for tag_name in object_tag_names: + tag = self.label_interface.get_object(tag_name) + if tag.tag.lower() == "image": yield tag @model_validator(mode="after") def validate_response_model(self): - interface = LabelInterface(self.label_config) logger.debug(f"Read labeling config {self.label_config}") if self.allowed_control_tags or self.allowed_object_tags: if self.allowed_control_tags: control_tags = { - tag: interface._controls[tag] for tag in self.allowed_control_tags + tag: self.label_interface._controls[tag] + for tag in self.allowed_control_tags } else: - control_tags = interface._controls + control_tags = self.label_interface._controls if self.allowed_object_tags: object_tags = { - tag: interface._objects[tag] for tag in self.allowed_object_tags + tag: self.label_interface._objects[tag] + for tag in self.allowed_object_tags } else: - object_tags = interface._objects + object_tags = self.label_interface._objects interface = LabelInterface.create_instance( tags={**control_tags, **object_tags} ) logger.debug( f"Filtered labeling config based on allowed tags {self.allowed_control_tags=} and {self.allowed_object_tags=} to {interface.config}" ) + else: + interface = self.label_interface # NOTE: filtered label config is used for the response model, but full label config is used for the prompt, so that the model has as much context as possible. self.field_schema = interface.to_json_schema() @@ -100,14 +124,28 @@ async def aapply( ) -> InternalDataFrame: with json_schema_to_pydantic(self.field_schema) as ResponseModel: - output = await runtime.batch_to_batch( - input, - input_template=self.input_template, - output_template="", - instructions_template=self.instructions, - response_model=ResponseModel, - ) - for ner_tag in self.ner_tags(): + # special handling to flag image inputs if they exist + if isinstance(runtime, AsyncLiteLLMVisionRuntime): + input_field_types = defaultdict(lambda: MessageChunkType.TEXT) + for tag in self.image_tags: + input_field_types[tag.name] = MessageChunkType.IMAGE_URL + output = await runtime.batch_to_batch( + input, + input_template=self.input_template, + output_template="", + instructions_template=self.instructions, + response_model=ResponseModel, + input_field_types=input_field_types, + ) + else: + output = await runtime.batch_to_batch( + input, + input_template=self.input_template, + output_template="", + instructions_template=self.instructions, + response_model=ResponseModel, + ) + for ner_tag in self.ner_tags: input_field_name = ner_tag.objects[0].value.lstrip("$") output_field_name = ner_tag.name quote_string_field_name = "text" diff --git a/tests/cassettes/test_label_studio_skill/test_label_studio_skill_image_input.yaml b/tests/cassettes/test_label_studio_skill/test_label_studio_skill_image_input.yaml new file mode 100644 index 00000000..6e57a70d --- /dev/null +++ b/tests/cassettes/test_label_studio_skill/test_label_studio_skill_image_input.yaml @@ -0,0 +1,215 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Hey, how''s it going?"}], "model": + "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '143' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.47.1 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.47.1 + x-stainless-raw-response: + - 'true' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xSy27bMBC86yu2vPRiFbbs1o9LUPTS9NBDgz7QIBBociWxobgsuYLjBgb6G/29 + fklB2bEUNAV6IcCZncHMkvcZgDBabECoRrJqvc1ff9FXTf3+zXfeVl/ryw/x46d3P1q7M/X+6k5M + koK231Dxg+qFotZbZEPuSKuAkjG5zpbz4uVyvVque6IljTbJas/5gvLWOJMX02KRT5f5bHVSN2QU + RrGB6wwA4L4/U06n8U5sYDp5QFqMUdYoNuchABHIJkTIGE1k6VhMBlKRY3R99MvnLWgyroYdWjsB + bqS7hT11z+At7UBuqeN0vYDPjeTfP39FIJeAAK1xGpi03F+MzQNWXZSpoOusPeGHc1pLtQ+0jSf+ + jFfGmdiUAWUkl5JFJi969pAB3PRb6R4VFT5Q67lkukWXDGeLo50Y3mJErk4kE0s74PNi8oRbqZGl + sXG0VaGkalAPyuEJZKcNjYhs1PnvME95H3sbV/+P/UAohZ5Rlz6gNupx4WEsYPqp/xo777gPLOI+ + MrZlZVyNwQdz/CeVL+caZ8VqNX21Ftkh+wMAAP//AwDs57wINQMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e85a915891d6208-ORD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 26 Nov 2024 00:11:19 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=f2eAWUmcSjgkraa7rJvzhr53.Kz3y7EZniQwAmrWmHg-1732579879-1.0.1.1-FcTG.L1LC0IYeDrJNsA3S_9CqAeK8RVmE9li1oKj8OrrEOFELgjJ.wfKOQqQi8SWUsocl.oe2kGwriII9BVQ5Q; + path=/; expires=Tue, 26-Nov-24 00:41:19 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=5M11WH7821NNRxCf3t86tF5_JSGA0RXiNMeAxl1Pa4A-1732579879834-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - heartex + openai-processing-ms: + - '488' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998994' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_c89ae189bd037c2fdf4605f19a3115f5 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "user", "content": "\n Given + the title of a museum painting:\nIt''s definitely not the Mona Lisa\n and the + image of the painting:\nhttps://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/687px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg\n,\n classify + the painting as either \"Mona Lisa\" or \"Not Mona Lisa\".\n They + may or may not agree with each other. If the title and image disagree, believe + the image.\n "}], "model": "gpt-4o-mini", "max_tokens": 1000, + "seed": 47, "temperature": 0.0, "tool_choice": {"type": "function", "function": + {"name": "MyModel"}}, "tools": [{"type": "function", "function": {"name": "MyModel", + "description": "Correctly extracted `MyModel` with all the required parameters + with correct types", "parameters": {"properties": {"classification": {"description": + "Choices for image", "enum": ["Mona Lisa", "Not Mona Lisa"], "title": "Classification", + "type": "string"}}, "required": ["classification"], "type": "object"}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '1124' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.47.1 + x-stainless-arch: + - x64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.47.1 + x-stainless-raw-response: + - 'true' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xTUW/TMBB+z6+w7rlBSelIyNsmgRhqERpioFEUuc4l9ebYnu1IlKr/HdnpkrQU + iTxY1n33fXf3+bKPCAFeQUGAbaljrRbx9ffqC79/Xr3p3n+6edfJ5cOH34+39c0i+5YgzDxDbR6R + uRfWK6ZaLdBxJXuYGaQOvWqavZ5fZW/zPAlAqyoUntZoFy9U3HLJ43kyX8RJFqf5kb1VnKGFgvyI + CCFkH07fp6zwFxQkaIVIi9bSBqEYkggBo4SPALWWW0elg9kIMiUdSt+67ISYAE4pUTIqxFi4//aT + +2gWFaLcpJpf7+rlXf7ViXZ7e/f8RD9/5GZSr5fe6dBQ3Uk2mDTBh3hxVowQkLQN3NVuFbybnSdQ + 03QtSufbhv0amPBz15xRL7mGYg0rJSlZckvXcIAT/iG6dP85scVg3Vkqjn4d44fhAYRqtFEbe+Yn + 1Fxyuy0NUhvmAuuU7mv7OqECdCdvB9qoVrvSqSeUXnCepr0ejPs1otkRc8pRMSXlswtyZYWO8vC2 + wzoxyrZYjdRxrWhXcTUBosnQfzdzSbsfnMvmf+RHgDHUDqtSG6w4Ox14TDPo/75/pQ0mh4bB7qzD + tqy5bNBow8PuQ63LJEuuNnWesQSiQ/QHAAD//wMAq681QQkEAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e85a91a5a7622f3-ORD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 26 Nov 2024 00:11:20 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=mR.lQGByVqO3YXPOJhOAYfQSCaSh.GGUAiqvmTKYeF4-1732579880-1.0.1.1-kjoNgd4tNmz.8ile246dtkSjbL3C9pTtBxM35zH_sQENgFJuN91lWEVTAYebM_Au.qq8D_Sr1S1_DegpYxCo7A; + path=/; expires=Tue, 26-Nov-24 00:41:20 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=vJrrlSUKQKX62ERSv.300oGbNMFud1yC5ztTRDPBooA-1732579880316-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - heartex + openai-processing-ms: + - '189' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998865' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_5d437fcbab69225ff907cf1da14e1bb7 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_llm/test_vision_runtime.yaml b/tests/cassettes/test_llm/test_vision_runtime.yaml new file mode 100644 index 00000000..3c500876 --- /dev/null +++ b/tests/cassettes/test_llm/test_vision_runtime.yaml @@ -0,0 +1,515 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Hey, how''s it going?"}], "model": + "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '143' + content-type: + - application/json + cookie: + - _cfuvid=l7iL4O7hW2C3VXg3EHHmHGMfG6h9GaDpM3R43nhxJAw-1730733247411-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.47.1 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.47.1 + x-stainless-raw-response: + - 'true' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xSwYrbMBS8+ytedeklLk6cYDeXpaWU5tDelpYtxSjSs62NrKdaz03Dkn8vcrJx + lt2FXgSaeTPMPOkhARBGizUI1UpWnbfphx+6yDe36q5c7PKPd5803f5pPpd4//tr+U3MooK296j4 + UfVOUectsiF3olWPkjG6zot8sSrKVV6MREcabZQ1ntMlpZ1xJl1ki2WaFem8PKtbMgqDWMPPBADg + YTxjTqfxr1hDNntEOgxBNijWlyEA0ZONiJAhmMDSsZhNpCLH6Mbom7cdaDKugT1aOwNupdvBgYY3 + 8IX2ILc0cLzewPdWMijpYAMtWh9B2BtugUnLw821f4/1EGTs6AZrz/jxEthS43vahjN/wWvjTGir + HmUgF8MFJi9G9pgA/BoXMzzpKnxPneeKaYcuGs6XJzsxPccV+f5MMrG0E57nsxfcKo0sjQ1XixVK + qhb1pJxeQQ7a0BWRXHV+HuYl71Nv45r/sZ8IpdAz6sr3qI16Wnga6zF+1tfGLjseA4twCIxdVRvX + YO97c/oqta+yIltt67JQmUiOyT8AAAD//wMA/n3XgzgDAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e85885378e086f8-ORD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 25 Nov 2024 23:48:57 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=eLsPnt6Pzes.k01.NQ1J2SgKRKHWAdtCiIuLlunhlDc-1732578537-1.0.1.1-4M9T24vvtlNrVZCB3bdC7q5cky0ytrfuAc6SSSWPX_bTfFo6Y2dRnkyN.fFwqAwbjETBDmiCE4pFQ_HP6XtwiA; + path=/; expires=Tue, 26-Nov-24 00:18:57 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=H3e75EZARspIKAJIbdmcPONXX2bd4dBJ_iVCJBZKCPk-1732578537967-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - heartex + openai-processing-ms: + - '367' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998994' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_494b776ae0ac7090d4d435fe4bb434ab + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "My + name is Carla and I am 25 years old."}]}], "model": "gpt-4o-mini", "max_tokens": + 1000, "seed": 47, "temperature": 0.0, "tool_choice": {"type": "function", "function": + {"name": "Output"}}, "tools": [{"type": "function", "function": {"name": "Output", + "description": "Correctly extracted `Output` with all the required parameters + with correct types", "parameters": {"properties": {"name": {"description": "name:", + "title": "Name", "type": "string"}, "age": {"description": "age:", "title": + "Age", "type": "string"}}, "required": ["age", "name"], "type": "object"}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '637' + content-type: + - application/json + cookie: + - _cfuvid=sklnqaX5NEz5UfcuJZB8jNRawvW.wjbD8N5YSJ_rUh0-1723578083597-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.47.1 + x-stainless-arch: + - x64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.47.1 + x-stainless-raw-response: + - 'true' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xT32+bMBB+56+w7jlMJA0l5S2Nqkn70eZhU6uOCRlzEKfG9myztYvyv09AAiRt + pfFgWffd931352PnEQI8h5gA21DHKi385UMezT9+vnkKbx++F6s/65uvj9f6223269PvGiYNQ2Vb + ZO7I+sBUpQU6rmQHM4PUYaM6jS5mYbQILxYtUKkcRUMrtfPnyq+45P4smM39IPKniwN7ozhDCzH5 + 4RFCyK49mzpljs8Qk2ByjFRoLS0R4j6JEDBKNBGg1nLrqHQwGUCmpEPZlC5rIUaAU0qkjAoxGHff + bnQfhkWFSC/1/ePV3y1dZl+e18vtOsCVvqvur0d+nfSLbgsqasn6IY3wPh6fmRECklYt9652unZn + TEKAmrKuULqmatglbX4CcQIragRNYJIALbvILExgDycCe++t+8/RWAwWtaXiMK9DfN8/gFClNiqz + Z/OEgktuN6lBatu+wDqlO+/Gp3WA+uTtQBtVaZc69YSyEVxcdnIwrNcATg9LAE45Kob41ZF0opbm + 6Chvn7bfJkbZBvOBOWwVrXOuRoA36vl1MW9pd31zWf6P/AAwhtphnmqDOWenDQ9pBpuf7720fsZt + wWBfrMMqLbgs0WjD29WHQqdBFIRZsYhYAN7e+wcAAP//AwAydy+KCAQAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e8588573ce4eb05-ORD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 25 Nov 2024 23:48:58 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=nUr9DAa1xx46rsdQ8jLylug4EQEIjBqOh_DrnLQ4iCs-1732578538-1.0.1.1-K1.z3.kxwK.y2bN.csBbWfD2WXgmPSqNwl3533sF5oKfc02lw0BHcl2bPvy3DNpFzGmut5iaAIX7XUGTSGX0Mw; + path=/; expires=Tue, 26-Nov-24 00:18:58 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=mZO7PQIQWPp7HYEYjCqejvrcPsMCoqvpZ8K9wj9EcwQ-1732578538510-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - heartex + openai-processing-ms: + - '321' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998989' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_16f953166c00a551ee4fa5e931a38a40 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "My + name is Carla and I am 25 years old."}]}], "model": "gpt-4o-mini", "max_tokens": + 1000, "seed": 47, "temperature": 0.0, "tool_choice": {"type": "function", "function": + {"name": "Output"}}, "tools": [{"type": "function", "function": {"name": "Output", + "description": "Correctly extracted `Output` with all the required parameters + with correct types", "parameters": {"properties": {"name": {"description": "name:", + "title": "Name", "type": "string"}, "age": {"description": "age:", "title": + "Age", "type": "string"}}, "required": ["age", "name"], "type": "object"}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '637' + content-type: + - application/json + cookie: + - _cfuvid=W8DNnoWXBapN39sZpvMK7iASgmsq_OnqmTKqTPq2CBI-1723578083808-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.47.1 + x-stainless-arch: + - x64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.47.1 + x-stainless-raw-response: + - 'true' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"error\": {\n \"message\": \"Incorrect API key provided: + fake_api_key. You can find your API key at https://platform.openai.com/account/api-keys.\",\n + \ \"type\": \"invalid_request_error\",\n \"param\": null,\n \"code\": + \"invalid_api_key\"\n }\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e85885b2f0010cd-ORD + Connection: + - keep-alive + Content-Length: + - '262' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 25 Nov 2024 23:48:58 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=gnCsyRGFtGleLdDkTxhpjTtYilNQ2QnK83QhNMjlUXc-1732578538-1.0.1.1-0nLNbKcFb6VJlx3KAZyc0Qmtnd8O3b4b3ZNZiYdnhRY2zbO93GO4Jc5QaTnmZybTfoQwXuC.lQeLZIChL536ew; + path=/; expires=Tue, 26-Nov-24 00:18:58 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=5yrza0CipJMkqg1mo3_JtMjx7oALVzAVravbx3z3rVw-1732578538840-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Origin + x-request-id: + - req_ea1af632f81971bec43817a1fdccbf82 + status: + code: 401 + message: Unauthorized +- request: + body: '{"messages": [{"role": "user", "content": "Hey, how''s it going?"}], "model": + "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '143' + content-type: + - application/json + cookie: + - _cfuvid=H3e75EZARspIKAJIbdmcPONXX2bd4dBJ_iVCJBZKCPk-1732578537967-0.0.1.1-604800000; + __cf_bm=eLsPnt6Pzes.k01.NQ1J2SgKRKHWAdtCiIuLlunhlDc-1732578537-1.0.1.1-4M9T24vvtlNrVZCB3bdC7q5cky0ytrfuAc6SSSWPX_bTfFo6Y2dRnkyN.fFwqAwbjETBDmiCE4pFQ_HP6XtwiA + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.47.1 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.47.1 + x-stainless-raw-response: + - 'true' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xSTY/TMBS851c8fOGSoPRLKb2sOMHCBQkhKhCKXPslMXX8LPuFpVpV4m/w9/gl + yGm36YpF4mLJM29GM8++zwCE0WIDQnWSVe9t8Wqrq9XWfGQOfbX/vLj93r3X796+/qDKbSPypKDd + N1T8oHqhqPcW2ZA70SqgZEyus2oxX1Xr1eLlSPSk0SZZ67lYUtEbZ4p5OV8WZVXM1md1R0ZhFBv4 + kgEA3I9nyuk0/hAbKPMHpMcYZYticxkCEIFsQoSM0USWjkU+kYocoxuj3z7vQZNxLdyhtTlwJ90e + DjQ8gzd0B3JHA6frDXzqJP/++SsCuQQE6I3TwKTl4ebaPGAzRJkKusHaM368pLXU+kC7eOYveGOc + iV0dUEZyKVlk8mJkjxnA13Erw6OiwgfqPddMe3TJcLY82YnpLa7I9ZlkYmknfDHPn3CrNbI0Nl5t + VSipOtSTcnoCOWhDV0R21fnvME95n3ob1/6P/UQohZ5R1z6gNupx4WksYPqp/xq77HgMLOIhMvZ1 + Y1yLwQdz+ieNr8uqXO2adaVKkR2zPwAAAP//AwApqZnyNQMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e85885c1af786f8-ORD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 25 Nov 2024 23:48:59 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - heartex + openai-processing-ms: + - '472' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998994' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_dfa7c9dcf2072b06ff6bf2eace034012 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "system", "content": "Describe what you see in the + image."}, {"role": "user", "content": [{"type": "text", "text": "What''s in + this image? "}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/687px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg"}}]}], + "model": "gpt-4o-mini", "max_tokens": 1000, "seed": 47, "temperature": 0.0, + "tool_choice": {"type": "function", "function": {"name": "VisionOutput"}}, "tools": + [{"type": "function", "function": {"name": "VisionOutput", "description": "Correctly + extracted `VisionOutput` with all the required parameters with correct types", + "parameters": {"properties": {"description": {"description": "Description of + the image", "title": "Description", "type": "string"}}, "required": ["description"], + "type": "object"}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '911' + content-type: + - application/json + cookie: + - _cfuvid=mZO7PQIQWPp7HYEYjCqejvrcPsMCoqvpZ8K9wj9EcwQ-1732578538510-0.0.1.1-604800000; + __cf_bm=nUr9DAa1xx46rsdQ8jLylug4EQEIjBqOh_DrnLQ4iCs-1732578538-1.0.1.1-K1.z3.kxwK.y2bN.csBbWfD2WXgmPSqNwl3533sF5oKfc02lw0BHcl2bPvy3DNpFzGmut5iaAIX7XUGTSGX0Mw + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.47.1 + x-stainless-arch: + - x64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.47.1 + x-stainless-raw-response: + - 'true' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.5 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xUUW/bNhB+96848Fk2EjuxXb8V67AOyFYMS4NhTWGcyZN0NUUS5KmqG+S/F5Qc + y8kyoHoQiPt43/fdkbyHCYBiozagdI2im2Cnb/8xq9Xd7V/ffbq7qd52tzeH9T5E6X69Mr+pImf4 + 3RfS8pQ1074JloS9G2AdCYUy6+VqMb9era+vLnug8YZsTquCTK/8tGHH0/nF/Gp6sZpero/ZtWdN + SW3g0wQA4KH/Z5/O0De1gYviKdJQSliR2pw2AajobY4oTImToBNVjKD2Tshl66619gwQ7+1Wo7Wj + 8PA9nK3HZqG12/Dv4pePH3Qpy4/0znXm9s+l48X37kxvoD6E3lDZOn1q0hl+im9eiAEoh02fe8eJ + vfvQSmjlRT6Awli1DTnJ3tXDvTKUdOSQSe/V5l7d1gTcYEVgKLCWBFITlNj4NkFAdsKugr3znQMc + wD+8Q7jhhAUcjxN2B7gh7zAaDwbhjp3mGfwuUBJKGykBQucbdNCx1ICQKJIjoG8hUsr+C0gsvRZW + yC4JIJjhkMCiM0ljoBn8XRPUmMB6VxVgMO6hRo6AzgAn6AhjzzFAJpM/SVquaplqb30kA60zFCuM + uTczeE8R6qwCGAl09CmRAXZQRu8EfAk1xaJXSblh2YDfZyXDkbTYA6D0zfnK1FE8ldnuxBKkhi3N + IPd6h3pfRd9mv07b1lCC6K3NXDVbm3oRhI6dybHIX7NypOx38JR8KQU0bW58X06a3atH9ezkHyev + rT+f3epIZZvQHq/7Mf54ej/WVyH6XXrxHFTJjlO9jYSpv5YqiQ+DdtbpFVT77OmpEH0TZCt+Ty4T + zq+v16uBUY0DYsTfLI+geEF7nrdcL4pXOLeGBLl/n6eRoFHXZMbkcTRga9ifAZOzyv/r5zXuoXp2 + 1c/Qj4DWFITMNkQyrJ/XPG6LlCfo/207dbo3rNIhCTXbkl1FMUTu55cqw3Zh6HK+Xl8s36jJ4+QH + AAAA//8DADoKaVzNBQAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e8588625b2ce814-ORD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 25 Nov 2024 23:49:03 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - heartex + openai-processing-ms: + - '3140' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-input-images: + - '50000' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-input-images: + - '49999' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149998217' + x-ratelimit-reset-input-images: + - 1ms + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_1970fe17cb190fdb98d07b28d85fbe01 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_label_studio_skill.py b/tests/test_label_studio_skill.py index ca3279a1..c04b1a9b 100644 --- a/tests/test_label_studio_skill.py +++ b/tests/test_label_studio_skill.py @@ -1,3 +1,4 @@ +import asyncio import pytest import os import pandas as pd @@ -490,3 +491,51 @@ async def test_label_studio_skill_valid_predictions(): assert len(failed_configs) == 0, f"Failed configs: {failed_configs}" assert len(errored_configs) == 0, f"Errored configs: {errored_configs}" + + +@pytest.mark.vcr +def test_label_studio_skill_image_input(): + df = pd.DataFrame( + [ + { + "title": "It's definitely not the Mona Lisa", + "image": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/687px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg", + } + ] + ) + + agent_payload = { + "runtimes": { + "default": { + "type": "AsyncLiteLLMChatRuntime", + "model": "gpt-4o-mini", + } + }, + "skills": [ + { + "type": "LabelStudioSkill", + "name": "SneakyMuseumLabel", + "input_template": """ + Given the title of a museum painting:\n{title}\n and the image of the painting:\n{image}\n, + classify the painting as either "Mona Lisa" or "Not Mona Lisa". + They may or may not agree with each other. If the title and image disagree, believe the image. + """, + "label_config": """ + +
+ + + + + + + + """, + } + ], + } + + agent = Agent(**agent_payload) + predictions = asyncio.run(agent.arun(df)) + + assert predictions.classification.tolist() == ["Mona Lisa"] diff --git a/tests/test_llm.py b/tests/test_llm.py index 10ad8c2b..ea3b9809 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -2,7 +2,12 @@ import asyncio import pandas as pd from pydantic import BaseModel, Field -from adala.runtimes import LiteLLMChatRuntime, AsyncLiteLLMChatRuntime +from adala.runtimes import ( + LiteLLMChatRuntime, + AsyncLiteLLMChatRuntime, + AsyncLiteLLMVisionRuntime, +) +from adala.runtimes._litellm import split_message_into_chunks, MessageChunkType @pytest.mark.vcr @@ -141,3 +146,173 @@ class Output(BaseModel): pd.testing.assert_frame_equal(result, expected_result) # TODO test batch with successes and failures, figure out how to inject a particular error into LiteLLM + + +def test_split_message_into_chunks(): + # Test basic text-only template + result = split_message_into_chunks( + "Hello {name}!", {"name": MessageChunkType.TEXT}, name="Alice" + ) + assert result == [{"type": "text", "text": "Hello Alice!"}] + + # Test template with image URL + result = split_message_into_chunks( + "Look at this {image}", + {"image": MessageChunkType.IMAGE_URL}, + image="http://example.com/img.jpg", + ) + assert result == [ + {"type": "text", "text": "Look at this "}, + {"type": "image_url", "image_url": {"url": "http://example.com/img.jpg"}}, + ] + + # Test mixed text and image template + result = split_message_into_chunks( + "User {name} shared {image} yesterday", + {"name": MessageChunkType.TEXT, "image": MessageChunkType.IMAGE_URL}, + name="Bob", + image="http://example.com/photo.jpg", + ) + assert result == [ + {"type": "text", "text": "User Bob shared "}, + {"type": "image_url", "image_url": {"url": "http://example.com/photo.jpg"}}, + {"type": "text", "text": " yesterday"}, + ] + + # Test multiple occurrences of same field + result = split_message_into_chunks( + "{name} is here. Hi {name}!", {"name": MessageChunkType.TEXT}, name="Dave" + ) + assert result == [{"type": "text", "text": "Dave is here. Hi Dave!"}] + + +@pytest.mark.vcr +def test_vision_runtime(): + + # test success + + runtime = AsyncLiteLLMVisionRuntime() + + batch = pd.DataFrame.from_records([{"input_name": "Carla", "input_age": 25}]) + + class Output(BaseModel): + name: str = Field(..., description="name:") + age: str = Field(..., description="age:") + + result = asyncio.run( + runtime.batch_to_batch( + batch, + input_template="My name is {input_name} and I am {input_age} years old.", + instructions_template="", + response_model=Output, + ) + ) + + # note age coerced to string + expected_result = pd.DataFrame.from_records( + [ + { + "name": "Carla", + "age": "25", + } + ] + ) + pd.testing.assert_frame_equal(result[["name", "age"]], expected_result) + + # assert all other columns (costs) are nonzero + assert ( + ( + result[ + [ + "_prompt_tokens", + "_completion_tokens", + "_prompt_cost_usd", + "_completion_cost_usd", + "_total_cost_usd", + ] + ] + > 0 + ) + .all() + .all() + ) + + # test failure + + runtime.api_key = "fake_api_key" + + result = asyncio.run( + runtime.batch_to_batch( + batch, + input_template="My name is {input_name} and I am {input_age} years old.", + instructions_template="", + response_model=Output, + ) + ) + + expected_result = pd.DataFrame.from_records( + [ + { + "_adala_error": True, + "_adala_message": "AuthenticationError", + "_adala_details": "litellm.AuthenticationError: AuthenticationError: OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: fake_api_key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}", + } + ] + ) + pd.testing.assert_frame_equal( + result[["_adala_error", "_adala_message", "_adala_details"]], expected_result + ) + # assert only prompt costs are nonzero + assert ( + (result[["_prompt_tokens", "_prompt_cost_usd", "_total_cost_usd"]] > 0) + .all() + .all() + ) + assert (result[["_completion_tokens", "_completion_cost_usd"]] == 0).all().all() + + # test with image input + + runtime = AsyncLiteLLMVisionRuntime(model="gpt-4o-mini") + + batch = pd.DataFrame.from_records( + [ + { + "text": "What's in this image?", + "image": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/687px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg", + } + ] + ) + + class VisionOutput(BaseModel): + description: str = Field(..., description="Description of the image") + + result = asyncio.run( + runtime.batch_to_batch( + batch, + input_template="{text} {image}", + instructions_template="Describe what you see in the image.", + response_model=VisionOutput, + input_field_types={ + "text": MessageChunkType.TEXT, + "image": MessageChunkType.IMAGE_URL, + }, + ) + ) + + assert "mona lisa" in result["description"].iloc[0].lower() + assert ( + ( + result[ + [ + "_prompt_tokens", + "_completion_tokens", + "_prompt_cost_usd", + "_completion_cost_usd", + "_total_cost_usd", + ] + ] + > 0 + ) + .all() + .all() + )