diff --git a/newrelic/config.py b/newrelic/config.py index 6816c43b5..142d4e80b 100644 --- a/newrelic/config.py +++ b/newrelic/config.py @@ -2037,6 +2037,16 @@ def _process_trace_cache_import_hooks(): def _process_module_builtin_defaults(): + _process_module_definition( + "openai.api_resources.chat_completion", + "newrelic.hooks.mlmodel_openai", + "instrument_openai_api_resources_chat_completion", + ) + _process_module_definition( + "openai.util", + "newrelic.hooks.mlmodel_openai", + "instrument_openai_util", + ) _process_module_definition( "asyncio.base_events", "newrelic.hooks.coroutines_asyncio", diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py new file mode 100644 index 000000000..c7afed04f --- /dev/null +++ b/newrelic/hooks/mlmodel_openai.py @@ -0,0 +1,138 @@ + +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import openai +import uuid +from newrelic.api.function_trace import FunctionTrace +from newrelic.common.object_wrapper import wrap_function_wrapper +from newrelic.api.transaction import current_transaction +from newrelic.api.time_trace import get_trace_linking_metadata +from newrelic.core.config import global_settings +from newrelic.common.object_names import callable_name +from newrelic.core.attribute import MAX_LOG_MESSAGE_LENGTH + + +def wrap_chat_completion_create(wrapped, instance, args, kwargs): + transaction = current_transaction() + + if not transaction: + return + + ft_name = callable_name(wrapped) + with FunctionTrace(ft_name) as ft: + response = wrapped(*args, **kwargs) + + if not response: + return + + custom_attrs_dict = transaction._custom_params + conversation_id = custom_attrs_dict["conversation_id"] if "conversation_id" in custom_attrs_dict.keys() else str(uuid.uuid4()) + + chat_completion_id = str(uuid.uuid4()) + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") + + response_headers = getattr(response, "_nr_response_headers") + response_model = response.model + settings = transaction.settings if transaction.settings is not None else global_settings() + + chat_completion_summary_dict = { + "id": chat_completion_id, + "appName": settings.app_name, + "conversation_id": conversation_id, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction._transaction_id, + "api_key_last_four_digits": f"sk-{response.api_key[-4:]}", + "response_time": ft.duration, + "request.model": kwargs.get("model") or kwargs.get("engine"), + "response.model": response_model, + "response.organization": response.organization, + "response.usage.completion_tokens": response.usage.completion_tokens, + "response.usage.total_tokens": response.usage.total_tokens, + "response.usage.prompt_tokens": response.usage.prompt_tokens, + "request.temperature": kwargs.get("temperature"), + "request.max_tokens": kwargs.get("max_tokens"), + "response.choices.finish_reason": response.choices[0].finish_reason, + "response.api_type": response.api_type, + "response.headers.llmVersion": response_headers.get("openai-version"), + "response.headers.ratelimitLimitRequests": check_rate_limit_header(response_headers, "x-ratelimit-limit-requests", True), + "response.headers.ratelimitLimitTokens": check_rate_limit_header(response_headers, "x-ratelimit-limit-tokens", True), + "response.headers.ratelimitResetTokens": check_rate_limit_header(response_headers, "x-ratelimit-reset-tokens", False), + "response.headers.ratelimitResetRequests": check_rate_limit_header(response_headers, "x-ratelimit-reset-requests", False), + "response.headers.ratelimitRemainingTokens": check_rate_limit_header(response_headers, "x-ratelimit-remaining-tokens", True), + "response.headers.ratelimitRemainingRequests": check_rate_limit_header(response_headers, "x-ratelimit-remaining-requests", True), + "vendor": "openAI", + "ingest_source": "Python", + "number_of_messages": len(kwargs.get("messages", [])) + len(response.choices), + "api_version": response_headers.get("openai-version") + } + + transaction.record_ml_event("LlmChatCompletionSummary", chat_completion_summary_dict) + message_list = list(kwargs.get("messages", [])) + [response.choices[0].message] + + create_chat_completion_message_event(transaction, message_list, chat_completion_id, span_id, trace_id, response_model) + + return response + + +def check_rate_limit_header(response_headers, header_name, is_int): + if header_name in response_headers: + header_value = response_headers.get(header_name) + if is_int: + header_value = int(header_value) + return header_value + else: + return None + + +def create_chat_completion_message_event(transaction, message_list, chat_completion_id, span_id, trace_id, response_model): + if not transaction: + return + + for index, message in enumerate(message_list): + chat_completion_message_dict = { + "id": str(uuid.uuid4()), + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction._transaction_id, + "content": message.get("content", "")[:MAX_LOG_MESSAGE_LENGTH], + "role": message.get("role"), + "completion_id": chat_completion_id, + "sequence": index, + "model": response_model, + "vendor": "openAI", + "ingest_source": "Python", + } + transaction.record_ml_event("LlmChatCompletionMessage", chat_completion_message_dict) + + +def wrap_convert_to_openai_object(wrapped, instance, args, kwargs): + resp = args[0] + returned_response = wrapped(*args, **kwargs) + + if isinstance(resp, openai.openai_response.OpenAIResponse): + setattr(returned_response, "_nr_response_headers", getattr(resp, "_headers", {})) + + return returned_response + + +def instrument_openai_api_resources_chat_completion(module): + if hasattr(module.ChatCompletion, "create"): + wrap_function_wrapper(module, "ChatCompletion.create", wrap_chat_completion_create) + + +def instrument_openai_util(module): + wrap_function_wrapper(module, "convert_to_openai_object", wrap_convert_to_openai_object) diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py index 900c43a81..e84f6482a 100644 --- a/tests/mlmodel_openai/conftest.py +++ b/tests/mlmodel_openai/conftest.py @@ -36,8 +36,11 @@ "transaction_tracer.stack_trace_threshold": 0.0, "debug.log_data_collector_payloads": True, "debug.record_transaction_failure": True, - "ml_insights_event.enabled": True, + "machine_learning.enabled": True, + #"machine_learning.inference_events_value.enabled": True, + "ml_insights_events.enabled": True } + collector_agent_registration = collector_agent_registration_fixture( app_name="Python Agent Test (mlmodel_openai)", default_settings=_default_settings, diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index b428f329f..599dd9275 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -13,19 +13,182 @@ # limitations under the License. import openai +import pytest +from testing_support.fixtures import ( # function_not_called,; override_application_settings, + function_not_called, + override_application_settings, + reset_core_stats_engine, +) + +from newrelic.api.time_trace import current_trace +from newrelic.api.transaction import current_transaction, add_custom_attribute +from testing_support.validators.validate_ml_event_count import validate_ml_event_count +from testing_support.validators.validate_ml_event_payload import ( + validate_ml_event_payload, +) +from testing_support.validators.validate_ml_events import validate_ml_events +from testing_support.validators.validate_ml_events_outside_transaction import ( + validate_ml_events_outside_transaction, +) + +import newrelic.core.otlp_utils +from newrelic.api.application import application_instance as application +from newrelic.api.background_task import background_task +from newrelic.api.transaction import record_ml_event +from newrelic.core.config import global_settings +from newrelic.packages import six _test_openai_chat_completion_sync_messages = ( {"role": "system", "content": "You are a scientist."}, - {"role": "user", "content": "What is the boiling point of water?"}, - {"role": "assistant", "content": "The boiling point of water is 212 degrees Fahrenheit."}, + #{"role": "user", "content": "What is the boiling point of water?"}, + #{"role": "assistant", "content": "The boiling point of water is 212 degrees Fahrenheit."}, {"role": "user", "content": "What is 212 degrees Fahrenheit converted to Celsius?"}, ) -def test_openai_chat_completion_sync(): +def set_trace_info(): + txn = current_transaction() + if txn: + txn._trace_id = "trace-id" + trace = current_trace() + if trace: + trace.guid = "span-id" + + +sync_chat_completion_recorded_events = [ + ( + {'type': 'LlmChatCompletionSummary'}, + { + 'id': None, # UUID that varies with each run + 'appName': 'Python Agent Test (mlmodel_openai)', + 'conversation_id': 'my-awesome-id', + 'transaction_id': None, + 'span_id': "span-id", + 'trace_id': "trace-id", + 'api_key_last_four_digits': 'sk-CRET', + 'response_time': None, # Response time varies each test run + 'request.model': 'gpt-3.5-turbo', + 'response.model': 'gpt-3.5-turbo-0613', + 'response.organization': 'new-relic-nkmd8b', + 'response.usage.completion_tokens': 11, + 'response.usage.total_tokens': 64, + 'response.usage.prompt_tokens': 53, + 'request.temperature': 0.7, + 'request.max_tokens': 100, + 'response.choices.finish_reason': 'stop', + 'response.api_type': 'None', + 'response.headers.llmVersion': '2020-10-01', + 'response.headers.ratelimitLimitRequests': 200, + 'response.headers.ratelimitLimitTokens': 40000, + 'response.headers.ratelimitResetTokens': "90ms", + 'response.headers.ratelimitResetRequests': "7m12s", + 'response.headers.ratelimitRemainingTokens': 39940, + 'response.headers.ratelimitRemainingRequests': 199, + 'vendor': 'openAI', + 'ingest_source': 'Python', + 'number_of_messages': 3, + 'api_version': '2020-10-01' + }, + ), + ( + {'type': 'LlmChatCompletionMessage'}, + { + 'id': None, + 'span_id': "span-id", + 'trace_id': "trace-id", + 'transaction_id': None, + 'content': 'You are a scientist.', + 'role': 'system', + 'completion_id': None, + 'sequence': 0, + 'model': 'gpt-3.5-turbo-0613', + 'vendor': 'openAI', + 'ingest_source': 'Python' + }, + ), + ( + {'type': 'LlmChatCompletionMessage'}, + { + 'id': None, + 'span_id': "span-id", + 'trace_id': "trace-id", + 'transaction_id': None, + 'content': 'What is 212 degrees Fahrenheit converted to Celsius?', + 'role': 'user', + 'completion_id': None, + 'sequence': 1, + 'model': 'gpt-3.5-turbo-0613', + 'vendor': 'openAI', + 'ingest_source': 'Python' + }, + ), + ( + {'type': 'LlmChatCompletionMessage'}, + { + 'id': None, + 'span_id': "span-id", + 'trace_id': "trace-id", + 'transaction_id': None, + 'content': '212 degrees Fahrenheit is equal to 100 degrees Celsius.', + 'role': 'assistant', + 'completion_id': None, + 'sequence': 2, + 'model': 'gpt-3.5-turbo-0613', + 'vendor': 'openAI', + 'ingest_source': 'Python' + } + ), +] + + +@reset_core_stats_engine() +@validate_ml_events(sync_chat_completion_recorded_events) +# One summary event, one system message, one user message, and one response message from the assistant +@validate_ml_event_count(count=4) +@background_task() +def test_openai_chat_completion_sync_in_txn(): + set_trace_info() + add_custom_attribute("conversation_id", "my-awesome-id") + openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_sync_messages, + temperature=0.7, + max_tokens=100 + ) + + +@reset_core_stats_engine() +# One summary event, one system message, one user message, and one response message from the assistant +@validate_ml_event_count(count=0) +def test_openai_chat_completion_sync_outside_txn(): + set_trace_info() + add_custom_attribute("conversation_id", "my-awesome-id") + openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_sync_messages, + temperature=0.7, + max_tokens=100 + ) + + +disabled_ml_settings = { + "machine_learning.enabled": False, + "machine_learning.inference_events_value.enabled": False, + "ml_insights_events.enabled": False +} + + +@override_application_settings(disabled_ml_settings) +@reset_core_stats_engine() +# One summary event, one system message, one user message, and one response message from the assistant +@validate_ml_event_count(count=0) +def test_openai_chat_completion_sync_disabled_settings(): + set_trace_info() openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_sync_messages, + temperature=0.7, + max_tokens=100 )