DataDog · sabrenner · Oct 2, 2024 · Oct 2, 2024 · Oct 2, 2024 · lievan
@@ -954,8 +954,6 @@ def _on_span_started(span: Span):
                 span.set_tag_str("langchain.request.inputs.%d.%s" % (idx, k), integration.trunc(str(v)))
 
     def _on_span_finished(span: Span, streamed_chunks):
-        if span.error or not integration.is_pc_sampled_span(span):
-            return
         if (
             streamed_chunks
             and langchain_core
@@ -970,6 +968,9 @@ def _on_span_finished(span: Span, streamed_chunks):
         else:
             # best effort to join chunks together
             content = "".join([str(chunk) for chunk in streamed_chunks])
+        integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=content, operation="chain")
+        if span.error or not integration.is_pc_sampled_span(span):
+            return
         span.set_tag_str("langchain.response.outputs", integration.trunc(content))
 
     return shared_stream(
@@ -989,6 +990,7 @@ def _on_span_finished(span: Span, streamed_chunks):
 def traced_chat_stream(langchain, pin, func, instance, args, kwargs):
     integration: LangChainIntegration = langchain._datadog_integration
     llm_provider = instance._llm_type
+    model = _extract_model_name(instance)
 
     def _on_span_started(span: Span):
         if not integration.is_pc_sampled_span(span):
@@ -1004,12 +1006,19 @@ def _on_span_started(span: Span):
                 span.set_tag_str("langchain.request.%s.parameters.%s.%s" % (llm_provider, param, k), str(v))
 
     def _on_span_finished(span: Span, streamed_chunks):
-        if span.error or not integration.is_pc_sampled_span(span):
+        joined_chunks = streamed_chunks[0]
+        for chunk in streamed_chunks[1:]:
+            joined_chunks += chunk  # base message types support __add__ for concatenation
+        integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=joined_chunks, operation="chat")
+        if (
+            span.error
+            or not integration.is_pc_sampled_span(span)
+            or streamed_chunks is None
+            or len(streamed_chunks) == 0
+        ):
             return
-        content = "".join([str(getattr(chunk, "content", chunk)) for chunk in streamed_chunks])
-        role = (
-            streamed_chunks[0].__class__.__name__.replace("Chunk", "") if streamed_chunks else None
-        )  # AIMessageChunk --> AIeMessage
+        content = str(getattr(joined_chunks, "content", joined_chunks))
+        role = joined_chunks.__class__.__name__.replace("Chunk", "")  # AIMessageChunk --> AIMessage
         span.set_tag_str("langchain.response.content", integration.trunc(content))
         if role:
             span.set_tag_str("langchain.response.message_type", role)
@@ -1032,13 +1041,15 @@ def _on_span_finished(span: Span, streamed_chunks):
         on_span_finished=_on_span_finished,
         api_key=_extract_api_key(instance),
         provider=llm_provider,
+        model=model,
     )
 
 
 @with_traced_module
 def traced_llm_stream(langchain, pin, func, instance, args, kwargs):
     integration: LangChainIntegration = langchain._datadog_integration
     llm_provider = instance._llm_type
+    model = _extract_model_name(instance)
 
     def _on_span_start(span: Span):
         if not integration.is_pc_sampled_span(span):
@@ -1053,9 +1064,10 @@ def _on_span_start(span: Span):
                 span.set_tag_str("langchain.request.%s.parameters.%s.%s" % (llm_provider, param, k), str(v))
 
     def _on_span_finished(span: Span, streamed_chunks):
+        content = "".join([str(chunk) for chunk in streamed_chunks])
+        integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=content, operation="llm")
         if span.error or not integration.is_pc_sampled_span(span):
             return
-        content = "".join([str(chunk) for chunk in streamed_chunks])
         span.set_tag_str("langchain.response.content", integration.trunc(content))
 
     return shared_stream(
@@ -1070,6 +1082,7 @@ def _on_span_finished(span: Span, streamed_chunks):
         on_span_finished=_on_span_finished,
         api_key=_extract_api_key(instance),
         provider=llm_provider,
+        model=model,
     )
 
 

@@ -34,6 +34,7 @@ def __next__(self):
         except Exception:
             self._dd_span.set_exc_info(*sys.exc_info())
             self._dd_integration.metric(self._dd_span, "incr", "request.error", 1)
+            self._dd_span.finish()
             raise
 
 
@@ -60,6 +61,7 @@ async def __anext__(self):
         except Exception:
             self._dd_span.set_exc_info(*sys.exc_info())
             self._dd_integration.metric(self._dd_span, "incr", "request.error", 1)
+            self._dd_span.finish()
             raise
 
 
@@ -79,6 +81,7 @@ def shared_stream(
         "pin": pin,
         "operation_id": f"{instance.__module__}.{instance.__class__.__name__}",
         "interface_type": interface_type,
+        "submit_to_llmobs": True,
     }
 
     options.update(extra_options)

@@ -89,7 +89,7 @@ def _llmobs_set_tags(
         elif operation == "chat":
             self._llmobs_set_meta_tags_from_chat_model(span, args, kwargs, response, is_workflow=is_workflow)
         elif operation == "chain":
-            self._llmobs_set_meta_tags_from_chain(span, inputs=kwargs, outputs=response)
+            self._llmobs_set_meta_tags_from_chain(span, args, kwargs, outputs=response)
         elif operation == "embedding":
             self._llmobs_set_meta_tags_from_embedding(span, args, kwargs, response, is_workflow=is_workflow)
         elif operation == "retrieval":
@@ -129,16 +129,25 @@ def _llmobs_set_meta_tags_from_llm(
 
         input_tag_key = INPUT_VALUE if is_workflow else INPUT_MESSAGES
         output_tag_key = OUTPUT_VALUE if is_workflow else OUTPUT_MESSAGES
+        stream = span.get_tag("langchain.request.stream")
 
-        prompts = get_argument_value(args, kwargs, 0, "prompts")
+        prompts = get_argument_value(args, kwargs, 0, "input" if stream else "prompts")
         if isinstance(prompts, str) or not isinstance(prompts, list):
             prompts = [prompts]
 
-        span.set_tag_str(input_tag_key, safe_json([{"content": str(prompt)} for prompt in prompts]))
+        if stream:
+            # chat and llm take the same input types for streamed calls
+            span.set_tag_str(input_tag_key, safe_json(self._handle_stream_input_messages(prompts)))
+        else:
+            span.set_tag_str(input_tag_key, safe_json([{"content": str(prompt)} for prompt in prompts]))
+
         if span.error:
             span.set_tag_str(output_tag_key, safe_json([{"content": ""}]))
             return
-        message_content = [{"content": completion[0].text} for completion in completions.generations]
+        if stream:
+            message_content = [{"content": completions}]  # single completion for streams
+        else:
+            message_content = [{"content": completion[0].text} for completion in completions.generations]
         span.set_tag_str(output_tag_key, safe_json(message_content))
 
     def _llmobs_set_meta_tags_from_chat_model(
@@ -155,20 +164,36 @@ def _llmobs_set_meta_tags_from_chat_model(
 
         input_tag_key = INPUT_VALUE if is_workflow else INPUT_MESSAGES
         output_tag_key = OUTPUT_VALUE if is_workflow else OUTPUT_MESSAGES
+        stream = span.get_tag("langchain.request.stream")
 
         input_messages = []
-        chat_messages = get_argument_value(args, kwargs, 0, "messages", optional=True) or []
-        for message_set in chat_messages:
-            for message in message_set:
-                content = message.get("content", "") if isinstance(message, dict) else getattr(message, "content", "")
-                role = getattr(message, "role", ROLE_MAPPING.get(message.type, ""))
-                input_messages.append({"content": str(content), "role": str(role)})
+        if stream:
+            chat_messages = get_argument_value(args, kwargs, 0, "input")
+            input_messages = self._handle_stream_input_messages(chat_messages)
+        else:
+            chat_messages = get_argument_value(args, kwargs, 0, "messages", optional=True) or []
+            if not isinstance(chat_messages, list):
+                chat_messages = [chat_messages]
+            for message_set in chat_messages:
+                for message in message_set:
+                    content = (
+                        message.get("content", "") if isinstance(message, dict) else getattr(message, "content", "")
+                    )
+                    role = getattr(message, "role", ROLE_MAPPING.get(message.type, ""))
+                    input_messages.append({"content": str(content), "role": str(role)})
         span.set_tag_str(input_tag_key, safe_json(input_messages))
 
         if span.error:
             span.set_tag_str(output_tag_key, json.dumps([{"content": ""}]))
             return
+
         output_messages = []
+        if stream:
+            content = chat_completions.content
+            role = chat_completions.__class__.__name__.replace("MessageChunk", "").lower()  # AIMessageChunk --> ai
+            span.set_tag_str(output_tag_key, safe_json([{"content": content, "role": ROLE_MAPPING.get(role, "")}]))
+            return
+
         for message_set in chat_completions.generations:
             for chat_completion in message_set:
                 chat_completion_msg = chat_completion.message
@@ -196,9 +221,38 @@ def _extract_tool_calls(self, chat_completion_msg: Any) -> List[Dict[str, Any]]:
                 tool_calls_info.append(tool_call_info)
         return tool_calls_info
 
-    def _llmobs_set_meta_tags_from_chain(self, span: Span, outputs: Any, inputs: Optional[Any] = None) -> None:
-        span.set_tag_str(SPAN_KIND, "workflow")
+    def _handle_stream_input_messages(self, inputs):
+        input_messages = []
+        if hasattr(inputs, "to_messages"):  # isinstance(inputs, langchain_core.prompt_values.PromptValue)
+            inputs = inputs.to_messages()
+        elif not isinstance(inputs, list):
+            inputs = [inputs]
+        for inp in inputs:
+            inp_message = {}
+            content, role = None, None
+            if isinstance(inp, dict):
+                content = str(inp.get("content", ""))
+                role = inp.get("role")
+            elif hasattr(inp, "content"):  # isinstance(inp, langchain_core.messages.BaseMessage)
+                content = str(inp.content)
+                role = inp.__class__.__name__
+            else:
+                content = str(inp)
+
+            inp_message["content"] = content
+            if role is not None:
+                inp_message["role"] = role
+            input_messages.append(inp_message)
 
+        return input_messages
+
+    def _llmobs_set_meta_tags_from_chain(self, span: Span, args, kwargs, outputs: Any) -> None:
+        span.set_tag_str(SPAN_KIND, "workflow")
+        stream = span.get_tag("langchain.request.stream")
+        if stream:
+            inputs = get_argument_value(args, kwargs, 0, "input")
+        else:
+            inputs = kwargs
         if inputs is not None:
             formatted_inputs = self.format_io(inputs)
             span.set_tag_str(INPUT_VALUE, safe_json(formatted_inputs))

@@ -0,0 +1,4 @@
+---
+features:
+  - |
+    LLM Observability: LangChain streamed calls (``llm.stream``, ``chat_model.stream``, and ``chain.stream``) submit to LLM Observability.