Skip to content

Commit

Permalink
[MLOB-1556] LLM Observability tagger (#4718)
Browse files Browse the repository at this point in the history
LLM Observability tagger
  • Loading branch information
sabrenner authored Sep 26, 2024
1 parent 5b215f6 commit feeeb89
Show file tree
Hide file tree
Showing 3 changed files with 733 additions and 0 deletions.
24 changes: 24 additions & 0 deletions packages/dd-trace/src/llmobs/constants.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,30 @@
'use strict'

module.exports = {
SPAN_KIND: '_ml_obs.meta.span.kind',
SESSION_ID: '_ml_obs.session_id',
METADATA: '_ml_obs.meta.metadata',
METRICS: '_ml_obs.metrics',
ML_APP: '_ml_obs.meta.ml_app',
PROPAGATED_PARENT_ID_KEY: '_dd.p.llmobs_parent_id',
PARENT_ID_KEY: '_ml_obs.llmobs_parent_id',
TAGS: '_ml_obs.tags',
NAME: '_ml_obs.name',
TRACE_ID: '_ml_obs.trace_id',
PROPAGATED_TRACE_ID_KEY: '_dd.p.llmobs_trace_id',
ROOT_PARENT_ID: 'undefined',

MODEL_NAME: '_ml_obs.meta.model_name',
MODEL_PROVIDER: '_ml_obs.meta.model_provider',

INPUT_DOCUMENTS: '_ml_obs.meta.input.documents',
INPUT_MESSAGES: '_ml_obs.meta.input.messages',
INPUT_VALUE: '_ml_obs.meta.input.value',

OUTPUT_DOCUMENTS: '_ml_obs.meta.output.documents',
OUTPUT_MESSAGES: '_ml_obs.meta.output.messages',
OUTPUT_VALUE: '_ml_obs.meta.output.value',

EVP_PROXY_AGENT_BASE_PATH: 'evp_proxy/v2',
EVP_PROXY_AGENT_ENDPOINT: 'evp_proxy/v2/api/v2/llmobs',
EVP_SUBDOMAIN_HEADER_NAME: 'X-Datadog-EVP-Subdomain',
Expand Down
287 changes: 287 additions & 0 deletions packages/dd-trace/src/llmobs/tagger.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
'use strict'

const logger = require('../log')
const {
SPAN_TYPE
} = require('../../../../ext/tags')
const {
MODEL_NAME,
MODEL_PROVIDER,
SESSION_ID,
ML_APP,
SPAN_KIND,
INPUT_VALUE,
OUTPUT_DOCUMENTS,
INPUT_DOCUMENTS,
OUTPUT_VALUE,
METADATA,
METRICS,
PARENT_ID_KEY,
INPUT_MESSAGES,
OUTPUT_MESSAGES,
TAGS,
NAME,
PROPAGATED_PARENT_ID_KEY,
ROOT_PARENT_ID
} = require('./constants')

class LLMObsTagger {
constructor (config) {
this._config = config
}

setLLMObsSpanTags (
span,
kind,
{ modelName, modelProvider, sessionId, mlApp, parentLLMObsSpan } = {},
name
) {
if (!this._config.llmobs.enabled) return
if (kind) span.setTag(SPAN_TYPE, 'llm') // only mark it as an llm span if it was a valid kind
if (name) span.setTag(NAME, name)

span.setTag(SPAN_KIND, kind)
if (modelName) span.setTag(MODEL_NAME, modelName)
if (modelProvider) span.setTag(MODEL_PROVIDER, modelProvider)

sessionId = sessionId || parentLLMObsSpan?.context()._tags[SESSION_ID]
if (sessionId) span.setTag(SESSION_ID, sessionId)

if (!mlApp) mlApp = parentLLMObsSpan?.context()._tags[ML_APP] || this._config.llmobs.mlApp
span.setTag(ML_APP, mlApp)

const parentId =
parentLLMObsSpan?.context().toSpanId() ||
span.context()._trace.tags[PROPAGATED_PARENT_ID_KEY] ||
ROOT_PARENT_ID
span.setTag(PARENT_ID_KEY, parentId)
}

tagLLMIO (span, inputData, outputData) {
this._tagMessages(span, inputData, INPUT_MESSAGES)
this._tagMessages(span, outputData, OUTPUT_MESSAGES)
}

tagEmbeddingIO (span, inputData, outputData) {
this._tagDocuments(span, inputData, INPUT_DOCUMENTS)
this._tagText(span, outputData, OUTPUT_VALUE)
}

tagRetrievalIO (span, inputData, outputData) {
this._tagText(span, inputData, INPUT_VALUE)
this._tagDocuments(span, outputData, OUTPUT_DOCUMENTS)
}

tagTextIO (span, inputData, outputData) {
this._tagText(span, inputData, INPUT_VALUE)
this._tagText(span, outputData, OUTPUT_VALUE)
}

tagMetadata (span, metadata) {
try {
span.setTag(METADATA, JSON.stringify(metadata))
} catch {
logger.warn('Failed to parse span metadata. Metadata key-value pairs must be JSON serializable.')
}
}

tagMetrics (span, metrics) {
try {
span.setTag(METRICS, JSON.stringify(metrics))
} catch {
logger.warn('Failed to parse span metrics. Metrics key-value pairs must be JSON serializable.')
}
}

tagSpanTags (span, tags) {
// new tags will be merged with existing tags
try {
const currentTags = span.context()._tags[TAGS]
if (currentTags) {
Object.assign(tags, JSON.parse(currentTags))
}
span.setTag(TAGS, JSON.stringify(tags))
} catch {
logger.warn('Failed to parse span tags. Tag key-value pairs must be JSON serializable.')
}
}

_tagText (span, data, key) {
if (data) {
if (typeof data === 'string') {
span.setTag(key, data)
} else {
try {
span.setTag(key, JSON.stringify(data))
} catch {
const type = key === INPUT_VALUE ? 'input' : 'output'
logger.warn(`Failed to parse ${type} value, must be JSON serializable.`)
}
}
}
}

_tagDocuments (span, data, key) {
if (data) {
if (!Array.isArray(data)) {
data = [data]
}

try {
const documents = data.map(document => {
if (typeof document === 'string') {
return { text: document }
}

if (document == null || typeof document !== 'object') {
logger.warn('Documents must be a string, object, or list of objects.')
return undefined
}

const { text, name, id, score } = document

if (typeof text !== 'string') {
logger.warn('Document text must be a string.')
return undefined
}

const documentObj = { text }

if (name) {
if (typeof name !== 'string') {
logger.warn('Document name must be a string.')
return undefined
}
documentObj.name = name
}

if (id) {
if (typeof id !== 'string') {
logger.warn('Document ID must be a string.')
return undefined
}
documentObj.id = id
}

if (score) {
if (typeof score !== 'number') {
logger.warn('Document score must be a number.')
return undefined
}
documentObj.score = score
}

return documentObj
}).filter(doc => !!doc)

span.setTag(key, JSON.stringify(documents))
} catch {
const type = key === INPUT_DOCUMENTS ? 'input' : 'output'
logger.warn(`Failed to parse ${type} documents.`)
}
}
}

_tagMessages (span, data, key) {
if (data) {
if (!Array.isArray(data)) {
data = [data]
}

try {
const messages = data.map(message => {
if (typeof message === 'string') {
return { content: message }
}

if (message == null || typeof message !== 'object') {
logger.warn('Messages must be a string, object, or list of objects')
return undefined
}

const { content = '', role } = message
let toolCalls = message.toolCalls
const messageObj = { content }

if (typeof content !== 'string') {
logger.warn('Message content must be a string.')
return undefined
}

if (role) {
if (typeof role !== 'string') {
logger.warn('Message role must be a string.')
return undefined
}
messageObj.role = role
}

if (toolCalls) {
if (!Array.isArray(toolCalls)) {
toolCalls = [toolCalls]
}

const filteredToolCalls = toolCalls.map(toolCall => {
if (typeof toolCall !== 'object') {
logger.warn('Tool call must be an object.')
return undefined
}

const { name, arguments: args, toolId, type } = toolCall
const toolCallObj = {}

if (name) {
if (typeof name !== 'string') {
logger.warn('Tool name must be a string.')
return undefined
}
toolCallObj.name = name
}

if (args) {
if (typeof args !== 'object') {
logger.warn('Tool arguments must be an object.')
return undefined
}
toolCallObj.arguments = args
}

if (toolId) {
if (typeof toolId !== 'string') {
logger.warn('Tool ID must be a string.')
return undefined
}
toolCallObj.toolId = toolId
}

if (type) {
if (typeof type !== 'string') {
logger.warn('Tool type must be a string.')
return undefined
}
toolCallObj.type = type
}

return toolCallObj
}).filter(toolCall => !!toolCall)

if (filteredToolCalls.length) {
messageObj.tool_calls = filteredToolCalls
}
}

return messageObj
}).filter(msg => !!msg)

if (messages.length) {
span.setTag(key, JSON.stringify(messages))
}
} catch {
const type = key === INPUT_MESSAGES ? 'input' : 'output'
logger.warn(`Failed to parse ${type} messages.`)
}
}
}
}

module.exports = LLMObsTagger
Loading

0 comments on commit feeeb89

Please sign in to comment.