Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MLOB-1556] LLM Observability tagger #4718

Merged
merged 7 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
"test:core:ci": "npm run test:core -- --coverage --nyc-arg=--include=\"packages/datadog-core/src/**/*.js\"",
"test:lambda": "mocha -r \"packages/dd-trace/test/setup/mocha.js\" \"packages/dd-trace/test/lambda/**/*.spec.js\"",
"test:lambda:ci": "nyc --no-clean --include \"packages/dd-trace/src/lambda/**/*.js\" -- npm run test:lambda",
"test:llmobs": "mocha -r \"packages/dd-trace/test/setup/mocha.js\" \"packages/dd-trace/test/llmobs/**/*.spec.js\"",
"test:llmobs:ci": "nyc --no-clean --include \"packages/dd-trace/src/llmobs/**/*.js\" -- npm run test:llmobs",
"test:plugins": "mocha -r \"packages/dd-trace/test/setup/mocha.js\" \"packages/datadog-instrumentations/test/@($(echo $PLUGINS)).spec.js\" \"packages/datadog-plugin-@($(echo $PLUGINS))/test/**/*.spec.js\"",
"test:plugins:ci": "yarn services && nyc --no-clean --include \"packages/datadog-instrumentations/src/@($(echo $PLUGINS)).js\" --include \"packages/datadog-instrumentations/src/@($(echo $PLUGINS))/**/*.js\" --include \"packages/datadog-plugin-@($(echo $PLUGINS))/src/**/*.js\" -- npm run test:plugins",
"test:plugins:upstream": "node ./packages/dd-trace/test/plugins/suite.js",
Expand Down
27 changes: 27 additions & 0 deletions packages/dd-trace/src/llmobs/constants.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
'use strict'

module.exports = {
SPAN_KIND: '_ml_obs.meta.span.kind',
SESSION_ID: '_ml_obs.session_id',
METADATA: '_ml_obs.meta.metadata',
METRICS: '_ml_obs.metrics',
ML_APP: '_ml_obs.meta.ml_app',
PROPAGATED_PARENT_ID_KEY: '_dd.p.llmobs_parent_id',
PARENT_ID_KEY: '_ml_obs.llmobs_parent_id',
TAGS: '_ml_obs.tags',
NAME: '_ml_obs.name',
TRACE_ID: '_ml_obs.trace_id',
PROPAGATED_TRACE_ID_KEY: '_dd.p.llmobs_trace_id',
ROOT_PARENT_ID: 'undefined',

MODEL_NAME: '_ml_obs.meta.model_name',
MODEL_PROVIDER: '_ml_obs.meta.model_provider',

INPUT_DOCUMENTS: '_ml_obs.meta.input.documents',
INPUT_MESSAGES: '_ml_obs.meta.input.messages',
INPUT_VALUE: '_ml_obs.meta.input.value',

OUTPUT_DOCUMENTS: '_ml_obs.meta.output.documents',
OUTPUT_MESSAGES: '_ml_obs.meta.output.messages',
OUTPUT_VALUE: '_ml_obs.meta.output.value'
}
207 changes: 207 additions & 0 deletions packages/dd-trace/src/llmobs/tagger.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
'use strict'

const logger = require('../log')
const {
SPAN_TYPE
} = require('../../../../ext/tags')
const {
MODEL_NAME,
MODEL_PROVIDER,
SESSION_ID,
ML_APP,
SPAN_KIND,
INPUT_VALUE,
OUTPUT_DOCUMENTS,
INPUT_DOCUMENTS,
OUTPUT_VALUE,
METADATA,
METRICS,
PARENT_ID_KEY,
INPUT_MESSAGES,
OUTPUT_MESSAGES,
TAGS,
NAME,
PROPAGATED_PARENT_ID_KEY,
ROOT_PARENT_ID
} = require('./constants')

class LLMObsTagger {
constructor (config) {
this._config = config
}

setLLMObsSpanTags (
span,
kind,
{ modelName, modelProvider, sessionId, mlApp, parentLLMObsSpan } = {},
name
) {
if (kind) span.setTag(SPAN_TYPE, 'llm') // only mark it as an llm span if it was a valid kind
if (name) span.setTag(NAME, name)

span.setTag(SPAN_KIND, kind)
if (modelName) span.setTag(MODEL_NAME, modelName)
if (modelProvider) span.setTag(MODEL_PROVIDER, modelProvider)

sessionId = sessionId || parentLLMObsSpan?.context()._tags[SESSION_ID]
if (sessionId) span.setTag(SESSION_ID, sessionId)

if (!mlApp) mlApp = parentLLMObsSpan?.context()._tags[ML_APP] || this._config.llmobs.mlApp
span.setTag(ML_APP, mlApp)

const parentId =
parentLLMObsSpan?.context().toSpanId() ||
span.context()._trace.tags[PROPAGATED_PARENT_ID_KEY] ||
ROOT_PARENT_ID
span.setTag(PARENT_ID_KEY, parentId)
}

tagLLMIO (span, inputData, outputData) {
this._tagMessages(span, inputData, INPUT_MESSAGES)
this._tagMessages(span, outputData, OUTPUT_MESSAGES)
}

tagEmbeddingIO (span, inputData, outputData) {
this._tagDocuments(span, inputData, INPUT_DOCUMENTS)
this._tagText(span, outputData, OUTPUT_VALUE)
}

tagRetrievalIO (span, inputData, outputData) {
this._tagText(span, inputData, INPUT_VALUE)
this._tagDocuments(span, outputData, OUTPUT_DOCUMENTS)
}

tagTextIO (span, inputData, outputData) {
this._tagText(span, inputData, INPUT_VALUE)
this._tagText(span, outputData, OUTPUT_VALUE)
}

tagMetadata (span, metadata) {
try {
span.setTag(METADATA, JSON.stringify(metadata))
} catch {
logger.warn('Failed to parse span metadata. Metadata key-value pairs must be JSON serializable.')
}
}

tagMetrics (span, metrics) {
try {
span.setTag(METRICS, JSON.stringify(metrics))
} catch {
logger.warn('Failed to parse span metrics. Metrics key-value pairs must be JSON serializable.')
}
}

tagSpanTags (span, tags) {
try {
const currentTags = span.context()._tags[TAGS]
if (currentTags) {
Object.assign(tags, currentTags)
}
span.setTag(TAGS, JSON.stringify(tags))
} catch {
logger.warn('Failed to parse span tags. Tag key-value pairs must be JSON serializable.')
}
}

_tagText (span, data, key) {
if (data) {
if (typeof data === 'string') {
span.setTag(key, data)
} else {
try {
// this will help showcase unfinished promises being passed in as values
span.setTag(key, JSON.stringify(data))
} catch {
const type = key === INPUT_VALUE ? 'input' : 'output'
logger.warn(`Failed to parse ${type} value, must be JSON serializable.`)
}
}
}
}

_tagDocuments (span, data, key) {
if (data) {
if (!Array.isArray(data)) {
data = [data]
}

try {
const documents = data.map(document => {
if (typeof document === 'string') {
return document
}

const { text, name, id, score } = document

if (text && typeof text !== 'string') {
logger.warn(`Invalid property found in ${document}: text must be a string.`)
return undefined
}

if (name && typeof name !== 'string') {
logger.warn(`Invalid property found in ${document}: name must be a string.`)
return undefined
}

if (id && typeof id !== 'string') {
logger.warn(`Invalid property found in ${document}: id must be a string.`)
return undefined
}

if (score && typeof score !== 'number') {
logger.warn(`Invalid property found in ${document}: score must be a number.`)
return undefined
}

return document
}).filter(doc => !!doc) // filter out bad documents?

span.setTag(key, JSON.stringify(documents))
} catch {
const type = key === INPUT_DOCUMENTS ? 'input' : 'output'
logger.warn(`Failed to parse ${type} documents.`)
}
}
}

_tagMessages (span, data, key) {
if (data) {
if (!Array.isArray(data)) {
data = [data]
}

try {
const messages = data.map(message => {
if (typeof message === 'string') {
return message
}

const content = message.content || ''
const role = message.role

if (typeof content !== 'string') {
logger.warn(`Invalid property found in ${message}: content must be a string.`)
return undefined
}

message.content = content

if (role && typeof role !== 'string') {
logger.warn(`Invalid property found in ${message}: role must be a string.`)
return undefined
}

return message
}).filter(msg => !!msg) // filter out bad messages?

span.setTag(key, JSON.stringify(messages))
} catch {
const type = key === INPUT_MESSAGES ? 'input' : 'output'
logger.warn(`Failed to parse ${type} messages.`)
}
}
}
}

module.exports = LLMObsTagger
Loading
Loading