Skip to content

Commit

Permalink
[DI] Implement PII redaction (#5053)
Browse files Browse the repository at this point in the history
The algorithm will look for:
- names of variables
- names of object properties
- names of keys in maps

The names will be matched against a disallow-list and if a match is
found, its value will be redacted.

The list is hardcoded and can be found here:

    packages/dd-trace/src/debugger/devtools_client/snapshot/redaction.js

It's possible to add names to the list using the following environment
variable:

    DD_DYNAMIC_INSTRUMENTATION_REDACTED_IDENTIFIERS

Or it's possible to remove names from the list using the following
environment variable:

    DD_DYNAMIC_INSTRUMENTATION_REDACTION_EXCLUDED_IDENTIFIERS

Each environment variable takes a list of names separated by commas.

Support for redacting instances of specific classes is not included in
this commit.
  • Loading branch information
watson authored Jan 8, 2025
1 parent e2bee27 commit b36ce05
Show file tree
Hide file tree
Showing 14 changed files with 449 additions and 24 deletions.
49 changes: 49 additions & 0 deletions integration-tests/debugger/redact.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
'use strict'

const { assert } = require('chai')
const { setup } = require('./utils')

// Default settings is tested in unit tests, so we only need to test the env vars here
describe('Dynamic Instrumentation snapshot PII redaction', function () {
describe('DD_DYNAMIC_INSTRUMENTATION_REDACTED_IDENTIFIERS=foo,bar', function () {
const t = setup({ env: { DD_DYNAMIC_INSTRUMENTATION_REDACTED_IDENTIFIERS: 'foo,bar' } })

it('should respect DD_DYNAMIC_INSTRUMENTATION_REDACTED_IDENTIFIERS', function (done) {
t.triggerBreakpoint()

t.agent.on('debugger-input', ({ payload: [{ 'debugger.snapshot': { captures } }] }) => {
const { locals } = captures.lines[t.breakpoint.line]

assert.deepPropertyVal(locals, 'foo', { type: 'string', notCapturedReason: 'redactedIdent' })
assert.deepPropertyVal(locals, 'bar', { type: 'string', notCapturedReason: 'redactedIdent' })
assert.deepPropertyVal(locals, 'baz', { type: 'string', value: 'c' })

// existing redaction should not be impacted
assert.deepPropertyVal(locals, 'secret', { type: 'string', notCapturedReason: 'redactedIdent' })

done()
})

t.agent.addRemoteConfig(t.generateRemoteConfig({ captureSnapshot: true }))
})
})

describe('DD_DYNAMIC_INSTRUMENTATION_REDACTION_EXCLUDED_IDENTIFIERS=secret', function () {
const t = setup({ env: { DD_DYNAMIC_INSTRUMENTATION_REDACTION_EXCLUDED_IDENTIFIERS: 'secret' } })

it('should respect DD_DYNAMIC_INSTRUMENTATION_REDACTED_IDENTIFIERS', function (done) {
t.triggerBreakpoint()

t.agent.on('debugger-input', ({ payload: [{ 'debugger.snapshot': { captures } }] }) => {
const { locals } = captures.lines[t.breakpoint.line]

assert.deepPropertyVal(locals, 'secret', { type: 'string', value: 'shh!' })
assert.deepPropertyVal(locals, 'password', { type: 'string', notCapturedReason: 'redactedIdent' })

done()
})

t.agent.addRemoteConfig(t.generateRemoteConfig({ captureSnapshot: true }))
})
})
})
26 changes: 26 additions & 0 deletions integration-tests/debugger/target-app/redact.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
'use strict'

require('dd-trace/init')
const Fastify = require('fastify')

const fastify = Fastify()

fastify.get('/', function () {
/* eslint-disable no-unused-vars */
const foo = 'a'
const bar = 'b'
const baz = 'c'
const secret = 'shh!'
const password = 'shh!'
/* eslint-enable no-unused-vars */

return { hello: 'world' } // BREAKPOINT: /
})

fastify.listen({ port: process.env.APP_PORT }, (err) => {
if (err) {
fastify.log.error(err)
process.exit(1)
}
process.send({ port: process.env.APP_PORT })
})
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'use strict'

const { join } = require('path')
const { Worker } = require('worker_threads')
const { Worker, threadId: parentThreadId } = require('worker_threads')
const { randomUUID } = require('crypto')
const log = require('../../log')

Expand Down Expand Up @@ -46,29 +46,47 @@ class TestVisDynamicInstrumentation {
return this._readyPromise
}

start () {
start (config) {
if (this.worker) return

const { NODE_OPTIONS, ...envWithoutNodeOptions } = process.env

log.debug('Starting Test Visibility - Dynamic Instrumentation client...')

const rcChannel = new MessageChannel() // mock channel
const configChannel = new MessageChannel() // mock channel

this.worker = new Worker(
join(__dirname, 'worker', 'index.js'),
{
execArgv: [],
env: envWithoutNodeOptions,
workerData: {
config: config.serialize(),
parentThreadId,
rcPort: rcChannel.port1,
configPort: configChannel.port1,
breakpointSetChannel: this.breakpointSetChannel.port1,
breakpointHitChannel: this.breakpointHitChannel.port1
},
transferList: [this.breakpointSetChannel.port1, this.breakpointHitChannel.port1]
transferList: [
rcChannel.port1,
configChannel.port1,
this.breakpointSetChannel.port1,
this.breakpointHitChannel.port1
]
}
)
this.worker.on('online', () => {
log.debug('Test Visibility - Dynamic Instrumentation client is ready')
this._onReady()
})
this.worker.on('error', (err) => {
log.error('Test Visibility - Dynamic Instrumentation worker error', err)
})
this.worker.on('messageerror', (err) => {
log.error('Test Visibility - Dynamic Instrumentation worker messageerror', err)
})

// Allow the parent to exit even if the worker is still running
this.worker.unref()
Expand Down
36 changes: 36 additions & 0 deletions packages/dd-trace/src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,8 @@ class Config {
this._setValue(defaults, 'dogstatsd.port', '8125')
this._setValue(defaults, 'dsmEnabled', false)
this._setValue(defaults, 'dynamicInstrumentationEnabled', false)
this._setValue(defaults, 'dynamicInstrumentationRedactedIdentifiers', [])
this._setValue(defaults, 'dynamicInstrumentationRedactionExcludedIdentifiers', [])
this._setValue(defaults, 'env', undefined)
this._setValue(defaults, 'experimental.enableGetRumData', false)
this._setValue(defaults, 'experimental.exporter', undefined)
Expand Down Expand Up @@ -600,6 +602,8 @@ class Config {
DD_DOGSTATSD_HOST,
DD_DOGSTATSD_PORT,
DD_DYNAMIC_INSTRUMENTATION_ENABLED,
DD_DYNAMIC_INSTRUMENTATION_REDACTED_IDENTIFIERS,
DD_DYNAMIC_INSTRUMENTATION_REDACTION_EXCLUDED_IDENTIFIERS,
DD_ENV,
DD_EXPERIMENTAL_API_SECURITY_ENABLED,
DD_EXPERIMENTAL_APPSEC_STANDALONE_ENABLED,
Expand Down Expand Up @@ -747,6 +751,12 @@ class Config {
this._setString(env, 'dogstatsd.port', DD_DOGSTATSD_PORT)
this._setBoolean(env, 'dsmEnabled', DD_DATA_STREAMS_ENABLED)
this._setBoolean(env, 'dynamicInstrumentationEnabled', DD_DYNAMIC_INSTRUMENTATION_ENABLED)
this._setArray(env, 'dynamicInstrumentationRedactedIdentifiers', DD_DYNAMIC_INSTRUMENTATION_REDACTED_IDENTIFIERS)
this._setArray(
env,
'dynamicInstrumentationRedactionExcludedIdentifiers',
DD_DYNAMIC_INSTRUMENTATION_REDACTION_EXCLUDED_IDENTIFIERS
)
this._setString(env, 'env', DD_ENV || tags.env)
this._setBoolean(env, 'traceEnabled', DD_TRACE_ENABLED)
this._setBoolean(env, 'experimental.enableGetRumData', DD_TRACE_EXPERIMENTAL_GET_RUM_DATA_ENABLED)
Expand Down Expand Up @@ -927,6 +937,16 @@ class Config {
}
this._setBoolean(opts, 'dsmEnabled', options.dsmEnabled)
this._setBoolean(opts, 'dynamicInstrumentationEnabled', options.experimental?.dynamicInstrumentationEnabled)
this._setArray(
opts,
'dynamicInstrumentationRedactedIdentifiers',
options.experimental?.dynamicInstrumentationRedactedIdentifiers
)
this._setArray(
opts,
'dynamicInstrumentationRedactionExcludedIdentifiers',
options.experimental?.dynamicInstrumentationRedactionExcludedIdentifiers
)
this._setString(opts, 'env', options.env || tags.env)
this._setBoolean(opts, 'experimental.enableGetRumData', options.experimental?.enableGetRumData)
this._setString(opts, 'experimental.exporter', options.experimental?.exporter)
Expand Down Expand Up @@ -1312,6 +1332,22 @@ class Config {
this.sampler.sampleRate = this.sampleRate
updateConfig(changes, this)
}

// TODO: Refactor the Config class so it never produces any config objects that are incompatible with MessageChannel
/**
* Serializes the config object so it can be passed over a Worker Thread MessageChannel.
* @returns {Object} The serialized config object.
*/
serialize () {
// URL objects cannot be serialized over the MessageChannel, so we need to convert them to strings first
if (this.url instanceof URL) {
const config = { ...this }
config.url = this.url.toString()
return config
}

return this
}
}

function maybeInt (number) {
Expand Down
2 changes: 2 additions & 0 deletions packages/dd-trace/src/debugger/devtools_client/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ const { format } = require('node:url')
const log = require('../../log')

const config = module.exports = {
dynamicInstrumentationRedactedIdentifiers: parentConfig.dynamicInstrumentationRedactedIdentifiers,
dynamicInstrumentationRedactionExcludedIdentifiers: parentConfig.dynamicInstrumentationRedactionExcludedIdentifiers,
runtimeId: parentConfig.tags['runtime-id'],
service: parentConfig.service,
commitSHA: parentConfig.commitSHA,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'use strict'

const { collectionSizeSym, fieldCountSym } = require('./symbols')
const { normalizeName, REDACTED_IDENTIFIERS } = require('./redaction')

module.exports = {
processRawState: processProperties
Expand All @@ -24,7 +25,14 @@ function processProperties (props, maxLength) {
return result
}

// TODO: Improve performance of redaction algorithm.
// This algorithm is probably slower than if we embedded the redaction logic inside the functions below.
// That way we didn't have to traverse objects that will just be redacted anyway.
function getPropertyValue (prop, maxLength) {
return redact(prop, getPropertyValueRaw(prop, maxLength))
}

function getPropertyValueRaw (prop, maxLength) {
// Special case for getters and setters which does not have a value property
if ('get' in prop) {
const hasGet = prop.get.type !== 'undefined'
Expand Down Expand Up @@ -185,8 +193,11 @@ function toMap (type, pairs, maxLength) {
// `pair.value` is a special wrapper-object with subtype `internal#entry`. This can be skipped and we can go
// directly to its children, of which there will always be exactly two, the first containing the key, and the
// second containing the value of this entry of the Map.
const shouldRedact = shouldRedactMapValue(pair.value.properties[0])
const key = getPropertyValue(pair.value.properties[0], maxLength)
const val = getPropertyValue(pair.value.properties[1], maxLength)
const val = shouldRedact
? notCapturedRedacted(pair.value.properties[1].value.type)
: getPropertyValue(pair.value.properties[1], maxLength)
result.entries[i++] = [key, val]
}

Expand Down Expand Up @@ -240,6 +251,25 @@ function arrayBufferToString (bytes, size) {
return buf.toString()
}

function redact (prop, obj) {
const name = getNormalizedNameFromProp(prop)
return REDACTED_IDENTIFIERS.has(name) ? notCapturedRedacted(obj.type) : obj
}

function shouldRedactMapValue (key) {
const isSymbol = key.value.type === 'symbol'
if (!isSymbol && key.value.type !== 'string') return false // WeakMaps uses objects as keys
const name = normalizeName(
isSymbol ? key.value.description : key.value.value,
isSymbol
)
return REDACTED_IDENTIFIERS.has(name)
}

function getNormalizedNameFromProp (prop) {
return normalizeName(prop.name, 'symbol' in prop)
}

function setNotCaptureReasonOnCollection (result, collection) {
if (collectionSizeSym in collection) {
result.notCapturedReason = 'collectionSize'
Expand All @@ -250,3 +280,7 @@ function setNotCaptureReasonOnCollection (result, collection) {
function notCapturedDepth (type) {
return { type, notCapturedReason: 'depth' }
}

function notCapturedRedacted (type) {
return { type, notCapturedReason: 'redactedIdent' }
}
Loading

0 comments on commit b36ce05

Please sign in to comment.