Skip to content

Commit

Permalink
[DI] Adhere to maxCollectionSize limit in snapshots (#4780)
Browse files Browse the repository at this point in the history
The maxCollectionSize limit affects the following types:
- Array
- Map / WeakMap
- Set / WeakSet
- All TypedArray types

This limit contols the maximum about of elements collected for those
types. The default is 100.
  • Loading branch information
watson authored and rochdev committed Oct 31, 2024
1 parent 4b5d0a5 commit bc2f06a
Show file tree
Hide file tree
Showing 9 changed files with 273 additions and 51 deletions.
31 changes: 27 additions & 4 deletions integration-tests/debugger/index.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,9 @@ describe('Dynamic Instrumentation', function () {
elements: [
{ type: 'number', value: '1' },
{ type: 'number', value: '2' },
{ type: 'number', value: '3' }
{ type: 'number', value: '3' },
{ type: 'number', value: '4' },
{ type: 'number', value: '5' }
]
},
obj: {
Expand Down Expand Up @@ -556,6 +558,27 @@ describe('Dynamic Instrumentation', function () {

agent.addRemoteConfig(generateRemoteConfig({ captureSnapshot: true, capture: { maxLength: 10 } }))
})

it('should respect maxCollectionSize', (done) => {
agent.on('debugger-input', ({ payload: { 'debugger.snapshot': { captures } } }) => {
const { locals } = captures.lines[probeLineNo]

assert.deepEqual(locals.arr, {
type: 'Array',
elements: [
{ type: 'number', value: '1' },
{ type: 'number', value: '2' },
{ type: 'number', value: '3' }
],
notCapturedReason: 'collectionSize',
size: 5
})

done()
})

agent.addRemoteConfig(generateRemoteConfig({ captureSnapshot: true, capture: { maxCollectionSize: 3 } }))
})
})
})

Expand Down Expand Up @@ -612,7 +635,9 @@ function generateRemoteConfig (overrides = {}) {
}
}

function generateProbeConfig (overrides) {
function generateProbeConfig (overrides = {}) {
overrides.capture = { maxReferenceDepth: 3, ...overrides.capture }
overrides.sampling = { snapshotsPerSecond: 5000, ...overrides.sampling }
return {
id: randomUUID(),
version: 0,
Expand All @@ -623,8 +648,6 @@ function generateProbeConfig (overrides) {
template: 'Hello World!',
segments: [{ str: 'Hello World!' }],
captureSnapshot: false,
capture: { maxReferenceDepth: 3 },
sampling: { snapshotsPerSecond: 5000 },
evaluateAt: 'EXIT',
...overrides
}
Expand Down
2 changes: 1 addition & 1 deletion integration-tests/debugger/target-app/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ function getSomeData () {
lstr: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.',
sym: Symbol('foo'),
regex: /bar/i,
arr: [1, 2, 3],
arr: [1, 2, 3, 4, 5],
obj: {
foo: {
baz: 42,
Expand Down
8 changes: 6 additions & 2 deletions packages/dd-trace/src/debugger/devtools_client/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@ session.on('Debugger.paused', async ({ params }) => {
const timestamp = Date.now()

let captureSnapshotForProbe = null
let maxReferenceDepth, maxLength
let maxReferenceDepth, maxCollectionSize, maxLength
const probes = params.hitBreakpoints.map((id) => {
const probe = breakpoints.get(id)
if (probe.captureSnapshot) {
captureSnapshotForProbe = probe
maxReferenceDepth = highestOrUndefined(probe.capture.maxReferenceDepth, maxReferenceDepth)
maxCollectionSize = highestOrUndefined(probe.capture.maxCollectionSize, maxCollectionSize)
maxLength = highestOrUndefined(probe.capture.maxLength, maxLength)
}
return probe
Expand All @@ -38,7 +39,10 @@ session.on('Debugger.paused', async ({ params }) => {
if (captureSnapshotForProbe !== null) {
try {
// TODO: Create unique states for each affected probe based on that probes unique `capture` settings (DEBUG-2863)
processLocalState = await getLocalStateForCallFrame(params.callFrames[0], { maxReferenceDepth, maxLength })
processLocalState = await getLocalStateForCallFrame(
params.callFrames[0],
{ maxReferenceDepth, maxCollectionSize, maxLength }
)
} catch (err) {
// TODO: This error is not tied to a specific probe, but to all probes with `captureSnapshot: true`.
// However, in 99,99% of cases, there will be just a single probe, so I guess this simplification is ok?
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'use strict'

const { collectionSizeSym } = require('./symbols')
const session = require('../session')

const LEAF_SUBTYPES = new Set(['date', 'regexp'])
Expand All @@ -14,56 +15,67 @@ module.exports = {
// each lookup will just finish in its own time and traverse the child nodes when the event loop allows it.
// Alternatively, use `Promise.all` or something like that, but the code would probably be more complex.

async function getObject (objectId, maxDepth, depth = 0) {
async function getObject (objectId, opts, depth = 0, collection = false) {
const { result, privateProperties } = await session.post('Runtime.getProperties', {
objectId,
ownProperties: true // exclude inherited properties
})

if (privateProperties) result.push(...privateProperties)
if (collection) {
// Trim the collection if it's too large.
// Collections doesn't contain private properties, so the code in this block doesn't have to deal with it.
removeNonEnumerableProperties(result) // remove the `length` property
const size = result.length
if (size > opts.maxCollectionSize) {
result.splice(opts.maxCollectionSize)
result[collectionSizeSym] = size
}
} else if (privateProperties) {
result.push(...privateProperties)
}

return traverseGetPropertiesResult(result, maxDepth, depth)
return traverseGetPropertiesResult(result, opts, depth)
}

async function traverseGetPropertiesResult (props, maxDepth, depth) {
async function traverseGetPropertiesResult (props, opts, depth) {
// TODO: Decide if we should filter out non-enumerable properties or not:
// props = props.filter((e) => e.enumerable)

if (depth >= maxDepth) return props
if (depth >= opts.maxReferenceDepth) return props

for (const prop of props) {
if (prop.value === undefined) continue
const { value: { type, objectId, subtype } } = prop
if (type === 'object') {
if (objectId === undefined) continue // if `subtype` is "null"
if (LEAF_SUBTYPES.has(subtype)) continue // don't waste time with these subtypes
prop.value.properties = await getObjectProperties(subtype, objectId, maxDepth, depth)
prop.value.properties = await getObjectProperties(subtype, objectId, opts, depth)
} else if (type === 'function') {
prop.value.properties = await getFunctionProperties(objectId, maxDepth, depth + 1)
prop.value.properties = await getFunctionProperties(objectId, opts, depth + 1)
}
}

return props
}

async function getObjectProperties (subtype, objectId, maxDepth, depth) {
async function getObjectProperties (subtype, objectId, opts, depth) {
if (ITERABLE_SUBTYPES.has(subtype)) {
return getIterable(objectId, maxDepth, depth)
return getIterable(objectId, opts, depth)
} else if (subtype === 'promise') {
return getInternalProperties(objectId, maxDepth, depth)
return getInternalProperties(objectId, opts, depth)
} else if (subtype === 'proxy') {
return getProxy(objectId, maxDepth, depth)
return getProxy(objectId, opts, depth)
} else if (subtype === 'arraybuffer') {
return getArrayBuffer(objectId, maxDepth, depth)
return getArrayBuffer(objectId, opts, depth)
} else {
return getObject(objectId, maxDepth, depth + 1)
return getObject(objectId, opts, depth + 1, subtype === 'array' || subtype === 'typedarray')
}
}

// TODO: The following extra information from `internalProperties` might be relevant to include for functions:
// - Bound function: `[[TargetFunction]]`, `[[BoundThis]]` and `[[BoundArgs]]`
// - Non-bound function: `[[FunctionLocation]]`, and `[[Scopes]]`
async function getFunctionProperties (objectId, maxDepth, depth) {
async function getFunctionProperties (objectId, opts, depth) {
let { result } = await session.post('Runtime.getProperties', {
objectId,
ownProperties: true // exclude inherited properties
Expand All @@ -72,10 +84,12 @@ async function getFunctionProperties (objectId, maxDepth, depth) {
// For legacy reasons (I assume) functions has a `prototype` property besides the internal `[[Prototype]]`
result = result.filter(({ name }) => name !== 'prototype')

return traverseGetPropertiesResult(result, maxDepth, depth)
return traverseGetPropertiesResult(result, opts, depth)
}

async function getIterable (objectId, maxDepth, depth) {
async function getIterable (objectId, opts, depth) {
// TODO: If the iterable has any properties defined on the object directly, instead of in its collection, they will
// exist in the return value below in the `result` property. We currently do not collect these.
const { internalProperties } = await session.post('Runtime.getProperties', {
objectId,
ownProperties: true // exclude inherited properties
Expand All @@ -93,10 +107,17 @@ async function getIterable (objectId, maxDepth, depth) {
ownProperties: true // exclude inherited properties
})

return traverseGetPropertiesResult(result, maxDepth, depth)
removeNonEnumerableProperties(result) // remove the `length` property
const size = result.length
if (size > opts.maxCollectionSize) {
result.splice(opts.maxCollectionSize)
result[collectionSizeSym] = size
}

return traverseGetPropertiesResult(result, opts, depth)
}

async function getInternalProperties (objectId, maxDepth, depth) {
async function getInternalProperties (objectId, opts, depth) {
const { internalProperties } = await session.post('Runtime.getProperties', {
objectId,
ownProperties: true // exclude inherited properties
Expand All @@ -105,10 +126,10 @@ async function getInternalProperties (objectId, maxDepth, depth) {
// We want all internal properties except the prototype
const props = internalProperties.filter(({ name }) => name !== '[[Prototype]]')

return traverseGetPropertiesResult(props, maxDepth, depth)
return traverseGetPropertiesResult(props, opts, depth)
}

async function getProxy (objectId, maxDepth, depth) {
async function getProxy (objectId, opts, depth) {
const { internalProperties } = await session.post('Runtime.getProperties', {
objectId,
ownProperties: true // exclude inherited properties
Expand All @@ -127,14 +148,14 @@ async function getProxy (objectId, maxDepth, depth) {
ownProperties: true // exclude inherited properties
})

return traverseGetPropertiesResult(result, maxDepth, depth)
return traverseGetPropertiesResult(result, opts, depth)
}

// Support for ArrayBuffer is a bit trickly because the internal structure stored in `internalProperties` is not
// documented and is not straight forward. E.g. ArrayBuffer(3) will internally contain both Int8Array(3) and
// UInt8Array(3), whereas ArrayBuffer(8) internally contains both Int8Array(8), Uint8Array(8), Int16Array(4), and
// Int32Array(2) - all representing the same data in different ways.
async function getArrayBuffer (objectId, maxDepth, depth) {
async function getArrayBuffer (objectId, opts, depth) {
const { internalProperties } = await session.post('Runtime.getProperties', {
objectId,
ownProperties: true // exclude inherited properties
Expand All @@ -149,5 +170,13 @@ async function getArrayBuffer (objectId, maxDepth, depth) {
ownProperties: true // exclude inherited properties
})

return traverseGetPropertiesResult(result, maxDepth, depth)
return traverseGetPropertiesResult(result, opts, depth)
}

function removeNonEnumerableProperties (props) {
for (let i = 0; i < props.length; i++) {
if (props[i].enumerable === false) {
props.splice(i--, 1)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const { getRuntimeObject } = require('./collector')
const { processRawState } = require('./processor')

const DEFAULT_MAX_REFERENCE_DEPTH = 3
const DEFAULT_MAX_COLLECTION_SIZE = 100
const DEFAULT_MAX_LENGTH = 255

module.exports = {
Expand All @@ -12,14 +13,18 @@ module.exports = {

async function getLocalStateForCallFrame (
callFrame,
{ maxReferenceDepth = DEFAULT_MAX_REFERENCE_DEPTH, maxLength = DEFAULT_MAX_LENGTH } = {}
{
maxReferenceDepth = DEFAULT_MAX_REFERENCE_DEPTH,
maxCollectionSize = DEFAULT_MAX_COLLECTION_SIZE,
maxLength = DEFAULT_MAX_LENGTH
} = {}
) {
const rawState = []
let processedState = null

for (const scope of callFrame.scopeChain) {
if (scope.type === 'global') continue // The global scope is too noisy
rawState.push(...await getRuntimeObject(scope.object.objectId, maxReferenceDepth))
rawState.push(...await getRuntimeObject(scope.object.objectId, { maxReferenceDepth, maxCollectionSize }))
}

// Deplay calling `processRawState` so the caller gets a chance to resume the main thread before processing `rawState`
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
'use strict'

const { collectionSizeSym } = require('./symbols')

module.exports = {
processRawState: processProperties
}
Expand Down Expand Up @@ -144,31 +146,28 @@ function toArray (type, elements, maxLength) {
if (elements === undefined) return notCapturedDepth(type)

// Perf: Create array of expected size in advance (expect that it contains only one non-enumrable element)
const expectedLength = elements.length - 1
const result = { type, elements: new Array(expectedLength) }
const result = { type, elements: new Array(elements.length) }

setNotCaptureReasonOnCollection(result, elements)

let i = 0
for (const elm of elements) {
if (elm.enumerable === false) continue // the value of the `length` property should not be part of the array
result.elements[i++] = getPropertyValue(elm, maxLength)
}

// Safe-guard in case there were more than one non-enumerable element
if (i < expectedLength) result.elements.length = i

return result
}

function toMap (type, pairs, maxLength) {
if (pairs === undefined) return notCapturedDepth(type)

// Perf: Create array of expected size in advance (expect that it contains only one non-enumrable element)
const expectedLength = pairs.length - 1
const result = { type, entries: new Array(expectedLength) }
// Perf: Create array of expected size in advance
const result = { type, entries: new Array(pairs.length) }

setNotCaptureReasonOnCollection(result, pairs)

let i = 0
for (const pair of pairs) {
if (pair.enumerable === false) continue // the value of the `length` property should not be part of the map
// The following code is based on assumptions made when researching the output of the Chrome DevTools Protocol.
// There doesn't seem to be any documentation to back it up:
//
Expand All @@ -180,22 +179,19 @@ function toMap (type, pairs, maxLength) {
result.entries[i++] = [key, val]
}

// Safe-guard in case there were more than one non-enumerable element
if (i < expectedLength) result.entries.length = i

return result
}

function toSet (type, values, maxLength) {
if (values === undefined) return notCapturedDepth(type)

// Perf: Create array of expected size in advance (expect that it contains only one non-enumrable element)
const expectedLength = values.length - 1
const result = { type, elements: new Array(expectedLength) }
const result = { type, elements: new Array(values.length) }

setNotCaptureReasonOnCollection(result, values)

let i = 0
for (const value of values) {
if (value.enumerable === false) continue // the value of the `length` property should not be part of the set
// The following code is based on assumptions made when researching the output of the Chrome DevTools Protocol.
// There doesn't seem to be any documentation to back it up:
//
Expand All @@ -205,9 +201,6 @@ function toSet (type, values, maxLength) {
result.elements[i++] = getPropertyValue(value.value.properties[0], maxLength)
}

// Safe-guard in case there were more than one non-enumerable element
if (i < expectedLength) result.elements.length = i

return result
}

Expand Down Expand Up @@ -236,6 +229,13 @@ function arrayBufferToString (bytes, size) {
return buf.toString()
}

function setNotCaptureReasonOnCollection (result, collection) {
if (collectionSizeSym in collection) {
result.notCapturedReason = 'collectionSize'
result.size = collection[collectionSizeSym]
}
}

function notCapturedDepth (type) {
return { type, notCapturedReason: 'depth' }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
'use stict'

module.exports = {
collectionSizeSym: Symbol('datadog.collectionSize')
}
Loading

0 comments on commit bc2f06a

Please sign in to comment.