Skip to content

Commit

Permalink
Merge pull request #1485 from matewilk/obfuscate_url_paths
Browse files Browse the repository at this point in the history
Obfuscate url paths for distributed tracing and transactions
  • Loading branch information
bizob2828 authored Feb 3, 2023
2 parents c8e9cad + ab325ed commit e53517f
Show file tree
Hide file tree
Showing 14 changed files with 367 additions and 12 deletions.
43 changes: 42 additions & 1 deletion lib/config/default.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
'use strict'

const defaultConfig = module.exports
const { array, int, float, boolean, object, objectList, allowList } = require('./formatters')
const { array, int, float, boolean, object, objectList, allowList, regex } = require('./formatters')

/**
* A function that returns the definition of the agent configuration
Expand Down Expand Up @@ -1215,6 +1215,47 @@ defaultConfig.definition = () => ({
formatter: boolean,
default: false
}
},

/**
* Obfuscates URL parameters
* for outgoing and incoming requests
* for distrubuted tracing attributes - both transaction and span attributes
* for transaction trace transaction details
*/
url_obfuscation: {
/**
* Toggles whether to obfuscate URL parameters
*/
enabled: {
formatter: boolean,
default: false
},

regex: {
/**
* A regular expression to match URL parameters to obfuscate
*/
pattern: {
formatter: regex,
default: null
},

/**
* A string containing RegEx flags to use when matching URL parameters
*/
flags: {
default: ''
},

/**
* A string containing a replacement value for URL parameters
* can contain refferences to capture groups in the pattern
*/
replacement: {
default: ''
}
}
}
})

Expand Down
16 changes: 16 additions & 0 deletions lib/config/formatters.js
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,19 @@ formatters.objectList = function objectList(val, logger) {
formatters.allowList = function allowList(list, val) {
return list.includes(val) ? val : list[0]
}

/**
* Parse a config setting as a regex
*
* @param {string} val valid regex
* @param {logger} logger agent logger instance
* @returns {RegExp} regex
*/
formatters.regex = function regex(val, logger) {
try {
return new RegExp(val)
} catch (error) {
logger.error(`New Relic configurator could not validate regex: ${val}`)
logger.error(error.stack)
}
}
10 changes: 8 additions & 2 deletions lib/config/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,7 @@ Config.prototype._fromPassed = function _fromPassed(external, internal, arbitrar
return
}

if (typeof node === 'object' && !Array.isArray(node)) {
if (typeof node === 'object' && !Array.isArray(node) && !(node instanceof RegExp)) {
// is top level and can have arbitrary keys
const allowArbitrary = internal === this || HAS_ARBITRARY_KEYS.has(key)
this._fromPassed(node, internal[key], allowArbitrary)
Expand Down Expand Up @@ -1095,7 +1095,13 @@ Config.prototype._fromEnvironment = function _fromEnvironment(
setFromEnv({ config, key, envVar, paths })
} else if (type === 'object') {
if (value.hasOwnProperty('env')) {
setFromEnv({ config, key, envVar: value.env, paths, formatter: value.formatter })
setFromEnv({
config,
key,
envVar: value.env,
paths,
formatter: value.formatter
})
} else if (value.hasOwnProperty('default')) {
const envVar = deriveEnvVar(key, paths)
setFromEnv({ config, key, envVar, formatter: value.formatter, paths })
Expand Down
1 change: 1 addition & 0 deletions lib/instrumentation/core/http-outbound.js
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ function applySegment(opts, makeRequest, hostname, segment) {
segment.start()
const request = makeRequest(opts)
const parsed = urltils.scrubAndParseParameters(request.path)
parsed.path = urltils.obfuscatePath(segment.transaction.agent.config, parsed.path)
const proto = parsed.protocol || opts.protocol || 'http:'
segment.name += parsed.path
request[symbols.segment] = segment
Expand Down
2 changes: 1 addition & 1 deletion lib/instrumentation/core/http.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ function wrapEmitWithTransaction(agent, emit, isHTTPS) {

// the error tracer needs a URL for tracing, even though naming overwrites
transaction.parsedUrl = url.parse(request.url, true)
transaction.url = transaction.parsedUrl.pathname
transaction.url = urltils.obfuscatePath(agent.config, transaction.parsedUrl.path)
transaction.verb = request.method

// URL is sent as an agent attribute with transaction events
Expand Down
5 changes: 4 additions & 1 deletion lib/transaction/trace/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
/* eslint sonarjs/cognitive-complexity: ["error", 19] -- TODO: https://issues.newrelic.com/browse/NEWRELIC-5252 */

const codec = require('../../util/codec')
const urltils = require('../../util/urltils')
const Segment = require('./segment')
const { Attributes, MAXIMUM_CUSTOM_ATTRIBUTES } = require('../../attributes')
const logger = require('../../logger').child({ component: 'trace' })
Expand Down Expand Up @@ -329,7 +330,9 @@ Trace.prototype._getRequestUri = function _getRequestUri() {
const canAddUri = this.attributes.hasValidDestination(DESTINATIONS.TRANS_TRACE, REQUEST_URI_KEY)
let requestUri = null // must be null if excluded
if (canAddUri) {
requestUri = this.transaction.url || UNKNOWN_URI_PLACEHOLDER
// obfuscate the path if config is set
const url = urltils.obfuscatePath(this.transaction.agent.config, this.transaction.url)
requestUri = url || UNKNOWN_URI_PLACEHOLDER
}

return requestUri
Expand Down
24 changes: 24 additions & 0 deletions lib/util/urltils.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
'use strict'

const url = require('url')
const logger = require('../logger').child({ component: 'urltils' })

const LOCALHOST_NAMES = {
'localhost': true,
Expand Down Expand Up @@ -162,6 +163,29 @@ module.exports = {
}
},

/**
* Obfuscates path parameters with regex from config
*
* @param {Config} config The configuration containing the regex
* @param {string} path The path to be obfuscated
* @returns {string} The obfuscated path or the original path
*/
obfuscatePath: function obfuscatePath(config, path) {
const { enabled, regex } = config.url_obfuscation
if (typeof path !== 'string' || !enabled || !regex) {
return path
}

const { pattern, flags = '', replacement = '' } = regex
try {
const regexPattern = new RegExp(pattern, flags)
return path.replace(regexPattern, replacement)
} catch (e) {
logger.warn('Invalid regular expression for url_obfuscation.regex.pattern', pattern)
return path
}
},

/**
* Copy a set of request parameters from one object to another,
* but do not overwrite any existing parameters in destination,
Expand Down
12 changes: 12 additions & 0 deletions test/unit/config/config-defaults.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -270,4 +270,16 @@ tap.test('with default properties', (t) => {
t.equal(configuration.code_level_metrics.enabled, false)
t.end()
})

t.test('should default `url_obfuscation` accordingly', (t) => {
t.same(configuration.url_obfuscation, {
enabled: false,
regex: {
pattern: null,
flags: '',
replacement: ''
}
})
t.end()
})
})
54 changes: 49 additions & 5 deletions test/unit/config/config-env.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -396,10 +396,15 @@ tap.test('when overriding configuration values via environment variables', (t) =
})

t.test('should pick up which status codes are expectedd when using a range', (t) => {
idempotentEnv({ NEW_RELIC_ERROR_COLLECTOR_EXPECTED_ERROR_CODES: '401, 420-421, 502' }, (tc) => {
t.same(tc.error_collector.expected_status_codes, [401, 420, 421, 502])
t.end()
})
idempotentEnv(
{
NEW_RELIC_ERROR_COLLECTOR_EXPECTED_ERROR_CODES: '401, 420-421, 502'
},
(tc) => {
t.same(tc.error_collector.expected_status_codes, [401, 420, 421, 502])
t.end()
}
)
})

t.test('should not add codes given with invalid range', (t) => {
Expand Down Expand Up @@ -652,7 +657,9 @@ tap.test('when overriding configuration values via environment variables', (t) =
t.test('should pick up custom esm entrypoint configuration', (t) => {
const customEntryPoint = '/patht/to/custom-instrumentation.mjs'
idempotentEnv(
{ NEW_RELIC_API_ESM_CUSTOM_INSTRUMENTATION_ENTRYPOINT: customEntryPoint },
{
NEW_RELIC_API_ESM_CUSTOM_INSTRUMENTATION_ENTRYPOINT: customEntryPoint
},
function (tc) {
t.equal(tc.api.esm.custom_instrumentation_entrypoint, customEntryPoint)
t.end()
Expand Down Expand Up @@ -688,4 +695,41 @@ tap.test('when overriding configuration values via environment variables', (t) =
t.end()
})
})

t.test('should pick up url_obfuscation.enabled', (t) => {
const env = {
NEW_RELIC_URL_OBFUSCATION_ENABLED: 'true'
}

idempotentEnv(env, (config) => {
t.equal(config.url_obfuscation.enabled, true)
t.end()
})
})

t.test('should pick up url_obfuscation.regex parameters', (t) => {
const env = {
NEW_RELIC_URL_OBFUSCATION_REGEX_PATTERN: 'regex',
NEW_RELIC_URL_OBFUSCATION_REGEX_FLAGS: 'g',
NEW_RELIC_URL_OBFUSCATION_REGEX_REPLACEMENT: 'replacement'
}

idempotentEnv(env, (config) => {
t.same(config.url_obfuscation.regex.pattern, /regex/)
t.equal(config.url_obfuscation.regex.flags, 'g')
t.equal(config.url_obfuscation.regex.replacement, 'replacement')
t.end()
})
})

t.test('should set regex to undefined if invalid regex', (t) => {
const env = {
NEW_RELIC_URL_OBFUSCATION_REGEX_PATTERN: '['
}

idempotentEnv(env, (config) => {
t.notOk(config.url_obfuscation.regex.pattern)
t.end()
})
})
})
20 changes: 20 additions & 0 deletions test/unit/config/config-formatters.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,24 @@ tap.test('config formatters', (t) => {
t.end()
})
})

tap.test('regex', (t) => {
t.autoend()

t.test('should return regex if valid', (t) => {
const val = '/hello/'
const result = formatters.regex(val)
t.same(result, /\/hello\//)
t.end()
})

t.test('should log error and return null if regex is invalid', (t) => {
const loggerMock = { error: sinon.stub() }
const val = '[a-z'
t.notOk(formatters.regex(val, loggerMock))
t.equal(loggerMock.error.args[0][0], `New Relic configurator could not validate regex: [a-z`)
t.match(loggerMock.error.args[1][0], /SyntaxError: Invalid regular expression/)
t.end()
})
})
})
31 changes: 30 additions & 1 deletion test/unit/instrumentation/http/http.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,35 @@ test('built-in http module instrumentation', (t) => {
}
)

t.test('when url_obfuscation regex pattern is set, obfuscate segment url attributes', (t) => {
agent.config.url_obfuscation = {
enabled: true,
regex: {
pattern: '.*',
replacement: '/***'
}
}
transaction = null
makeRequest(
{
port: 8123,
host: 'localhost',
path: '/foo4/bar4',
method: 'GET'
},
finish
)

function finish() {
const segment = transaction.baseSegment
const spanAttributes = segment.attributes.get(DESTINATIONS.SPAN_EVENT)

t.equal(spanAttributes['request.uri'], '/***')

t.end()
}
})

t.test('successful request', (t) => {
transaction = null
const refererUrl = 'https://www.google.com/search/cats?scrubbed=false'
Expand Down Expand Up @@ -687,7 +716,7 @@ test('built-in http module instrumentation', (t) => {
const traceparent = '00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-00'
const priority = 0.789
// eslint-disable-next-line
const tracestate = `190@nr=0-0-709288-8599547-f85f42fd82a4cf1d-164d3b4b0d09cb05-1-${priority}-1563574856827`
const tracestate = `190@nr=0-0-709288-8599547-f85f42fd82a4cf1d-164d3b4b0d09cb05-1-${priority}-1563574856827`;
http = require('http')
agent.config.trusted_account_key = 190

Expand Down
27 changes: 27 additions & 0 deletions test/unit/instrumentation/http/outbound.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,33 @@ tap.test('instrumentOutbound', (t) => {
t.end()
})

t.test('should obfuscate url path if url_obfuscation regex pattern is set', (t) => {
helper.unloadAgent(agent)
agent = helper.loadMockedAgent({
url_obfuscation: {
enabled: true,
regex: {
pattern: '.*',
replacement: '/***'
}
}
})
const req = new events.EventEmitter()
helper.runInTransaction(agent, function (transaction) {
instrumentOutbound(agent, { host: HOSTNAME, port: PORT }, makeFakeRequest)
t.same(transaction.trace.root.children[0].getAttributes(), {
procedure: 'GET',
url: `http://${HOSTNAME}:${PORT}/***`
})

function makeFakeRequest() {
req.path = '/asdf/foo/bar/baz?test=123&test2=456'
return req
}
})
t.end()
})

t.test('should strip query parameters from path in transaction trace segment', (t) => {
const req = new events.EventEmitter()
helper.runInTransaction(agent, function (transaction) {
Expand Down
Loading

0 comments on commit e53517f

Please sign in to comment.