Skip to content

Commit

Permalink
feat: Added New Relic Control health check (#2841)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsumners-nr authored Feb 4, 2025
1 parent ed89f38 commit 4c8bf13
Show file tree
Hide file tree
Showing 14 changed files with 1,024 additions and 40 deletions.
4 changes: 4 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

'use strict'

const HealthReporter = require('./lib/health-reporter')

// Record opening times before loading any other files.
const preAgentTime = process.uptime()
const agentStart = Date.now()
Expand Down Expand Up @@ -154,6 +156,7 @@ function createAgent(config) {
'New Relic requires that you name this application!\n' +
'Set app_name in your newrelic.js or newrelic.cjs file or set environment variable\n' +
'NEW_RELIC_APP_NAME. Not starting!'
agent.healthReporter.setStatus(HealthReporter.STATUS_MISSING_APP_NAME)
throw new Error(message)
}

Expand All @@ -167,6 +170,7 @@ function createAgent(config) {

agent.start(function afterStart(error) {
if (error) {
agent.healthReporter.setStatus(HealthReporter.STATUS_INTERNAL_UNEXPECTED_ERROR)
const errorMessage = 'New Relic for Node.js halted startup due to an error:'
logger.error(error, errorMessage)

Expand Down
45 changes: 28 additions & 17 deletions lib/agent.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ const {
const synthetics = require('./synthetics')
const Harvester = require('./harvester')
const { createFeatureUsageMetrics } = require('./util/application-logging')
const HealthReporter = require('./health-reporter')

// Map of valid states to whether or not data collection is valid
const STATES = {
Expand Down Expand Up @@ -162,6 +163,8 @@ function Agent(config) {
throw new Error('Agent must be created with a configuration!')
}

this.healthReporter = new HealthReporter({ agentConfig: config })

// The agent base attributes which last throughout its lifetime.
this._state = 'stopped'
this.config = config
Expand Down Expand Up @@ -326,6 +329,7 @@ Agent.prototype.start = function start(callback) {
if (this.config.agent_enabled !== true) {
logger.warn('The New Relic Node.js agent is disabled by its configuration. ' + 'Not starting!')

this.healthReporter.setStatus(HealthReporter.STATUS_AGENT_DISABLED)
this.setState('stopped')
return process.nextTick(callback)
}
Expand All @@ -342,17 +346,21 @@ Agent.prototype.start = function start(callback) {
'Has a license key been specified in the agent configuration ' +
'file or via the NEW_RELIC_LICENSE_KEY environment variable?'
)
this.healthReporter.setStatus(HealthReporter.STATUS_LICENSE_KEY_MISSING)

this.setState('errored')
sampler.stop()
return process.nextTick(function onNextTick() {
callback(new Error('Not starting without license key!'))
agent.healthReporter.stop(() => {
callback(new Error('Not starting without license key!'))
})
})
}
logger.info('Starting New Relic for Node.js connection process.')

this.collector.connect(function onStartConnect(error, response) {
if (error || response.shouldShutdownRun()) {
agent.healthReporter.setStatus(HealthReporter.STATUS_CONNECT_ERROR)
agent.setState('errored')
sampler.stop()
callback(error || new Error('Failed to connect to collector'), response && response.payload)
Expand Down Expand Up @@ -476,23 +484,26 @@ Agent.prototype.stop = function stop(callback) {

sampler.stop()

if (this.collector.isConnected()) {
this.collector.shutdown(function onShutdown(error) {
if (error) {
agent.setState('errored')
logger.warn(error, 'Got error shutting down connection to New Relic:')
} else {
agent.setState('stopped')
logger.info('Stopped New Relic for Node.js.')
}

callback(error)
})
} else {
logger.trace('Collector was not connected, invoking callback.')
this.healthReporter.setStatus(HealthReporter.STATUS_AGENT_SHUTDOWN)
this.healthReporter.stop(() => {
if (agent.collector.isConnected()) {
agent.collector.shutdown(function onShutdown(error) {
if (error) {
agent.setState('errored')
logger.warn(error, 'Got error shutting down connection to New Relic:')
} else {
agent.setState('stopped')
logger.info('Stopped New Relic for Node.js.')
}

callback(error)
})
} else {
logger.trace('Collector was not connected, invoking callback.')

process.nextTick(callback)
}
process.nextTick(callback)
}
})
}

/**
Expand Down
15 changes: 15 additions & 0 deletions lib/collector/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const CollectorResponse = require('./response')
const facts = require('./facts')
const logger = require('../logger').child({ component: 'collector_api' })
const RemoteMethod = require('./remote-method')
const HealthReporter = require('../health-reporter')

const NAMES = require('../metrics/names')

Expand Down Expand Up @@ -221,6 +222,7 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
ctx.errors.push(error)
} else if (response && SUCCESS.has(response.status)) {
dumpErrors(ctx.errors, 'connect')
this._agent.healthReporter.setStatus(HealthReporter.STATUS_HEALTHY)
ctx.callback(null, CollectorResponse.success(response.payload))
return
}
Expand All @@ -231,6 +233,7 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
// Retry everything except for an explicit Disconnect response code.
if (response.status === 410 || response.agentRun === AGENT_RUN_BEHAVIOR.SHUTDOWN) {
logger.error('The New Relic collector rejected this agent.')
this._agent.healthReporter.setStatus(HealthReporter.STATUS_FORCED_DISCONNECT)
return ctx.callback(null, CollectorResponse.fatal(response.payload))
} else if (response.status === 401) {
logger.warn(
Expand All @@ -240,6 +243,7 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
' (status code %s)',
response.status
)
this._agent.healthReporter.setStatus(HealthReporter.STATUS_INVALID_LICENSE_KEY)
} else if (this._isProxyMisconfigured(error)) {
logger.warn(
error,
Expand All @@ -248,6 +252,17 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
'SSL(https). If your proxy is configured to accept connections over http, try ' +
'setting `proxy` to a fully qualified URL(e.g http://proxy-host:8080).'
)
this._agent.healthReporter.setStatus(HealthReporter.STATUS_HTTP_PROXY_MISCONFIGURED)
} else {
// Sometimes we get a `CollectorResponse` instance instead of an
// `http.ServerResponse`. In such cases, we do not have access to the
// status code.
let msg = 'Unexpected error communicating with New Relic backend.'
if (response.status) {
msg = `Received error status code from New Relic backend: ${response.status}.`
}
logger.warn(error, msg)
this._agent.healthReporter.setStatus(HealthReporter.STATUS_BACKEND_ERROR)
}

const backoff = BACKOFFS[Math.min(ctx.attempts, ctx.max) - 1]
Expand Down
39 changes: 39 additions & 0 deletions lib/config/default.js
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,45 @@ defaultConfig.definition = () => ({
formatter: boolean,
default: true
},

/**
* Collects configuration related to New Relic Agent Control, i.e. centralized
* agent management in container based environments.
*/
agent_control: {
/**
* Indicates that the agent is being managed by Agent Control. Must be set
* to true health monitoring.
*/
enabled: {
formatter: boolean,
default: false
},

/**
* Settings specific to the health monitoring aspect of Agent Control.
*/
health: {
/**
* A string file path to a directory that the agent is expected to write
* health status files to. Must be set for health monitoring to be
* enabled.
*/
delivery_location: {
default: 'file:///newrelic/apm/health'
},

/**
* The time, in seconds, that the agent should wait between writing
* updates to its health status. The default interval is 5 seconds.
*/
frequency: {
formatter: int,
default: 5
}
}
},

/**
* The default Apdex tolerating / threshold value for applications, in
* seconds. The default for Node is apdexT to 100 milliseconds, which is
Expand Down
Loading

0 comments on commit 4c8bf13

Please sign in to comment.