Skip to content

Commit 4c8bf13

Browse files
authored
feat: Added New Relic Control health check (#2841)
1 parent ed89f38 commit 4c8bf13

File tree

14 files changed

+1024
-40
lines changed

14 files changed

+1024
-40
lines changed

index.js

+4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
'use strict'
77

8+
const HealthReporter = require('./lib/health-reporter')
9+
810
// Record opening times before loading any other files.
911
const preAgentTime = process.uptime()
1012
const agentStart = Date.now()
@@ -154,6 +156,7 @@ function createAgent(config) {
154156
'New Relic requires that you name this application!\n' +
155157
'Set app_name in your newrelic.js or newrelic.cjs file or set environment variable\n' +
156158
'NEW_RELIC_APP_NAME. Not starting!'
159+
agent.healthReporter.setStatus(HealthReporter.STATUS_MISSING_APP_NAME)
157160
throw new Error(message)
158161
}
159162

@@ -167,6 +170,7 @@ function createAgent(config) {
167170

168171
agent.start(function afterStart(error) {
169172
if (error) {
173+
agent.healthReporter.setStatus(HealthReporter.STATUS_INTERNAL_UNEXPECTED_ERROR)
170174
const errorMessage = 'New Relic for Node.js halted startup due to an error:'
171175
logger.error(error, errorMessage)
172176

lib/agent.js

+28-17
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ const {
4040
const synthetics = require('./synthetics')
4141
const Harvester = require('./harvester')
4242
const { createFeatureUsageMetrics } = require('./util/application-logging')
43+
const HealthReporter = require('./health-reporter')
4344

4445
// Map of valid states to whether or not data collection is valid
4546
const STATES = {
@@ -162,6 +163,8 @@ function Agent(config) {
162163
throw new Error('Agent must be created with a configuration!')
163164
}
164165

166+
this.healthReporter = new HealthReporter({ agentConfig: config })
167+
165168
// The agent base attributes which last throughout its lifetime.
166169
this._state = 'stopped'
167170
this.config = config
@@ -326,6 +329,7 @@ Agent.prototype.start = function start(callback) {
326329
if (this.config.agent_enabled !== true) {
327330
logger.warn('The New Relic Node.js agent is disabled by its configuration. ' + 'Not starting!')
328331

332+
this.healthReporter.setStatus(HealthReporter.STATUS_AGENT_DISABLED)
329333
this.setState('stopped')
330334
return process.nextTick(callback)
331335
}
@@ -342,17 +346,21 @@ Agent.prototype.start = function start(callback) {
342346
'Has a license key been specified in the agent configuration ' +
343347
'file or via the NEW_RELIC_LICENSE_KEY environment variable?'
344348
)
349+
this.healthReporter.setStatus(HealthReporter.STATUS_LICENSE_KEY_MISSING)
345350

346351
this.setState('errored')
347352
sampler.stop()
348353
return process.nextTick(function onNextTick() {
349-
callback(new Error('Not starting without license key!'))
354+
agent.healthReporter.stop(() => {
355+
callback(new Error('Not starting without license key!'))
356+
})
350357
})
351358
}
352359
logger.info('Starting New Relic for Node.js connection process.')
353360

354361
this.collector.connect(function onStartConnect(error, response) {
355362
if (error || response.shouldShutdownRun()) {
363+
agent.healthReporter.setStatus(HealthReporter.STATUS_CONNECT_ERROR)
356364
agent.setState('errored')
357365
sampler.stop()
358366
callback(error || new Error('Failed to connect to collector'), response && response.payload)
@@ -476,23 +484,26 @@ Agent.prototype.stop = function stop(callback) {
476484

477485
sampler.stop()
478486

479-
if (this.collector.isConnected()) {
480-
this.collector.shutdown(function onShutdown(error) {
481-
if (error) {
482-
agent.setState('errored')
483-
logger.warn(error, 'Got error shutting down connection to New Relic:')
484-
} else {
485-
agent.setState('stopped')
486-
logger.info('Stopped New Relic for Node.js.')
487-
}
488-
489-
callback(error)
490-
})
491-
} else {
492-
logger.trace('Collector was not connected, invoking callback.')
487+
this.healthReporter.setStatus(HealthReporter.STATUS_AGENT_SHUTDOWN)
488+
this.healthReporter.stop(() => {
489+
if (agent.collector.isConnected()) {
490+
agent.collector.shutdown(function onShutdown(error) {
491+
if (error) {
492+
agent.setState('errored')
493+
logger.warn(error, 'Got error shutting down connection to New Relic:')
494+
} else {
495+
agent.setState('stopped')
496+
logger.info('Stopped New Relic for Node.js.')
497+
}
498+
499+
callback(error)
500+
})
501+
} else {
502+
logger.trace('Collector was not connected, invoking callback.')
493503

494-
process.nextTick(callback)
495-
}
504+
process.nextTick(callback)
505+
}
506+
})
496507
}
497508

498509
/**

lib/collector/api.js

+15
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ const CollectorResponse = require('./response')
99
const facts = require('./facts')
1010
const logger = require('../logger').child({ component: 'collector_api' })
1111
const RemoteMethod = require('./remote-method')
12+
const HealthReporter = require('../health-reporter')
1213

1314
const NAMES = require('../metrics/names')
1415

@@ -221,6 +222,7 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
221222
ctx.errors.push(error)
222223
} else if (response && SUCCESS.has(response.status)) {
223224
dumpErrors(ctx.errors, 'connect')
225+
this._agent.healthReporter.setStatus(HealthReporter.STATUS_HEALTHY)
224226
ctx.callback(null, CollectorResponse.success(response.payload))
225227
return
226228
}
@@ -231,6 +233,7 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
231233
// Retry everything except for an explicit Disconnect response code.
232234
if (response.status === 410 || response.agentRun === AGENT_RUN_BEHAVIOR.SHUTDOWN) {
233235
logger.error('The New Relic collector rejected this agent.')
236+
this._agent.healthReporter.setStatus(HealthReporter.STATUS_FORCED_DISCONNECT)
234237
return ctx.callback(null, CollectorResponse.fatal(response.payload))
235238
} else if (response.status === 401) {
236239
logger.warn(
@@ -240,6 +243,7 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
240243
' (status code %s)',
241244
response.status
242245
)
246+
this._agent.healthReporter.setStatus(HealthReporter.STATUS_INVALID_LICENSE_KEY)
243247
} else if (this._isProxyMisconfigured(error)) {
244248
logger.warn(
245249
error,
@@ -248,6 +252,17 @@ CollectorAPI.prototype._retry = function _retry(ctx, error, response) {
248252
'SSL(https). If your proxy is configured to accept connections over http, try ' +
249253
'setting `proxy` to a fully qualified URL(e.g http://proxy-host:8080).'
250254
)
255+
this._agent.healthReporter.setStatus(HealthReporter.STATUS_HTTP_PROXY_MISCONFIGURED)
256+
} else {
257+
// Sometimes we get a `CollectorResponse` instance instead of an
258+
// `http.ServerResponse`. In such cases, we do not have access to the
259+
// status code.
260+
let msg = 'Unexpected error communicating with New Relic backend.'
261+
if (response.status) {
262+
msg = `Received error status code from New Relic backend: ${response.status}.`
263+
}
264+
logger.warn(error, msg)
265+
this._agent.healthReporter.setStatus(HealthReporter.STATUS_BACKEND_ERROR)
251266
}
252267

253268
const backoff = BACKOFFS[Math.min(ctx.attempts, ctx.max) - 1]

lib/config/default.js

+39
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,45 @@ defaultConfig.definition = () => ({
147147
formatter: boolean,
148148
default: true
149149
},
150+
151+
/**
152+
* Collects configuration related to New Relic Agent Control, i.e. centralized
153+
* agent management in container based environments.
154+
*/
155+
agent_control: {
156+
/**
157+
* Indicates that the agent is being managed by Agent Control. Must be set
158+
* to true health monitoring.
159+
*/
160+
enabled: {
161+
formatter: boolean,
162+
default: false
163+
},
164+
165+
/**
166+
* Settings specific to the health monitoring aspect of Agent Control.
167+
*/
168+
health: {
169+
/**
170+
* A string file path to a directory that the agent is expected to write
171+
* health status files to. Must be set for health monitoring to be
172+
* enabled.
173+
*/
174+
delivery_location: {
175+
default: 'file:///newrelic/apm/health'
176+
},
177+
178+
/**
179+
* The time, in seconds, that the agent should wait between writing
180+
* updates to its health status. The default interval is 5 seconds.
181+
*/
182+
frequency: {
183+
formatter: int,
184+
default: 5
185+
}
186+
}
187+
},
188+
150189
/**
151190
* The default Apdex tolerating / threshold value for applications, in
152191
* seconds. The default for Node is apdexT to 100 milliseconds, which is

0 commit comments

Comments
 (0)