Skip to content

Commit

Permalink
Added Events config, Latency warning events, Tags refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander Shovdra committed May 23, 2017
1 parent d89d884 commit 649dfba
Showing 1 changed file with 71 additions and 22 deletions.
93 changes: 71 additions & 22 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ var config = (function () {
url: process.env.WATCHMEN_GRAPHITE_API,
user: process.env.WATCHMEN_GRAPHITE_API_USER,
pass: process.env.WATCHMEN_GRAPHITE_API_PASS,
},
graphite_event: {
failedCheck: process.env.WATCHMEN_GRAPHITE_EVENT_FAILEDCHECK || true,
newOutage: process.env.WATCHMEN_GRAPHITE_EVENT_NEWOUTAGE || true,
serviceBack: process.env.WATCHMEN_GRAPHITE_EVENT_SERVICEBACK || true,
latencyWarning: process.env.WATCHMEN_GRAPHITE_EVENT_LATENCYWARNING || true,
}
};
})();
Expand All @@ -23,17 +29,26 @@ var graphite = new Graphite(config.graphite_host, config.graphite_port);

/**
* Filter service name before sending
* @param name
* @param {String} name
*/
function filterName (name) {
return name.replace(/http(s)?|:|\/\//g, '').replace(/\/|\./g, '_');
}

/**
* Send Graphite event
* @param body
* @param {Object} service
* @param {Object} body
*/
function sendEvent (body) {
function sendEvent (service, body) {
var serviceName = filterName(service.name);
var tags = body.tags.split(' ');

body.tags = 'watchmen ' + serviceName;
tags.forEach(function (tag) {
body.tags += ' ' + tag + ' ' + serviceName + '_' + tag;
});

return request(
{
method: 'POST',
Expand All @@ -56,6 +71,34 @@ var eventHandlers = {
onFailedCheck: function (service, data) {
// Send new outage indication with failureInterval in ms. for full downtime calculations
graphite.push('watchmen.' + filterName(service.name) + '.failedCheck', service.failureInterval);

if (config.graphite_event.failedCheck) {
sendEvent(service, {
what: 'FAILED CHECK',
tags: 'failed',
when: Math.round(new Date().getTime() / 1000),
data: 'Service: ' + service.name + ' (' + service.url + '). ' +
'Type: ' + service.pingServiceName + '. ' +
'Error: ' + JSON.stringify(data.error)
});
}
},

onLatencyWarning: function (service, data) {
// Send latency warning indication with elapsed time
graphite.push('watchmen.' + filterName(service.name) + '.latencyWarning', data.elapsedTime);

if (config.graphite_event.latencyWarning) {
sendEvent(service, {
what: 'LATENCY WARNING',
tags: 'latency',
when: Math.round(new Date().getTime() / 1000),
data: 'Service: ' + service.name + ' (' + service.url + '). ' +
'Type: ' + service.pingServiceName + '. ' +
'Default: ' + service.warningThreshold + ' ms. ' +
'Current: ' + data.elapsedTime + ' ms.'
});
}
},

onServiceOk: function (service, data) {
Expand All @@ -66,32 +109,37 @@ var eventHandlers = {
onNewOutage: function (service, outage) {
// Send new outage indication with real start date calculated from failureInterval and failuresToBeOutage
var failuresToBeOutage = isNaN(service.failuresToBeOutage) ? 1 : service.failuresToBeOutage;
if (failuresToBeOutage > 1) failuresToBeOutage -= 1;
graphite.push('watchmen.' + filterName(service.name) + '.newOutage', service.failureInterval * failuresToBeOutage);
sendEvent({
what: 'OUTAGE',
tags: 'watchmen ' + service.name + ' outage',
when: Math.round(outage.timestamp / 1000),
data: 'Service: ' + service.name + ' (' + service.url + '). ' +
'Type: ' + service.pingServiceName + '. ' +
'Error: ' + JSON.stringify(outage.error)
});

if (config.graphite_event.newOutage) {
sendEvent(service, {
what: 'NEW OUTAGE',
tags: 'outage',
when: Math.round(outage.timestamp / 1000),
data: 'Service: ' + service.name + ' (' + service.url + '). ' +
'Type: ' + service.pingServiceName + '. ' +
'Error: ' + JSON.stringify(outage.error)
});
}
},

onServiceBack: function (service, lastOutage) {
// Send downtime duration in ms. (from latest outage)
graphite.push('watchmen.' + filterName(service.name) + '.serviceBack', new Date().getTime() - lastOutage.timestamp);

var duration = Math.round((new Date().getTime() - lastOutage.timestamp) / 1000);
var minutes = Math.floor(duration / 60) + ':' + (duration % 60 ? duration % 60 : '00');
sendEvent({
what: 'RECOVERY',
tags: 'watchmen ' + service.name + ' recovery',
when: Math.round(new Date().getTime() / 1000),
data: 'Service: ' + service.name + ' (' + service.url + '). ' +
'Type: ' + service.pingServiceName + '. ' +
'Error: ' + JSON.stringify(lastOutage.error) + '. ' +
'Duration: ' + minutes + ' min.'
});
if (config.graphite_event.serviceBack) {
var duration = Math.round((new Date().getTime() - lastOutage.timestamp) / 1000);
sendEvent(service, {
what: 'RECOVERY',
tags: 'recovery',
when: Math.round(new Date().getTime() / 1000),
data: 'Service: ' + service.name + ' (' + service.url + '). ' +
'Type: ' + service.pingServiceName + '. ' +
'Error: ' + JSON.stringify(lastOutage.error) + '. ' +
'Duration: ' + (duration / 60).toFixed(2) + ' min.'
});
}
},
};

Expand All @@ -100,6 +148,7 @@ function GraphitePlugin (watchmen) {
watchmen.on('service-ok', eventHandlers.onServiceOk);
watchmen.on('service-back', eventHandlers.onServiceBack);
watchmen.on('new-outage', eventHandlers.onNewOutage);
watchmen.on('latency-warning', eventHandlers.onLatencyWarning);
}

exports = module.exports = GraphitePlugin;

0 comments on commit 649dfba

Please sign in to comment.