Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving MongoDB to checks.d - fixes #387 #397

Merged
merged 15 commits into from
Mar 11, 2013
Merged
234 changes: 234 additions & 0 deletions checks.d/mongo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
import re
import types
import time
from datetime import datetime

from checks import AgentCheck
from util import get_hostname

# When running with pymongo < 2.0
# Not the full spec for mongo URIs -- just extract username and password
# http://www.mongodb.org/display/DOCS/connections6
mongo_uri_re=re.compile(r'mongodb://(?P<username>[^:@]+):(?P<password>[^:@]+)@.*')

class MongoDb(AgentCheck):

GAUGES = [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I missed that.
Can you change the metric names to their proper names ?
Currently, these metric names are "translated" in the server side to their proper names mongodb.the_metric
Can you do the translation in the check please ?

"indexCounters.btree.missRatio",
"globalLock.ratio",
"connections.current",
"connections.available",
"mem.resident",
"mem.virtual",
"mem.mapped",
"cursors.totalOpen",
"cursors.timedOut",
"uptime",

"stats.indexes",
"stats.indexSize",
"stats.objects",
"stats.dataSize",
"stats.storageSize",

"replSet.health",
"replSet.state",
"replSet.replicationLag"
]

RATES = [
"indexCounters.btree.accesses",
"indexCounters.btree.hits",
"indexCounters.btree.misses",
"opcounters.insert",
"opcounters.query",
"opcounters.update",
"opcounters.delete",
"opcounters.getmore",
"opcounters.command",
"asserts.regular",
"asserts.warning",
"asserts.msg",
"asserts.user",
"asserts.rollovers"
]

METRICS = GAUGES + RATES

def __init__(self, name, init_config, agentConfig):
AgentCheck.__init__(self, name, init_config, agentConfig)

self._last_state = -1

def checkLastState(self, state, agentConfig):
if self._last_state != state:
self._last_state = state
return self.create_event(state, agentConfig)

def create_event(self, state, agentConfig):
"""Create an event with a message describing the replication
state of a mongo node"""

def get_state_description(state):
if state == 0: return 'Starting Up'
elif state == 1: return 'Primary'
elif state == 2: return 'Secondary'
elif state == 3: return 'Recovering'
elif state == 4: return 'Fatal'
elif state == 5: return 'Starting up (forking threads)'
elif state == 6: return 'Unknown'
elif state == 7: return 'Arbiter'
elif state == 8: return 'Down'
elif state == 9: return 'Rollback'

status = get_state_description(state)
hostname = get_hostname(agentConfig)
msg_title = "%s is %s" % (hostname, status)
msg = "MongoDB: %s just reported as %s" % (hostname, status)

self.event({
'timestamp': int(time.mktime(datetime.now().timetuple())),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably a carryover from the old code, but this will give the wrong time, since it's converting local time assuming it's utc time. It should just be int(time.time())

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will change now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Made a fix for this on master - literally changed that line to:
'timestamp': int(time.time()),

Build failed though - failures were unrelated to Mongo (Lighttpd and Apache)

'event_type': 'Mongo',
'api_key': agentConfig['api_key'],
'msg_title': msg_title,
'msg_text': msg,
'host': hostname
})

def check(self, instance):
"""
Returns a dictionary that looks a lot like what's sent back by db.serverStatus()
"""
if 'server' not in instance:
self.log.warn("Missing 'server' in mongo config")
return

tags = instance.get('tags', [])

try:
from pymongo import Connection
except ImportError:
self.log.error('mongo.yaml exists but pymongo module can not be imported. Skipping check.')
raise Exception('Python PyMongo Module can not be imported. Please check the installation instruction on the Datadog Website')

try:
from pymongo import uri_parser
# Configuration a URL, mongodb://user:pass@server/db
parsed = uri_parser.parse_uri(instance['server'])
except ImportError:
# uri_parser is pymongo 2.0+
matches = mongo_uri_re.match(instance['server'])
if matches:
parsed = matches.groupdict()
else:
parsed = {}
username = parsed.get('username')
password = parsed.get('password')

do_auth = True
if username is None or password is None:
self.log.debug("Mongo: cannot extract username and password from config %s" % instance['server'])
do_auth = False

conn = Connection(instance['server'])
db = conn['admin']
if do_auth:
if not db.authenticate(username, password):
self.log.error("Mongo: cannot connect with config %s" % instance['server'])

status = db.command('serverStatus') # Shorthand for {'serverStatus': 1}
status['stats'] = db.command('dbstats')

results = {}

# Handle replica data, if any
# See http://www.mongodb.org/display/DOCS/Replica+Set+Commands#ReplicaSetCommands-replSetGetStatus
try:
data = {}

replSet = db.command('replSetGetStatus')
if replSet:
primary = None
current = None

# find nodes: master and current node (ourself)
for member in replSet.get('members'):
if member.get('self'):
current = member
if int(member.get('state')) == 1:
primary = member

# If we have both we can compute a lag time
if current is not None and primary is not None:
lag = current['optimeDate'] - primary['optimeDate']
# Python 2.7 has this built in, python < 2.7 don't...
if hasattr(lag,'total_seconds'):
data['replicationLag'] = lag.total_seconds()
else:
data['replicationLag'] = (lag.microseconds + \
(lag.seconds + lag.days * 24 * 3600) * 10**6) / 10.0**6

if current is not None:
data['health'] = current['health']

data['state'] = replSet['myState']
self.checkLastState(data['state'], self.agentConfig)
status['replSet'] = data
except Exception, e:

from pymongo.errors import OperationFailure

if type(e) == OperationFailure and "replSetGetStatus" in str(e):
pass
else:
raise e

# If these keys exist, remove them for now as they cannot be serialized
try:
status['backgroundFlushing'].pop('last_finished')
except KeyError:
pass
try:
status.pop('localTime')
except KeyError:
pass

# Go through the metrics and save the values
for m in self.METRICS:
# each metric is of the form: x.y.z with z optional
# and can be found at status[x][y][z]
value = status
try:
for c in m.split("."):
value = value[c]
except KeyError:
continue

# value is now status[x][y][z]
assert type(value) in (types.IntType, types.LongType, types.FloatType)

# Check if metric is a gauge or rate
if m in self.GAUGES:
m = self.normalize(m.lower(), 'mongodb')
self.gauge(m, value, tags=tags)

if m in self.RATES:
m = self.normalize(m.lower(), 'mongodb') + "ps"
self.rate(m, value, tags=tags)

@staticmethod
def parse_agent_config(agentConfig):
if not agentConfig.get('mongodb_server'):
return False

return {
'instances': [{
'server': agentConfig.get('mongodb_server')
}]
}

if __name__ == "__main__":
check, instances = MongoDb.from_yaml('conf.d/mongo.yaml')
for instance in instances:
check.check(instance)
print check.get_metrics()
10 changes: 0 additions & 10 deletions checks/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from checks.nagios import Nagios
from checks.build import Hudson
from checks.db.mysql import MySql
from checks.db.mongo import MongoDb
from checks.db.mcache import Memcache
from checks.queue import RabbitMq
from checks.ganglia import Ganglia
Expand Down Expand Up @@ -77,7 +76,6 @@ def __init__(self, agentConfig, emitters, systemStats):
}

# Old-style metric checks
self._mongodb = MongoDb(log)
self._mysql = MySql(log)
self._rabbitmq = RabbitMq()
self._ganglia = Ganglia(log)
Expand Down Expand Up @@ -199,7 +197,6 @@ def run(self, checksd=None, start_event=True):
# Run old-style checks
mysqlStatus = self._mysql.check(self.agentConfig)
rabbitmq = self._rabbitmq.check(log, self.agentConfig)
mongodb = self._mongodb.check(self.agentConfig)
gangliaData = self._ganglia.check(self.agentConfig)
cassandraData = self._cassandra.check(log, self.agentConfig)
dogstreamData = self._dogstream.check(self.agentConfig)
Expand All @@ -218,13 +215,6 @@ def run(self, checksd=None, start_event=True):
# RabbitMQ
if rabbitmq:
payload['rabbitMQ'] = rabbitmq

# MongoDB
if mongodb:
if mongodb.has_key('events'):
events['Mongo'] = mongodb['events']['Mongo']
del mongodb['events']
payload['mongoDB'] = mongodb

# dogstream
if dogstreamData:
Expand Down
Loading