-
Notifications
You must be signed in to change notification settings - Fork 813
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Moving MongoDB to checks.d - fixes #387 #397
Changes from all commits
e0412d7
5030fc0
8eb564a
ac4048c
04876ac
97ef4b2
c54f6d6
464ceaf
30a298d
6acdc6c
b5c1982
80f0d9c
490612d
d0b35b4
c990e64
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
import re | ||
import types | ||
import time | ||
from datetime import datetime | ||
|
||
from checks import AgentCheck | ||
from util import get_hostname | ||
|
||
# When running with pymongo < 2.0 | ||
# Not the full spec for mongo URIs -- just extract username and password | ||
# http://www.mongodb.org/display/DOCS/connections6 | ||
mongo_uri_re=re.compile(r'mongodb://(?P<username>[^:@]+):(?P<password>[^:@]+)@.*') | ||
|
||
class MongoDb(AgentCheck): | ||
|
||
GAUGES = [ | ||
"indexCounters.btree.missRatio", | ||
"globalLock.ratio", | ||
"connections.current", | ||
"connections.available", | ||
"mem.resident", | ||
"mem.virtual", | ||
"mem.mapped", | ||
"cursors.totalOpen", | ||
"cursors.timedOut", | ||
"uptime", | ||
|
||
"stats.indexes", | ||
"stats.indexSize", | ||
"stats.objects", | ||
"stats.dataSize", | ||
"stats.storageSize", | ||
|
||
"replSet.health", | ||
"replSet.state", | ||
"replSet.replicationLag" | ||
] | ||
|
||
RATES = [ | ||
"indexCounters.btree.accesses", | ||
"indexCounters.btree.hits", | ||
"indexCounters.btree.misses", | ||
"opcounters.insert", | ||
"opcounters.query", | ||
"opcounters.update", | ||
"opcounters.delete", | ||
"opcounters.getmore", | ||
"opcounters.command", | ||
"asserts.regular", | ||
"asserts.warning", | ||
"asserts.msg", | ||
"asserts.user", | ||
"asserts.rollovers" | ||
] | ||
|
||
METRICS = GAUGES + RATES | ||
|
||
def __init__(self, name, init_config, agentConfig): | ||
AgentCheck.__init__(self, name, init_config, agentConfig) | ||
|
||
self._last_state = -1 | ||
|
||
def checkLastState(self, state, agentConfig): | ||
if self._last_state != state: | ||
self._last_state = state | ||
return self.create_event(state, agentConfig) | ||
|
||
def create_event(self, state, agentConfig): | ||
"""Create an event with a message describing the replication | ||
state of a mongo node""" | ||
|
||
def get_state_description(state): | ||
if state == 0: return 'Starting Up' | ||
elif state == 1: return 'Primary' | ||
elif state == 2: return 'Secondary' | ||
elif state == 3: return 'Recovering' | ||
elif state == 4: return 'Fatal' | ||
elif state == 5: return 'Starting up (forking threads)' | ||
elif state == 6: return 'Unknown' | ||
elif state == 7: return 'Arbiter' | ||
elif state == 8: return 'Down' | ||
elif state == 9: return 'Rollback' | ||
|
||
status = get_state_description(state) | ||
hostname = get_hostname(agentConfig) | ||
msg_title = "%s is %s" % (hostname, status) | ||
msg = "MongoDB: %s just reported as %s" % (hostname, status) | ||
|
||
self.event({ | ||
'timestamp': int(time.mktime(datetime.now().timetuple())), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably a carryover from the old code, but this will give the wrong time, since it's converting local time assuming it's utc time. It should just be int(time.time()) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will change now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Made a fix for this on master - literally changed that line to: Build failed though - failures were unrelated to Mongo (Lighttpd and Apache) |
||
'event_type': 'Mongo', | ||
'api_key': agentConfig['api_key'], | ||
'msg_title': msg_title, | ||
'msg_text': msg, | ||
'host': hostname | ||
}) | ||
|
||
def check(self, instance): | ||
""" | ||
Returns a dictionary that looks a lot like what's sent back by db.serverStatus() | ||
""" | ||
if 'server' not in instance: | ||
self.log.warn("Missing 'server' in mongo config") | ||
return | ||
|
||
tags = instance.get('tags', []) | ||
|
||
try: | ||
from pymongo import Connection | ||
except ImportError: | ||
self.log.error('mongo.yaml exists but pymongo module can not be imported. Skipping check.') | ||
raise Exception('Python PyMongo Module can not be imported. Please check the installation instruction on the Datadog Website') | ||
|
||
try: | ||
from pymongo import uri_parser | ||
# Configuration a URL, mongodb://user:pass@server/db | ||
parsed = uri_parser.parse_uri(instance['server']) | ||
except ImportError: | ||
# uri_parser is pymongo 2.0+ | ||
matches = mongo_uri_re.match(instance['server']) | ||
if matches: | ||
parsed = matches.groupdict() | ||
else: | ||
parsed = {} | ||
username = parsed.get('username') | ||
password = parsed.get('password') | ||
|
||
do_auth = True | ||
if username is None or password is None: | ||
self.log.debug("Mongo: cannot extract username and password from config %s" % instance['server']) | ||
do_auth = False | ||
|
||
conn = Connection(instance['server']) | ||
db = conn['admin'] | ||
if do_auth: | ||
if not db.authenticate(username, password): | ||
self.log.error("Mongo: cannot connect with config %s" % instance['server']) | ||
|
||
status = db.command('serverStatus') # Shorthand for {'serverStatus': 1} | ||
status['stats'] = db.command('dbstats') | ||
|
||
results = {} | ||
|
||
# Handle replica data, if any | ||
# See http://www.mongodb.org/display/DOCS/Replica+Set+Commands#ReplicaSetCommands-replSetGetStatus | ||
try: | ||
data = {} | ||
|
||
replSet = db.command('replSetGetStatus') | ||
if replSet: | ||
primary = None | ||
current = None | ||
|
||
# find nodes: master and current node (ourself) | ||
for member in replSet.get('members'): | ||
if member.get('self'): | ||
current = member | ||
if int(member.get('state')) == 1: | ||
primary = member | ||
|
||
# If we have both we can compute a lag time | ||
if current is not None and primary is not None: | ||
lag = current['optimeDate'] - primary['optimeDate'] | ||
# Python 2.7 has this built in, python < 2.7 don't... | ||
if hasattr(lag,'total_seconds'): | ||
data['replicationLag'] = lag.total_seconds() | ||
else: | ||
data['replicationLag'] = (lag.microseconds + \ | ||
(lag.seconds + lag.days * 24 * 3600) * 10**6) / 10.0**6 | ||
|
||
if current is not None: | ||
data['health'] = current['health'] | ||
|
||
data['state'] = replSet['myState'] | ||
self.checkLastState(data['state'], self.agentConfig) | ||
status['replSet'] = data | ||
except Exception, e: | ||
|
||
from pymongo.errors import OperationFailure | ||
|
||
if type(e) == OperationFailure and "replSetGetStatus" in str(e): | ||
pass | ||
else: | ||
raise e | ||
|
||
# If these keys exist, remove them for now as they cannot be serialized | ||
try: | ||
status['backgroundFlushing'].pop('last_finished') | ||
except KeyError: | ||
pass | ||
try: | ||
status.pop('localTime') | ||
except KeyError: | ||
pass | ||
|
||
# Go through the metrics and save the values | ||
for m in self.METRICS: | ||
# each metric is of the form: x.y.z with z optional | ||
# and can be found at status[x][y][z] | ||
value = status | ||
try: | ||
for c in m.split("."): | ||
value = value[c] | ||
except KeyError: | ||
continue | ||
|
||
# value is now status[x][y][z] | ||
assert type(value) in (types.IntType, types.LongType, types.FloatType) | ||
|
||
# Check if metric is a gauge or rate | ||
if m in self.GAUGES: | ||
m = self.normalize(m.lower(), 'mongodb') | ||
self.gauge(m, value, tags=tags) | ||
|
||
if m in self.RATES: | ||
m = self.normalize(m.lower(), 'mongodb') + "ps" | ||
self.rate(m, value, tags=tags) | ||
|
||
@staticmethod | ||
def parse_agent_config(agentConfig): | ||
if not agentConfig.get('mongodb_server'): | ||
return False | ||
|
||
return { | ||
'instances': [{ | ||
'server': agentConfig.get('mongodb_server') | ||
}] | ||
} | ||
|
||
if __name__ == "__main__": | ||
check, instances = MongoDb.from_yaml('conf.d/mongo.yaml') | ||
for instance in instances: | ||
check.check(instance) | ||
print check.get_metrics() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry I missed that.
Can you change the metric names to their proper names ?
Currently, these metric names are "translated" in the server side to their proper names mongodb.the_metric
Can you do the translation in the check please ?