From 90d835b37a35972cb5d699f145d740703dc65ad4 Mon Sep 17 00:00:00 2001 From: Conor Branagan Date: Mon, 3 Nov 2014 22:54:20 +0000 Subject: [PATCH] Add a service check for Riak. Remove the timeout and status code events because they provide the same functionality as a monitor on this service check. --- checks.d/riak.py | 46 ++++++++++++---------------------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/checks.d/riak.py b/checks.d/riak.py index b9f5f67b97..a6bd9601b8 100644 --- a/checks.d/riak.py +++ b/checks.d/riak.py @@ -11,6 +11,7 @@ from httplib2 import Http, HttpLib2Error class Riak(AgentCheck): + SERVICE_CHECK_NAME = 'riak.can_connect' keys = [ "vnode_gets", @@ -55,34 +56,29 @@ def __init__(self, name, init_config, agentConfig, instances=None): self.prev_coord_redirs_total = -1 - def check(self, instance): url = instance['url'] default_timeout = self.init_config.get('default_timeout', 5) timeout = float(instance.get('timeout', default_timeout)) - - aggregation_key = md5(url).hexdigest() + service_check_tags = ['url:%s' % url] try: h = Http(timeout=timeout) resp, content = h.request(url, "GET") - - except socket.timeout, e: - self.timeout_event(url, timeout, aggregation_key) - return - - except socket.error, e: - self.timeout_event(url, timeout, aggregation_key) - return - - except HttpLib2Error, e: - self.timeout_event(url, timeout, aggregation_key) - return + except (socket.timeout, socket.error, HttpLib2Error) as e: + self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, + message="Unable to fetch Riak stats: %s" % str(e), + tags=service_check_tags) if resp.status != 200: - self.status_code_event(url, resp, aggregation_key) + self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, + tags=service_check_tags, + message="Unexpected status of %s when fetching Riak stats, " \ + "response: %s" % (resp.status, content)) stats = json.loads(content) + self.service_check( + self.SERVICE_CHECK_NAME, AgentCheck.OK, tags=service_check_tags) [self.gauge("riak." + k, stats[k]) for k in self.keys if k in stats] @@ -92,21 +88,3 @@ def check(self, instance): self.gauge('riak.coord_redirs', count) self.prev_coord_redirs_total = coord_redirs_total - - def timeout_event(self, url, timeout, aggregation_key): - self.event({ - 'timestamp': int(time.time()), - 'event_type': 'riak_check', - 'msg_title': 'riak check timeout', - 'msg_text': '%s timed out after %s seconds.' % (url, timeout), - 'aggregation_key': aggregation_key - }) - - def status_code_event(self, url, r, aggregation_key): - self.event({ - 'timestamp': int(time.time()), - 'event_type': 'riak_check', - 'msg_title': 'Invalid reponse code for riak check', - 'msg_text': '%s returned a status of %s' % (url, r.status_code), - 'aggregation_key': aggregation_key - }) \ No newline at end of file