From 2302ad9479124e96253e5776c5b37e755e6ece67 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Tue, 4 Dec 2012 17:13:48 -0500 Subject: [PATCH 01/25] consistent logging using centrally configured python logger --- agent.py | 67 ++++++++++---------------------------- aggregator.py | 2 +- checks/__init__.py | 6 ++-- checks/build.py | 2 +- checks/check_status.py | 2 +- checks/collector.py | 4 +-- checks/datadog.py | 4 +-- checks/db/elastic.py | 2 +- checks/nagios.py | 9 ++--- compat/tornadotcpserver.py | 4 ++- config.py | 50 ++++++++++++++++------------ daemon.py | 27 +++++++-------- datadog.conf.example | 41 ++++++----------------- ddagent.py | 27 ++++++++------- dogstatsd.py | 16 +++++---- graphite.py | 26 ++++++++------- pup/pup.py | 11 ++++--- tests/badconfig.conf | 6 +--- transaction.py | 26 +++++++-------- util.py | 37 ++++++++------------- win32/agent.py | 2 +- 21 files changed, 162 insertions(+), 209 deletions(-) diff --git a/agent.py b/agent.py index bf6ceacbf8..39d266eca1 100755 --- a/agent.py +++ b/agent.py @@ -10,6 +10,9 @@ (C) Datadog, Inc. 2010 all rights reserved ''' +# set up logging before importing any other components +from config import initialize_logging; initialize_logging() + # Core modules import logging import modules @@ -42,8 +45,7 @@ WATCHDOG_MULTIPLIER = 10 # Globals -agent_logger = logging.getLogger('agent') - +logger = logging.getLogger('ddagent.agent') class Agent(Daemon): """ @@ -56,7 +58,7 @@ def __init__(self, pidfile): self.collector = None def _handle_sigterm(self, signum, frame): - agent_logger.debug("Caught sigterm. Stopping run loop.") + logger.debug("Caught sigterm. Stopping run loop.") self.run_forever = False if self.collector: self.collector.stop() @@ -82,7 +84,7 @@ def run(self): # Configure the watchdog. check_frequency = int(agentConfig['check_freq']) watchdog = self._get_watchdog(check_frequency, agentConfig) - + # Run the main loop. while self.run_forever: # Do the work. @@ -103,7 +105,7 @@ def run(self): # Explicitly kill the process, because it might be running # as a daemon. - agent_logger.info("Exiting. Bye bye.") + logger.info("Exiting. Bye bye.") sys.exit(0) def _get_emitters(self, agentConfig): @@ -127,51 +129,14 @@ def _set_agent_config_hostname(self, agentConfig): if agentConfig.get('hostname') is None and agentConfig.get('use_ec2_instance_id'): instanceId = EC2.get_instance_id() if instanceId is not None: - agent_logger.info("Running on EC2, instanceId: %s" % instanceId) + logger.info("Running on EC2, instanceId: %s" % instanceId) agentConfig['hostname'] = instanceId else: - agent_logger.info('Not running on EC2, using hostname to identify this server') + logger.info('Not running on EC2, using hostname to identify this server') return agentConfig - -def setup_logging(agentConfig): - """Configure logging to use syslog whenever possible. - Also controls debug_mode.""" - if agentConfig['debug_mode']: - logFile = "/tmp/dd-agent.log" - logging.basicConfig(filename=logFile, filemode='w', - level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') - logging.info("Logging to %s" % logFile) - else: - try: - from logging.handlers import SysLogHandler - rootLog = logging.getLogger() - rootLog.setLevel(logging.INFO) - - sys_log_addr = "/dev/log" - - # Special-case macs - if sys.platform == 'darwin': - sys_log_addr = "/var/run/syslog" - - handler = SysLogHandler(address=sys_log_addr, facility=SysLogHandler.LOG_DAEMON) - formatter = logging.Formatter("dd-agent - %(name)s - %(levelname)s - %(message)s") - handler.setFormatter(formatter) - rootLog.addHandler(handler) - logging.info('Logging to syslog is set up') - except Exception,e: - sys.stderr.write("Error while setting up syslog logging (%s). No logging available" % str(e)) - logging.disable(logging.ERROR) - - def main(): options, args = get_parsed_args() - agentConfig = get_config() - - # Logging - setup_logging(agentConfig) - - COMMANDS = [ 'start', 'stop', @@ -192,7 +157,7 @@ def main(): return 3 pid_file = PidFile('dd-agent') - + # Only initialize the Agent if we're starting or stopping it. if command in ['start', 'stop', 'restart', 'foreground']: @@ -202,19 +167,19 @@ def main(): agent = Agent(pid_file.get_path()) if 'start' == command: - logging.info('Start daemon') + logger.info('Start daemon') agent.start() elif 'stop' == command: - logging.info('Stop daemon') + logger.info('Stop daemon') agent.stop() elif 'restart' == command: - logging.info('Restart daemon') + logger.info('Restart daemon') agent.restart() elif 'foreground' == command: - logging.info('Running in foreground') + logger.info('Running in foreground') agent.run() # Commands that don't need the agent to be initialized. @@ -223,8 +188,10 @@ def main(): pid = pid_file.get_pid() if pid is not None: sys.stdout.write('dd-agent is running as pid %s.\n' % pid) + logger.info("dd-agent is running as pid %s." % pid) else: sys.stdout.write('dd-agent is not running.\n') + logger.info("dd-agent is not running.") elif 'info' == command: return CollectorStatus.print_latest_status() @@ -238,7 +205,7 @@ def main(): except Exception: # Try our best to log the error. try: - agent_logger.exception("Uncaught error running the agent") + logger.exception("Uncaught error running the agent") except: pass raise diff --git a/aggregator.py b/aggregator.py index 4f30843ef2..9e7e644183 100644 --- a/aggregator.py +++ b/aggregator.py @@ -2,7 +2,7 @@ from time import time -logger = logging.getLogger(__name__) +logger = logging.getLogger('ddagent.aggregator') class Infinity(Exception): pass diff --git a/checks/__init__.py b/checks/__init__.py index 120848c322..3ea26833d6 100644 --- a/checks/__init__.py +++ b/checks/__init__.py @@ -21,6 +21,8 @@ from util import LaconicFilter from checks import check_status +logger = logging.getLogger('ddagent.checks') + # Konstants class CheckException(Exception): pass class Infinity(CheckException): pass @@ -268,7 +270,7 @@ def __init__(self, name, init_config, agentConfig, instances=None): self.init_config = init_config self.agentConfig = agentConfig self.hostname = gethostname(agentConfig) - self.log = logging.getLogger('checks.%s' % name) + self.log = logging.getLogger('ddagent.checks.%s' % name) self.aggregator = MetricsAggregator(self.hostname, formatter=agent_formatter) self.events = [] self.instances = instances or [] @@ -476,7 +478,7 @@ def gethostname(agentConfig): try: return socket.getfqdn() except socket.error, e: - logging.debug("processes: unable to get hostname: " + str(e)) + logger.debug("processes: unable to get hostname: " + str(e)) def agent_formatter(metric, value, timestamp, tags, hostname, device_name=None): """ Formats metrics coming from the MetricsAggregator. Will look like: diff --git a/checks/build.py b/checks/build.py index 1faae92121..c403505ef2 100644 --- a/checks/build.py +++ b/checks/build.py @@ -130,7 +130,7 @@ def check(self, logger, agentConfig): hudson_home, apiKey = sys.argv[1:3] - logger = logging.getLogger('hudson') + logger = logging.getLogger('ddagent.checks.hudson') logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler()) hudson = Hudson() diff --git a/checks/check_status.py b/checks/check_status.py index 012b3c780c..d652f7f6c5 100644 --- a/checks/check_status.py +++ b/checks/check_status.py @@ -20,7 +20,7 @@ STATUS_ERROR = 'ERROR' -log = logging.getLogger(__name__) +log = logging.getLogger('ddagent.checks.check_status') class Stylizer(object): diff --git a/checks/collector.py b/checks/collector.py index ef835f857a..d5d772b3b7 100644 --- a/checks/collector.py +++ b/checks/collector.py @@ -34,8 +34,8 @@ from resources.processes import Processes as ResProcesses -logger = logging.getLogger('collector') -checks_logger = logging.getLogger('checks') +logger = logging.getLogger('ddagent.collector') +checks_logger = logging.getLogger('ddagent.checks') class Collector(object): diff --git a/checks/datadog.py b/checks/datadog.py index bdfdf6df8d..3f8c6eff34 100644 --- a/checks/datadog.py +++ b/checks/datadog.py @@ -568,7 +568,7 @@ def check(self, agentConfig, move_end=True): def testDogStream(): import logging - logger = logging.getLogger("datadog") + logger = logging.getLogger("ddagent.checks.datadog") logger.setLevel(logging.DEBUG) logger.addHandler(logging.StreamHandler()) dogstream = Dogstream(logger) @@ -582,7 +582,7 @@ def testDogStream(): def testddForwarder(): import logging - logger = logging.getLogger("datadog") + logger = logging.getLogger("ddagent.checks.datadog") logger.setLevel(logging.DEBUG) logger.addHandler(logging.StreamHandler()) diff --git a/checks/db/elastic.py b/checks/db/elastic.py index 950eaed7b2..304ca127bd 100644 --- a/checks/db/elastic.py +++ b/checks/db/elastic.py @@ -369,7 +369,7 @@ def check(self, config, url_suffix=STATS_URL): import logging from config import get_version logging.basicConfig() - logger = logging.getLogger() + logger = logging.getLogger('ddagent.checks.elastic') c = ElasticSearch(logger) config = {"elasticsearch": "http://localhost:9200", "version": get_version(), "api_key":"apiKey 2"} pprint.pprint(c.check(config)) diff --git a/checks/nagios.py b/checks/nagios.py index 15cad708f2..fbd0c0d564 100644 --- a/checks/nagios.py +++ b/checks/nagios.py @@ -153,9 +153,7 @@ def parse_log(api_key, log_file): import socket import sys - logger = logging.getLogger("nagios") - logger.setLevel(logging.DEBUG) - logger.addHandler(logging.StreamHandler()) + logger = logging.getLogger("ddagent.checks.nagios") nagios = Nagios(socket.gethostname()) events = nagios.check(logger, {'api_key': api_key, 'nagios_log': log_file}, move_end=False) @@ -165,9 +163,8 @@ def parse_log(api_key, log_file): if __name__ == "__main__": import logging import socket - logger = logging.getLogger("nagios") - logger.setLevel(logging.DEBUG) - logger.addHandler(logging.StreamHandler()) + + logger = logging.getLogger("ddagent.checks.nagios") nagios = Nagios(socket.gethostname()) config = {'api_key':'apikey_2','nagios_log': '/var/log/nagios3/nagios.log'} diff --git a/compat/tornadotcpserver.py b/compat/tornadotcpserver.py index 8ff0eec9ec..2007f892bf 100644 --- a/compat/tornadotcpserver.py +++ b/compat/tornadotcpserver.py @@ -12,6 +12,8 @@ import fcntl +logger = logging.getLogger('ddagent.tornado') + def set_close_exec(fd): flags = fcntl.fcntl(fd, fcntl.F_GETFD) fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) @@ -201,7 +203,7 @@ def _handle_connection(self, connection, address): stream = IOStream(connection, io_loop=self.io_loop) self.handle_stream(stream, address) except Exception: - logging.error("Error in connection callback", exc_info=True) + logger.error("Error in connection callback", exc_info=True) def bind_sockets(port, address=None, family=socket.AF_UNSPEC, backlog=128): diff --git a/config.py b/config.py index 9f90454369..8d3406a152 100644 --- a/config.py +++ b/config.py @@ -10,7 +10,6 @@ import inspect from optparse import OptionParser, Values from cStringIO import StringIO -from util import getOS # CONSTANTS DATADOG_CONF = "datadog.conf" @@ -18,6 +17,8 @@ DEFAULT_STATSD_FREQUENCY = 10 # seconds PUP_STATSD_FREQUENCY = 2 # seconds +logger = logging.getLogger('ddagent.config') + class PathNotFound(Exception): pass def get_parsed_args(): @@ -46,10 +47,15 @@ def skip_leading_wsp(f): "Works on a file, returns a file-like object" return StringIO("\n".join(map(string.strip, f.readlines()))) -def initialize_logging(config_path, os_name=None): +def initialize_logging(config_path=None, os_name=None): + if os_name is None: + os_name = getOS() + if config_path is None: + config_path = get_config_path(None, os_name=getOS()), try: logging.config.fileConfig(config_path) except Exception, e: + raise sys.stderr.write("Couldn't initialize logging: %s" % str(e)) def _windows_commondata_path(): @@ -150,7 +156,6 @@ def get_config(parse_args = True, cfg_path=None, init_logging=False, options=Non # General config agentConfig = { 'check_freq': DEFAULT_CHECK_FREQUENCY, - 'debug_mode': False, 'dogstatsd_interval': DEFAULT_STATSD_FREQUENCY, 'dogstatsd_normalize': 'yes', 'dogstatsd_port': 8125, @@ -234,9 +239,6 @@ def get_config(parse_args = True, cfg_path=None, init_logging=False, options=Non # Which API key to use agentConfig['api_key'] = config.get('Main', 'api_key') - # Debug mode - agentConfig['debug_mode'] = config.get('Main', 'debug_mode').lower() in ("yes", "true") - # local traffic only? Default to no agentConfig['non_local_traffic'] = False if config.has_option('Main', 'non_local_traffic'): @@ -402,7 +404,6 @@ def set_win32_cert_path(): tornado.simple_httpclient._DEFAULT_CA_CERTS = crt_path def get_confd_path(osname): - log = logging.getLogger('config') if osname == 'windows': try: @@ -421,12 +422,10 @@ def get_confd_path(osname): if os.path.exists(cur_path): return cur_path - log.error("No conf.d folder found at '%s' or in the directory where the agent is currently deployed.\n" % exc.message) + logger.error("No conf.d folder found at '%s' or in the directory where the agent is currently deployed.\n" % exc.message) sys.exit(3) def get_checksd_path(osname): - log = logging.getLogger('config') - # Unix only will look up based on the current directory # because checks.d will hang with the other python modules cur_path = os.path.dirname(os.path.realpath(__file__)) @@ -438,9 +437,9 @@ def get_checksd_path(osname): try: return _windows_checksd_path() except PathNotFound, e: - log.error("No checks.d folder found in '%s'.\n" % e.message) + logger.error("No checks.d folder found in '%s'.\n" % e.message) - log.error("No checks.d folder at '%s'.\n" % checksd_path) + logger.error("No checks.d folder at '%s'.\n" % checksd_path) sys.exit(3) def load_check_directory(agentConfig): @@ -451,7 +450,6 @@ def load_check_directory(agentConfig): checks = [] - log = logging.getLogger('checks') osname = getOS() checks_path = get_checksd_path(osname) confd_path = get_confd_path(osname) @@ -471,7 +469,7 @@ def load_check_directory(agentConfig): try: check_module = __import__(check_name) except: - log.exception('Unable to import check module %s.py from checks.d' % check_name) + logger.exception('Unable to import check module %s.py from checks.d' % check_name) continue check_class = None @@ -487,7 +485,7 @@ def load_check_directory(agentConfig): break if not check_class: - log.error('No check class (inheriting from AgentCheck) found in %s.py' % check_name) + logger.error('No check class (inheriting from AgentCheck) found in %s.py' % check_name) continue # Check if the config exists OR we match the old-style config @@ -500,7 +498,7 @@ def load_check_directory(agentConfig): f.close() except: f.close() - log.exception("Unable to parse yaml config in %s" % conf_path) + logger.exception("Unable to parse yaml config in %s" % conf_path) continue elif hasattr(check_class, 'parse_agent_config'): # FIXME: Remove this check once all old-style checks are gone @@ -508,12 +506,12 @@ def load_check_directory(agentConfig): if not check_config: continue else: - log.debug('No conf.d/%s.yaml found for checks.d/%s.py' % (check_name, check_name)) + logger.debug('No conf.d/%s.yaml found for checks.d/%s.py' % (check_name, check_name)) continue # Look for the per-check config, which *must* exist if not check_config.get('instances'): - log.error("Config %s is missing 'instances'" % conf_path) + logger.error("Config %s is missing 'instances'" % conf_path) continue # Accept instances as a list, as a single dict, or as non-existant @@ -549,7 +547,19 @@ def load_check_directory(agentConfig): pythonpath = [pythonpath] sys.path.extend(pythonpath) - log.debug('Loaded check.d/%s.py' % check_name) + logger.debug('Loaded check.d/%s.py' % check_name) - log.info('checks.d checks: %s' % [c.name for c in checks]) + logger.info('checks.d checks: %s' % [c.name for c in checks]) return checks + +def getOS(): + if sys.platform == 'darwin': + return 'mac' + elif sys.platform.find('freebsd') != -1: + return 'freebsd' + elif sys.platform.find('linux') != -1: + return 'linux' + elif sys.platform.find('win32') != -1: + return 'windows' + else: + return sys.platform diff --git a/daemon.py b/daemon.py index 16f3f9ce2f..6b3ed508b5 100644 --- a/daemon.py +++ b/daemon.py @@ -26,6 +26,7 @@ import time import logging +logger = logging.getLogger('ddagent.daemon') class Daemon: """ @@ -52,11 +53,11 @@ def daemonize(self): sys.exit(0) except OSError, e: msg = "fork #1 failed: %d (%s)" % (e.errno, e.strerror) - logging.error(msg) + logger.error(msg) sys.stderr.write(msg + "\n") sys.exit(1) - logging.debug("Fork 1 ok") + logger.debug("Fork 1 ok") # Decouple from parent environment os.chdir("/") @@ -71,7 +72,7 @@ def daemonize(self): sys.exit(0) except OSError, e: msg = "fork #2 failed: %d (%s)" % (e.errno, e.strerror) - logging.error(msg) + logger.error(msg) sys.stderr.write(msg + "\n") sys.exit(1) @@ -87,7 +88,7 @@ def daemonize(self): os.dup2(so.fileno(), sys.stdout.fileno()) os.dup2(se.fileno(), sys.stderr.fileno()) - logging.info("Started") + logger.info("Started") # Write pidfile atexit.register(self.delpid) # Make sure pid file is removed if we quit @@ -96,7 +97,7 @@ def daemonize(self): file(self.pidfile,'w+').write("%s\n" % pid) except Exception, e: msg = "Unable to write pidfile: %s" % self.pidfile - logging.exception(msg) + logger.exception(msg) sys.stderr.write(msg + "\n") sys.exit(1) @@ -111,7 +112,7 @@ def start(self): Start the daemon """ - logging.info("Starting...") + logger.info("Starting...") # Check for a pidfile to see if the daemon already runs try: pf = file(self.pidfile,'r') @@ -124,14 +125,14 @@ def start(self): if pid: message = "pidfile %s already exists. Is it already running?\n" - logging.error(message % self.pidfile) + logger.error(message % self.pidfile) sys.stderr.write(message % self.pidfile) sys.exit(1) # Start the daemon - logging.info("Pidfile: %s" % self.pidfile) + logger.info("Pidfile: %s" % self.pidfile) self.daemonize() - logging.debug("Calling run method") + logger.debug("Calling run method") self.run() def stop(self): @@ -141,7 +142,7 @@ def stop(self): from signal import SIGTERM - logging.info("Stopping...") + logger.info("Stopping...") # Get the pid from the pidfile try: pf = file(self.pidfile,'r') @@ -164,11 +165,11 @@ def stop(self): time.sleep(0.1) except OSError, err: if str(err).find("No such process") <= 0: - logging.exception("Cannot kill agent daemon at pid %s" % pid) + logger.exception("Cannot kill agent daemon at pid %s" % pid) sys.stderr.write(str(err) + "\n") else: message = "Pidfile %s does not exist. Not running?\n" % self.pidfile - logging.info(message) + logger.info(message) sys.stderr.write(message) # Just to be sure. A ValueError might occur if the PID file is empty but does actually exist @@ -178,7 +179,7 @@ def stop(self): return # Not an error in a restart - logging.info("Stopped") + logger.info("Stopped") def restart(self): "Restart the daemon" diff --git a/datadog.conf.example b/datadog.conf.example index 2a7c171cb6..033c9c9338 100644 --- a/datadog.conf.example +++ b/datadog.conf.example @@ -8,10 +8,6 @@ dd_url: https://app.datadoghq.com # https://app.datadoghq.com/account/settings api_key: -# Boolean to enable debug_mode, which outputs massive amounts of log messages -# to the /tmp/ directory. -debug_mode: no - # Force the hostname to whatever you want. #hostname: mymachine.mydomain @@ -436,45 +432,28 @@ use_mount: no # -------------------------------------------------------------------------- # [loggers] -keys:root,dogstatsd,checks +keys:root,agent [handlers] -keys:dogstatsd,checks +keys:stream [formatters] keys:ddagent -# DogStatsd logging - [logger_root] -level:INFO -handlers: -propagate:0 -qualname:root +level:ERROR +handlers:stream -[logger_dogstatsd] +[logger_agent] level:INFO -handlers:dogstatsd +handlers:stream +qualname:ddagent propagate:0 -qualname:dogstatsd - -[logger_checks] -level:WARN -handlers:checks -propagate:0 -qualname:checks - -[handler_dogstatsd] -class:FileHandler -level:INFO -formatter:ddagent -args:('/tmp/dogstatsd.log', 'a') -[handler_checks] -class:FileHandler -level:WARN +[handler_stream] +class:StreamHandler formatter:ddagent -args:('/tmp/dd-agent.log', 'a') +args:() [formatter_ddagent] format: %(asctime)s | %(name)s | %(levelname)s | %(message)s diff --git a/ddagent.py b/ddagent.py index a6181867da..1d32933dfc 100755 --- a/ddagent.py +++ b/ddagent.py @@ -10,6 +10,9 @@ (C) Datadog, Inc. 2010-2012 all rights reserved ''' +# set up logging before importing any other components +from config import initialize_logging; initialize_logging() + # Standard imports import logging import os @@ -33,6 +36,8 @@ from checks.check_status import ForwarderStatus from transaction import Transaction, TransactionManager +logger = logging.getLogger('ddagent.forwarder') + TRANSACTION_FLUSH_INTERVAL = 5000 # Every 5 seconds WATCHDOG_INTERVAL_MULTIPLIER = 10 # 10x flush interval @@ -64,6 +69,7 @@ def get_tr_manager(cls): @classmethod def set_endpoints(cls): + if 'use_pup' in cls._application._agentConfig: if cls._application._agentConfig['use_pup']: cls._endpoints.append('pup_url') @@ -76,10 +82,10 @@ def set_endpoints(cls): and cls._application._agentConfig.get('api_key') is not None\ and cls._application._agentConfig.get('api_key', "pup") not in ("", "pup") if is_dd_user: - logging.warn("You are a Datadog user so we will send data to https://app.datadoghq.com") + logger.warn("You are a Datadog user so we will send data to https://app.datadoghq.com") cls._endpoints.append('dd_url') except: - logging.info("Not a Datadog user") + logger.info("Not a Datadog user") def __init__(self, data, headers): self._data = data @@ -90,7 +96,7 @@ def __init__(self, data, headers): # Insert the transaction in the Manager self._trManager.append(self) - logging.debug("Created transaction %d" % self.get_id()) + logger.debug("Created transaction %d" % self.get_id()) self._trManager.flush() def __sizeof__(self): @@ -105,7 +111,7 @@ def get_url(self, endpoint): def flush(self): for endpoint in self._endpoints: url = self.get_url(endpoint) - logging.info("Sending metrics to endpoint %s at %s" % (endpoint, url)) + logger.info("Sending metrics to endpoint %s at %s" % (endpoint, url)) req = tornado.httpclient.HTTPRequest(url, method="POST", body=self._data, headers=self._headers) @@ -123,7 +129,7 @@ def flush(self): def on_response(self, response): if response.error: - logging.error("Response: %s" % response.error) + logger.error("Response: %s" % response.error) self._trManager.tr_error(self) else: self._trManager.tr_success(self) @@ -236,7 +242,6 @@ def _postMetrics(self): self._metrics = {} def run(self): - handlers = [ (r"/intake/?", AgentInputHandler), (r"/api/v1/series/?", ApiInputHandler), @@ -259,7 +264,7 @@ def run(self): else: # localhost in lieu of 127.0.0.1 to support IPv6 http_server.listen(self._port, address = "localhost") - logging.info("Listening on port %d" % self._port) + logger.info("Listening on port %d" % self._port) # Register callbacks self.mloop = tornado.ioloop.IOLoop.instance() @@ -276,7 +281,7 @@ def flush_trs(): # Register optional Graphite listener gport = self._agentConfig.get("graphite_listen_port", None) if gport is not None: - logging.info("Starting graphite listener on port %s" % gport) + logger.info("Starting graphite listener on port %s" % gport) from graphite import GraphiteServer gs = GraphiteServer(self, gethostname(self._agentConfig), io_loop=self.mloop) if non_local_traffic is True: @@ -290,7 +295,7 @@ def flush_trs(): tr_sched.start() self.mloop.start() - logging.info("Stopped") + logger.info("Stopped") def stop(self): self.mloop.stop() @@ -307,7 +312,7 @@ def init(): app = Application(port, agentConfig) def sigterm_handler(signum, frame): - logging.info("caught sigterm. stopping") + logger.info("caught sigterm. stopping") app.stop() import signal @@ -330,7 +335,7 @@ def main(): app.run() finally: ForwarderStatus.remove_latest_status() - + else: usage = "%s [help|info]. Run with no commands to start the server" % ( sys.argv[0]) diff --git a/dogstatsd.py b/dogstatsd.py index cdd94284ac..ec6aab6cdc 100755 --- a/dogstatsd.py +++ b/dogstatsd.py @@ -3,6 +3,9 @@ A Python Statsd implementation with some datadog special sauce. """ +# set up logging before importing any other components +from config import initialize_logging; initialize_logging() + # stdlib import httplib as http_client import logging @@ -25,12 +28,11 @@ from daemon import Daemon from util import json, PidFile - -WATCHDOG_TIMEOUT = 120 -UDP_SOCKET_TIMEOUT = 5 +logger = logging.getLogger('ddagent.dogstatsd') -logger = logging.getLogger('dogstatsd') +WATCHDOG_TIMEOUT = 120 +UDP_SOCKET_TIMEOUT = 5 class Reporter(threading.Thread): @@ -68,6 +70,7 @@ def stop(self): self.finished.set() def run(self): + logger.info("Reporting to %s every %ss" % (self.api_host, self.interval)) logger.debug("Watchdog enabled: %s" % bool(self.watchdog)) @@ -112,7 +115,6 @@ def flush(self): logger.exception("Error flushing metrics") def submit(self, metrics): - # HACK - Copy and pasted from dogapi, because it's a bit of a pain to distribute python # dependencies with the agent. body = json.dumps({"series" : metrics}) @@ -163,6 +165,7 @@ def start(self): """ Run the server. """ # Bind to the UDP socket. self.socket.bind(self.address) + logger.info('Listening on host & port: %s' % str(self.address)) # Inline variables for quick look-up. @@ -223,8 +226,7 @@ def _handle_sigterm(self, signum, frame): def init(config_path=None, use_watchdog=False, use_forwarder=False): - c = get_config(parse_args=False, cfg_path=config_path, init_logging=True) - + c = get_config(parse_args=False, cfg_path=config_path) logger.debug("Configuration dogstatsd") port = c['dogstatsd_port'] diff --git a/graphite.py b/graphite.py index 3677605dfd..54da810343 100644 --- a/graphite.py +++ b/graphite.py @@ -5,17 +5,19 @@ from tornado.ioloop import IOLoop from tornado.iostream import IOStream +logger = logging.getLogger('ddagent.graphite') + try: from tornado.netutil import TCPServer except Exception, e: - logging.warn("Tornado < 2.1.1 detected, using compatibility TCPServer") + logger.warn("Tornado < 2.1.1 detected, using compatibility TCPServer") from compat.tornadotcpserver import TCPServer class GraphiteServer(TCPServer): def __init__(self, app, hostname, io_loop=None, ssl_options=None, **kwargs): - logging.info('Graphite listener is started') + logger.info('Graphite listener is started') self.app = app self.hostname = hostname TCPServer.__init__(self, io_loop=io_loop, ssl_options=ssl_options, **kwargs) @@ -27,7 +29,7 @@ def handle_stream(self, stream, address): class GraphiteConnection(object): def __init__(self, stream, address, app, hostname): - logging.debug('received a new connection from %s', address) + logger.debug('received a new connection from %s', address) self.app = app self.stream = stream self.address = address @@ -38,17 +40,17 @@ def __init__(self, stream, address, app, hostname): def _on_read_header(self,data): try: size = struct.unpack("!I",data)[0] - logging.debug("Receiving a string of size:" + str(size)) + logger.debug("Receiving a string of size:" + str(size)) self.stream.read_bytes(size, self._on_read_line) except Exception, e: - logging.error(e) + logger.error(e) def _on_read_line(self, data): - logging.debug('read a new line from %s', self.address) + logger.debug('read a new line from %s', self.address) self._decode(data) def _on_close(self): - logging.debug('client quit %s', self.address) + logger.debug('client quit %s', self.address) def _parseMetric(self, metric): """Graphite does not impose a particular metric structure. @@ -68,7 +70,7 @@ def _parseMetric(self, metric): return metric, host, device except Exception, e: - logging.exception("Unparsable metric: {0}".format(metric)) + logger.exception("Unparsable metric: {0}".format(metric)) return None, None, None def _postMetric(self, name, host, device, datapoint): @@ -81,25 +83,25 @@ def _processMetric(self, metric, datapoint): """Parse the metric name to fetch (host, metric, device) and send the datapoint to datadog""" - logging.debug("New metric: %s, values: %s" % (metric, datapoint)) + logger.debug("New metric: %s, values: %s" % (metric, datapoint)) (metric,host,device) = self._parseMetric(metric) if metric is not None: self._postMetric(metric,host,device, datapoint) - logging.info("Posted metric: %s, host: %s, device: %s" % (metric, host, device)) + logger.info("Posted metric: %s, host: %s, device: %s" % (metric, host, device)) def _decode(self,data): try: datapoints = pickle.loads(data) except: - logging.exception("Cannot decode grapite points") + logger.exception("Cannot decode grapite points") return for (metric, datapoint) in datapoints: try: datapoint = ( float(datapoint[0]), float(datapoint[1]) ) except Exception, e: - logging.error(e) + logger.error(e) continue self._processMetric(metric,datapoint) diff --git a/pup/pup.py b/pup/pup.py index 5ed46a45ca..768cd0f54f 100644 --- a/pup/pup.py +++ b/pup/pup.py @@ -10,6 +10,9 @@ (C) Datadog, Inc. 2012 all rights reserved """ +# set up logging before importing any other components +from config import initialize_logging; initialize_logging() + # stdlib from collections import defaultdict import sys @@ -30,9 +33,7 @@ from config import get_config from util import json - -logger = logging.getLogger('pup') - +logger = logging.getLogger('ddagent.pup') AGENT_TRANSLATION = { 'cpuUser' : 'CPU user (%)', @@ -244,10 +245,10 @@ def main(): is_enabled = c['use_pup'] if is_enabled: - logging.info("Starting pup") + logger.info("Starting pup") run_pup(c) else: - logging.info("Pup is disabled. Exiting") + logger.info("Pup is disabled. Exiting") # We're exiting purposefully, so exit with zero (supervisor's expected # code). HACK: Sleep a little bit so supervisor thinks we've started cleanly # and thus can exit cleanly. diff --git a/tests/badconfig.conf b/tests/badconfig.conf index bfd80c35ed..a9077f56e2 100644 --- a/tests/badconfig.conf +++ b/tests/badconfig.conf @@ -8,10 +8,6 @@ dd_url: https://app.datadoghq.com # https://app.datadoghq.com/account/settings api_key: 1234 -# Boolean to enable debug_mode, which outputs massive amounts of log messages -# to the /tmp/ directory. -debug_mode: no - # Force the hostname to whatever you want. #hostname: mymachine.mydomain @@ -29,4 +25,4 @@ use_mount: no nagios_log: /var/log/nagios3/nagios.log nagios_perf_cfg: /var/log/blah.log -graphite_listen_port: 17126 \ No newline at end of file +graphite_listen_port: 17126 diff --git a/transaction.py b/transaction.py index 517080da4c..843f03bafd 100644 --- a/transaction.py +++ b/transaction.py @@ -1,5 +1,3 @@ - - # stdlib import sys import time @@ -13,6 +11,8 @@ # project from checks.check_status import ForwarderStatus +logger = logging.getLogger('ddagent.transaction') + def plural(count): if count > 1: return "s" @@ -98,7 +98,7 @@ def get_transactions(self): return self._transactions def print_queue_stats(self): - logging.info("Queue size: at %s, %s transaction(s), %s KB" % + logger.info("Queue size: at %s, %s transaction(s), %s KB" % (time.time(), self._total_count, (self._total_size/1024))) def get_tr_id(self): @@ -113,15 +113,15 @@ def append(self,tr): # Check the size tr_size = tr.get_size() - logging.info("New transaction to add, total size of queue would be: %s KB" % + logger.info("New transaction to add, total size of queue would be: %s KB" % ((self._total_size + tr_size)/ 1024)) if (self._total_size + tr_size) > self._MAX_QUEUE_SIZE: - logging.warn("Queue is too big, removing old messages...") + logger.warn("Queue is too big, removing old messages...") new_trs = sorted(self._transactions,key=attrgetter('_next_flush'), reverse = True) for tr2 in new_trs: if (self._total_size + tr_size) > self._MAX_QUEUE_SIZE: - logging.warn("Removing transaction %s from queue" % tr2.get_id()) + logger.warn("Removing transaction %s from queue" % tr2.get_id()) self._transactions.remove(tr2) self._total_count = self._total_count - 1 self._total_size = self._total_size - tr2.get_size() @@ -131,13 +131,13 @@ def append(self,tr): self._total_count = self._total_count + 1 self._total_size = self._total_size + tr_size - logging.info("Transaction %s added" % (tr.get_id())) + logger.info("Transaction %s added" % (tr.get_id())) self.print_queue_stats() def flush(self): if self._trs_to_flush is not None: - logging.info("A flush is already in progress, not doing anything") + logger.info("A flush is already in progress, not doing anything") return to_flush = [] @@ -149,7 +149,7 @@ def flush(self): count = len(to_flush) if count > 0: - logging.info("Flushing %s transaction%s" % (count,plural(count))) + logger.info("Flushing %s transaction%s" % (count,plural(count))) self._trs_to_flush = to_flush self.flush_next() self._flush_count += 1 @@ -173,11 +173,11 @@ def flush_next(self): if delay <= 0: tr = self._trs_to_flush.pop() self._last_flush = datetime.now() - logging.debug("Flushing transaction %d" % tr.get_id()) + logger.debug("Flushing transaction %d" % tr.get_id()) try: tr.flush() except Exception,e : - logging.exception(e) + logger.exception(e) self.tr_error(tr) self.flush_next() else: @@ -195,12 +195,12 @@ def flush_next(self): def tr_error(self,tr): tr.inc_error_count() tr.compute_next_flush(self._MAX_WAIT_FOR_REPLAY) - logging.info("Transaction %d in error (%s error%s), it will be replayed after %s" % + logger.info("Transaction %d in error (%s error%s), it will be replayed after %s" % (tr.get_id(), tr.get_error_count(), plural(tr.get_error_count()), tr.get_next_flush())) def tr_success(self,tr): - logging.info("Transaction %d completed" % tr.get_id()) + logger.info("Transaction %d completed" % tr.get_id()) self._transactions.remove(tr) self._total_count = self._total_count - 1 self._total_size = self._total_size - tr.get_size() diff --git a/util.py b/util.py index 6984d72e95..6c41c8edee 100644 --- a/util.py +++ b/util.py @@ -1,4 +1,3 @@ -import logging import os import platform import signal @@ -6,6 +5,7 @@ import math import time import uuid +from config import getOS try: from hashlib import md5 @@ -41,6 +41,9 @@ def loads(data): except ImportError: from compat.namedtuple import namedtuple +import logging +logger = logging.getLogger('ddagent.agent') + NumericTypes = (float, int, long) @@ -56,8 +59,6 @@ def get_uuid(): # on the back-end if need be, based on mac addresses. return uuid.uuid5(uuid.NAMESPACE_DNS, platform.node() + str(uuid.getnode())).hex - - def headers(agentConfig): # Build the request headers return { @@ -66,18 +67,6 @@ def headers(agentConfig): 'Accept': 'text/html, */*', } -def getOS(): - if sys.platform == 'darwin': - return 'mac' - elif sys.platform.find('freebsd') != -1: - return 'freebsd' - elif sys.platform.find('linux') != -1: - return 'linux' - elif sys.platform.find('win32') != -1: - return 'windows' - else: - return sys.platform - def getTopIndex(): macV = None if sys.platform == 'darwin': @@ -128,13 +117,13 @@ def __init__(self, duration): def self_destruct(signum, frame): try: import traceback - logging.error("Self-destructing...") - logging.error(traceback.format_exc()) + logger.error("Self-destructing...") + logger.error(traceback.format_exc()) finally: os.kill(os.getpid(), signal.SIGKILL) def reset(self): - logging.debug("Resetting watchdog for %d" % self._duration) + logger.debug("Resetting watchdog for %d" % self._duration) signal.alarm(self._duration) @@ -152,29 +141,29 @@ def get_path(self): # Can we write to the directory try: if os.access(self.pid_dir, os.W_OK): - logging.debug("Pid file is: %s" % self.pid_path) + logger.info("Pid file is: %s" % self.pid_path) return self.pid_path except: - logging.exception("Cannot locate pid file, defaulting to /tmp/%s" % PID_FILE) + logger.exception("Cannot locate pid file, defaulting to /tmp/%s" % PID_FILE) # if all else fails if os.access("/tmp", os.W_OK): tmp_path = os.path.join('/tmp', self.pid_file) - logging.debug("Using temporary pid file: %s" % tmp_path) + logger.debug("Using temporary pid file: %s" % tmp_path) return tmp_path else: # Can't save pid file, bail out - logging.error("Cannot save pid file anywhere") + logger.error("Cannot save pid file anywhere") raise Exception("Cannot save pid file anywhere") def clean(self): try: path = self.get_path() - logging.debug("Cleaning up pid file %s" % path) + logger.debug("Cleaning up pid file %s" % path) os.remove(path) return True except: - logging.exception("Could not clean up pid file") + logger.exception("Could not clean up pid file") return False def get_pid(self): diff --git a/win32/agent.py b/win32/agent.py index d35041f631..a38aeafb3e 100644 --- a/win32/agent.py +++ b/win32/agent.py @@ -27,7 +27,7 @@ class AgentSvc(win32serviceutil.ServiceFramework): def __init__(self, args): win32serviceutil.ServiceFramework.__init__(self, args) self.hWaitStop = win32event.CreateEvent(None, 0, 0, None) - config = get_config(parse_args=False) + config = get_config(parse_args=False, init_logging=True) self.forwarder = DDForwarder(config) self.dogstatsd = DogstatsdThread(config) From f9fb135b7b3ed0d58a1f5352db9c640f646b1849 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Tue, 4 Dec 2012 18:39:44 -0500 Subject: [PATCH 02/25] consistent logger naming --- agent.py | 6 +++--- aggregator.py | 4 ++-- checks/__init__.py | 5 +++-- checks/check_status.py | 2 +- checks/collector.py | 8 ++++---- compat/tornadotcpserver.py | 4 +++- config.py | 15 +++++++++++++-- daemon.py | 4 +++- ddagent.py | 6 +++--- dogstatsd.py | 6 +++--- graphite.py | 4 +++- pup/pup.py | 6 +++--- transaction.py | 3 ++- util.py | 5 ++--- 14 files changed, 48 insertions(+), 30 deletions(-) diff --git a/agent.py b/agent.py index 39d266eca1..9c884064dc 100755 --- a/agent.py +++ b/agent.py @@ -11,7 +11,7 @@ ''' # set up logging before importing any other components -from config import initialize_logging; initialize_logging() +from config import initialize_logging; initialize_logging(logger_name='agent') # Core modules import logging @@ -34,7 +34,7 @@ from checks.collector import Collector from checks.check_status import CollectorStatus from checks.ec2 import EC2 -from config import get_config, get_system_stats, get_parsed_args, load_check_directory +from config import get_config, get_system_stats, get_parsed_args, load_check_directory, get_logger_name from daemon import Daemon from emitter import http_emitter from util import Watchdog, PidFile @@ -45,7 +45,7 @@ WATCHDOG_MULTIPLIER = 10 # Globals -logger = logging.getLogger('ddagent.agent') +logger = logging.getLogger(get_logger_name()) class Agent(Daemon): """ diff --git a/aggregator.py b/aggregator.py index 9e7e644183..7f8c27a6cf 100644 --- a/aggregator.py +++ b/aggregator.py @@ -1,9 +1,9 @@ import logging from time import time +from config import get_logger_name -logger = logging.getLogger('ddagent.aggregator') - +logger = logging.getLogger(get_logger_name()) class Infinity(Exception): pass class UnknownValue(Exception): pass diff --git a/checks/__init__.py b/checks/__init__.py index 3ea26833d6..407600eea2 100644 --- a/checks/__init__.py +++ b/checks/__init__.py @@ -20,8 +20,9 @@ from util import LaconicFilter from checks import check_status +from config import get_logger_name -logger = logging.getLogger('ddagent.checks') +logger = logging.getLogger('%s.checks' % get_logger_name()) # Konstants class CheckException(Exception): pass @@ -270,7 +271,7 @@ def __init__(self, name, init_config, agentConfig, instances=None): self.init_config = init_config self.agentConfig = agentConfig self.hostname = gethostname(agentConfig) - self.log = logging.getLogger('ddagent.checks.%s' % name) + self.log = logging.getLogger('%s.checks.%s' % (get_logger_name(), name)) self.aggregator = MetricsAggregator(self.hostname, formatter=agent_formatter) self.events = [] self.instances = instances or [] diff --git a/checks/check_status.py b/checks/check_status.py index d652f7f6c5..4f9e8fda0d 100644 --- a/checks/check_status.py +++ b/checks/check_status.py @@ -20,7 +20,7 @@ STATUS_ERROR = 'ERROR' -log = logging.getLogger('ddagent.checks.check_status') +log = logging.getLogger('%s.checks.check_status' % config.get_logger_name()) class Stylizer(object): diff --git a/checks/collector.py b/checks/collector.py index d5d772b3b7..7d4febfe41 100644 --- a/checks/collector.py +++ b/checks/collector.py @@ -11,7 +11,7 @@ import modules from util import getOS, get_uuid, md5, Timer -from config import get_version +from config import get_version, get_logger_name from checks import gethostname import checks.system.unix as u @@ -34,8 +34,8 @@ from resources.processes import Processes as ResProcesses -logger = logging.getLogger('ddagent.collector') -checks_logger = logging.getLogger('ddagent.checks') +logger = logging.getLogger('%s.collector' % get_logger_name()) +checks_logger = logging.getLogger('%s.checks' % get_logger_name()) class Collector(object): @@ -285,7 +285,7 @@ def run(self, checksd=None): for check in checksd: if not self.continue_running: return - logger.debug("Running check %s" % check.name) + logger.info("Running check %s" % check.name) instance_statuses = [] metric_count = 0 event_count = 0 diff --git a/compat/tornadotcpserver.py b/compat/tornadotcpserver.py index 2007f892bf..2344082a08 100644 --- a/compat/tornadotcpserver.py +++ b/compat/tornadotcpserver.py @@ -12,7 +12,9 @@ import fcntl -logger = logging.getLogger('ddagent.tornado') +from config import get_logger_name + +logger = logging.getLogger(get_logger_name()) def set_close_exec(fd): flags = fcntl.fcntl(fd, fcntl.F_GETFD) diff --git a/config.py b/config.py index 8d3406a152..f735ea08eb 100644 --- a/config.py +++ b/config.py @@ -17,7 +17,13 @@ DEFAULT_STATSD_FREQUENCY = 10 # seconds PUP_STATSD_FREQUENCY = 2 # seconds -logger = logging.getLogger('ddagent.config') +logger_name_fragment = None +def get_logger_name(): + if logger_name_fragment is None: + return 'dd' + return 'dd.%s' % logger_name_fragment + +logger = logging.getLogger(get_logger_name()) class PathNotFound(Exception): pass @@ -47,7 +53,7 @@ def skip_leading_wsp(f): "Works on a file, returns a file-like object" return StringIO("\n".join(map(string.strip, f.readlines()))) -def initialize_logging(config_path=None, os_name=None): +def initialize_logging(config_path=None, os_name=None, logger_name=None): if os_name is None: os_name = getOS() if config_path is None: @@ -57,6 +63,11 @@ def initialize_logging(config_path=None, os_name=None): except Exception, e: raise sys.stderr.write("Couldn't initialize logging: %s" % str(e)) + if logger_name is not None: + global logger + global logger_name_fragment + logger_name_fragment = logger_name + logger = logging.getLogger(get_logger_name()) def _windows_commondata_path(): ''' Return the common appdata path, using ctypes diff --git a/daemon.py b/daemon.py index 6b3ed508b5..1ba6dc96d2 100644 --- a/daemon.py +++ b/daemon.py @@ -26,7 +26,9 @@ import time import logging -logger = logging.getLogger('ddagent.daemon') +from config import get_logger_name + +logger = logging.getLogger(get_logger_name()) class Daemon: """ diff --git a/ddagent.py b/ddagent.py index 1d32933dfc..487f883b47 100755 --- a/ddagent.py +++ b/ddagent.py @@ -11,7 +11,7 @@ ''' # set up logging before importing any other components -from config import initialize_logging; initialize_logging() +from config import initialize_logging; initialize_logging(logger_name='forwarder') # Standard imports import logging @@ -31,12 +31,12 @@ # agent import from util import Watchdog, getOS, get_uuid from emitter import http_emitter, format_body -from config import get_config +from config import get_config, get_logger_name from checks import gethostname from checks.check_status import ForwarderStatus from transaction import Transaction, TransactionManager -logger = logging.getLogger('ddagent.forwarder') +logger = logging.getLogger(get_logger_name()) TRANSACTION_FLUSH_INTERVAL = 5000 # Every 5 seconds WATCHDOG_INTERVAL_MULTIPLIER = 10 # 10x flush interval diff --git a/dogstatsd.py b/dogstatsd.py index ec6aab6cdc..2bee5cf62f 100755 --- a/dogstatsd.py +++ b/dogstatsd.py @@ -4,7 +4,7 @@ """ # set up logging before importing any other components -from config import initialize_logging; initialize_logging() +from config import initialize_logging; initialize_logging(logger_name='dogstatsd') # stdlib import httplib as http_client @@ -24,11 +24,11 @@ from aggregator import MetricsAggregator from checks import gethostname from checks.check_status import DogstatsdStatus -from config import get_config +from config import get_config, get_logger_name from daemon import Daemon from util import json, PidFile -logger = logging.getLogger('ddagent.dogstatsd') +logger = logging.getLogger(get_logger_name()) WATCHDOG_TIMEOUT = 120 diff --git a/graphite.py b/graphite.py index 54da810343..b381398800 100644 --- a/graphite.py +++ b/graphite.py @@ -5,7 +5,9 @@ from tornado.ioloop import IOLoop from tornado.iostream import IOStream -logger = logging.getLogger('ddagent.graphite') +from config import get_logger_name + +logger = logging.getLogger(get_logger_name()) try: from tornado.netutil import TCPServer diff --git a/pup/pup.py b/pup/pup.py index 768cd0f54f..5e219d328f 100644 --- a/pup/pup.py +++ b/pup/pup.py @@ -11,7 +11,7 @@ """ # set up logging before importing any other components -from config import initialize_logging; initialize_logging() +from config import initialize_logging; initialize_logging(logger_name='pup') # stdlib from collections import defaultdict @@ -30,10 +30,10 @@ from tornado import websocket # project -from config import get_config +from config import get_config, get_logger_name from util import json -logger = logging.getLogger('ddagent.pup') +logger = logging.getLogger(get_logger_name()) AGENT_TRANSLATION = { 'cpuUser' : 'CPU user (%)', diff --git a/transaction.py b/transaction.py index 843f03bafd..70fbad3034 100644 --- a/transaction.py +++ b/transaction.py @@ -10,8 +10,9 @@ # project from checks.check_status import ForwarderStatus +from config import get_logger_name -logger = logging.getLogger('ddagent.transaction') +logger = logging.getLogger(get_logger_name()) def plural(count): if count > 1: diff --git a/util.py b/util.py index 6c41c8edee..b30d721c02 100644 --- a/util.py +++ b/util.py @@ -5,7 +5,7 @@ import math import time import uuid -from config import getOS +from config import getOS, get_logger_name try: from hashlib import md5 @@ -42,8 +42,7 @@ def loads(data): from compat.namedtuple import namedtuple import logging -logger = logging.getLogger('ddagent.agent') - +logger = logging.getLogger(get_logger_name()) NumericTypes = (float, int, long) From efd30dc9ab5b059e915b60fc9a77c0b401f8d654 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Mon, 10 Dec 2012 17:05:18 -0500 Subject: [PATCH 03/25] programmatically configure loggers to clear up logging config --- agent.py | 2 +- checks/collector.py | 2 +- config.py | 103 ++++++++++++++++++++++++++++++++++--------- datadog.conf.example | 64 +++++---------------------- ddagent.py | 2 +- dogstatsd.py | 2 +- pup/pup.py | 2 +- win32/agent.py | 8 ++-- 8 files changed, 102 insertions(+), 83 deletions(-) diff --git a/agent.py b/agent.py index 9c884064dc..c558cabf85 100755 --- a/agent.py +++ b/agent.py @@ -11,7 +11,7 @@ ''' # set up logging before importing any other components -from config import initialize_logging; initialize_logging(logger_name='agent') +from config import initialize_logging; initialize_logging('collector') # Core modules import logging diff --git a/checks/collector.py b/checks/collector.py index 7d4febfe41..8c3e9f3048 100644 --- a/checks/collector.py +++ b/checks/collector.py @@ -34,7 +34,7 @@ from resources.processes import Processes as ResProcesses -logger = logging.getLogger('%s.collector' % get_logger_name()) +logger = logging.getLogger('%s' % get_logger_name()) checks_logger = logging.getLogger('%s.checks' % get_logger_name()) diff --git a/config.py b/config.py index f735ea08eb..b713b2878f 100644 --- a/config.py +++ b/config.py @@ -53,22 +53,6 @@ def skip_leading_wsp(f): "Works on a file, returns a file-like object" return StringIO("\n".join(map(string.strip, f.readlines()))) -def initialize_logging(config_path=None, os_name=None, logger_name=None): - if os_name is None: - os_name = getOS() - if config_path is None: - config_path = get_config_path(None, os_name=getOS()), - try: - logging.config.fileConfig(config_path) - except Exception, e: - raise - sys.stderr.write("Couldn't initialize logging: %s" % str(e)) - if logger_name is not None: - global logger - global logger_name_fragment - logger_name_fragment = logger_name - logger = logging.getLogger(get_logger_name()) - def _windows_commondata_path(): ''' Return the common appdata path, using ctypes From: http://stackoverflow.com/questions/626796/how-do-i-find-the-windows-common-application-data-folder-using-python @@ -158,7 +142,7 @@ def get_config_path(cfg_path=None, os_name=None): sys.stderr.write("Please supply a configuration file at %s or in the directory where the agent is currently deployed.\n" % exc.message) sys.exit(3) -def get_config(parse_args = True, cfg_path=None, init_logging=False, options=None): +def get_config(parse_args=True, cfg_path=None, options=None): if parse_args: options, args = get_parsed_args() elif not options: @@ -192,10 +176,6 @@ def get_config(parse_args = True, cfg_path=None, init_logging=False, options=Non config = ConfigParser.ConfigParser() config.readfp(skip_leading_wsp(open(config_path))) - if init_logging: - initialize_logging(config_path, os_name=getOS()) - - # bulk import for option in config.options('Main'): agentConfig[option] = config.get('Main', option) @@ -574,3 +554,84 @@ def getOS(): return 'windows' else: return sys.platform + +# +# logging + +def get_logging_config(cfg_path=None): + logging_config = { + 'collector_log_file': '/var/log/datadog/collector.log', + 'forwarder_log_file': '/var/log/datadog/forwarder.log', + 'dogstatsd_log_file': '/var/log/datadog/dogstatsd.log', + 'pup_log_file': '/var/log/datadog/pup.log', + 'format': '%(asctime)s | %(levelname)s | %(name)s | %(filename)s:%(lineno)s | %(message)s', + 'level': None + } + + config_path = get_config_path(cfg_path, os_name=getOS()) + config = ConfigParser.ConfigParser() + config.readfp(skip_leading_wsp(open(config_path))) + + for option in logging_config: + if config.has_option('Main', option): + logging_config[option] = config.get('Main', option) + + levels = { + 'CRITICAL': logging.CRITICAL, + 'DEBUG': logging.DEBUG, + 'ERROR': logging.ERROR, + 'FATAL': logging.FATAL, + 'INFO': logging.INFO, + 'WARN': logging.WARN, + 'WARNING': logging.WARNING, + } + if config.has_option('Main', 'level'): + logging_config['level'] = levels.get(config.get('Main', 'level')) + + return logging_config + +def initialize_logging(logger_name): + try: + if getOS() == 'windows': + logging.config.fileConfig(get_config_path()) + + else: + logging_config = get_logging_config() + + logging.basicConfig( + format=logging_config['format'], + level=logging_config['level'] or logging.ERROR, + ) + + log_file = logging_config.get('%s_log_file' % logger_name) + if log_file is not None: + # make sure the log file exists and is writeable + try: + fp = open(log_file, 'a') + fp.write('') + fp.close() + except Exception, e: + sys.stderr.write("Log file is unwritable: '%s'\n" % log_file) + else: + file_handler = logging.FileHandler(log_file) + file_handler.setFormatter(logging.Formatter(logging_config['format'])) + log = logging.getLogger() + log.addHandler(file_handler) + + dd_log = logging.getLogger('dd') + dd_log.setLevel(logging_config['level'] or logging.INFO) + + except Exception, e: + sys.stderr.write("Couldn't initialize logging: %s\n" % str(e)) + + # if config fails entirely, enable basic stdout logging as a fallback + logging.basicConfig( + format="%(asctime)s | %(name)s | %(levelname)s | %(filename)s:%(lineno)s | %(message)s", + level=logging.INFO, + ) + + if logger_name is not None: + global logger + global logger_name_fragment + logger_name_fragment = logger_name + logger = logging.getLogger(get_logger_name()) diff --git a/datadog.conf.example b/datadog.conf.example index 033c9c9338..2f42e2f30c 100644 --- a/datadog.conf.example +++ b/datadog.conf.example @@ -14,6 +14,10 @@ api_key: # Set the host's tags #tags: mytag0, mytag1 +# Use the amazon EC2 instance-id instead of hostname (unless hostname is +# explicitly set) +use_ec2_instance_id: yes + # Use mount points instead of volumes to track disk and fs metrics use_mount: no @@ -405,60 +409,12 @@ use_mount: no # If the name of the check is not specified, 'Check' is assumed. # ========================================================================== # -# WMI Metrics +# Logging # ========================================================================== # -[WMI] -# Each value should have the following structure: -# -# my.custom.metric_name: Win32_Metric_Name:value_to_use -# -# Example: -# -# system.users.count: Win32_OperatingSystem:NumberOfUsers - -############################################################################## -#################### NOTHING TO MODIFY AFTER THIS LINE ####################### -############################################################################## - -# -------------------------------------------------------------------------- # -# Logging Configuration -# -# PLEASE READ ME! -# -# DO NOT PUT ANY INTEGRATION CONFIGURATION AFTER THIS SECTION -# IT WILL BE IGNORED. -# I REPEAT, IT WILL BE IGNORED. -# -# -------------------------------------------------------------------------- # - -[loggers] -keys:root,agent - -[handlers] -keys:stream - -[formatters] -keys:ddagent - -[logger_root] -level:ERROR -handlers:stream - -[logger_agent] -level:INFO -handlers:stream -qualname:ddagent -propagate:0 - -[handler_stream] -class:StreamHandler -formatter:ddagent -args:() -[formatter_ddagent] -format: %(asctime)s | %(name)s | %(levelname)s | %(message)s -class:logging.Formatter +# collector_log_file: /var/log/datadog/collector.log +# forwarder_log_file: /var/log/datadog/forwarder.log +# dogstatsd_log_file: /var/log/datadog/dogstatsd.log +# pup_log_file: /var/log/datadog/pup.log -# IF YOU ADD ANYTHING HERE, IT WILL BE IGNORED -# SO IF YOU WANT TO CONFIGURE AN INTEGRATION, -# DO IT IN THE SECTION ABOVE +# level: INFO diff --git a/ddagent.py b/ddagent.py index 487f883b47..b7ee211815 100755 --- a/ddagent.py +++ b/ddagent.py @@ -11,7 +11,7 @@ ''' # set up logging before importing any other components -from config import initialize_logging; initialize_logging(logger_name='forwarder') +from config import initialize_logging; initialize_logging('forwarder') # Standard imports import logging diff --git a/dogstatsd.py b/dogstatsd.py index 2bee5cf62f..ca9a0cd13c 100755 --- a/dogstatsd.py +++ b/dogstatsd.py @@ -4,7 +4,7 @@ """ # set up logging before importing any other components -from config import initialize_logging; initialize_logging(logger_name='dogstatsd') +from config import initialize_logging; initialize_logging('dogstatsd') # stdlib import httplib as http_client diff --git a/pup/pup.py b/pup/pup.py index 5e219d328f..bc8c8bc507 100644 --- a/pup/pup.py +++ b/pup/pup.py @@ -11,7 +11,7 @@ """ # set up logging before importing any other components -from config import initialize_logging; initialize_logging(logger_name='pup') +from config import initialize_logging; initialize_logging('pup') # stdlib from collections import defaultdict diff --git a/win32/agent.py b/win32/agent.py index a38aeafb3e..16c2b4cd57 100644 --- a/win32/agent.py +++ b/win32/agent.py @@ -1,3 +1,6 @@ +# set up logging before importing any other components +from config import initialize_logging; initialize_logging('collector') + import win32serviceutil import win32service import win32event @@ -27,7 +30,7 @@ class AgentSvc(win32serviceutil.ServiceFramework): def __init__(self, args): win32serviceutil.ServiceFramework.__init__(self, args) self.hWaitStop = win32event.CreateEvent(None, 0, 0, None) - config = get_config(parse_args=False, init_logging=True) + config = get_config(parse_args=False) self.forwarder = DDForwarder(config) self.dogstatsd = DogstatsdThread(config) @@ -38,8 +41,7 @@ def __init__(self, args): 'use_forwarder': True, 'disabled_dd': False }), [] - agentConfig = get_config(init_logging=True, parse_args=False, - options=opts) + agentConfig = get_config(parse_args=False, options=opts) self.agent = DDAgent(agentConfig) def SvcStop(self): From 14ea3a3200d09cb99352713bc3fa1793c42ad305 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Mon, 10 Dec 2012 21:23:36 -0500 Subject: [PATCH 04/25] deb packaging for new logging --- packaging/Makefile | 3 +++ packaging/datadog-agent-base-deb/datadog-agent.init | 8 ++++---- packaging/datadog-agent-base-deb/postinst | 7 ++++++- packaging/datadog-agent-deb/postinst | 8 +++++++- packaging/datadog-agent-deb/supervisor.conf | 12 ++++++------ 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/packaging/Makefile b/packaging/Makefile index e2c0630032..6d29ef2b52 100644 --- a/packaging/Makefile +++ b/packaging/Makefile @@ -45,6 +45,7 @@ source: cp -r $(SRC)/*.py $(ROOT)/ cp -r $(SRC)/LICENSE* $(ROOT)/ cp -r $(SRC)/datadog.conf.example $(ROOT)/ + find $(ROOT) -name "*.pyc" -exec rm {} \; # Layout all of the files common to both versions of the agent in @@ -54,6 +55,7 @@ install_base: source mkdir -p $(BUILD)/usr/share/datadog/agent mkdir -p $(BUILD)/etc/dd-agent mkdir -p $(BUILD)/usr/bin + mkdir -p $(BUILD)/var/log/datadog # Install the common source & config. cp -r $(ROOT)/* $(BUILD)/usr/share/datadog/agent/ cp $(ROOT)/datadog.conf.example $(BUILD)/etc/dd-agent @@ -62,6 +64,7 @@ install_base: source ln -sf ../share/datadog/agent/agent.py $(BUILD)/usr/bin/dd-agent chmod 755 $(BUILD)/usr/bin/dogstatsd chmod 755 $(BUILD)/usr/bin/dd-agent + chmod 755 $(BUILD)/var/log/datadog install_full: source # Install the forwarder. diff --git a/packaging/datadog-agent-base-deb/datadog-agent.init b/packaging/datadog-agent-base-deb/datadog-agent.init index 3d95208d90..d6804801e2 100755 --- a/packaging/datadog-agent-base-deb/datadog-agent.init +++ b/packaging/datadog-agent-base-deb/datadog-agent.init @@ -43,8 +43,8 @@ case "$1" in mkdir -p $PIDPATH fi chown $AGENTUSER $PIDPATH - su $AGENTUSER -c "env LANG=POSIX $AGENTPATH start init --clean" - su $AGENTUSER -c "env LANG=POSIX $DOGSTATSDPATH start" + su $AGENTUSER -c "env LANG=POSIX $AGENTPATH start init --clean > /dev/null 2>&1" + su $AGENTUSER -c "env LANG=POSIX $DOGSTATSDPATH start > /dev/null 2>&1" fi echo "$NAME." ;; @@ -66,8 +66,8 @@ case "$1" in echo -n "(warning: datadog-agent supervisor config is missing) " fi else - su $AGENTUSER -c "$AGENTPATH stop init" - su $AGENTUSER -c "$DOGSTATSDPATH stop" + su $AGENTUSER -c "$AGENTPATH stop init > /dev/null 2>&1" + su $AGENTUSER -c "$DOGSTATSDPATH stop > /dev/null 2>&1" fi echo "$NAME." diff --git a/packaging/datadog-agent-base-deb/postinst b/packaging/datadog-agent-base-deb/postinst index fe5be37e6b..2996d8345c 100644 --- a/packaging/datadog-agent-base-deb/postinst +++ b/packaging/datadog-agent-base-deb/postinst @@ -22,7 +22,12 @@ case "$1" in configure) update-rc.d datadog-agent defaults adduser --system dd-agent --shell /bin/sh --no-create-home --quiet - chown -R dd-agent /etc/dd-agent + chown root:root /etc/init.d/datadog-agent + chown -R dd-agent:root /etc/dd-agent + chown -R dd-agent:root /var/log/datadog + chown -R root:root /usr/share/datadog + chown -h root:root /usr/bin/dogstatsd + chown -h root:root /usr/bin/dd-agent if which invoke-rc.d >/dev/null 2>&1; then invoke-rc.d datadog-agent restart diff --git a/packaging/datadog-agent-deb/postinst b/packaging/datadog-agent-deb/postinst index 995f588cab..aa270cfd0f 100644 --- a/packaging/datadog-agent-deb/postinst +++ b/packaging/datadog-agent-deb/postinst @@ -6,7 +6,13 @@ case "$1" in configure) update-rc.d datadog-agent defaults adduser --system dd-agent --shell /bin/sh --no-create-home --quiet - chown -R dd-agent /etc/dd-agent + chown root:root /etc/init.d/datadog-agent + chown -R dd-agent:root /etc/dd-agent + chown -R dd-agent:root /var/log/datadog + chown -R root:root /usr/share/datadog + chown -h root:root /usr/bin/dogstatsd + chown -h root:root /usr/bin/dd-agent + chown -h root:root /usr/bin/dd-forwarder # Update the supervisor process group to ensure it has the proper # configuration when we restart the datadog-agent. diff --git a/packaging/datadog-agent-deb/supervisor.conf b/packaging/datadog-agent-deb/supervisor.conf index b1a913586d..51570f76af 100644 --- a/packaging/datadog-agent-deb/supervisor.conf +++ b/packaging/datadog-agent-deb/supervisor.conf @@ -9,24 +9,24 @@ environment=LANG=POSIX [program:forwarder] command=/usr/bin/dd-forwarder -redirect_stderr=true -stdout_logfile=/var/log/ddforwarder.log +stdout_logfile=NONE +stderr_logfile=NONE startsecs=3 priority=998 user=dd-agent [program:dogstatsd] command=/usr/bin/dogstatsd --use-local-forwarder -redirect_stderr=true -stdout_logfile=/var/log/dogstatsd.log +stdout_logfile=NONE +stderr_logfile=NONE startsecs=3 priority=998 user=dd-agent [program:pup] command=python /usr/share/datadog/agent/pup/pup.py -redirect_stderr=true -stdout_logfile=/var/log/datadog-pup.log +stdout_logfile=NONE +stderr_logfile=NONE startsecs=1 priority=998 user=dd-agent From 81e3227c080fe0be64703e942cec600cffcbb755 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Tue, 11 Dec 2012 14:54:24 -0500 Subject: [PATCH 05/25] clean up some py24 incompatiblities --- agent.py | 2 +- config.py | 24 ++++++++++++++++-------- util.py | 4 ++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/agent.py b/agent.py index c558cabf85..57998dfc82 100755 --- a/agent.py +++ b/agent.py @@ -202,7 +202,7 @@ def main(): if __name__ == '__main__': try: sys.exit(main()) - except Exception: + except StandardError: # Try our best to log the error. try: logger.exception("Uncaught error running the agent") diff --git a/config.py b/config.py index b713b2878f..902f6ff414 100644 --- a/config.py +++ b/config.py @@ -120,17 +120,19 @@ def get_config_path(cfg_path=None, os_name=None): return cfg_path # Check for an OS-specific path, continue on not-found exceptions - exc = None + bad_path = '' if os_name == 'windows': try: return _windows_config_path() except PathNotFound, e: - exc = e + if len(e.args) > 0: + bad_path = e.args[0] else: try: return _unix_config_path() except PathNotFound, e: - exc = e + if len(e.args) > 0: + bad_path = e.args[0] # Check if there's a config stored in the current agent directory path = os.path.realpath(__file__) @@ -139,7 +141,7 @@ def get_config_path(cfg_path=None, os_name=None): return os.path.join(path, DATADOG_CONF) # If all searches fail, exit the agent with an error - sys.stderr.write("Please supply a configuration file at %s or in the directory where the agent is currently deployed.\n" % exc.message) + sys.stderr.write("Please supply a configuration file at %s or in the directory where the agent is currently deployed.\n" % bad_path) sys.exit(3) def get_config(parse_args=True, cfg_path=None, options=None): @@ -396,16 +398,19 @@ def set_win32_cert_path(): def get_confd_path(osname): + bad_path = '' if osname == 'windows': try: return _windows_confd_path() except PathNotFound, e: - exc = e + if len(e.args) > 0: + bad_path = e.args[0] else: try: return _unix_confd_path() except PathNotFound, e: - exc = e + if len(e.args) > 0: + bad_path = e.args[0] cur_path = os.path.dirname(os.path.realpath(__file__)) cur_path = os.path.join(cur_path, 'conf.d') @@ -413,7 +418,7 @@ def get_confd_path(osname): if os.path.exists(cur_path): return cur_path - logger.error("No conf.d folder found at '%s' or in the directory where the agent is currently deployed.\n" % exc.message) + logger.error("No conf.d folder found at '%s' or in the directory where the agent is currently deployed.\n" % bad_path) sys.exit(3) def get_checksd_path(osname): @@ -428,7 +433,10 @@ def get_checksd_path(osname): try: return _windows_checksd_path() except PathNotFound, e: - logger.error("No checks.d folder found in '%s'.\n" % e.message) + if len(e.args) > 0: + logger.error("No checks.d folder found in '%s'.\n" % e.args[0]) + else: + logger.error("No checks.d folder found.\n") logger.error("No checks.d folder at '%s'.\n" % checksd_path) sys.exit(3) diff --git a/util.py b/util.py index b30d721c02..912cad5b25 100644 --- a/util.py +++ b/util.py @@ -143,7 +143,7 @@ def get_path(self): logger.info("Pid file is: %s" % self.pid_path) return self.pid_path except: - logger.exception("Cannot locate pid file, defaulting to /tmp/%s" % PID_FILE) + logger.warn("Cannot locate pid file, defaulting to /tmp/%s" % PID_FILE) # if all else fails if os.access("/tmp", os.W_OK): @@ -162,7 +162,7 @@ def clean(self): os.remove(path) return True except: - logger.exception("Could not clean up pid file") + logger.warn("Could not clean up pid file") return False def get_pid(self): From d245f44ace063c21c76c1780a62b27ae3fc28e15 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Tue, 11 Dec 2012 15:28:00 -0500 Subject: [PATCH 06/25] rpm packaging for new logging --- packaging/Makefile | 4 ++-- .../datadog-agent-base-rpm/datadog-agent-redhat | 8 ++++---- packaging/datadog-agent-base-rpm/post_install | 1 + packaging/datadog-agent-rpm/postinst | 1 + packaging/datadog-agent-rpm/supervisor.conf | 16 ++++++++-------- 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/packaging/Makefile b/packaging/Makefile index 6d29ef2b52..e33cc89185 100644 --- a/packaging/Makefile +++ b/packaging/Makefile @@ -200,8 +200,8 @@ deb_repo: sudo dpkg-scanpackages . /dev/null | gzip -9c > Packages.gz tmp: - cp *.deb /tmp/shared || true - cp *.rpm /tmp/shared || true + cp ../artifacts/*.deb /tmp/shared || true + cp ../dist/*.rpm /tmp/shared || true all: clean deb rpm diff --git a/packaging/datadog-agent-base-rpm/datadog-agent-redhat b/packaging/datadog-agent-base-rpm/datadog-agent-redhat index 017cc3f374..6a35d46d7d 100755 --- a/packaging/datadog-agent-base-rpm/datadog-agent-redhat +++ b/packaging/datadog-agent-base-rpm/datadog-agent-redhat @@ -120,9 +120,9 @@ start() { fi echo -n 'Starting Datadog agent: ' install -d -o $AGENTUSER $PIDPATH - daemon --user $AGENTUSER "env LANG=POSIX $PYTHON $AGENTPATH start init --clean >/dev/null" + daemon --user $AGENTUSER "env LANG=POSIX $PYTHON $AGENTPATH start init --clean > /dev/null 2>&1" RETURNVALUE=$? - daemon --user $AGENTUSER "env LANG=POSIX $PYTHON $DOGSTATSDPATH start > /dev/null" + daemon --user $AGENTUSER "env LANG=POSIX $PYTHON $DOGSTATSDPATH start > /dev/null 2>&1" RETURNVALUE=$(($RETURNVALUE || $?)) echo [ $RETURNVALUE -eq 0 ] && touch $LOCKFILE @@ -147,9 +147,9 @@ stop() { return fi echo -n 'Stopping Datadog agent: ' - daemon --user $AGENTUSER "env LANG=POSIX $PYTHON $AGENTPATH stop >/dev/null" + daemon --user $AGENTUSER "env LANG=POSIX $PYTHON $AGENTPATH stop > /dev/null 2>&1" RETURNVALUE=$? - daemon --user $AGENTUSER "env LANG=POSIX $PYTHON $DOGSTATSDPATH stop > /dev/null" + daemon --user $AGENTUSER "env LANG=POSIX $PYTHON $DOGSTATSDPATH stop > /dev/null 2>&1" RETURNVALUE=$(($RETURNVALUE || $?)) [ "$RETURNVALUE" -eq "0" ] && rm -f $LOCKFILE && rm -f $PIDFILE fi diff --git a/packaging/datadog-agent-base-rpm/post_install b/packaging/datadog-agent-base-rpm/post_install index 6e0fbc2151..31694c3a6b 100644 --- a/packaging/datadog-agent-base-rpm/post_install +++ b/packaging/datadog-agent-base-rpm/post_install @@ -1,4 +1,5 @@ chown -R dd-agent /etc/dd-agent/ +chown -R dd-agent /var/log/datadog/ chkconfig --add datadog-agent /etc/init.d/datadog-agent restart diff --git a/packaging/datadog-agent-rpm/postinst b/packaging/datadog-agent-rpm/postinst index 21f6e0cb1f..ebdf02afa5 100644 --- a/packaging/datadog-agent-rpm/postinst +++ b/packaging/datadog-agent-rpm/postinst @@ -1,5 +1,6 @@ # start chown -R dd-agent /etc/dd-agent/ +chown -R dd-agent /var/log/datadog/ chkconfig --add datadog-agent /etc/init.d/datadog-agent restart diff --git a/packaging/datadog-agent-rpm/supervisor.conf b/packaging/datadog-agent-rpm/supervisor.conf index 0c931182ea..fcae631922 100644 --- a/packaging/datadog-agent-rpm/supervisor.conf +++ b/packaging/datadog-agent-rpm/supervisor.conf @@ -21,8 +21,8 @@ environment=PYTHONPATH=/usr/share/datadog/agent:/usr/share/datadog/agent/checks, [program:collector] command=/usr/bin/python2.6 /usr/share/datadog/agent/agent.py foreground --use-local-forwarder -redirect_stderr=true ; redirect proc stderr to stdout (default false) -stdout_logfile=/var/log/supervisor/datadog-collector.log +stdout_logfile=NONE +stderr_logfile=NONE priority=999 startsecs=5 startretries=1 @@ -30,8 +30,8 @@ user=dd-agent [program:forwarder] command=/usr/bin/python2.6 /usr/share/datadog/agent/ddagent.py --pycurl=0 -redirect_stderr=true ; redirect proc stderr to stdout (default false) -stdout_logfile=/var/log/ddforwarder.log +stdout_logfile=NONE +stderr_logfile=NONE startsecs=5 startretries=1 priority=998 @@ -39,8 +39,8 @@ user=dd-agent [program:dogstatsd] command=/usr/bin/python2.6 /usr/share/datadog/agent/dogstatsd.py --use-local-forwarder -redirect_stderr=true -stdout_logfile=/var/log/dogstatsd.log +stdout_logfile=NONE +stderr_logfile=NONE startsecs=5 startretries=1 priority=998 @@ -48,8 +48,8 @@ user=dd-agent [program:pup] command=/usr/bin/python2.6 /usr/share/datadog/agent/pup/pup.py -redirect_stderr=true -stdout_logfile=/var/log/datadog-pup.log +stdout_logfile=NONE +stderr_logfile=NONE startsecs=1 priority=998 user=dd-agent From 8cf9df5108bec0793e2533c35c9eba10966f2cc8 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Tue, 11 Dec 2012 16:17:39 -0500 Subject: [PATCH 07/25] give pid files safe permissions. fixes #293. --- daemon.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/daemon.py b/daemon.py index 1ba6dc96d2..faea3867e5 100644 --- a/daemon.py +++ b/daemon.py @@ -96,7 +96,10 @@ def daemonize(self): atexit.register(self.delpid) # Make sure pid file is removed if we quit pid = str(os.getpid()) try: - file(self.pidfile,'w+').write("%s\n" % pid) + fp = os.fdopen(os.open(self.pidfile, os.O_RDWR | os.O_CREAT | os.O_APPEND, 0644), 'w+') + fp.write("%s\n" % pid) + fp.close() + os.chmod(self.pidfile, 0644) except Exception, e: msg = "Unable to write pidfile: %s" % self.pidfile logger.exception(msg) From e1146143297aa6447920ec9f9fc173ed06878b5a Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Tue, 11 Dec 2012 16:31:52 -0500 Subject: [PATCH 08/25] don't log during 'info' command --- agent.py | 1 + ddagent.py | 1 + dogstatsd.py | 26 ++++++++++++++++---------- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/agent.py b/agent.py index 57998dfc82..787538440c 100755 --- a/agent.py +++ b/agent.py @@ -194,6 +194,7 @@ def main(): logger.info("dd-agent is not running.") elif 'info' == command: + logger.setLevel(logging.ERROR) return CollectorStatus.print_latest_status() return 0 diff --git a/ddagent.py b/ddagent.py index b7ee211815..58a3550e0f 100755 --- a/ddagent.py +++ b/ddagent.py @@ -341,6 +341,7 @@ def main(): sys.argv[0]) command = args[0] if command == 'info': + logger.setLevel(logging.ERROR) return ForwarderStatus.print_latest_status() elif command == 'help': print usage diff --git a/dogstatsd.py b/dogstatsd.py index ca9a0cd13c..d5eb266022 100755 --- a/dogstatsd.py +++ b/dogstatsd.py @@ -261,6 +261,22 @@ def main(config_path=None): dest="use_forwarder", default=False) opts, args = parser.parse_args() + # commands that don't need the daemon + if args and args[0] in ['info', 'status']: + command = args[0] + if command == 'info': + logger.setLevel(logging.ERROR) + return DogstatsdStatus.print_latest_status() + elif command == 'status': + pid = pid_file.get_pid() + if pid: + message = 'dogstatsd is running with pid %s' % pid + else: + message = 'dogstatsd is not running' + logger.info(message) + sys.stdout.write(message + "\n") + return 0 + reporter, server = init(config_path, use_watchdog=True, use_forwarder=opts.use_forwarder) pid_file = PidFile('dogstatsd') daemon = Dogstatsd(pid_file.get_path(), server, reporter) @@ -273,8 +289,6 @@ def main(config_path=None): # Otherwise, we're process the deamon command. else: command = args[0] - if command == 'info': - return DogstatsdStatus.print_latest_status() if command == 'start': daemon.start() @@ -282,14 +296,6 @@ def main(config_path=None): daemon.stop() elif command == 'restart': daemon.restart() - elif command == 'status': - pid = pid_file.get_pid() - if pid: - message = 'dogstatsd is running with pid %s' % pid - else: - message = 'dogstatsd is not running' - logger.info(message) - sys.stdout.write(message + "\n") else: sys.stderr.write("Unknown command: %s\n\n" % command) parser.print_help() From 9f3fde1044e4fb1d0d5c79d997968f7707c43ef2 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Tue, 11 Dec 2012 17:16:08 -0500 Subject: [PATCH 09/25] show log locations in info command --- checks/check_status.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/checks/check_status.py b/checks/check_status.py index 4f9e8fda0d..ba7e30c9b6 100644 --- a/checks/check_status.py +++ b/checks/check_status.py @@ -67,6 +67,16 @@ def stylize(cls, text, *styles): def style(*args): return Stylizer.stylize(*args) +def logger_info(): + loggers = [] + root_logger = logging.getLogger() + if len(root_logger.handlers) > 0: + for handler in root_logger.handlers: + if isinstance(handler, logging.StreamHandler): + loggers.append(handler.stream.name) + else: + loggers.append("No loggers configured") + return ', '.join(loggers) class AgentStatus(object): @@ -124,6 +134,7 @@ def _header_lines(self, indent): ("Pid", self.created_by_pid), ("Platform", platform.platform()), ("Python Version", platform.python_version()), + ("Logs", logger_info()), ] for key, value in fields: From 364e771d7267ea6354a74e498643de35c6b587fc Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Tue, 11 Dec 2012 17:16:39 -0500 Subject: [PATCH 10/25] version 3.5.0 --- config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.py b/config.py index 902f6ff414..b918b6d4ce 100644 --- a/config.py +++ b/config.py @@ -47,7 +47,7 @@ def get_parsed_args(): return options, args def get_version(): - return "3.4.1" + return "3.5.0" def skip_leading_wsp(f): "Works on a file, returns a file-like object" From 8f9d191448ccf52f047a2ab49d3f1f340d575fe1 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Wed, 12 Dec 2012 13:00:23 -0500 Subject: [PATCH 11/25] clean up logging config --- config.py | 19 ++++++++++--------- datadog.conf.example | 8 ++------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/config.py b/config.py index b918b6d4ce..18abd9a875 100644 --- a/config.py +++ b/config.py @@ -566,14 +566,15 @@ def getOS(): # # logging +LOG_FORMAT = '%(asctime)s | %(levelname)s | %(name)s | %(filename)s:%(lineno)s | %(message)s' + def get_logging_config(cfg_path=None): logging_config = { 'collector_log_file': '/var/log/datadog/collector.log', 'forwarder_log_file': '/var/log/datadog/forwarder.log', 'dogstatsd_log_file': '/var/log/datadog/dogstatsd.log', 'pup_log_file': '/var/log/datadog/pup.log', - 'format': '%(asctime)s | %(levelname)s | %(name)s | %(filename)s:%(lineno)s | %(message)s', - 'level': None + 'log_level': None } config_path = get_config_path(cfg_path, os_name=getOS()) @@ -593,8 +594,8 @@ def get_logging_config(cfg_path=None): 'WARN': logging.WARN, 'WARNING': logging.WARNING, } - if config.has_option('Main', 'level'): - logging_config['level'] = levels.get(config.get('Main', 'level')) + if config.has_option('Main', 'log_level'): + logging_config['log_level'] = levels.get(config.get('Main', 'log_level')) return logging_config @@ -607,8 +608,8 @@ def initialize_logging(logger_name): logging_config = get_logging_config() logging.basicConfig( - format=logging_config['format'], - level=logging_config['level'] or logging.ERROR, + format=LOG_FORMAT, + level=logging_config['log_level'] or logging.ERROR, ) log_file = logging_config.get('%s_log_file' % logger_name) @@ -622,19 +623,19 @@ def initialize_logging(logger_name): sys.stderr.write("Log file is unwritable: '%s'\n" % log_file) else: file_handler = logging.FileHandler(log_file) - file_handler.setFormatter(logging.Formatter(logging_config['format'])) + file_handler.setFormatter(logging.Formatter(LOG_FORMAT)) log = logging.getLogger() log.addHandler(file_handler) dd_log = logging.getLogger('dd') - dd_log.setLevel(logging_config['level'] or logging.INFO) + dd_log.setLevel(logging_config['log_level'] or logging.INFO) except Exception, e: sys.stderr.write("Couldn't initialize logging: %s\n" % str(e)) # if config fails entirely, enable basic stdout logging as a fallback logging.basicConfig( - format="%(asctime)s | %(name)s | %(levelname)s | %(filename)s:%(lineno)s | %(message)s", + format=LOG_FORMAT, level=logging.INFO, ) diff --git a/datadog.conf.example b/datadog.conf.example index 2f42e2f30c..9fb7448c96 100644 --- a/datadog.conf.example +++ b/datadog.conf.example @@ -14,10 +14,6 @@ api_key: # Set the host's tags #tags: mytag0, mytag1 -# Use the amazon EC2 instance-id instead of hostname (unless hostname is -# explicitly set) -use_ec2_instance_id: yes - # Use mount points instead of volumes to track disk and fs metrics use_mount: no @@ -412,9 +408,9 @@ use_mount: no # Logging # ========================================================================== # +# log_level: INFO + # collector_log_file: /var/log/datadog/collector.log # forwarder_log_file: /var/log/datadog/forwarder.log # dogstatsd_log_file: /var/log/datadog/dogstatsd.log # pup_log_file: /var/log/datadog/pup.log - -# level: INFO From 80a54814fd655c6a37d26d5fa660b8bbe41cdcb2 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Wed, 12 Dec 2012 14:15:36 -0500 Subject: [PATCH 12/25] rotate logs --- config.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/config.py b/config.py index 18abd9a875..782153e450 100644 --- a/config.py +++ b/config.py @@ -2,6 +2,7 @@ import os import logging import logging.config +import logging.handlers import platform import string import subprocess @@ -614,19 +615,15 @@ def initialize_logging(logger_name): log_file = logging_config.get('%s_log_file' % logger_name) if log_file is not None: - # make sure the log file exists and is writeable - try: - fp = open(log_file, 'a') - fp.write('') - fp.close() - except Exception, e: - sys.stderr.write("Log file is unwritable: '%s'\n" % log_file) - else: - file_handler = logging.FileHandler(log_file) + # make sure the log directory is writeable + # NOTE: the entire directory needs to be writable so that rotation works + if os.access(os.path.dirname(log_file), os.R_OK | os.W_OK): + file_handler = logging.handlers.RotatingFileHandler(log_file, maxBytes=5*1024*1024, backupCount=1) file_handler.setFormatter(logging.Formatter(LOG_FORMAT)) log = logging.getLogger() log.addHandler(file_handler) - + else: + sys.stderr.write("Log file is unwritable: '%s'\n" % log_file) dd_log = logging.getLogger('dd') dd_log.setLevel(logging_config['log_level'] or logging.INFO) From e1912d02dbe33d917a17613fa6f4f4f5d2f21e04 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Wed, 12 Dec 2012 15:47:22 -0500 Subject: [PATCH 13/25] dump the vestigal 'get_logger_name()' --- agent.py | 4 +- aggregator.py | 4 +- checks/__init__.py | 5 +-- checks/check_status.py | 2 +- checks/collector.py | 75 +++++++++++++++++++------------------- compat/tornadotcpserver.py | 4 +- config.py | 29 +++++---------- daemon.py | 4 +- ddagent.py | 4 +- dogstatsd.py | 4 +- graphite.py | 4 +- pup/pup.py | 4 +- transaction.py | 3 +- util.py | 4 +- 14 files changed, 65 insertions(+), 85 deletions(-) diff --git a/agent.py b/agent.py index 787538440c..7f33bdb850 100755 --- a/agent.py +++ b/agent.py @@ -34,7 +34,7 @@ from checks.collector import Collector from checks.check_status import CollectorStatus from checks.ec2 import EC2 -from config import get_config, get_system_stats, get_parsed_args, load_check_directory, get_logger_name +from config import get_config, get_system_stats, get_parsed_args, load_check_directory from daemon import Daemon from emitter import http_emitter from util import Watchdog, PidFile @@ -45,7 +45,7 @@ WATCHDOG_MULTIPLIER = 10 # Globals -logger = logging.getLogger(get_logger_name()) +logger = logging.getLogger('collector') class Agent(Daemon): """ diff --git a/aggregator.py b/aggregator.py index 7f8c27a6cf..421d4e011c 100644 --- a/aggregator.py +++ b/aggregator.py @@ -1,9 +1,7 @@ import logging from time import time -from config import get_logger_name - -logger = logging.getLogger(get_logger_name()) +logger = logging.getLogger(__name__) class Infinity(Exception): pass class UnknownValue(Exception): pass diff --git a/checks/__init__.py b/checks/__init__.py index 407600eea2..89af71e96f 100644 --- a/checks/__init__.py +++ b/checks/__init__.py @@ -20,9 +20,8 @@ from util import LaconicFilter from checks import check_status -from config import get_logger_name -logger = logging.getLogger('%s.checks' % get_logger_name()) +logger = logging.getLogger(__name__) # Konstants class CheckException(Exception): pass @@ -271,7 +270,7 @@ def __init__(self, name, init_config, agentConfig, instances=None): self.init_config = init_config self.agentConfig = agentConfig self.hostname = gethostname(agentConfig) - self.log = logging.getLogger('%s.checks.%s' % (get_logger_name(), name)) + self.log = logging.getLogger('%s.%s' % (__name__, name)) self.aggregator = MetricsAggregator(self.hostname, formatter=agent_formatter) self.events = [] self.instances = instances or [] diff --git a/checks/check_status.py b/checks/check_status.py index ba7e30c9b6..725f0c7b91 100644 --- a/checks/check_status.py +++ b/checks/check_status.py @@ -20,7 +20,7 @@ STATUS_ERROR = 'ERROR' -log = logging.getLogger('%s.checks.check_status' % config.get_logger_name()) +log = logging.getLogger(__name__) class Stylizer(object): diff --git a/checks/collector.py b/checks/collector.py index 8c3e9f3048..11eaa0e64c 100644 --- a/checks/collector.py +++ b/checks/collector.py @@ -11,7 +11,7 @@ import modules from util import getOS, get_uuid, md5, Timer -from config import get_version, get_logger_name +from config import get_version from checks import gethostname import checks.system.unix as u @@ -34,8 +34,7 @@ from resources.processes import Processes as ResProcesses -logger = logging.getLogger('%s' % get_logger_name()) -checks_logger = logging.getLogger('%s.checks' % get_logger_name()) +logger = logging.getLogger(__name__) class Collector(object): @@ -60,66 +59,66 @@ def __init__(self, agentConfig, emitters, systemStats): # Unix System Checks self._unix_system_checks = { - 'disk': u.Disk(checks_logger), + 'disk': u.Disk(logger), 'io': u.IO(), - 'load': u.Load(checks_logger), - 'memory': u.Memory(checks_logger), - 'network': u.Network(checks_logger), + 'load': u.Load(logger), + 'memory': u.Memory(logger), + 'network': u.Network(logger), 'processes': u.Processes(), - 'cpu': u.Cpu(checks_logger) + 'cpu': u.Cpu(logger) } # Win32 System `Checks self._win32_system_checks = { - 'disk': w32.Disk(checks_logger), - 'io': w32.IO(checks_logger), - 'proc': w32.Processes(checks_logger), - 'memory': w32.Memory(checks_logger), - 'network': w32.Network(checks_logger), - 'cpu': w32.Cpu(checks_logger) + 'disk': w32.Disk(logger), + 'io': w32.IO(logger), + 'proc': w32.Processes(logger), + 'memory': w32.Memory(logger), + 'network': w32.Network(logger), + 'cpu': w32.Cpu(logger) } # Old-style metric checks - self._couchdb = CouchDb(checks_logger) - self._mongodb = MongoDb(checks_logger) - self._mysql = MySql(checks_logger) + self._couchdb = CouchDb(logger) + self._mongodb = MongoDb(logger) + self._mysql = MySql(logger) self._rabbitmq = RabbitMq() - self._ganglia = Ganglia(checks_logger) + self._ganglia = Ganglia(logger) self._cassandra = Cassandra() - self._dogstream = Dogstreams.init(checks_logger, self.agentConfig) - self._ddforwarder = DdForwarder(checks_logger, self.agentConfig) - self._ec2 = EC2(checks_logger) + self._dogstream = Dogstreams.init(logger, self.agentConfig) + self._ddforwarder = DdForwarder(logger, self.agentConfig) + self._ec2 = EC2(logger) # Metric Checks self._metrics_checks = [ - ElasticSearch(checks_logger), - Jvm(checks_logger), - Tomcat(checks_logger), - ActiveMQ(checks_logger), - Solr(checks_logger), - WMICheck(checks_logger), - Memcache(checks_logger), + ElasticSearch(logger), + Jvm(logger), + Tomcat(logger), + ActiveMQ(logger), + Solr(logger), + WMICheck(logger), + Memcache(logger), ] # Custom metric checks for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]: if len(module_spec) == 0: continue try: - self._metrics_checks.append(modules.load(module_spec, 'Check')(checks_logger)) + self._metrics_checks.append(modules.load(module_spec, 'Check')(logger)) logger.info("Registered custom check %s" % module_spec) except Exception, e: logger.exception('Unable to load custom check module %s' % module_spec) # Event Checks self._event_checks = [ - ElasticSearchClusterStatus(checks_logger), + ElasticSearchClusterStatus(logger), Nagios(socket.gethostname()), Hudson() ] # Resource Checks self._resources_checks = [ - ResProcesses(checks_logger,self.agentConfig) + ResProcesses(logger,self.agentConfig) ] def stop(self): @@ -181,11 +180,11 @@ def run(self, checksd=None): 'memShared': memory.get('physShared') }) - ioStats = sys_checks['io'].check(checks_logger, self.agentConfig) + ioStats = sys_checks['io'].check(logger, self.agentConfig) if ioStats: payload['ioStats'] = ioStats - processes = sys_checks['processes'].check(checks_logger, self.agentConfig) + processes = sys_checks['processes'].check(logger, self.agentConfig) payload.update({'processes': processes}) networkTraffic = sys_checks['network'].check(self.agentConfig) @@ -197,11 +196,11 @@ def run(self, checksd=None): # Run old-style checks mysqlStatus = self._mysql.check(self.agentConfig) - rabbitmq = self._rabbitmq.check(checks_logger, self.agentConfig) + rabbitmq = self._rabbitmq.check(logger, self.agentConfig) mongodb = self._mongodb.check(self.agentConfig) couchdb = self._couchdb.check(self.agentConfig) gangliaData = self._ganglia.check(self.agentConfig) - cassandraData = self._cassandra.check(checks_logger, self.agentConfig) + cassandraData = self._cassandra.check(logger, self.agentConfig) dogstreamData = self._dogstream.check(self.agentConfig) ddforwarderData = self._ddforwarder.check(self.agentConfig) @@ -248,7 +247,7 @@ def run(self, checksd=None): # Process the event checks. for event_check in self._event_checks: - event_data = event_check.check(checks_logger, self.agentConfig) + event_data = event_check.check(logger, self.agentConfig) if event_data: events[event_check.key] = event_data @@ -340,7 +339,7 @@ def _emit(self, payload): name = emitter.__name__ emitter_status = EmitterStatus(name) try: - emitter(payload, checks_logger, self.agentConfig) + emitter(payload, logger, self.agentConfig) except Exception, e: logger.exception("Error running emitter: %s" % emitter.__name__) emitter_status = EmitterStatus(name, e) @@ -416,7 +415,7 @@ def _should_send_metadata(self): # If the interval has passed, send the metadata again now = time.time() if now - self.metadata_start >= self.metadata_interval: - checks_logger.debug('Metadata interval has passed. Sending metadata.') + logger.debug('Metadata interval has passed. Sending metadata.') self.metadata_start = now return True diff --git a/compat/tornadotcpserver.py b/compat/tornadotcpserver.py index 2344082a08..e7eadb58e4 100644 --- a/compat/tornadotcpserver.py +++ b/compat/tornadotcpserver.py @@ -12,9 +12,7 @@ import fcntl -from config import get_logger_name - -logger = logging.getLogger(get_logger_name()) +logger = logging.getLogger(__name__) def set_close_exec(fd): flags = fcntl.fcntl(fd, fcntl.F_GETFD) diff --git a/config.py b/config.py index 782153e450..67af6e0c91 100644 --- a/config.py +++ b/config.py @@ -18,13 +18,7 @@ DEFAULT_STATSD_FREQUENCY = 10 # seconds PUP_STATSD_FREQUENCY = 2 # seconds -logger_name_fragment = None -def get_logger_name(): - if logger_name_fragment is None: - return 'dd' - return 'dd.%s' % logger_name_fragment - -logger = logging.getLogger(get_logger_name()) +logger = logging.getLogger(__name__) class PathNotFound(Exception): pass @@ -567,7 +561,8 @@ def getOS(): # # logging -LOG_FORMAT = '%(asctime)s | %(levelname)s | %(name)s | %(filename)s:%(lineno)s | %(message)s' +def get_log_format(logger_name): + return '%%(asctime)s | %%(levelname)s | dd.%s | %%(name)s(%%(filename)s:%%(lineno)s) | %%(message)s' % logger_name def get_logging_config(cfg_path=None): logging_config = { @@ -609,8 +604,8 @@ def initialize_logging(logger_name): logging_config = get_logging_config() logging.basicConfig( - format=LOG_FORMAT, - level=logging_config['log_level'] or logging.ERROR, + format=get_log_format(logger_name), + level=logging_config['log_level'] or logging.INFO, ) log_file = logging_config.get('%s_log_file' % logger_name) @@ -619,25 +614,21 @@ def initialize_logging(logger_name): # NOTE: the entire directory needs to be writable so that rotation works if os.access(os.path.dirname(log_file), os.R_OK | os.W_OK): file_handler = logging.handlers.RotatingFileHandler(log_file, maxBytes=5*1024*1024, backupCount=1) - file_handler.setFormatter(logging.Formatter(LOG_FORMAT)) + file_handler.setFormatter(logging.Formatter(get_log_format(logger_name))) log = logging.getLogger() log.addHandler(file_handler) else: sys.stderr.write("Log file is unwritable: '%s'\n" % log_file) - dd_log = logging.getLogger('dd') - dd_log.setLevel(logging_config['log_level'] or logging.INFO) except Exception, e: sys.stderr.write("Couldn't initialize logging: %s\n" % str(e)) # if config fails entirely, enable basic stdout logging as a fallback logging.basicConfig( - format=LOG_FORMAT, + format=get_log_format(logger_name), level=logging.INFO, ) - if logger_name is not None: - global logger - global logger_name_fragment - logger_name_fragment = logger_name - logger = logging.getLogger(get_logger_name()) + # re-get the logger after logging is initialized + global logger + logger = logging.getLogger(__name__) diff --git a/daemon.py b/daemon.py index faea3867e5..5eaa698456 100644 --- a/daemon.py +++ b/daemon.py @@ -26,9 +26,7 @@ import time import logging -from config import get_logger_name - -logger = logging.getLogger(get_logger_name()) +logger = logging.getLogger(__name__) class Daemon: """ diff --git a/ddagent.py b/ddagent.py index 58a3550e0f..04c83c4d82 100755 --- a/ddagent.py +++ b/ddagent.py @@ -31,12 +31,12 @@ # agent import from util import Watchdog, getOS, get_uuid from emitter import http_emitter, format_body -from config import get_config, get_logger_name +from config import get_config from checks import gethostname from checks.check_status import ForwarderStatus from transaction import Transaction, TransactionManager -logger = logging.getLogger(get_logger_name()) +logger = logging.getLogger('forwarder') TRANSACTION_FLUSH_INTERVAL = 5000 # Every 5 seconds WATCHDOG_INTERVAL_MULTIPLIER = 10 # 10x flush interval diff --git a/dogstatsd.py b/dogstatsd.py index d5eb266022..57737fcab4 100755 --- a/dogstatsd.py +++ b/dogstatsd.py @@ -24,11 +24,11 @@ from aggregator import MetricsAggregator from checks import gethostname from checks.check_status import DogstatsdStatus -from config import get_config, get_logger_name +from config import get_config from daemon import Daemon from util import json, PidFile -logger = logging.getLogger(get_logger_name()) +logger = logging.getLogger('dogstatsd') WATCHDOG_TIMEOUT = 120 diff --git a/graphite.py b/graphite.py index b381398800..63a7d0340a 100644 --- a/graphite.py +++ b/graphite.py @@ -5,9 +5,7 @@ from tornado.ioloop import IOLoop from tornado.iostream import IOStream -from config import get_logger_name - -logger = logging.getLogger(get_logger_name()) +logger = logging.getLogger(__name__) try: from tornado.netutil import TCPServer diff --git a/pup/pup.py b/pup/pup.py index bc8c8bc507..cc3c3624c2 100644 --- a/pup/pup.py +++ b/pup/pup.py @@ -30,10 +30,10 @@ from tornado import websocket # project -from config import get_config, get_logger_name +from config import get_config from util import json -logger = logging.getLogger(get_logger_name()) +logger = logging.getLogger('pup') AGENT_TRANSLATION = { 'cpuUser' : 'CPU user (%)', diff --git a/transaction.py b/transaction.py index 70fbad3034..5552bfb999 100644 --- a/transaction.py +++ b/transaction.py @@ -10,9 +10,8 @@ # project from checks.check_status import ForwarderStatus -from config import get_logger_name -logger = logging.getLogger(get_logger_name()) +logger = logging.getLogger(__name__) def plural(count): if count > 1: diff --git a/util.py b/util.py index 912cad5b25..474c052862 100644 --- a/util.py +++ b/util.py @@ -5,7 +5,7 @@ import math import time import uuid -from config import getOS, get_logger_name +from config import getOS try: from hashlib import md5 @@ -42,7 +42,7 @@ def loads(data): from compat.namedtuple import namedtuple import logging -logger = logging.getLogger(get_logger_name()) +logger = logging.getLogger(__name__) NumericTypes = (float, int, long) From 96572d0f6be46758fdd490004e43d4f53e8cb644 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Wed, 12 Dec 2012 16:00:44 -0500 Subject: [PATCH 14/25] print the whole traceback when logging fails --- config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config.py b/config.py index 67af6e0c91..572f3c8a82 100644 --- a/config.py +++ b/config.py @@ -9,6 +9,7 @@ import sys import glob import inspect +import traceback from optparse import OptionParser, Values from cStringIO import StringIO @@ -622,6 +623,7 @@ def initialize_logging(logger_name): except Exception, e: sys.stderr.write("Couldn't initialize logging: %s\n" % str(e)) + traceback.print_exc() # if config fails entirely, enable basic stdout logging as a fallback logging.basicConfig( From b6294cc5676eeb0a3a6061ac6b66214eb1408d6d Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Wed, 12 Dec 2012 17:21:49 -0500 Subject: [PATCH 15/25] syslog logging. on by default, and configurable. --- agent.py | 2 +- checks/check_status.py | 5 +++++ config.py | 46 +++++++++++++++++++++++++++++++++++++++++- datadog.conf.example | 7 +++++++ ddagent.py | 2 +- dogstatsd.py | 2 +- 6 files changed, 60 insertions(+), 4 deletions(-) diff --git a/agent.py b/agent.py index 7f33bdb850..3192bec62a 100755 --- a/agent.py +++ b/agent.py @@ -194,7 +194,7 @@ def main(): logger.info("dd-agent is not running.") elif 'info' == command: - logger.setLevel(logging.ERROR) + logging.getLogger().setLevel(logging.ERROR) return CollectorStatus.print_latest_status() return 0 diff --git a/checks/check_status.py b/checks/check_status.py index 725f0c7b91..05b3be6eb4 100644 --- a/checks/check_status.py +++ b/checks/check_status.py @@ -74,6 +74,11 @@ def logger_info(): for handler in root_logger.handlers: if isinstance(handler, logging.StreamHandler): loggers.append(handler.stream.name) + if isinstance(handler, logging.handlers.SysLogHandler): + if isinstance(handler.address, basestring): + loggers.append('syslog:%s' % handler.address) + else: + loggers.append('syslog:(%s, %s)' % handler.address) else: loggers.append("No loggers configured") return ', '.join(loggers) diff --git a/config.py b/config.py index 572f3c8a82..8d498eb0b7 100644 --- a/config.py +++ b/config.py @@ -565,13 +565,19 @@ def getOS(): def get_log_format(logger_name): return '%%(asctime)s | %%(levelname)s | dd.%s | %%(name)s(%%(filename)s:%%(lineno)s) | %%(message)s' % logger_name +def get_syslog_format(logger_name): + return 'dd.%s | %%(name)s(%%(filename)s:%%(lineno)s) | %%(message)s' % logger_name + def get_logging_config(cfg_path=None): logging_config = { + 'log_level': None, 'collector_log_file': '/var/log/datadog/collector.log', 'forwarder_log_file': '/var/log/datadog/forwarder.log', 'dogstatsd_log_file': '/var/log/datadog/dogstatsd.log', 'pup_log_file': '/var/log/datadog/pup.log', - 'log_level': None + 'log_to_syslog': True, + 'syslog_host': None, + 'syslog_port': None, } config_path = get_config_path(cfg_path, os_name=getOS()) @@ -594,6 +600,23 @@ def get_logging_config(cfg_path=None): if config.has_option('Main', 'log_level'): logging_config['log_level'] = levels.get(config.get('Main', 'log_level')) + if config.has_option('Main', 'log_to_syslog'): + logging_config['log_to_syslog'] = config.get('Main', 'log_to_syslog').strip().lower() in ['yes', 'true', 1] + + if config.has_option('Main', 'syslog_host'): + host = config.get('Main', 'syslog_host').strip() + if host: + logging_config['syslog_host'] = host + else: + logging_config['syslog_host'] = None + + if config.has_option('Main', 'syslog_port'): + port = config.get('Main', 'syslog_port').strip() + try: + logging_config['syslog_port'] = int(port) + except: + logging_config['syslog_port'] = None + return logging_config def initialize_logging(logger_name): @@ -609,6 +632,7 @@ def initialize_logging(logger_name): level=logging_config['log_level'] or logging.INFO, ) + # set up file loggers log_file = logging_config.get('%s_log_file' % logger_name) if log_file is not None: # make sure the log directory is writeable @@ -621,6 +645,26 @@ def initialize_logging(logger_name): else: sys.stderr.write("Log file is unwritable: '%s'\n" % log_file) + # set up syslog + if logging_config['log_to_syslog']: + try: + from logging.handlers import SysLogHandler + + if logging_config['syslog_host'] is not None and logging_config['syslog_port'] is not None: + sys_log_addr = (logging_config['syslog_host'], logging_config['syslog_port']) + else: + sys_log_addr = "/dev/log" + # Special-case macs + if sys.platform == 'darwin': + sys_log_addr = "/var/run/syslog" + + handler = SysLogHandler(address=sys_log_addr, facility=SysLogHandler.LOG_DAEMON) + handler.setFormatter(logging.Formatter(get_syslog_format(logger_name))) + log.addHandler(handler) + except Exception, e: + sys.stderr.write("Error setting up syslog: '%s'\n" % str(e)) + traceback.print_exc() + except Exception, e: sys.stderr.write("Couldn't initialize logging: %s\n" % str(e)) traceback.print_exc() diff --git a/datadog.conf.example b/datadog.conf.example index 9fb7448c96..21a30c3e4b 100644 --- a/datadog.conf.example +++ b/datadog.conf.example @@ -414,3 +414,10 @@ use_mount: no # forwarder_log_file: /var/log/datadog/forwarder.log # dogstatsd_log_file: /var/log/datadog/dogstatsd.log # pup_log_file: /var/log/datadog/pup.log + +# if syslog is enabled but a host and port are not set, a local domain socket +# connection will be attempted +# +# log_to_syslog: yes +# syslog_host: +# syslog_port: diff --git a/ddagent.py b/ddagent.py index 04c83c4d82..fd3eab182c 100755 --- a/ddagent.py +++ b/ddagent.py @@ -341,7 +341,7 @@ def main(): sys.argv[0]) command = args[0] if command == 'info': - logger.setLevel(logging.ERROR) + logging.getLogger().setLevel(logging.ERROR) return ForwarderStatus.print_latest_status() elif command == 'help': print usage diff --git a/dogstatsd.py b/dogstatsd.py index 57737fcab4..491dc0c302 100755 --- a/dogstatsd.py +++ b/dogstatsd.py @@ -265,7 +265,7 @@ def main(config_path=None): if args and args[0] in ['info', 'status']: command = args[0] if command == 'info': - logger.setLevel(logging.ERROR) + logging.getLogger().setLevel(logging.ERROR) return DogstatsdStatus.print_latest_status() elif command == 'status': pid = pid_file.get_pid() From ad880888194821e0e2ee062e45ead185ca4481c6 Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Wed, 12 Dec 2012 17:32:07 -0500 Subject: [PATCH 16/25] show deprecation warning if old-style python logging config exists --- config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config.py b/config.py index 8d498eb0b7..745e5a4d57 100644 --- a/config.py +++ b/config.py @@ -584,6 +584,9 @@ def get_logging_config(cfg_path=None): config = ConfigParser.ConfigParser() config.readfp(skip_leading_wsp(open(config_path))) + if config.has_section('handlers') or config.has_section('loggers') or config.has_section('formatters'): + sys.stderr.write("Python logging config is no longer supported and will be ignored.\nTo configure logging, update the logging portion of 'datadog.conf' to match:\n 'https://github.com/DataDog/dd-agent/blob/master/datadog.conf.example'.\n") + for option in logging_config: if config.has_option('Main', option): logging_config[option] = config.get('Main', option) From d70b556cef0f75938dca0c7f5396da007c6524db Mon Sep 17 00:00:00 2001 From: Matt Singleton Date: Wed, 12 Dec 2012 18:11:48 -0500 Subject: [PATCH 17/25] rename 'logger' to 'log' for oli --- agent.py | 24 +++++------ aggregator.py | 10 ++--- checks/__init__.py | 4 +- checks/collector.py | 88 +++++++++++++++++++------------------- compat/tornadotcpserver.py | 4 +- config.py | 37 ++++++++-------- daemon.py | 28 ++++++------ ddagent.py | 20 ++++----- dogstatsd.py | 32 +++++++------- emitter.py | 2 +- graphite.py | 26 +++++------ pup/pup.py | 6 +-- transaction.py | 24 +++++------ util.py | 20 ++++----- 14 files changed, 163 insertions(+), 162 deletions(-) diff --git a/agent.py b/agent.py index 3192bec62a..70fa915712 100755 --- a/agent.py +++ b/agent.py @@ -45,7 +45,7 @@ WATCHDOG_MULTIPLIER = 10 # Globals -logger = logging.getLogger('collector') +log = logging.getLogger('collector') class Agent(Daemon): """ @@ -58,7 +58,7 @@ def __init__(self, pidfile): self.collector = None def _handle_sigterm(self, signum, frame): - logger.debug("Caught sigterm. Stopping run loop.") + log.debug("Caught sigterm. Stopping run loop.") self.run_forever = False if self.collector: self.collector.stop() @@ -105,7 +105,7 @@ def run(self): # Explicitly kill the process, because it might be running # as a daemon. - logger.info("Exiting. Bye bye.") + log.info("Exiting. Bye bye.") sys.exit(0) def _get_emitters(self, agentConfig): @@ -129,10 +129,10 @@ def _set_agent_config_hostname(self, agentConfig): if agentConfig.get('hostname') is None and agentConfig.get('use_ec2_instance_id'): instanceId = EC2.get_instance_id() if instanceId is not None: - logger.info("Running on EC2, instanceId: %s" % instanceId) + log.info("Running on EC2, instanceId: %s" % instanceId) agentConfig['hostname'] = instanceId else: - logger.info('Not running on EC2, using hostname to identify this server') + log.info('Not running on EC2, using hostname to identify this server') return agentConfig def main(): @@ -167,19 +167,19 @@ def main(): agent = Agent(pid_file.get_path()) if 'start' == command: - logger.info('Start daemon') + log.info('Start daemon') agent.start() elif 'stop' == command: - logger.info('Stop daemon') + log.info('Stop daemon') agent.stop() elif 'restart' == command: - logger.info('Restart daemon') + log.info('Restart daemon') agent.restart() elif 'foreground' == command: - logger.info('Running in foreground') + log.info('Running in foreground') agent.run() # Commands that don't need the agent to be initialized. @@ -188,10 +188,10 @@ def main(): pid = pid_file.get_pid() if pid is not None: sys.stdout.write('dd-agent is running as pid %s.\n' % pid) - logger.info("dd-agent is running as pid %s." % pid) + log.info("dd-agent is running as pid %s." % pid) else: sys.stdout.write('dd-agent is not running.\n') - logger.info("dd-agent is not running.") + log.info("dd-agent is not running.") elif 'info' == command: logging.getLogger().setLevel(logging.ERROR) @@ -206,7 +206,7 @@ def main(): except StandardError: # Try our best to log the error. try: - logger.exception("Uncaught error running the agent") + log.exception("Uncaught error running the agent") except: pass raise diff --git a/aggregator.py b/aggregator.py index 421d4e011c..9c087c433a 100644 --- a/aggregator.py +++ b/aggregator.py @@ -1,7 +1,7 @@ import logging from time import time -logger = logging.getLogger(__name__) +log = logging.getLogger(__name__) class Infinity(Exception): pass class UnknownValue(Exception): pass @@ -197,12 +197,12 @@ def sample(self, value, sample_rate): def _rate(self, sample1, sample2): interval = sample2[0] - sample1[0] if interval == 0: - logger.warn('Metric %s has an interval of 0. Not flushing.' % self.name) + log.warn('Metric %s has an interval of 0. Not flushing.' % self.name) raise Infinity() delta = sample2[1] - sample1[1] if delta < 0: - logger.warn('Metric %s has a rate < 0. Not flushing.' % self.name) + log.warn('Metric %s has a rate < 0. Not flushing.' % self.name) raise UnknownValue() return (delta / interval) @@ -334,13 +334,13 @@ def flush(self): metrics = [] for context, metric in self.metrics.items(): if metric.last_sample_time < expiry_timestamp: - logger.debug("%s hasn't been submitted in %ss. Expiring." % (context, self.expiry_seconds)) + log.debug("%s hasn't been submitted in %ss. Expiring." % (context, self.expiry_seconds)) del self.metrics[context] else: metrics += metric.flush(timestamp, self.interval) # Save some stats. - logger.debug("received %s payloads since last flush" % self.count) + log.debug("received %s payloads since last flush" % self.count) self.total_count += self.count self.count = 0 return metrics diff --git a/checks/__init__.py b/checks/__init__.py index 89af71e96f..93cb79a859 100644 --- a/checks/__init__.py +++ b/checks/__init__.py @@ -21,7 +21,7 @@ from util import LaconicFilter from checks import check_status -logger = logging.getLogger(__name__) +log = logging.getLogger(__name__) # Konstants class CheckException(Exception): pass @@ -478,7 +478,7 @@ def gethostname(agentConfig): try: return socket.getfqdn() except socket.error, e: - logger.debug("processes: unable to get hostname: " + str(e)) + log.debug("processes: unable to get hostname: " + str(e)) def agent_formatter(metric, value, timestamp, tags, hostname, device_name=None): """ Formats metrics coming from the MetricsAggregator. Will look like: diff --git a/checks/collector.py b/checks/collector.py index 11eaa0e64c..24d5d2e24d 100644 --- a/checks/collector.py +++ b/checks/collector.py @@ -34,7 +34,7 @@ from resources.processes import Processes as ResProcesses -logger = logging.getLogger(__name__) +log = logging.getLogger(__name__) class Collector(object): @@ -59,66 +59,66 @@ def __init__(self, agentConfig, emitters, systemStats): # Unix System Checks self._unix_system_checks = { - 'disk': u.Disk(logger), + 'disk': u.Disk(log), 'io': u.IO(), - 'load': u.Load(logger), - 'memory': u.Memory(logger), - 'network': u.Network(logger), + 'load': u.Load(log), + 'memory': u.Memory(log), + 'network': u.Network(log), 'processes': u.Processes(), - 'cpu': u.Cpu(logger) + 'cpu': u.Cpu(log) } # Win32 System `Checks self._win32_system_checks = { - 'disk': w32.Disk(logger), - 'io': w32.IO(logger), - 'proc': w32.Processes(logger), - 'memory': w32.Memory(logger), - 'network': w32.Network(logger), - 'cpu': w32.Cpu(logger) + 'disk': w32.Disk(log), + 'io': w32.IO(log), + 'proc': w32.Processes(log), + 'memory': w32.Memory(log), + 'network': w32.Network(log), + 'cpu': w32.Cpu(log) } # Old-style metric checks - self._couchdb = CouchDb(logger) - self._mongodb = MongoDb(logger) - self._mysql = MySql(logger) + self._couchdb = CouchDb(log) + self._mongodb = MongoDb(log) + self._mysql = MySql(log) self._rabbitmq = RabbitMq() - self._ganglia = Ganglia(logger) + self._ganglia = Ganglia(log) self._cassandra = Cassandra() - self._dogstream = Dogstreams.init(logger, self.agentConfig) - self._ddforwarder = DdForwarder(logger, self.agentConfig) - self._ec2 = EC2(logger) + self._dogstream = Dogstreams.init(log, self.agentConfig) + self._ddforwarder = DdForwarder(log, self.agentConfig) + self._ec2 = EC2(log) # Metric Checks self._metrics_checks = [ - ElasticSearch(logger), - Jvm(logger), - Tomcat(logger), - ActiveMQ(logger), - Solr(logger), - WMICheck(logger), - Memcache(logger), + ElasticSearch(log), + Jvm(log), + Tomcat(log), + ActiveMQ(log), + Solr(log), + WMICheck(log), + Memcache(log), ] # Custom metric checks for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]: if len(module_spec) == 0: continue try: - self._metrics_checks.append(modules.load(module_spec, 'Check')(logger)) - logger.info("Registered custom check %s" % module_spec) + self._metrics_checks.append(modules.load(module_spec, 'Check')(log)) + log.info("Registered custom check %s" % module_spec) except Exception, e: - logger.exception('Unable to load custom check module %s' % module_spec) + log.exception('Unable to load custom check module %s' % module_spec) # Event Checks self._event_checks = [ - ElasticSearchClusterStatus(logger), + ElasticSearchClusterStatus(log), Nagios(socket.gethostname()), Hudson() ] # Resource Checks self._resources_checks = [ - ResProcesses(logger,self.agentConfig) + ResProcesses(log,self.agentConfig) ] def stop(self): @@ -139,7 +139,7 @@ def run(self, checksd=None): """ timer = Timer() self.run_count += 1 - logger.info("Starting collection run #%s" % self.run_count) + log.info("Starting collection run #%s" % self.run_count) payload = self._build_payload() metrics = payload['metrics'] @@ -180,11 +180,11 @@ def run(self, checksd=None): 'memShared': memory.get('physShared') }) - ioStats = sys_checks['io'].check(logger, self.agentConfig) + ioStats = sys_checks['io'].check(log, self.agentConfig) if ioStats: payload['ioStats'] = ioStats - processes = sys_checks['processes'].check(logger, self.agentConfig) + processes = sys_checks['processes'].check(log, self.agentConfig) payload.update({'processes': processes}) networkTraffic = sys_checks['network'].check(self.agentConfig) @@ -196,11 +196,11 @@ def run(self, checksd=None): # Run old-style checks mysqlStatus = self._mysql.check(self.agentConfig) - rabbitmq = self._rabbitmq.check(logger, self.agentConfig) + rabbitmq = self._rabbitmq.check(log, self.agentConfig) mongodb = self._mongodb.check(self.agentConfig) couchdb = self._couchdb.check(self.agentConfig) gangliaData = self._ganglia.check(self.agentConfig) - cassandraData = self._cassandra.check(logger, self.agentConfig) + cassandraData = self._cassandra.check(log, self.agentConfig) dogstreamData = self._dogstream.check(self.agentConfig) ddforwarderData = self._ddforwarder.check(self.agentConfig) @@ -247,7 +247,7 @@ def run(self, checksd=None): # Process the event checks. for event_check in self._event_checks: - event_data = event_check.check(logger, self.agentConfig) + event_data = event_check.check(log, self.agentConfig) if event_data: events[event_check.key] = event_data @@ -284,7 +284,7 @@ def run(self, checksd=None): for check in checksd: if not self.continue_running: return - logger.info("Running check %s" % check.name) + log.info("Running check %s" % check.name) instance_statuses = [] metric_count = 0 event_count = 0 @@ -308,7 +308,7 @@ def run(self, checksd=None): metric_count = len(current_check_metrics) event_count = len(current_check_events) except Exception, e: - logger.exception("Error running check %s" % check.name) + log.exception("Error running check %s" % check.name) check_status = CheckStatus(check.name, instance_statuses, metric_count, event_count) check_statuses.append(check_status) @@ -324,9 +324,9 @@ def run(self, checksd=None): try: CollectorStatus(check_statuses, emitter_statuses).persist() except Exception: - logger.exception("Error persisting collector status") + log.exception("Error persisting collector status") - logger.info("Finished run #%s. Collection time: %ss. Emit time: %ss" % + log.info("Finished run #%s. Collection time: %ss. Emit time: %ss" % (self.run_count, round(collect_duration, 2), round(emit_duration, 2))) def _emit(self, payload): @@ -339,9 +339,9 @@ def _emit(self, payload): name = emitter.__name__ emitter_status = EmitterStatus(name) try: - emitter(payload, logger, self.agentConfig) + emitter(payload, log, self.agentConfig) except Exception, e: - logger.exception("Error running emitter: %s" % emitter.__name__) + log.exception("Error running emitter: %s" % emitter.__name__) emitter_status = EmitterStatus(name, e) statuses.append(emitter_status) return statuses @@ -415,7 +415,7 @@ def _should_send_metadata(self): # If the interval has passed, send the metadata again now = time.time() if now - self.metadata_start >= self.metadata_interval: - logger.debug('Metadata interval has passed. Sending metadata.') + log.debug('Metadata interval has passed. Sending metadata.') self.metadata_start = now return True diff --git a/compat/tornadotcpserver.py b/compat/tornadotcpserver.py index e7eadb58e4..dcc8909899 100644 --- a/compat/tornadotcpserver.py +++ b/compat/tornadotcpserver.py @@ -12,7 +12,7 @@ import fcntl -logger = logging.getLogger(__name__) +log = logging.getLogger(__name__) def set_close_exec(fd): flags = fcntl.fcntl(fd, fcntl.F_GETFD) @@ -203,7 +203,7 @@ def _handle_connection(self, connection, address): stream = IOStream(connection, io_loop=self.io_loop) self.handle_stream(stream, address) except Exception: - logger.error("Error in connection callback", exc_info=True) + log.error("Error in connection callback", exc_info=True) def bind_sockets(port, address=None, family=socket.AF_UNSPEC, backlog=128): diff --git a/config.py b/config.py index 745e5a4d57..31944bda79 100644 --- a/config.py +++ b/config.py @@ -19,7 +19,7 @@ DEFAULT_STATSD_FREQUENCY = 10 # seconds PUP_STATSD_FREQUENCY = 2 # seconds -logger = logging.getLogger(__name__) +log = logging.getLogger(__name__) class PathNotFound(Exception): pass @@ -414,7 +414,7 @@ def get_confd_path(osname): if os.path.exists(cur_path): return cur_path - logger.error("No conf.d folder found at '%s' or in the directory where the agent is currently deployed.\n" % bad_path) + log.error("No conf.d folder found at '%s' or in the directory where the agent is currently deployed.\n" % bad_path) sys.exit(3) def get_checksd_path(osname): @@ -430,11 +430,11 @@ def get_checksd_path(osname): return _windows_checksd_path() except PathNotFound, e: if len(e.args) > 0: - logger.error("No checks.d folder found in '%s'.\n" % e.args[0]) + log.error("No checks.d folder found in '%s'.\n" % e.args[0]) else: - logger.error("No checks.d folder found.\n") + log.error("No checks.d folder found.\n") - logger.error("No checks.d folder at '%s'.\n" % checksd_path) + log.error("No checks.d folder at '%s'.\n" % checksd_path) sys.exit(3) def load_check_directory(agentConfig): @@ -464,7 +464,7 @@ def load_check_directory(agentConfig): try: check_module = __import__(check_name) except: - logger.exception('Unable to import check module %s.py from checks.d' % check_name) + log.exception('Unable to import check module %s.py from checks.d' % check_name) continue check_class = None @@ -480,7 +480,7 @@ def load_check_directory(agentConfig): break if not check_class: - logger.error('No check class (inheriting from AgentCheck) found in %s.py' % check_name) + log.error('No check class (inheriting from AgentCheck) found in %s.py' % check_name) continue # Check if the config exists OR we match the old-style config @@ -493,7 +493,7 @@ def load_check_directory(agentConfig): f.close() except: f.close() - logger.exception("Unable to parse yaml config in %s" % conf_path) + log.exception("Unable to parse yaml config in %s" % conf_path) continue elif hasattr(check_class, 'parse_agent_config'): # FIXME: Remove this check once all old-style checks are gone @@ -501,12 +501,12 @@ def load_check_directory(agentConfig): if not check_config: continue else: - logger.debug('No conf.d/%s.yaml found for checks.d/%s.py' % (check_name, check_name)) + log.debug('No conf.d/%s.yaml found for checks.d/%s.py' % (check_name, check_name)) continue # Look for the per-check config, which *must* exist if not check_config.get('instances'): - logger.error("Config %s is missing 'instances'" % conf_path) + log.error("Config %s is missing 'instances'" % conf_path) continue # Accept instances as a list, as a single dict, or as non-existant @@ -542,9 +542,9 @@ def load_check_directory(agentConfig): pythonpath = [pythonpath] sys.path.extend(pythonpath) - logger.debug('Loaded check.d/%s.py' % check_name) + log.debug('Loaded check.d/%s.py' % check_name) - logger.info('checks.d checks: %s' % [c.name for c in checks]) + log.info('checks.d checks: %s' % [c.name for c in checks]) return checks def getOS(): @@ -643,8 +643,8 @@ def initialize_logging(logger_name): if os.access(os.path.dirname(log_file), os.R_OK | os.W_OK): file_handler = logging.handlers.RotatingFileHandler(log_file, maxBytes=5*1024*1024, backupCount=1) file_handler.setFormatter(logging.Formatter(get_log_format(logger_name))) - log = logging.getLogger() - log.addHandler(file_handler) + root_log = logging.getLogger() + root_log.addHandler(file_handler) else: sys.stderr.write("Log file is unwritable: '%s'\n" % log_file) @@ -663,7 +663,8 @@ def initialize_logging(logger_name): handler = SysLogHandler(address=sys_log_addr, facility=SysLogHandler.LOG_DAEMON) handler.setFormatter(logging.Formatter(get_syslog_format(logger_name))) - log.addHandler(handler) + root_log = logging.getLogger() + root_log.addHandler(handler) except Exception, e: sys.stderr.write("Error setting up syslog: '%s'\n" % str(e)) traceback.print_exc() @@ -678,6 +679,6 @@ def initialize_logging(logger_name): level=logging.INFO, ) - # re-get the logger after logging is initialized - global logger - logger = logging.getLogger(__name__) + # re-get the log after logging is initialized + global log + log = logging.getLogger(__name__) diff --git a/daemon.py b/daemon.py index 5eaa698456..08e6eb68c4 100644 --- a/daemon.py +++ b/daemon.py @@ -26,7 +26,7 @@ import time import logging -logger = logging.getLogger(__name__) +log = logging.getLogger(__name__) class Daemon: """ @@ -53,11 +53,11 @@ def daemonize(self): sys.exit(0) except OSError, e: msg = "fork #1 failed: %d (%s)" % (e.errno, e.strerror) - logger.error(msg) + log.error(msg) sys.stderr.write(msg + "\n") sys.exit(1) - logger.debug("Fork 1 ok") + log.debug("Fork 1 ok") # Decouple from parent environment os.chdir("/") @@ -72,7 +72,7 @@ def daemonize(self): sys.exit(0) except OSError, e: msg = "fork #2 failed: %d (%s)" % (e.errno, e.strerror) - logger.error(msg) + log.error(msg) sys.stderr.write(msg + "\n") sys.exit(1) @@ -88,7 +88,7 @@ def daemonize(self): os.dup2(so.fileno(), sys.stdout.fileno()) os.dup2(se.fileno(), sys.stderr.fileno()) - logger.info("Started") + log.info("Started") # Write pidfile atexit.register(self.delpid) # Make sure pid file is removed if we quit @@ -100,7 +100,7 @@ def daemonize(self): os.chmod(self.pidfile, 0644) except Exception, e: msg = "Unable to write pidfile: %s" % self.pidfile - logger.exception(msg) + log.exception(msg) sys.stderr.write(msg + "\n") sys.exit(1) @@ -115,7 +115,7 @@ def start(self): Start the daemon """ - logger.info("Starting...") + log.info("Starting...") # Check for a pidfile to see if the daemon already runs try: pf = file(self.pidfile,'r') @@ -128,14 +128,14 @@ def start(self): if pid: message = "pidfile %s already exists. Is it already running?\n" - logger.error(message % self.pidfile) + log.error(message % self.pidfile) sys.stderr.write(message % self.pidfile) sys.exit(1) # Start the daemon - logger.info("Pidfile: %s" % self.pidfile) + log.info("Pidfile: %s" % self.pidfile) self.daemonize() - logger.debug("Calling run method") + log.debug("Calling run method") self.run() def stop(self): @@ -145,7 +145,7 @@ def stop(self): from signal import SIGTERM - logger.info("Stopping...") + log.info("Stopping...") # Get the pid from the pidfile try: pf = file(self.pidfile,'r') @@ -168,11 +168,11 @@ def stop(self): time.sleep(0.1) except OSError, err: if str(err).find("No such process") <= 0: - logger.exception("Cannot kill agent daemon at pid %s" % pid) + log.exception("Cannot kill agent daemon at pid %s" % pid) sys.stderr.write(str(err) + "\n") else: message = "Pidfile %s does not exist. Not running?\n" % self.pidfile - logger.info(message) + log.info(message) sys.stderr.write(message) # Just to be sure. A ValueError might occur if the PID file is empty but does actually exist @@ -182,7 +182,7 @@ def stop(self): return # Not an error in a restart - logger.info("Stopped") + log.info("Stopped") def restart(self): "Restart the daemon" diff --git a/ddagent.py b/ddagent.py index fd3eab182c..91d9084983 100755 --- a/ddagent.py +++ b/ddagent.py @@ -36,7 +36,7 @@ from checks.check_status import ForwarderStatus from transaction import Transaction, TransactionManager -logger = logging.getLogger('forwarder') +log = logging.getLogger('forwarder') TRANSACTION_FLUSH_INTERVAL = 5000 # Every 5 seconds WATCHDOG_INTERVAL_MULTIPLIER = 10 # 10x flush interval @@ -82,10 +82,10 @@ def set_endpoints(cls): and cls._application._agentConfig.get('api_key') is not None\ and cls._application._agentConfig.get('api_key', "pup") not in ("", "pup") if is_dd_user: - logger.warn("You are a Datadog user so we will send data to https://app.datadoghq.com") + log.warn("You are a Datadog user so we will send data to https://app.datadoghq.com") cls._endpoints.append('dd_url') except: - logger.info("Not a Datadog user") + log.info("Not a Datadog user") def __init__(self, data, headers): self._data = data @@ -96,7 +96,7 @@ def __init__(self, data, headers): # Insert the transaction in the Manager self._trManager.append(self) - logger.debug("Created transaction %d" % self.get_id()) + log.debug("Created transaction %d" % self.get_id()) self._trManager.flush() def __sizeof__(self): @@ -111,7 +111,7 @@ def get_url(self, endpoint): def flush(self): for endpoint in self._endpoints: url = self.get_url(endpoint) - logger.info("Sending metrics to endpoint %s at %s" % (endpoint, url)) + log.info("Sending metrics to endpoint %s at %s" % (endpoint, url)) req = tornado.httpclient.HTTPRequest(url, method="POST", body=self._data, headers=self._headers) @@ -129,7 +129,7 @@ def flush(self): def on_response(self, response): if response.error: - logger.error("Response: %s" % response.error) + log.error("Response: %s" % response.error) self._trManager.tr_error(self) else: self._trManager.tr_success(self) @@ -264,7 +264,7 @@ def run(self): else: # localhost in lieu of 127.0.0.1 to support IPv6 http_server.listen(self._port, address = "localhost") - logger.info("Listening on port %d" % self._port) + log.info("Listening on port %d" % self._port) # Register callbacks self.mloop = tornado.ioloop.IOLoop.instance() @@ -281,7 +281,7 @@ def flush_trs(): # Register optional Graphite listener gport = self._agentConfig.get("graphite_listen_port", None) if gport is not None: - logger.info("Starting graphite listener on port %s" % gport) + log.info("Starting graphite listener on port %s" % gport) from graphite import GraphiteServer gs = GraphiteServer(self, gethostname(self._agentConfig), io_loop=self.mloop) if non_local_traffic is True: @@ -295,7 +295,7 @@ def flush_trs(): tr_sched.start() self.mloop.start() - logger.info("Stopped") + log.info("Stopped") def stop(self): self.mloop.stop() @@ -312,7 +312,7 @@ def init(): app = Application(port, agentConfig) def sigterm_handler(signum, frame): - logger.info("caught sigterm. stopping") + log.info("caught sigterm. stopping") app.stop() import signal diff --git a/dogstatsd.py b/dogstatsd.py index 491dc0c302..b811dfc18e 100755 --- a/dogstatsd.py +++ b/dogstatsd.py @@ -28,7 +28,7 @@ from daemon import Daemon from util import json, PidFile -logger = logging.getLogger('dogstatsd') +log = logging.getLogger('dogstatsd') WATCHDOG_TIMEOUT = 120 @@ -66,13 +66,13 @@ def __init__(self, interval, metrics_aggregator, api_host, api_key=None, use_wat self.http_conn_cls = http_client.HTTPConnection def stop(self): - logger.info("Stopping reporter") + log.info("Stopping reporter") self.finished.set() def run(self): - logger.info("Reporting to %s every %ss" % (self.api_host, self.interval)) - logger.debug("Watchdog enabled: %s" % bool(self.watchdog)) + log.info("Reporting to %s every %ss" % (self.api_host, self.interval)) + log.debug("Watchdog enabled: %s" % bool(self.watchdog)) # Persist a start-up message. DogstatsdStatus().persist() @@ -85,7 +85,7 @@ def run(self): self.watchdog.reset() # Clean up the status messages. - logger.debug("Stopped reporter") + log.debug("Stopped reporter") DogstatsdStatus.remove_latest_status() def flush(self): @@ -97,9 +97,9 @@ def flush(self): metrics = self.metrics_aggregator.flush() count = len(metrics) if not count: - logger.info("Flush #%s: No metrics to flush." % self.flush_count) + log.info("Flush #%s: No metrics to flush." % self.flush_count) else: - logger.info("Flush #%s: flushing %s metrics" % (self.flush_count, count)) + log.info("Flush #%s: flushing %s metrics" % (self.flush_count, count)) self.submit(metrics) # Persist a status message. @@ -112,7 +112,7 @@ def flush(self): ).persist() except: - logger.exception("Error flushing metrics") + log.exception("Error flushing metrics") def submit(self, metrics): # HACK - Copy and pasted from dogapi, because it's a bit of a pain to distribute python @@ -140,7 +140,7 @@ def submit(self, metrics): finally: conn.close() duration = round((time() - start_time) * 1000.0, 4) - logger.info("%s %s %s%s (%sms)" % ( + log.info("%s %s %s%s (%sms)" % ( status, method, self.api_host, url, duration)) return duration @@ -166,7 +166,7 @@ def start(self): # Bind to the UDP socket. self.socket.bind(self.address) - logger.info('Listening on host & port: %s' % str(self.address)) + log.info('Listening on host & port: %s' % str(self.address)) # Inline variables for quick look-up. buffer_size = self.buffer_size @@ -192,7 +192,7 @@ def start(self): except (KeyboardInterrupt, SystemExit): break except Exception, e: - logger.exception('Error receiving datagram') + log.exception('Error receiving datagram') def stop(self): self.running = False @@ -208,7 +208,7 @@ def __init__(self, pid_file, server, reporter): def run(self): # Gracefully exit on sigterm. - logger.info("Adding sig handler") + log.info("Adding sig handler") signal.signal(signal.SIGTERM, self._handle_sigterm) self.reporter.start() try: @@ -218,16 +218,16 @@ def run(self): # the reporting thread. self.reporter.stop() self.reporter.join() - logger.info("Stopped") + log.info("Stopped") def _handle_sigterm(self, signum, frame): - logger.info("Caught sigterm. Stopping run loop.") + log.info("Caught sigterm. Stopping run loop.") self.server.stop() def init(config_path=None, use_watchdog=False, use_forwarder=False): c = get_config(parse_args=False, cfg_path=config_path) - logger.debug("Configuration dogstatsd") + log.debug("Configuration dogstatsd") port = c['dogstatsd_port'] interval = int(c['dogstatsd_interval']) @@ -273,7 +273,7 @@ def main(config_path=None): message = 'dogstatsd is running with pid %s' % pid else: message = 'dogstatsd is not running' - logger.info(message) + log.info(message) sys.stdout.write(message + "\n") return 0 diff --git a/emitter.py b/emitter.py index ba85dddc07..bcba0fef8a 100644 --- a/emitter.py +++ b/emitter.py @@ -37,7 +37,7 @@ def http_emitter(message, logger, agentConfig): try: request = urllib2.Request(url, postBackData, headers) - # Do the request, log any errors + # Do the request, logger any errors response = urllib2.urlopen(request) try: logger.debug('http_emitter: postback response: ' + str(response.read())) diff --git a/graphite.py b/graphite.py index 63a7d0340a..5292f68da5 100644 --- a/graphite.py +++ b/graphite.py @@ -5,19 +5,19 @@ from tornado.ioloop import IOLoop from tornado.iostream import IOStream -logger = logging.getLogger(__name__) +log = logging.getLogger(__name__) try: from tornado.netutil import TCPServer except Exception, e: - logger.warn("Tornado < 2.1.1 detected, using compatibility TCPServer") + log.warn("Tornado < 2.1.1 detected, using compatibility TCPServer") from compat.tornadotcpserver import TCPServer class GraphiteServer(TCPServer): def __init__(self, app, hostname, io_loop=None, ssl_options=None, **kwargs): - logger.info('Graphite listener is started') + log.info('Graphite listener is started') self.app = app self.hostname = hostname TCPServer.__init__(self, io_loop=io_loop, ssl_options=ssl_options, **kwargs) @@ -29,7 +29,7 @@ def handle_stream(self, stream, address): class GraphiteConnection(object): def __init__(self, stream, address, app, hostname): - logger.debug('received a new connection from %s', address) + log.debug('received a new connection from %s', address) self.app = app self.stream = stream self.address = address @@ -40,17 +40,17 @@ def __init__(self, stream, address, app, hostname): def _on_read_header(self,data): try: size = struct.unpack("!I",data)[0] - logger.debug("Receiving a string of size:" + str(size)) + log.debug("Receiving a string of size:" + str(size)) self.stream.read_bytes(size, self._on_read_line) except Exception, e: - logger.error(e) + log.error(e) def _on_read_line(self, data): - logger.debug('read a new line from %s', self.address) + log.debug('read a new line from %s', self.address) self._decode(data) def _on_close(self): - logger.debug('client quit %s', self.address) + log.debug('client quit %s', self.address) def _parseMetric(self, metric): """Graphite does not impose a particular metric structure. @@ -70,7 +70,7 @@ def _parseMetric(self, metric): return metric, host, device except Exception, e: - logger.exception("Unparsable metric: {0}".format(metric)) + log.exception("Unparsable metric: {0}".format(metric)) return None, None, None def _postMetric(self, name, host, device, datapoint): @@ -83,25 +83,25 @@ def _processMetric(self, metric, datapoint): """Parse the metric name to fetch (host, metric, device) and send the datapoint to datadog""" - logger.debug("New metric: %s, values: %s" % (metric, datapoint)) + log.debug("New metric: %s, values: %s" % (metric, datapoint)) (metric,host,device) = self._parseMetric(metric) if metric is not None: self._postMetric(metric,host,device, datapoint) - logger.info("Posted metric: %s, host: %s, device: %s" % (metric, host, device)) + log.info("Posted metric: %s, host: %s, device: %s" % (metric, host, device)) def _decode(self,data): try: datapoints = pickle.loads(data) except: - logger.exception("Cannot decode grapite points") + log.exception("Cannot decode grapite points") return for (metric, datapoint) in datapoints: try: datapoint = ( float(datapoint[0]), float(datapoint[1]) ) except Exception, e: - logger.error(e) + log.error(e) continue self._processMetric(metric,datapoint) diff --git a/pup/pup.py b/pup/pup.py index cc3c3624c2..43120fb839 100644 --- a/pup/pup.py +++ b/pup/pup.py @@ -33,7 +33,7 @@ from config import get_config from util import json -logger = logging.getLogger('pup') +log = logging.getLogger('pup') AGENT_TRANSLATION = { 'cpuUser' : 'CPU user (%)', @@ -245,10 +245,10 @@ def main(): is_enabled = c['use_pup'] if is_enabled: - logger.info("Starting pup") + log.info("Starting pup") run_pup(c) else: - logger.info("Pup is disabled. Exiting") + log.info("Pup is disabled. Exiting") # We're exiting purposefully, so exit with zero (supervisor's expected # code). HACK: Sleep a little bit so supervisor thinks we've started cleanly # and thus can exit cleanly. diff --git a/transaction.py b/transaction.py index 5552bfb999..c135334ffb 100644 --- a/transaction.py +++ b/transaction.py @@ -11,7 +11,7 @@ # project from checks.check_status import ForwarderStatus -logger = logging.getLogger(__name__) +log = logging.getLogger(__name__) def plural(count): if count > 1: @@ -98,7 +98,7 @@ def get_transactions(self): return self._transactions def print_queue_stats(self): - logger.info("Queue size: at %s, %s transaction(s), %s KB" % + log.info("Queue size: at %s, %s transaction(s), %s KB" % (time.time(), self._total_count, (self._total_size/1024))) def get_tr_id(self): @@ -113,15 +113,15 @@ def append(self,tr): # Check the size tr_size = tr.get_size() - logger.info("New transaction to add, total size of queue would be: %s KB" % + log.info("New transaction to add, total size of queue would be: %s KB" % ((self._total_size + tr_size)/ 1024)) if (self._total_size + tr_size) > self._MAX_QUEUE_SIZE: - logger.warn("Queue is too big, removing old messages...") + log.warn("Queue is too big, removing old messages...") new_trs = sorted(self._transactions,key=attrgetter('_next_flush'), reverse = True) for tr2 in new_trs: if (self._total_size + tr_size) > self._MAX_QUEUE_SIZE: - logger.warn("Removing transaction %s from queue" % tr2.get_id()) + log.warn("Removing transaction %s from queue" % tr2.get_id()) self._transactions.remove(tr2) self._total_count = self._total_count - 1 self._total_size = self._total_size - tr2.get_size() @@ -131,13 +131,13 @@ def append(self,tr): self._total_count = self._total_count + 1 self._total_size = self._total_size + tr_size - logger.info("Transaction %s added" % (tr.get_id())) + log.info("Transaction %s added" % (tr.get_id())) self.print_queue_stats() def flush(self): if self._trs_to_flush is not None: - logger.info("A flush is already in progress, not doing anything") + log.info("A flush is already in progress, not doing anything") return to_flush = [] @@ -149,7 +149,7 @@ def flush(self): count = len(to_flush) if count > 0: - logger.info("Flushing %s transaction%s" % (count,plural(count))) + log.info("Flushing %s transaction%s" % (count,plural(count))) self._trs_to_flush = to_flush self.flush_next() self._flush_count += 1 @@ -173,11 +173,11 @@ def flush_next(self): if delay <= 0: tr = self._trs_to_flush.pop() self._last_flush = datetime.now() - logger.debug("Flushing transaction %d" % tr.get_id()) + log.debug("Flushing transaction %d" % tr.get_id()) try: tr.flush() except Exception,e : - logger.exception(e) + log.exception(e) self.tr_error(tr) self.flush_next() else: @@ -195,12 +195,12 @@ def flush_next(self): def tr_error(self,tr): tr.inc_error_count() tr.compute_next_flush(self._MAX_WAIT_FOR_REPLAY) - logger.info("Transaction %d in error (%s error%s), it will be replayed after %s" % + log.info("Transaction %d in error (%s error%s), it will be replayed after %s" % (tr.get_id(), tr.get_error_count(), plural(tr.get_error_count()), tr.get_next_flush())) def tr_success(self,tr): - logger.info("Transaction %d completed" % tr.get_id()) + log.info("Transaction %d completed" % tr.get_id()) self._transactions.remove(tr) self._total_count = self._total_count - 1 self._total_size = self._total_size - tr.get_size() diff --git a/util.py b/util.py index 474c052862..e8778e4a37 100644 --- a/util.py +++ b/util.py @@ -42,7 +42,7 @@ def loads(data): from compat.namedtuple import namedtuple import logging -logger = logging.getLogger(__name__) +log = logging.getLogger(__name__) NumericTypes = (float, int, long) @@ -116,13 +116,13 @@ def __init__(self, duration): def self_destruct(signum, frame): try: import traceback - logger.error("Self-destructing...") - logger.error(traceback.format_exc()) + log.error("Self-destructing...") + log.error(traceback.format_exc()) finally: os.kill(os.getpid(), signal.SIGKILL) def reset(self): - logger.debug("Resetting watchdog for %d" % self._duration) + log.debug("Resetting watchdog for %d" % self._duration) signal.alarm(self._duration) @@ -140,29 +140,29 @@ def get_path(self): # Can we write to the directory try: if os.access(self.pid_dir, os.W_OK): - logger.info("Pid file is: %s" % self.pid_path) + log.info("Pid file is: %s" % self.pid_path) return self.pid_path except: - logger.warn("Cannot locate pid file, defaulting to /tmp/%s" % PID_FILE) + log.warn("Cannot locate pid file, defaulting to /tmp/%s" % PID_FILE) # if all else fails if os.access("/tmp", os.W_OK): tmp_path = os.path.join('/tmp', self.pid_file) - logger.debug("Using temporary pid file: %s" % tmp_path) + log.debug("Using temporary pid file: %s" % tmp_path) return tmp_path else: # Can't save pid file, bail out - logger.error("Cannot save pid file anywhere") + log.error("Cannot save pid file anywhere") raise Exception("Cannot save pid file anywhere") def clean(self): try: path = self.get_path() - logger.debug("Cleaning up pid file %s" % path) + log.debug("Cleaning up pid file %s" % path) os.remove(path) return True except: - logger.warn("Could not clean up pid file") + log.warn("Could not clean up pid file") return False def get_pid(self): From f80a706b65c6256335b59dbf82ac2ecb19bb0384 Mon Sep 17 00:00:00 2001 From: Conor Branagan Date: Wed, 9 Jan 2013 16:08:12 -0500 Subject: [PATCH 18/25] Only log "starting collector run.." every 20 runs. --- checks/collector.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/checks/collector.py b/checks/collector.py index c61eb56827..f2b7cec4fb 100644 --- a/checks/collector.py +++ b/checks/collector.py @@ -34,6 +34,7 @@ log = logging.getLogger(__name__) +LOGGING_INTERVAL = 20 class Collector(object): @@ -134,7 +135,8 @@ def run(self, checksd=None): """ timer = Timer() self.run_count += 1 - logger.debug("Starting collection run #%s" % self.run_count) + if self.run_count == 1 or self.run_count % LOGGING_INTERVAL == 0: + log.debug("Starting collection run #%s" % self.run_count) payload = self._build_payload() metrics = payload['metrics'] @@ -323,8 +325,9 @@ def run(self, checksd=None): except Exception: log.exception("Error persisting collector status") - log.info("Finished run #%s. Collection time: %ss. Emit time: %ss" % - (self.run_count, round(collect_duration, 2), round(emit_duration, 2))) + if self.run_count == 1 or self.run_count % LOGGING_INTERVAL == 0: + log.info("Finished run #%s. Collection time: %ss. Emit time: %ss" % + (self.run_count, round(collect_duration, 2), round(emit_duration, 2))) def _emit(self, payload): """ Send the payload via the emitters. """ From 342c0c50ffcc01ad19ab1febd40096cd437811e9 Mon Sep 17 00:00:00 2001 From: Conor Branagan Date: Thu, 10 Jan 2013 22:02:07 +0000 Subject: [PATCH 19/25] fix some issues from the conflicting merge --- checks/collector.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/checks/collector.py b/checks/collector.py index f2b7cec4fb..157937d38a 100644 --- a/checks/collector.py +++ b/checks/collector.py @@ -60,11 +60,11 @@ def __init__(self, agentConfig, emitters, systemStats): # Unix System Checks self._unix_system_checks = { 'disk': u.Disk(log), - 'io': u.IO(), + 'io': u.IO(log), 'load': u.Load(log), 'memory': u.Memory(log), 'network': u.Network(log), - 'processes': u.Processes(), + 'processes': u.Processes(log), 'cpu': u.Cpu(log) } @@ -179,11 +179,11 @@ def run(self, checksd=None): 'memShared': memory.get('physShared') }) - ioStats = sys_checks['io'].check(log, self.agentConfig) + ioStats = sys_checks['io'].check(self.agentConfig) if ioStats: payload['ioStats'] = ioStats - processes = sys_checks['processes'].check(log, self.agentConfig) + processes = sys_checks['processes'].check(self.agentConfig) payload.update({'processes': processes}) networkTraffic = sys_checks['network'].check(self.agentConfig) From b3ae1e52c7387a5ae62837b723ad1979ede8aa8f Mon Sep 17 00:00:00 2001 From: Conor Branagan Date: Thu, 10 Jan 2013 22:08:04 +0000 Subject: [PATCH 20/25] remove old setup_logging call --- agent.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/agent.py b/agent.py index 8b77b6bc31..a0d0ace8b2 100755 --- a/agent.py +++ b/agent.py @@ -144,9 +144,6 @@ def main(): options, args = get_parsed_args() agentConfig = get_config(options=options) - # Logging - setup_logging(agentConfig) - COMMANDS = [ 'start', 'stop', From fcabffc0ec8155a1edbff256a90e496c73c6bbc0 Mon Sep 17 00:00:00 2001 From: Conor Branagan Date: Thu, 10 Jan 2013 22:25:02 +0000 Subject: [PATCH 21/25] Always show "starting run #x" logging because it's at debug level. --- checks/collector.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/checks/collector.py b/checks/collector.py index a140b9699f..47d306ffb9 100644 --- a/checks/collector.py +++ b/checks/collector.py @@ -34,7 +34,6 @@ log = logging.getLogger(__name__) -LOGGING_INTERVAL = 20 class Collector(object): @@ -136,8 +135,7 @@ def run(self, checksd=None): """ timer = Timer() self.run_count += 1 - if self.run_count == 1 or self.run_count % LOGGING_INTERVAL == 0: - log.debug("Starting collection run #%s" % self.run_count) + log.debug("Starting collection run #%s" % self.run_count) payload = self._build_payload() metrics = payload['metrics'] @@ -328,9 +326,8 @@ def run(self, checksd=None): except Exception: log.exception("Error persisting collector status") - if self.run_count == 1 or self.run_count % LOGGING_INTERVAL == 0: - log.info("Finished run #%s. Collection time: %ss. Emit time: %ss" % - (self.run_count, round(collect_duration, 2), round(emit_duration, 2))) + log.debug("Finished run #%s. Collection time: %ss. Emit time: %ss" % + (self.run_count, round(collect_duration, 2), round(emit_duration, 2))) def _emit(self, payload): """ Send the payload via the emitters. """ From 8a57fd4962607d65f06116378e5d547620694398 Mon Sep 17 00:00:00 2001 From: Conor Branagan Date: Fri, 11 Jan 2013 14:19:30 -0500 Subject: [PATCH 22/25] update travis to use master branch for downloads --- .travis.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index e2735e6087..acac507db1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,18 +20,18 @@ before_script: - sudo apt-get install sysstat - sudo apt-get install haproxy - sudo apt-get install python-mysqldb - - curl -L https://raw.github.com/DataDog/dd-agent/check-haproxy/tests/haproxy.cfg > /tmp/haproxy.cfg + - curl -L https://raw.github.com/DataDog/dd-agent/master/tests/haproxy.cfg > /tmp/haproxy.cfg - curl -L http://mirror.sdunix.com/apache/tomcat/tomcat-6/v6.0.36/bin/apache-tomcat-6.0.36.tar.gz | tar -C /tmp -xzf - && mv /tmp/apache-tomcat-6.0.36 /tmp/apache-tomcat-6 && echo 'export CATALINA_OPTS="-Dcom.sun.management.jmxremote.port=8090 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false" export CATALINA_OUT="/tmp/apache-tomcat-6/catalina.out"' > /tmp/apache-tomcat-6/bin/setenv.sh - curl -L http://mirrors.ibiblio.org/apache/tomcat/tomcat-7/v7.0.34/bin/apache-tomcat-7.0.34.tar.gz | tar -C /tmp -xzf - && mv /tmp/apache-tomcat-7.0.34/ /tmp/apache-tomcat-7 && echo 'export CATALINA_OPTS="-Dcom.sun.management.jmxremote.port=8091 -Dcom.sun.management.jmxremote.authenticate=true -Dcom.sun.management.jmxremote.password.file=/tmp/apache-tomcat-7/conf/jmxremote.password -Dcom.sun.management.jmxremote.access.file=/tmp/apache-tomcat-7/conf/jmxremote.access -Dcom.sun.management.jmxremote.ssl=false" export CATALINA_OUT="/tmp/apache-tomcat-7/catalina.out"' > /tmp/apache-tomcat-7/bin/setenv.sh && echo 'monitorRole readonly' > /tmp/apache-tomcat-7/conf/jmxremote.access && echo 'monitorRole tomcat' > /tmp/apache-tomcat-7/conf/jmxremote.password && chmod 400 /tmp/apache-tomcat-7/conf/jmxremote.password - - curl -L https://raw.github.com/DataDog/dd-agent/jmx_multiple_checks/tests/tomcat_cfg.xml > /tmp/apache-tomcat-6/conf/server.xml + - curl -L https://raw.github.com/DataDog/dd-agent/master/tests/tomcat_cfg.xml > /tmp/apache-tomcat-6/conf/server.xml - curl -L http://mirror.cc.columbia.edu/pub/software/apache/lucene/solr/3.6.1/apache-solr-3.6.1.tgz > /tmp/solr.tgz && tar -C /tmp -xzf /tmp/solr.tgz && mv /tmp/apache-solr-3.6.1 /tmp/apache-solr-3 && echo 'monitorRole readonly' > /tmp/apache-solr-3/example/jmxremote.access && echo 'monitorRole solr' > /tmp/apache-solr-3/example/jmxremote.password && chmod 400 /tmp/apache-solr-3/example/jmxremote.password - sudo apt-get install nginx - - curl -L https://raw.github.com/DataDog/dd-agent/multiple_instances/tests/nginx.conf > /tmp/default.conf + - curl -L https://raw.github.com/DataDog/dd-agent/master/tests/nginx.conf > /tmp/default.conf - sudo cp /tmp/default.conf /etc/nginx/conf.d/default.conf - sudo /etc/init.d/nginx restart - sudo apt-get install apache2 - - sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/checks_to_checksd/tests/apache/ports.conf > /etc/apache2/ports.conf" - - sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/checks_to_checksd/tests/apache/apache.conf > /etc/apache2/apache.conf" + - sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/master/tests/apache/ports.conf > /etc/apache2/ports.conf" + - sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/master/tests/apache/apache.conf > /etc/apache2/apache.conf" - sudo /etc/init.d/apache2 restart - sudo apt-get remove memcached - sudo apt-get install memcached From cb6abeb51afa6aa51da17881b6e91479e9467bf2 Mon Sep 17 00:00:00 2001 From: Conor Branagan Date: Fri, 11 Jan 2013 14:19:40 -0500 Subject: [PATCH 23/25] generate a datadog.conf file before testing with travis --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index acac507db1..a5b5867b49 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,5 +35,7 @@ before_script: - sudo /etc/init.d/apache2 restart - sudo apt-get remove memcached - sudo apt-get install memcached + - sudo mkdir -p /etc/dd-agent/ + - sudo basch -c "curl -L https://raw.github.com/DataDog/dd-agent/datadog.conf.example > /etc/dd-agent/datadog.conf" env: - DB=redis From cdbf8b0df343346d9c4a3fc3c441226d1e059d1f Mon Sep 17 00:00:00 2001 From: Conor Branagan Date: Sat, 12 Jan 2013 10:18:11 -0500 Subject: [PATCH 24/25] Fix syntax in travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a5b5867b49..282a86bb13 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,6 +36,6 @@ before_script: - sudo apt-get remove memcached - sudo apt-get install memcached - sudo mkdir -p /etc/dd-agent/ - - sudo basch -c "curl -L https://raw.github.com/DataDog/dd-agent/datadog.conf.example > /etc/dd-agent/datadog.conf" + - sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/datadog.conf.example > /etc/dd-agent/datadog.conf" env: - DB=redis From d7194b842b9a9e6dd85ab980c76623669352b3ec Mon Sep 17 00:00:00 2001 From: Conor Branagan Date: Sat, 12 Jan 2013 10:27:13 -0500 Subject: [PATCH 25/25] Add a branch for datadog.conf file in travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 282a86bb13..62d1c3a6b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,6 +36,6 @@ before_script: - sudo apt-get remove memcached - sudo apt-get install memcached - sudo mkdir -p /etc/dd-agent/ - - sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/datadog.conf.example > /etc/dd-agent/datadog.conf" + - sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/master/datadog.conf.example > /etc/dd-agent/datadog.conf" env: - DB=redis