Skip to content

Commit

Permalink
Merge pull request #297 from DataDog/logging
Browse files Browse the repository at this point in the history
Logging
  • Loading branch information
conorbranagan committed Jan 12, 2013
2 parents 4a6afbf + d7194b8 commit c939854
Show file tree
Hide file tree
Showing 32 changed files with 437 additions and 347 deletions.
12 changes: 7 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,22 @@ before_script:
- sudo apt-get install sysstat
- sudo apt-get install haproxy
- sudo apt-get install python-mysqldb
- curl -L https://raw.github.com/DataDog/dd-agent/check-haproxy/tests/haproxy.cfg > /tmp/haproxy.cfg
- curl -L https://raw.github.com/DataDog/dd-agent/master/tests/haproxy.cfg > /tmp/haproxy.cfg
- curl -L http://mirror.sdunix.com/apache/tomcat/tomcat-6/v6.0.36/bin/apache-tomcat-6.0.36.tar.gz | tar -C /tmp -xzf - && mv /tmp/apache-tomcat-6.0.36 /tmp/apache-tomcat-6 && echo 'export CATALINA_OPTS="-Dcom.sun.management.jmxremote.port=8090 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false" export CATALINA_OUT="/tmp/apache-tomcat-6/catalina.out"' > /tmp/apache-tomcat-6/bin/setenv.sh
- curl -L http://mirrors.ibiblio.org/apache/tomcat/tomcat-7/v7.0.34/bin/apache-tomcat-7.0.34.tar.gz | tar -C /tmp -xzf - && mv /tmp/apache-tomcat-7.0.34/ /tmp/apache-tomcat-7 && echo 'export CATALINA_OPTS="-Dcom.sun.management.jmxremote.port=8091 -Dcom.sun.management.jmxremote.authenticate=true -Dcom.sun.management.jmxremote.password.file=/tmp/apache-tomcat-7/conf/jmxremote.password -Dcom.sun.management.jmxremote.access.file=/tmp/apache-tomcat-7/conf/jmxremote.access -Dcom.sun.management.jmxremote.ssl=false" export CATALINA_OUT="/tmp/apache-tomcat-7/catalina.out"' > /tmp/apache-tomcat-7/bin/setenv.sh && echo 'monitorRole readonly' > /tmp/apache-tomcat-7/conf/jmxremote.access && echo 'monitorRole tomcat' > /tmp/apache-tomcat-7/conf/jmxremote.password && chmod 400 /tmp/apache-tomcat-7/conf/jmxremote.password
- curl -L https://raw.github.com/DataDog/dd-agent/jmx_multiple_checks/tests/tomcat_cfg.xml > /tmp/apache-tomcat-6/conf/server.xml
- curl -L https://raw.github.com/DataDog/dd-agent/master/tests/tomcat_cfg.xml > /tmp/apache-tomcat-6/conf/server.xml
- curl -L http://mirror.cc.columbia.edu/pub/software/apache/lucene/solr/3.6.1/apache-solr-3.6.1.tgz > /tmp/solr.tgz && tar -C /tmp -xzf /tmp/solr.tgz && mv /tmp/apache-solr-3.6.1 /tmp/apache-solr-3 && echo 'monitorRole readonly' > /tmp/apache-solr-3/example/jmxremote.access && echo 'monitorRole solr' > /tmp/apache-solr-3/example/jmxremote.password && chmod 400 /tmp/apache-solr-3/example/jmxremote.password
- sudo apt-get install nginx
- curl -L https://raw.github.com/DataDog/dd-agent/multiple_instances/tests/nginx.conf > /tmp/default.conf
- curl -L https://raw.github.com/DataDog/dd-agent/master/tests/nginx.conf > /tmp/default.conf
- sudo cp /tmp/default.conf /etc/nginx/conf.d/default.conf
- sudo /etc/init.d/nginx restart
- sudo apt-get install apache2
- sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/checks_to_checksd/tests/apache/ports.conf > /etc/apache2/ports.conf"
- sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/checks_to_checksd/tests/apache/apache.conf > /etc/apache2/apache.conf"
- sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/master/tests/apache/ports.conf > /etc/apache2/ports.conf"
- sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/master/tests/apache/apache.conf > /etc/apache2/apache.conf"
- sudo /etc/init.d/apache2 restart
- sudo apt-get remove memcached
- sudo apt-get install memcached
- sudo mkdir -p /etc/dd-agent/
- sudo bash -c "curl -L https://raw.github.com/DataDog/dd-agent/master/datadog.conf.example > /etc/dd-agent/datadog.conf"
env:
- DB=redis
66 changes: 18 additions & 48 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
(C) Datadog, Inc. 2010 all rights reserved
'''

# set up logging before importing any other components
from config import initialize_logging; initialize_logging('collector')

import os; os.umask(022)

# Core modules
Expand Down Expand Up @@ -44,8 +47,7 @@
WATCHDOG_MULTIPLIER = 10

# Globals
agent_logger = logging.getLogger('agent')

log = logging.getLogger('collector')

class Agent(Daemon):
"""
Expand All @@ -58,7 +60,7 @@ def __init__(self, pidfile):
self.collector = None

def _handle_sigterm(self, signum, frame):
agent_logger.debug("Caught sigterm. Stopping run loop.")
log.debug("Caught sigterm. Stopping run loop.")
self.run_forever = False
if self.collector:
self.collector.stop()
Expand Down Expand Up @@ -87,7 +89,7 @@ def run(self, config=None):
# Configure the watchdog.
check_frequency = int(agentConfig['check_freq'])
watchdog = self._get_watchdog(check_frequency, agentConfig)

# Run the main loop.
while self.run_forever:
# Do the work.
Expand All @@ -108,7 +110,7 @@ def run(self, config=None):

# Explicitly kill the process, because it might be running
# as a daemon.
agent_logger.info("Exiting. Bye bye.")
log.info("Exiting. Bye bye.")
sys.exit(0)

def _get_emitters(self, agentConfig):
Expand All @@ -132,51 +134,16 @@ def _set_agent_config_hostname(self, agentConfig):
if agentConfig.get('hostname') is None and agentConfig.get('use_ec2_instance_id'):
instanceId = EC2.get_instance_id()
if instanceId is not None:
agent_logger.info("Running on EC2, instanceId: %s" % instanceId)
log.info("Running on EC2, instanceId: %s" % instanceId)
agentConfig['hostname'] = instanceId
else:
agent_logger.info('Not running on EC2, using hostname to identify this server')
log.info('Not running on EC2, using hostname to identify this server')
return agentConfig


def setup_logging(agentConfig):
"""Configure logging to use syslog whenever possible.
Also controls debug_mode."""
if agentConfig['debug_mode']:
logFile = "/tmp/dd-agent.log"
logging.basicConfig(filename=logFile, filemode='w',
level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.info("Logging to %s" % logFile)
else:
try:
from logging.handlers import SysLogHandler
rootLog = logging.getLogger()
rootLog.setLevel(logging.INFO)

sys_log_addr = "/dev/log"

# Special-case macs
if sys.platform == 'darwin':
sys_log_addr = "/var/run/syslog"

handler = SysLogHandler(address=sys_log_addr, facility=SysLogHandler.LOG_DAEMON)
formatter = logging.Formatter("dd-agent - %(name)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
rootLog.addHandler(handler)
logging.info('Logging to syslog is set up')
except Exception,e:
sys.stderr.write("Error while setting up syslog logging (%s). No logging available" % str(e))
logging.disable(logging.ERROR)


def main():
options, args = get_parsed_args()
agentConfig = get_config(options=options)

# Logging
setup_logging(agentConfig)


COMMANDS = [
'start',
'stop',
Expand All @@ -196,7 +163,7 @@ def main():
return 3

pid_file = PidFile('dd-agent')

# Only initialize the Agent if we're starting or stopping it.
if command in ['start', 'stop', 'restart', 'foreground']:

Expand All @@ -206,15 +173,15 @@ def main():
agent = Agent(pid_file.get_path())

if 'start' == command:
logging.info('Start daemon')
log.info('Start daemon')
agent.start()

elif 'stop' == command:
logging.info('Stop daemon')
log.info('Stop daemon')
agent.stop()

elif 'restart' == command:
logging.info('Restart daemon')
log.info('Restart daemon')
agent.restart()

elif 'foreground' == command:
Expand All @@ -227,10 +194,13 @@ def main():
pid = pid_file.get_pid()
if pid is not None:
sys.stdout.write('dd-agent is running as pid %s.\n' % pid)
log.info("dd-agent is running as pid %s." % pid)
else:
sys.stdout.write('dd-agent is not running.\n')
log.info("dd-agent is not running.")

elif 'info' == command:
logging.getLogger().setLevel(logging.ERROR)
return CollectorStatus.print_latest_status(verbose=options.verbose)

return 0
Expand All @@ -239,10 +209,10 @@ def main():
if __name__ == '__main__':
try:
sys.exit(main())
except Exception:
except StandardError:
# Try our best to log the error.
try:
agent_logger.exception("Uncaught error running the agent")
log.exception("Uncaught error running the agent")
except:
pass
raise
12 changes: 5 additions & 7 deletions aggregator.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import logging
from time import time


logger = logging.getLogger(__name__)

log = logging.getLogger(__name__)

class Infinity(Exception): pass
class UnknownValue(Exception): pass
Expand Down Expand Up @@ -199,12 +197,12 @@ def sample(self, value, sample_rate):
def _rate(self, sample1, sample2):
interval = sample2[0] - sample1[0]
if interval == 0:
logger.warn('Metric %s has an interval of 0. Not flushing.' % self.name)
log.warn('Metric %s has an interval of 0. Not flushing.' % self.name)
raise Infinity()

delta = sample2[1] - sample1[1]
if delta < 0:
logger.warn('Metric %s has a rate < 0. Not flushing.' % self.name)
log.warn('Metric %s has a rate < 0. Not flushing.' % self.name)
raise UnknownValue()

return (delta / interval)
Expand Down Expand Up @@ -349,13 +347,13 @@ def flush(self):
metrics = []
for context, metric in self.metrics.items():
if metric.last_sample_time < expiry_timestamp:
logger.debug("%s hasn't been submitted in %ss. Expiring." % (context, self.expiry_seconds))
log.debug("%s hasn't been submitted in %ss. Expiring." % (context, self.expiry_seconds))
del self.metrics[context]
else:
metrics += metric.flush(timestamp, self.interval)

# Save some stats.
logger.debug("received %s payloads since last flush" % self.count)
log.debug("received %s payloads since last flush" % self.count)
self.total_count += self.count
self.count = 0
return metrics
Expand Down
6 changes: 4 additions & 2 deletions checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from util import LaconicFilter
from checks import check_status

log = logging.getLogger(__name__)

# Konstants
class CheckException(Exception): pass
class Infinity(CheckException): pass
Expand Down Expand Up @@ -273,7 +275,7 @@ def __init__(self, name, init_config, agentConfig, instances=None):
self.init_config = init_config
self.agentConfig = agentConfig
self.hostname = gethostname(agentConfig)
self.log = logging.getLogger('checks.%s' % name)
self.log = logging.getLogger('%s.%s' % (__name__, name))
self.aggregator = MetricsAggregator(self.hostname, formatter=agent_formatter)
self.events = []
self.instances = instances or []
Expand Down Expand Up @@ -482,7 +484,7 @@ def gethostname(agentConfig):
try:
return socket.getfqdn()
except socket.error, e:
logging.debug("processes: unable to get hostname: " + str(e))
log.debug("processes: unable to get hostname: " + str(e))

def agent_formatter(metric, value, timestamp, tags, hostname, device_name=None):
""" Formats metrics coming from the MetricsAggregator. Will look like:
Expand Down
2 changes: 1 addition & 1 deletion checks/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def check(self, logger, agentConfig):

hudson_home, apiKey = sys.argv[1:3]

logger = logging.getLogger('hudson')
logger = logging.getLogger('ddagent.checks.hudson')
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())
hudson = Hudson()
Expand Down
16 changes: 16 additions & 0 deletions checks/check_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,21 @@ def stylize(cls, text, *styles):
def style(*args):
return Stylizer.stylize(*args)

def logger_info():
loggers = []
root_logger = logging.getLogger()
if len(root_logger.handlers) > 0:
for handler in root_logger.handlers:
if isinstance(handler, logging.StreamHandler):
loggers.append(handler.stream.name)
if isinstance(handler, logging.handlers.SysLogHandler):
if isinstance(handler.address, basestring):
loggers.append('syslog:%s' % handler.address)
else:
loggers.append('syslog:(%s, %s)' % handler.address)
else:
loggers.append("No loggers configured")
return ', '.join(loggers)


class AgentStatus(object):
Expand Down Expand Up @@ -125,6 +140,7 @@ def _header_lines(self, indent):
("Pid", self.created_by_pid),
("Platform", platform.platform()),
("Python Version", platform.python_version()),
("Logs", logger_info()),
]

for key, value in fields:
Expand Down
Loading

0 comments on commit c939854

Please sign in to comment.