Skip to content

Commit

Permalink
Merge pull request #52 from DataDog/51-pid-file
Browse files Browse the repository at this point in the history
Fixing pid file location
  • Loading branch information
alq666 committed May 16, 2012
2 parents a302cf4 + e6bb6d7 commit 3ba7d92
Show file tree
Hide file tree
Showing 13 changed files with 180 additions and 131 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ before_script:
- sudo apt-get install python-mysqldb
- sudo apt-get install nginx
- sudo apt-get install apache2
- sudo apt-get install sysstat
123 changes: 61 additions & 62 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
import time
import urllib

# Constants
PID_DIR="/var/run/dd-agent"
PID_FILE="dd-agent.pid"

# Watchdog implementation
from threading import Timer
WATCHDOG_MULTIPLIER = 10 # will fire if no checks have been collected in N * checkFreq, 150s by default
Expand All @@ -31,29 +35,14 @@

# Custom modules
from checks.common import checks
from checks.ec2 import EC2
from config import get_config, get_system_stats, get_parsed_args
from daemon import Daemon
from emitter import http_emitter

# Override the generic daemon class to run our checks
class agent(Daemon):

EC2_URL = "http://169.254.169.254/latest/meta-data/instance-id"

@staticmethod
def get_ec2_instance_id():
"""Fetch EC2 instance ID if possible. If not on EC2 returns None"""
try:
url = urllib.urlopen(agent.EC2_URL)
instanceId = url.read()
assert instanceId.startswith("i-"), "Malformed instance-id: %s" % instanceId
return instanceId

except Exception, e:
logging.getLogger('agent').exception('Cannot determine instance-id. Is this machine on EC2?')

return None

def late(self, cks, threshold, crash=True):
"""Determine whether the agent run is late and optionally kill it if so.
"""
Expand All @@ -75,27 +64,25 @@ def run(self, agentConfig=None, run_forever=True):
agentLogger.debug('Creating checks instance')

if agentConfig is None:
agentConfig, rawConfig = get_config()
else:
rawConfig = {}
agentConfig = get_config()

# Try to fetch instance Id from EC2 if not hostname has been set
# in the config file
if agentConfig.get('hostname') is None and agentConfig.get('useEC2InstanceId'):
instanceId = self.get_ec2_instance_id()
instanceId = EC2.get_instance_id()
if instanceId is not None:
agentLogger.info("Running on EC2, instanceId: %s" % instanceId)
agentConfig['hostname'] = instanceId
else:
agentLogger.info('Not running on EC2')
agentLogger.info('Not running on EC2, using hostname to identify this server')

emitter = http_emitter

checkFreq = int(agentConfig['checkFreq'])
lateThresh = checkFreq * WATCHDOG_MULTIPLIER

# Checks instance
c = checks(agentConfig, rawConfig, emitter)
c = checks(agentConfig, emitter)

# Run once
c.doChecks(True, systemStats)
Expand All @@ -120,9 +107,10 @@ def run(self, agentConfig=None, run_forever=True):
agentLogger.debug("Getting ready to sleep for %s seconds." % lateThresh)

def setupLogging(agentConfig):
"""Used by ddagent.py as well"""
"""Configure logging to use syslog whenever possible.
Also controls debugMode."""
if agentConfig['debugMode']:
logFile = os.path.join(agentConfig['tmpDirectory'], 'dd-agent.log')
logFile = "/tmp/dd-agent.log"
logging.basicConfig(filename=logFile, filemode='w', level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.info("Logging to %s" % logFile)
else:
Expand All @@ -146,49 +134,68 @@ def setupLogging(agentConfig):
sys.stderr.write("Error while setting up syslog logging (%s). No logging available" % str(e))
logging.disable(logging.ERROR)

def getPidFile(command, agentConfig, clean):
"""Used by ddagent.py as well"""

if command == 'init':
# This path added for newer Linux packages which run under
# a separate dd-agent user account.
if os.path.exists('/var/run/dd-agent/'):
pidFile = '/var/run/dd-agent/dd-agent.pid'
else:
pidFile = '/var/run/dd-agent.pid'

def getPidFile(pid_dir=PID_DIR):
"""Find a good spot for the pid file.
By default PID_DIR/PID_FILE
"""
try:
# Can we write to the directory
if os.access(pid_dir, os.W_OK):
pidfile = os.path.join(pid_dir, PID_FILE)
logging.info("Pid file is: %s" % pidfile)
return pidfile
except:
logging.exception("Cannot locate pid file, defaulting to /tmp/%s" % PID_FILE)
# continue

# if all else fails
if os.access("/tmp", os.W_OK):
logging.warn("Pid file: /tmp/%s" % PID_FILE)
return os.path.join("/tmp", PID_FILE)
else:
pidFile = os.path.join(agentConfig['pidfileDirectory'], 'dd-agent.pid')

if clean:
logging.debug('Agent called with --clean option, removing .pid')
try:
os.remove(pidFile)
except OSError:
# Did not find pid file
pass

return pidFile
# Can't save pid file, bail out
logging.error("Cannot save pid file anywhere")
sys.exit(-2)

def cleanPidFile(pid_dir=PID_DIR):
try:
logging.debug("Cleaning up pid file %s" % getPidFile(pid_dir))
os.remove(getPidFile(pid_dir))
return True
except:
logging.exception("Could not clean up pid file")
return False

def getPid(pid_dir=PID_DIR):
"Retrieve the actual pid"
try:
pf = open(getPidFile(pid_dir))
pid_s = pf.read()
pf.close()

return int(pid_s.strip())
except:
logging.exception("Cannot read pid")
return None

# Control of daemon
if __name__ == '__main__':

options, args = get_parsed_args()
agentConfig, rawConfig = get_config()
agentConfig = get_config()

# Logging
setupLogging(agentConfig)

# FIXME
# Ever heard of optparse?

argLen = len(sys.argv)

if len(args) > 0:
command = args[0]

if options.clean:
cleanPidFile()

# Daemon instance from agent class
pidFile = getPidFile(command, agentConfig, options.clean)
pidFile = getPidFile()
daemon = agent(pidFile)

if 'start' == command:
Expand All @@ -208,16 +215,8 @@ def getPidFile(command, agentConfig, clean):
daemon.run()

elif 'status' == command:
try:
pf = file(pidFile,'r')
pid = int(pf.read().strip())
pf.close()
except IOError:
pid = None
except SystemExit:
pid = None

if pid:
pid = getPid()
if pid is not None:
sys.stdout.write('dd-agent is running as pid %s.\n' % pid)
logging.info("dd-agent is running as pid %s." % pid)
else:
Expand Down
3 changes: 1 addition & 2 deletions checks/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,8 @@ def wrapper(*args, **kwargs):
return wrapper

class checks:
def __init__(self, agentConfig, rawConfig, emitter):
def __init__(self, agentConfig, emitter):
self.agentConfig = agentConfig
self.rawConfig = rawConfig
self.plugins = None
self.emitter = emitter
self.last_post_ts = None
Expand Down
14 changes: 11 additions & 3 deletions checks/ec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
class EC2(Check):
"""Retrieve EC2 metadata
"""
URL = "http://169.254.169.254/1.0/meta-data/"
URL = "http://169.254.169.254/latest/meta-data/"
TIMEOUT = 0.1 # second

def __init__(self, logger):
Check.__init__(self, logger)

def get_metadata(self):
@staticmethod
def get_metadata():
"""Use the ec2 http service to introspect the instance. This adds latency if not running on EC2
"""
# >>> import urllib2
Expand Down Expand Up @@ -42,7 +43,7 @@ def get_metadata(self):
assert type(v) in (types.StringType, types.UnicodeType) and len(v) > 0, "%s is not a string" % v
metadata[k] = v
except:
self.logger.exception("(Ignore if !ec2) Cannot extract EC2 metadata %s" % k)
pass

try:
if socket_to is None:
Expand All @@ -52,3 +53,10 @@ def get_metadata(self):
pass

return metadata

@staticmethod
def get_instance_id():
try:
return EC2.get_metadata().get("instance-id", None)
except:
return None
40 changes: 15 additions & 25 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

# CONSTANTS
DATADOG_CONF = "datadog.conf"
DEFAULT_CHECK_FREQUENCY = 15 # seconds

def get_parsed_args():
parser = OptionParser()
Expand All @@ -25,7 +26,7 @@ def get_parsed_args():
return options, args

def get_version():
return "2.2.16"
return "2.2.17"

def skip_leading_wsp(f):
"Works on a file, returns a file-like object"
Expand All @@ -42,11 +43,9 @@ def get_config(parse_args = True, cfg_path=None):
agentConfig = {}
agentConfig['debugMode'] = False
# not really a frequency, but the time to sleep between checks
agentConfig['checkFreq'] = 15
agentConfig['checkFreq'] = DEFAULT_CHECK_FREQUENCY
agentConfig['version'] = get_version()

rawConfig = {}

# Config handling
try:
# Find the right config file
Expand All @@ -67,7 +66,11 @@ def get_config(parse_args = True, cfg_path=None):
config = ConfigParser.ConfigParser()
config.readfp(skip_leading_wsp(open(config_path)))

#
# Core config
#

# Where to send the data
if options is not None and options.use_forwarder:
listen_port = 17123
if config.has_option('Main','listen_port'):
Expand All @@ -80,14 +83,10 @@ def get_config(parse_args = True, cfg_path=None):
if agentConfig['ddUrl'].endswith('/'):
agentConfig['ddUrl'] = agentConfig['ddUrl'][:-1]

# Which API key to use
agentConfig['apiKey'] = config.get('Main', 'api_key')

if os.path.exists('/var/log/dd-agent/'):
agentConfig['tmpDirectory'] = '/var/log/dd-agent/'
else:
agentConfig['tmpDirectory'] = '/tmp/' # default which may be overriden in the config later
agentConfig['pidfileDirectory'] = agentConfig['tmpDirectory']


# Debug mode
agentConfig['debugMode'] = config.get('Main', 'debug_mode').lower() in ("yes", "true")

if config.has_option('Main', 'use_ec2_instance_id'):
Expand All @@ -98,7 +97,10 @@ def get_config(parse_args = True, cfg_path=None):
agentConfig['useEC2InstanceId'] = False

if config.has_option('Main', 'check_freq'):
agentConfig['checkFreq'] = int(config.get('Main', 'check_freq'))
try:
agentConfig['checkFreq'] = int(config.get('Main', 'check_freq'))
except:
agentConfig['checkFreq'] = DEFAULT_CHECK_FREQUENCY

if config.has_option('Main','hostname'):
agentConfig['hostname'] = config.get('Main','hostname')
Expand Down Expand Up @@ -161,12 +163,6 @@ def get_config(parse_args = True, cfg_path=None):
if config.has_option('Main', 'nginx_status_url'):
agentConfig['nginxStatusUrl'] = config.get('Main', 'nginx_status_url')

if config.has_option('Main', 'tmp_directory'):
agentConfig['tmpDirectory'] = config.get('Main', 'tmp_directory')

if config.has_option('Main', 'pidfile_directory'):
agentConfig['pidfileDirectory'] = config.get('Main', 'pidfile_directory')

if config.has_option('Main', 'plugin_directory'):
agentConfig['pluginDirectory'] = config.get('Main', 'plugin_directory')

Expand Down Expand Up @@ -308,13 +304,7 @@ def get_config(parse_args = True, cfg_path=None):
sys.stderr.write('You have configured MongoDB for monitoring, but the pymongo module is not installed.\n')
sys.exit(2)

for section in config.sections():
rawConfig[section] = {}

for option in config.options(section):
rawConfig[section][option] = config.get(section, option)

return agentConfig, rawConfig
return agentConfig

def get_system_stats():
systemStats = {
Expand Down
2 changes: 1 addition & 1 deletion datadog.conf.example
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ debug_mode: no

# Use the amazon EC2 instance-id instead of hostname (unless hostname is
# explicitly set)
use_ec2_instance_id: no
use_ec2_instance_id: yes

# Use mount points instead of volumes to track disk and fs metrics
use_mount: no
Expand Down
2 changes: 1 addition & 1 deletion ddagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def main():

import tornado.httpclient

agentConfig, rawConfig = get_config(parse_args = False)
agentConfig = get_config(parse_args = False)

port = agentConfig.get('listen_port', None)
if port is None:
Expand Down
Loading

0 comments on commit 3ba7d92

Please sign in to comment.