Skip to content

Commit

Permalink
Merge pull request #615 from DataDog/limit-devices
Browse files Browse the repository at this point in the history
Filter disk, io & network metrics by device
  • Loading branch information
clutchski committed Aug 20, 2013
2 parents e6ed2df + e52f0a0 commit 564bef4
Show file tree
Hide file tree
Showing 8 changed files with 465 additions and 223 deletions.
172 changes: 135 additions & 37 deletions checks.d/network.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
from checks import AgentCheck
"""
Collects network metrics.
"""

# stdlib
import platform
import subprocess
import sys
import re

# project
from checks import AgentCheck
from checks.system import Platform


class Network(AgentCheck):
Expand Down Expand Up @@ -39,16 +47,61 @@ class Network(AgentCheck):
def check(self, instance):
if instance is None:
instance = {}
self.excluded_interfaces = instance.get('excluded_interfaces', [])
self.collect_connection_state = instance.get('collect_connection_state', False)

if sys.platform == 'linux2':
self._excluded_ifaces = instance.get('excluded_interfaces', [])
self._collect_cx_state = instance.get('collect_connection_state', False)

self._exclude_iface_re = None
exclude_re = instance.get('excluded_interface_re', None)
if exclude_re:
self.log.debug("Excluding network devices matching: %s" % exclude_re)
self._exclude_iface_re = re.compile(exclude_re)

if Platform.is_linux():
self._check_linux(instance)
elif sys.platform == "darwin" or sys.platform.startswith("freebsd"):
elif Platform.is_bsd():
self._check_bsd(instance)
elif sys.platform == "sunos5":
elif Plaform.is_solaris():
self._check_solaris(instance)

def _submit_devicemetrics(self, iface, vals_by_metric):
if self._exclude_iface_re and self._exclude_iface_re.match(iface):
# Skip this network interface.
return False

expected_metrics = [
'bytes_rcvd',
'bytes_sent',
'packets_in.count',
'packets_in.error',
'packets_out.count',
'packets_out.error',
]
for m in expected_metrics:
assert m in vals_by_metric
assert len(vals_by_metric) == len(expected_metrics)

# For reasons i don't understand only these metrics are skipped if a
# particular interface is in the `excluded_interfaces` config list.
# Not sure why the others aren't included. Until I understand why, I'm
# going to keep the same behaviour.
exclude_iface_metrics = [
'packets_in.count',
'packets_in.error',
'packets_out.count',
'packets_out.error',
]

count = 0
for metric, val in vals_by_metric.iteritems():
if iface in self._excluded_ifaces and metric in exclude_iface_metrics:
# skip it!
continue
self.rate('system.net.%s' % metric, val, device_name=iface)
count += 1
self.log.debug("tracked %s network metrics for interface %s" % (count, iface))


def _parse_value(self, v):
if v == "-":
return 0
Expand All @@ -59,7 +112,7 @@ def _parse_value(self, v):
return 0

def _check_linux(self, instance):
if self.collect_connection_state:
if self._collect_cx_state:
netstat = subprocess.Popen(["netstat", "-n", "-u", "-t", "-a"],
stdout=subprocess.PIPE,
close_fds=True).communicate()[0]
Expand Down Expand Up @@ -95,8 +148,10 @@ def _check_linux(self, instance):


proc = open('/proc/net/dev', 'r')
lines = proc.readlines()
proc.close()
try:
lines = proc.readlines()
finally:
proc.close()
# Inter-| Receive | Transmit
# face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed
# lo:45890956 112797 0 0 0 0 0 0 45890956 112797 0 0 0 0 0 0
Expand All @@ -108,13 +163,15 @@ def _check_linux(self, instance):
# Filter inactive interfaces
if self._parse_value(x[0]) or self._parse_value(x[8]):
iface = cols[0].strip()
self.rate('system.net.bytes_rcvd', self._parse_value(x[0]), device_name=iface)
self.rate('system.net.bytes_sent', self._parse_value(x[8]), device_name=iface)
if iface not in self.excluded_interfaces:
self.rate('system.net.packets_in.count', self._parse_value(x[1]), device_name=iface)
self.rate('system.net.packets_in.error', self._parse_value(x[2]) + self._parse_value(x[3]), device_name=iface)
self.rate('system.net.packets_out.count', self._parse_value(x[9]), device_name=iface)
self.rate('system.net.packets_out.error', self._parse_value(x[10]) + self._parse_value(x[11]), device_name=iface)
metrics = {
'bytes_rcvd': self._parse_value(x[0]),
'bytes_sent': self._parse_value(x[8]),
'packets_in.count': self._parse_value(x[1]),
'packets_in.error': self._parse_value(x[2]) + self._parse_value(x[3]),
'packets_out.count': self._parse_value(x[9]),
'packets_out.error':self._parse_value(x[10]) + self._parse_value(x[11]),
}
self._submit_devicemetrics(iface, metrics)

def _check_bsd(self, instance):
netstat = subprocess.Popen(["netstat", "-i", "-b"],
Expand Down Expand Up @@ -171,20 +228,40 @@ def _check_bsd(self, instance):

# Filter inactive interfaces
if self._parse_value(x[-5]) or self._parse_value(x[-2]):
self.rate('system.net.bytes_rcvd', self._parse_value(x[-5]), device_name=iface)
self.rate('system.net.bytes_sent', self._parse_value(x[-2]), device_name=iface)
if iface not in self.excluded_interfaces:
self.rate('system.net.packets_in.count', self._parse_value(x[-7]), device_name=iface)
self.rate('system.net.packets_in.error', self._parse_value(x[-6]), device_name=iface)
self.rate('system.net.packets_out.count', self._parse_value(x[-4]), device_name=iface)
self.rate('system.net.packets_out.error', self._parse_value(x[-3]), device_name=iface)
iface = current
metrics = {
'bytes_rcvd': self._parse_value(x[-5]),
'bytes_sent': self._parse_value(x[-2]),
'packets_in.count': self._parse_value(x[-7]),
'packets_in.error': self._parse_value(x[-6]),
'packets_out.count': self._parse_value(x[-4]),
'packets_out.error':self._parse_value(x[-3]),
}
self._submit_devicemetrics(iface, metrics)

def _check_solaris(self, instance):
# Can't get bytes sent and received via netstat
# Default to kstat -p link:0:
netstat = subprocess.Popen(["kstat", "-p", "link:0:"],
stdout=subprocess.PIPE,
close_fds=True).communicate()[0]
metrics_by_interface = self._parse_solaris_netstat(netstat)
for interface, metrics in metrics_by_interface.iteritems():
self._submit_devicemetrics(interface, metrics)

def _parse_solaris_netstat(self, netstat_output):
"""
Return a mapping of network metrics by interface. For example:
{ interface:
{'bytes_sent': 0,
'bytes_rcvd': 0,
'bytes_rcvd': 0,
...
}
}
"""
# Here's an example of the netstat output:
#
# link:0:net0:brdcstrcv 527336
# link:0:net0:brdcstxmt 1595
# link:0:net0:class net
Expand Down Expand Up @@ -236,23 +313,44 @@ def _check_solaris(self, instance):
# link:0:net1:unknowns 0
# link:0:net1:zonename 53aa9b7e-48ba-4152-a52b-a6368c3d9e7c

lines = [l for l in netstat.split("\n") if len(l) > 0]
# A mapping of solaris names -> datadog names
metric_by_solaris_name = {
'rbytes64':'bytes_rcvd',
'obytes64':'bytes_rcvd',
'ipackets64':'packets_in.count',
'ierrors':'packets_in.error',
'opackets64':'packets_out.count',
'oerrors':'packets_out.error',
}

lines = [l for l in netstat_output.split("\n") if len(l) > 0]

metrics_by_interface = {}

for l in lines:
# Parse the metric & interface.
cols = l.split()
link, n, iface, name = cols[0].split(":")
assert link == "link"

if name == "rbytes64":
self.rate('system.net.bytes_rcvd', self._parse_value(cols[1]), device_name=iface)
elif name == "obytes64":
self.rate('system.net.bytes_sent', self._parse_value(cols[1]), device_name=iface)
elif iface not in self.excluded_interfaces:
if name == "ipackets64":
self.rate('system.net.packets_in.count', self._parse_value(cols[1]), device_name=iface)
elif name == "ierrors":
self.rate('system.net.packets_in.error', self._parse_value(cols[1]), device_name=iface)
elif name == "opackets64":
self.rate('system.net.packets_out.count', self._parse_value(cols[1]), device_name=iface)
elif name == "oerrors":
self.rate('system.net.packets_out.error', self._parse_value(cols[1]), device_name=iface)
# Get the datadog metric name.
ddname = metric_by_solaris_name.get(name, None)
if ddname is None:
continue

# Add it to this interface's list of metrics.
metrics = metrics_by_interface.get(iface, {})
metrics[ddname] = self._parse_value(cols[1])
metrics_by_interface[iface] = metrics

return metrics_by_interface


if __name__ == '__main__':
check, instances = Network.from_yaml('conf.d/network.yaml')
for instance in instances:
check.check(instance)
check.check(instance)
if check.has_events():
print 'Events: %s' % (check.get_events())
print 'Metrics: %s' % (check.get_metrics())
2 changes: 2 additions & 0 deletions checks/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@


log = logging.getLogger(__name__)


FLUSH_LOGGING_PERIOD = 10
FLUSH_LOGGING_INITIAL = 5

Expand Down
45 changes: 45 additions & 0 deletions checks/system/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Return information about the given platform.
"""


import sys


class Platform(object):

@staticmethod
def is_darwin(name=None):
name = name or sys.platform
return 'darwin' in name

@staticmethod
def is_freebsd(name=None):
name = name or sys.platform
return name.startswith("freebsd")

@staticmethod
def is_linux(name=None):
name = name or sys.platform
return 'linux' in name

@staticmethod
def is_bsd(name=None):
""" Return true if this is a BSD like operarting system. """
name = name or sys.platform
return Platform.is_darwin(name) or Platform.is_freebsd(name)

@staticmethod
def is_solaris(name=None):
name = name or sys.platform
return name == "sunos5"

@staticmethod
def is_unix(name=None):
""" Return true if the platform is a unix, False otherwise. """
name = name or sys.platform
return (Platform.is_darwin()
or Platform.is_linux()
or Platform.is_freebsd()
)

Loading

0 comments on commit 564bef4

Please sign in to comment.