From c56015fea2333ea57f998d078f08ed2e629aabac Mon Sep 17 00:00:00 2001 From: Quentin Madec Date: Wed, 17 Jun 2015 15:44:25 -0400 Subject: [PATCH] [disk] backward compatibility with old Disk check The new disk check was ignoring by default tmpfs disks and some others, breaking compatibility with the old check. It is fixed by filtering on device name (exclusion on '' and 'none') and total disk usage (!= 0), and setting all_partitions to True for psutil. The new all_partitions option will send metrics from the non excluded disks with 0 total disk usage. --- checks.d/disk.py | 28 +++++++++++++++++----------- conf.d/disk.yaml.default | 9 ++++----- tests/checks/mock/test_disk.py | 33 +++++++++------------------------ 3 files changed, 30 insertions(+), 40 deletions(-) diff --git a/checks.d/disk.py b/checks.d/disk.py index 8852eff1d6..630c967e6e 100644 --- a/checks.d/disk.py +++ b/checks.d/disk.py @@ -19,7 +19,6 @@ class Disk(AgentCheck): """ Collects metrics about the machine's disks. """ # -T for filesystem info DF_COMMAND = ['df', '-T'] - FAKE_DEVICES = ['udev', 'sysfs', 'rpc_pipefs', 'proc', 'devpts'] METRIC_DISK = 'system.disk.{0}' METRIC_INODE = 'system.fs.inodes.{0}' @@ -38,6 +37,7 @@ def check(self, instance): if self._psutil(): self.collect_metrics_psutil() else: + # FIXME: implement all_partitions (df -a) self.collect_metrics_manually() @classmethod @@ -49,12 +49,8 @@ def _load_conf(self, instance): self._excluded_disks = instance.get('excluded_disks', []) self._tag_by_filesystem = _is_affirmative( instance.get('tag_by_filesystem', False)) - # On Windows, we need all_partitions to True by default to collect - # metrics about remote disks - # On Linux, we need all_partitions to False to avoid collecting metrics - # about nodev filesystems self._all_partitions = _is_affirmative( - instance.get('all_partitions', Platform.is_win32())) + instance.get('all_partitions', False)) # FIXME: 6.x, drop use_mount option in datadog.conf self._load_legacy_option(instance, 'use_mount', False, @@ -79,20 +75,31 @@ def _load_legacy_option(self, instance, option, default, def collect_metrics_psutil(self): self._valid_disks = {} - for part in psutil.disk_partitions(all=self._all_partitions): + for part in psutil.disk_partitions(all=True): # we check all exclude conditions if self._exclude_disk_psutil(part): continue + # Get disk metrics here to be able to exclude on total usage + try: + disk_usage = psutil.disk_usage(part.mountpoint) + except Exception, e: + self.log.debug("Unable to get disk metrics for %s: %s", + part.mountpoint, e) + continue + # Exclude disks with total disk size 0 + if disk_usage.total == 0: + continue # For later, latency metrics self._valid_disks[part.device] = (part.fstype, part.mountpoint) self.log.debug('Passed: {0}'.format(part.device)) tags = [part.fstype] if self._tag_by_filesystem else [] device_name = part.mountpoint if self._use_mount else part.device + # legacy check names c: vs psutil name C:\\ if Platform.is_win32(): device_name = device_name.strip('\\').lower() - for metric_name, metric_value in self._collect_part_metrics(part).iteritems(): + for metric_name, metric_value in self._collect_part_metrics(part, disk_usage).iteritems(): self.gauge(metric_name, metric_value, tags=tags, device_name=device_name) # And finally, latency metrics, a legacy gift from the old Windows Check @@ -110,13 +117,12 @@ def _exclude_disk_psutil(self, part): # We don't want all those incorrect devices def _exclude_disk(self, name, filesystem): - return (name in self.FAKE_DEVICES or + return (((not name or name == 'none') and not self._all_partitions) or name in self._excluded_disks or self._excluded_disk_re.match(name) or filesystem in self._excluded_filesystems) - def _collect_part_metrics(self, part): - usage = psutil.disk_usage(part.mountpoint) + def _collect_part_metrics(self, part, usage): metrics = {} for name in ['total', 'used', 'free']: # For legacy reasons, the standard unit it kB diff --git a/conf.d/disk.yaml.default b/conf.d/disk.yaml.default index 9c61a10ed0..3bb2134b34 100644 --- a/conf.d/disk.yaml.default +++ b/conf.d/disk.yaml.default @@ -6,8 +6,8 @@ instances: - use_mount: no # The (optional) excluded_filesystems parameter will instruct the check to # ignore disks using these filesystems - excluded_filesystems: - - tmpfs + # excluded_filesystems: + # - tmpfs # The (optional) excluded_disks parameter will instruct the check to # ignore this list of disks @@ -24,7 +24,6 @@ instances: # tag_by_filesystem: no # # The (optional) all_partitions parameter will instruct the check to - # get metrics for all partitions (and not only physical disks) - # On Windows, this parameter default is 'yes' in order to get remote disks - # metrics; if they are not needed, set it to 'no' + # get metrics for all partitions. use_mount should be set to yes (to avoid + # collecting empty device names) when using this option. # all_partitions: no diff --git a/tests/checks/mock/test_disk.py b/tests/checks/mock/test_disk.py index 955cc1a56e..278b2dcdeb 100644 --- a/tests/checks/mock/test_disk.py +++ b/tests/checks/mock/test_disk.py @@ -67,7 +67,9 @@ def test_device_exclusion_logic(self): self.assertFalse(exclude_disk(MockPart())) # standard fake devices - self.assertTrue(exclude_disk(MockPart(device='udev'))) + self.assertTrue(exclude_disk(MockPart(device=''))) + self.assertTrue(exclude_disk(MockPart(device='none'))) + self.assertFalse(exclude_disk(MockPart(device='udev'))) # excluded filesystems list self.assertTrue(exclude_disk(MockPart(fstype='aaaaaa'))) @@ -81,6 +83,12 @@ def test_device_exclusion_logic(self): self.assertTrue(exclude_disk(MockPart(device='tevvv'))) self.assertFalse(exclude_disk(MockPart(device='tevvs'))) + # and now with all_partitions + self.check._all_partitions = True + self.assertFalse(exclude_disk(MockPart(device=''))) + self.assertFalse(exclude_disk(MockPart(device='none'))) + self.assertFalse(exclude_disk(MockPart(device='udev'))) + @mock.patch('psutil.disk_partitions', return_value=[MockPart()]) @mock.patch('psutil.disk_usage', return_value=MockDiskMetrics()) @mock.patch('os.statvfs', return_value=MockInodesMetrics()) @@ -97,29 +105,6 @@ def test_psutil(self, mock_partitions, mock_usage, mock_inodes): self.coverage_report() - # FIXME: patch the import of Platform to be able to test mocked Windows - # @mock.patch('psutil.disk_partitions', - # return_value=[MockPart(device='C:\\', fstype='ntfs', - # mountpoint='C:\\')]) - # @mock.patch('psutil.disk_usage', return_value=MockDiskMetrics()) - # @mock.patch('psutil.disk_io_counters', - # return_value={'PhysicalDisk0': MockIoCountersMetrics()}) - # @mock.patch('util.Platform', return_value=WindowsPlatform) - # def test_psutil_windows(self, mock_partitions, mock_usage, - # mock_io_counters, mock_platform): - # self.run_check({'instances': [{'use_mount': 'no'}]}, - # mocks={'_psutil': lambda: True}) - # - # # Assert metrics - # for metric, value in self.GAUGES_VALUES.iteritems(): - # self.assertMetric(metric, value=value, tags=[], - # device_name='c:') - # for metric, value in self.GAUGES_VALUES_PSUTIL.iteritems(): - # self.assertMetric(metric, value=value, tags=[], - # device_name='PhysicalDisk0') - # - # self.coverage_report() - @mock.patch('psutil.disk_partitions', return_value=[MockPart()]) @mock.patch('psutil.disk_usage', return_value=MockDiskMetrics()) @mock.patch('os.statvfs', return_value=MockInodesMetrics())