[disk] backward compatibility with old Disk check

The new disk check was ignoring by default tmpfs disks and some others, breaking compatibility with the old check. It is fixed by filtering on device name (exclusion on '' and 'none') and total disk usage (!= 0), and setting all_partitions to True for psutil. The new all_partitions option will send metrics from the non excluded disks with 0 total disk usage.
DataDog · Jun 17, 2015 · c56015f · c56015f
1 parent 8081ee9
commit c56015f
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 40 deletions.
diff --git a/checks.d/disk.py b/checks.d/disk.py
@@ -19,7 +19,6 @@ class Disk(AgentCheck):
     """ Collects metrics about the machine's disks. """
     # -T for filesystem info
     DF_COMMAND = ['df', '-T']
-    FAKE_DEVICES = ['udev', 'sysfs', 'rpc_pipefs', 'proc', 'devpts']
     METRIC_DISK = 'system.disk.{0}'
     METRIC_INODE = 'system.fs.inodes.{0}'
 
@@ -38,6 +37,7 @@ def check(self, instance):
         if self._psutil():
             self.collect_metrics_psutil()
         else:
+            # FIXME: implement all_partitions (df -a)
             self.collect_metrics_manually()
 
     @classmethod
@@ -49,12 +49,8 @@ def _load_conf(self, instance):
         self._excluded_disks = instance.get('excluded_disks', [])
         self._tag_by_filesystem = _is_affirmative(
             instance.get('tag_by_filesystem', False))
-        # On Windows, we need all_partitions to True by default to collect
-        # metrics about remote disks
-        # On Linux, we need all_partitions to False to avoid collecting metrics
-        # about nodev filesystems
         self._all_partitions = _is_affirmative(
-            instance.get('all_partitions', Platform.is_win32()))
+            instance.get('all_partitions', False))
 
         # FIXME: 6.x, drop use_mount option in datadog.conf
         self._load_legacy_option(instance, 'use_mount', False,
@@ -79,20 +75,31 @@ def _load_legacy_option(self, instance, option, default,
 
     def collect_metrics_psutil(self):
         self._valid_disks = {}
-        for part in psutil.disk_partitions(all=self._all_partitions):
+        for part in psutil.disk_partitions(all=True):
             # we check all exclude conditions
             if self._exclude_disk_psutil(part):
                 continue
+            # Get disk metrics here to be able to exclude on total usage
+            try:
+                disk_usage = psutil.disk_usage(part.mountpoint)
+            except Exception, e:
+                self.log.debug("Unable to get disk metrics for %s: %s",
+                               part.mountpoint, e)
+                continue
+            # Exclude disks with total disk size 0
+            if disk_usage.total == 0:
+                continue
             # For later, latency metrics
             self._valid_disks[part.device] = (part.fstype, part.mountpoint)
             self.log.debug('Passed: {0}'.format(part.device))
 
             tags = [part.fstype] if self._tag_by_filesystem else []
             device_name = part.mountpoint if self._use_mount else part.device
+
             # legacy check names c: vs psutil name C:\\
             if Platform.is_win32():
                 device_name = device_name.strip('\\').lower()
-            for metric_name, metric_value in self._collect_part_metrics(part).iteritems():
+            for metric_name, metric_value in self._collect_part_metrics(part, disk_usage).iteritems():
                 self.gauge(metric_name, metric_value,
                            tags=tags, device_name=device_name)
         # And finally, latency metrics, a legacy gift from the old Windows Check
@@ -110,13 +117,12 @@ def _exclude_disk_psutil(self, part):
 
     # We don't want all those incorrect devices
     def _exclude_disk(self, name, filesystem):
-        return (name in self.FAKE_DEVICES or
+        return (((not name or name == 'none') and not self._all_partitions) or
                 name in self._excluded_disks or
                 self._excluded_disk_re.match(name) or
                 filesystem in self._excluded_filesystems)
 
-    def _collect_part_metrics(self, part):
-        usage = psutil.disk_usage(part.mountpoint)
+    def _collect_part_metrics(self, part, usage):
         metrics = {}
         for name in ['total', 'used', 'free']:
             # For legacy reasons,  the standard unit it kB

diff --git a/conf.d/disk.yaml.default b/conf.d/disk.yaml.default
@@ -6,8 +6,8 @@ instances:
   - use_mount: no
     # The (optional) excluded_filesystems parameter will instruct the check to
     # ignore disks using these filesystems
-    excluded_filesystems:
-      - tmpfs
+    # excluded_filesystems:
+    #   - tmpfs
 
     # The (optional) excluded_disks parameter will instruct the check to
     # ignore this list of disks
@@ -24,7 +24,6 @@ instances:
     # tag_by_filesystem: no
     #
     # The (optional) all_partitions parameter will instruct the check to
-    # get metrics for all partitions (and not only physical disks)
-    # On Windows, this parameter default is 'yes' in order to get remote disks
-    # metrics; if they are not needed, set it to 'no'
+    # get metrics for all partitions. use_mount should be set to yes (to avoid
+    # collecting empty device names) when using this option.
     # all_partitions: no
diff --git a/tests/checks/mock/test_disk.py b/tests/checks/mock/test_disk.py
@@ -67,7 +67,9 @@ def test_device_exclusion_logic(self):
         self.assertFalse(exclude_disk(MockPart()))
 
         # standard fake devices
-        self.assertTrue(exclude_disk(MockPart(device='udev')))
+        self.assertTrue(exclude_disk(MockPart(device='')))
+        self.assertTrue(exclude_disk(MockPart(device='none')))
+        self.assertFalse(exclude_disk(MockPart(device='udev')))
 
         # excluded filesystems list
         self.assertTrue(exclude_disk(MockPart(fstype='aaaaaa')))
@@ -81,6 +83,12 @@ def test_device_exclusion_logic(self):
         self.assertTrue(exclude_disk(MockPart(device='tevvv')))
         self.assertFalse(exclude_disk(MockPart(device='tevvs')))
 
+        # and now with all_partitions
+        self.check._all_partitions = True
+        self.assertFalse(exclude_disk(MockPart(device='')))
+        self.assertFalse(exclude_disk(MockPart(device='none')))
+        self.assertFalse(exclude_disk(MockPart(device='udev')))
+
     @mock.patch('psutil.disk_partitions', return_value=[MockPart()])
     @mock.patch('psutil.disk_usage', return_value=MockDiskMetrics())
     @mock.patch('os.statvfs', return_value=MockInodesMetrics())
@@ -97,29 +105,6 @@ def test_psutil(self, mock_partitions, mock_usage, mock_inodes):
 
             self.coverage_report()
 
-    # FIXME: patch the import of Platform to be able to test mocked Windows
-    # @mock.patch('psutil.disk_partitions',
-    #             return_value=[MockPart(device='C:\\', fstype='ntfs',
-    #                                    mountpoint='C:\\')])
-    # @mock.patch('psutil.disk_usage', return_value=MockDiskMetrics())
-    # @mock.patch('psutil.disk_io_counters',
-    #             return_value={'PhysicalDisk0': MockIoCountersMetrics()})
-    # @mock.patch('util.Platform', return_value=WindowsPlatform)
-    # def test_psutil_windows(self, mock_partitions, mock_usage,
-    #                         mock_io_counters, mock_platform):
-    #     self.run_check({'instances': [{'use_mount': 'no'}]},
-    #                    mocks={'_psutil': lambda: True})
-    #
-    #     # Assert metrics
-    #     for metric, value in self.GAUGES_VALUES.iteritems():
-    #         self.assertMetric(metric, value=value, tags=[],
-    #                           device_name='c:')
-    #     for metric, value in self.GAUGES_VALUES_PSUTIL.iteritems():
-    #         self.assertMetric(metric, value=value, tags=[],
-    #                           device_name='PhysicalDisk0')
-    #
-    #     self.coverage_report()
-
     @mock.patch('psutil.disk_partitions', return_value=[MockPart()])
     @mock.patch('psutil.disk_usage', return_value=MockDiskMetrics())
     @mock.patch('os.statvfs', return_value=MockInodesMetrics())