Skip to content

Commit

Permalink
Minor refactoring of cgroup telemetry (#1607)
Browse files Browse the repository at this point in the history
* Minor refactoring of cgroup telemetry

* Update tests
  • Loading branch information
narrieta authored Aug 8, 2019
1 parent 5346d4a commit fce905a
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 204 deletions.
34 changes: 6 additions & 28 deletions azurelinuxagent/common/cgroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,6 @@ def _get_parameters(self, parameter_name, first_line_only=False):
raise CGroupsException("Exception while attempting to read {0}".format(parameter_filename), e)
return result

def collect(self):
raise NotImplementedError()

def is_active(self):
try:
tasks = self._get_parameters("tasks")
Expand Down Expand Up @@ -184,15 +181,14 @@ def _get_cpu_percent(self):

return round(float(cpu_delta * self._osutil.get_processor_cores() * 100) / float(system_delta), 3)

def collect(self):
def get_cpu_usage(self):
"""
Collect and return a list of all cpu metrics. If no metrics are collected, return an empty list.
Collects and return the cpu usage.
:rtype: [(str, str, float)]
:rtype: float
"""
self._update_cpu_data()
usage = self._get_cpu_percent()
return [CollectedMetrics("cpu", "% Processor Time", usage)]
return self._get_cpu_percent()


class MemoryCgroup(CGroup):
Expand All @@ -209,7 +205,7 @@ def __str__(self):
self.name, self.path, self.controller
)

def _get_memory_usage(self):
def get_memory_usage(self):
"""
Collect memory.usage_in_bytes from the cgroup.
Expand All @@ -231,7 +227,7 @@ def _get_memory_usage(self):
usage = "0"
return int(usage)

def _get_memory_max_usage(self):
def get_max_memory_usage(self):
"""
Collect memory.usage_in_bytes from the cgroup.
Expand All @@ -250,21 +246,3 @@ def _get_memory_max_usage(self):
if not usage:
usage = "0"
return int(usage)

def collect(self):
"""
Collect and return a list of all memory metrics
:rtype: [(str, str, float)]
"""
usage = self._get_memory_usage()
max_usage = self._get_memory_max_usage()
return [CollectedMetrics("memory", "Total Memory Usage", usage),
CollectedMetrics("memory", "Max Memory Usage", max_usage)]


class CollectedMetrics(object):
def __init__(self, controller, metric_name, value):
self.controller = controller
self.metric_name = metric_name
self.value = value
87 changes: 40 additions & 47 deletions azurelinuxagent/common/cgroupstelemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,24 +36,26 @@ def _get_metrics_list(metric):

@staticmethod
def _process_cgroup_metric(cgroup_metrics):
current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metrics.get_metrics()
memory_usage = cgroup_metrics.get_memory_usage()
max_memory_usage = cgroup_metrics.get_max_memory_usage()
cpu_usage = cgroup_metrics.get_cpu_usage()

processed_extension = {}

if current_cpu_usage.count() > 0:
processed_extension["cpu"] = {"cur_cpu": CGroupsTelemetry._get_metrics_list(current_cpu_usage)}
if cpu_usage.count() > 0:
processed_extension["cpu"] = {"cur_cpu": CGroupsTelemetry._get_metrics_list(cpu_usage)}

if current_memory_usage.count() > 0:
if memory_usage.count() > 0:
if "memory" in processed_extension:
processed_extension["memory"]["cur_mem"] = CGroupsTelemetry._get_metrics_list(current_memory_usage)
processed_extension["memory"]["cur_mem"] = CGroupsTelemetry._get_metrics_list(memory_usage)
else:
processed_extension["memory"] = {"cur_mem": CGroupsTelemetry._get_metrics_list(current_memory_usage)}
processed_extension["memory"] = {"cur_mem": CGroupsTelemetry._get_metrics_list(memory_usage)}

if max_memory_levels.count() > 0:
if max_memory_usage.count() > 0:
if "memory" in processed_extension:
processed_extension["memory"]["max_mem"] = CGroupsTelemetry._get_metrics_list(max_memory_levels)
processed_extension["memory"]["max_mem"] = CGroupsTelemetry._get_metrics_list(max_memory_usage)
else:
processed_extension["memory"] = {"max_mem": CGroupsTelemetry._get_metrics_list(max_memory_levels)}
processed_extension["memory"] = {"max_mem": CGroupsTelemetry._get_metrics_list(max_memory_usage)}

return processed_extension

Expand Down Expand Up @@ -118,19 +120,7 @@ def poll_all_tracked():
if cgroup.name not in CGroupsTelemetry._cgroup_metrics:
CGroupsTelemetry._cgroup_metrics[cgroup.name] = CgroupMetrics()

metric = None
# noinspection PyBroadException
try:
metric = cgroup.collect()
except Exception as e:
if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT:
pass
else:
logger.periodic_warn(logger.EVERY_HALF_HOUR,
'Could not collect the cgroup metrics for cgroup path {0}. '
'Internal error : {1}'.format(cgroup.path, ustr(e)))
if metric:
CGroupsTelemetry._cgroup_metrics[cgroup.name].add_new_data(cgroup.controller, metric)
CGroupsTelemetry._cgroup_metrics[cgroup.name].collect_data(cgroup)

if not cgroup.is_active():
CGroupsTelemetry.stop_tracking(cgroup)
Expand All @@ -152,34 +142,38 @@ def cleanup():

class CgroupMetrics(object):
def __init__(self):
self._current_memory_usage = Metric()
self._max_memory_levels = Metric()
self._current_cpu_usage = Metric()
self._memory_usage = Metric()
self._max_memory_usage = Metric()
self._cpu_usage = Metric()
self.marked_for_delete = False

def _add_memory_usage(self, metric):
self._current_memory_usage.append(metric[0].value)
self._max_memory_levels.append(metric[1].value)

def _add_cpu_usage(self, metric):
self._current_cpu_usage.append(metric[0].value)

def add_new_data(self, controller, metric):
if metric:
if controller == "cpu":
self._add_cpu_usage(metric)
elif controller == "memory":
self._add_memory_usage(metric)
def collect_data(self, cgroup):
# noinspection PyBroadException
try:
if cgroup.controller == "cpu":
self._cpu_usage.append(cgroup.get_cpu_usage())
elif cgroup.controller == "memory":
self._memory_usage.append(cgroup.get_memory_usage())
self._max_memory_usage.append(cgroup.get_max_memory_usage())
else:
raise CGroupsException('CGroup controller {0} is not supported'.format(controller))
except Exception as e:
if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT:
logger.periodic_warn(logger.EVERY_HALF_HOUR, 'Could not collect metrics for cgroup {0}. Error : {1}'.format(cgroup.path, ustr(e)))

def get_memory_usage(self):
return self._memory_usage

def get_max_memory_usage(self):
return self._max_memory_usage

def get_metrics(self):
return self._current_memory_usage, self._max_memory_levels, self._current_cpu_usage
def get_cpu_usage(self):
return self._cpu_usage

def clear(self):
self._current_memory_usage.clear()
self._max_memory_levels.clear()
self._current_cpu_usage.clear()
self._memory_usage.clear()
self._max_memory_usage.clear()
self._cpu_usage.clear()


class Metric(object):
Expand All @@ -188,12 +182,12 @@ def __init__(self):
self._first_poll_time = None
self._last_poll_time = None

def append(self, metric):
def append(self, data):
if not self._first_poll_time:
# We only want to do it first time.
self._first_poll_time = dt.utcnow()

self._data.append(metric)
self._data.append(data)
self._last_poll_time = dt.utcnow()

def clear(self):
Expand All @@ -202,8 +196,7 @@ def clear(self):
self._data *= 0

def average(self):
return float(sum(self._data)
) / float(len(self._data)) if self._data else None
return float(sum(self._data)) / float(len(self._data)) if self._data else None

def max(self):
return max(self._data) if self._data else None
Expand Down
42 changes: 13 additions & 29 deletions tests/common/test_cgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,19 +113,17 @@ def test_cpu_cgroup_create(self, patch_get_proc_stat):

@patch("azurelinuxagent.common.osutil.default.DefaultOSUtil.get_processor_cores", return_value=1)
@patch("azurelinuxagent.common.osutil.default.DefaultOSUtil._get_proc_stat")
def test_collect(self, patch_get_proc_stat, *args):
def test_get_cpu_usage(self, patch_get_proc_stat, *args):
patch_get_proc_stat.return_value = fileutil.read_file(os.path.join(data_dir, "cgroups", "dummy_proc_stat"))
test_cpu_cg = CpuCgroup("test_extension", os.path.join(data_dir, "cgroups", "cpu_mount"))

# Mocking CPU consumption
patch_get_proc_stat.return_value = fileutil.read_file(os.path.join(data_dir, "cgroups",
"dummy_proc_stat_updated"))

collected_metric = test_cpu_cg.collect()[0]
cpu_usage = test_cpu_cg.get_cpu_usage()

self.assertEqual("cpu", collected_metric.controller)
self.assertEqual("% Processor Time", collected_metric.metric_name)
self.assertEqual(5.114, collected_metric.value)
self.assertEqual(5.114, cpu_usage)

def test_get_current_cpu_total_exception_handling(self):
test_cpu_cg = CpuCgroup("test_extension", "dummy_path")
Expand All @@ -141,34 +139,20 @@ def test_memory_cgroup_create(self):
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "memory_mount"))
self.assertEqual("memory", test_mem_cg.controller)

def test_collect(self):
def test_get_metrics(self):
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "memory_mount"))
metrics = test_mem_cg.collect()

current_mem_collected_metric = metrics[0]
memory_usage = test_mem_cg.get_memory_usage()
self.assertEqual(100000, memory_usage)

self.assertEqual("memory", current_mem_collected_metric.controller)
self.assertEqual("Total Memory Usage", current_mem_collected_metric.metric_name)
self.assertEqual(100000, current_mem_collected_metric.value)
max_memory_usage = test_mem_cg.get_max_memory_usage()
self.assertEqual(1000000, max_memory_usage)

max_mem_collected_metric = metrics[1]

self.assertEqual("memory", max_mem_collected_metric.controller)
self.assertEqual("Max Memory Usage", max_mem_collected_metric.metric_name)
self.assertEqual(1000000, max_mem_collected_metric.value)

def test_collect_when_files_not_present(self):
def test_get_metrics_when_files_not_present(self):
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups"))
metrics = test_mem_cg.collect()

current_mem_collected_metric = metrics[0]

self.assertEqual("memory", current_mem_collected_metric.controller)
self.assertEqual("Total Memory Usage", current_mem_collected_metric.metric_name)
self.assertEqual(0, current_mem_collected_metric.value)

max_mem_collected_metric = metrics[1]
memory_usage = test_mem_cg.get_memory_usage()
self.assertEqual(0, memory_usage)

self.assertEqual("memory", max_mem_collected_metric.controller)
self.assertEqual("Max Memory Usage", max_mem_collected_metric.metric_name)
self.assertEqual(0, max_mem_collected_metric.value)
max_memory_usage = test_mem_cg.get_max_memory_usage()
self.assertEqual(0, max_memory_usage)
Loading

0 comments on commit fce905a

Please sign in to comment.