-
Notifications
You must be signed in to change notification settings - Fork 372
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Minor refactoring of cgroup telemetry #1607
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -95,9 +95,6 @@ def _get_parameters(self, parameter_name, first_line_only=False): | |
raise CGroupsException("Exception while attempting to read {0}".format(parameter_filename), e) | ||
return result | ||
|
||
def collect(self): | ||
raise NotImplementedError() | ||
|
||
def is_active(self): | ||
try: | ||
tasks = self._get_parameters("tasks") | ||
|
@@ -184,15 +181,14 @@ def _get_cpu_percent(self): | |
|
||
return round(float(cpu_delta * self._osutil.get_processor_cores() * 100) / float(system_delta), 3) | ||
|
||
def collect(self): | ||
def get_cpu_usage(self): | ||
""" | ||
Collect and return a list of all cpu metrics. If no metrics are collected, return an empty list. | ||
Collects and return the cpu usage. | ||
|
||
:rtype: [(str, str, float)] | ||
:rtype: float | ||
""" | ||
self._update_cpu_data() | ||
usage = self._get_cpu_percent() | ||
return [CollectedMetrics("cpu", "% Processor Time", usage)] | ||
return self._get_cpu_percent() | ||
|
||
|
||
class MemoryCgroup(CGroup): | ||
|
@@ -209,7 +205,7 @@ def __str__(self): | |
self.name, self.path, self.controller | ||
) | ||
|
||
def _get_memory_usage(self): | ||
def get_memory_usage(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. made it public |
||
""" | ||
Collect memory.usage_in_bytes from the cgroup. | ||
|
||
|
@@ -231,7 +227,7 @@ def _get_memory_usage(self): | |
usage = "0" | ||
return int(usage) | ||
|
||
def _get_memory_max_usage(self): | ||
def get_max_memory_usage(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. made it public + rename |
||
""" | ||
Collect memory.usage_in_bytes from the cgroup. | ||
|
||
|
@@ -250,21 +246,3 @@ def _get_memory_max_usage(self): | |
if not usage: | ||
usage = "0" | ||
return int(usage) | ||
|
||
def collect(self): | ||
""" | ||
Collect and return a list of all memory metrics | ||
|
||
:rtype: [(str, str, float)] | ||
""" | ||
usage = self._get_memory_usage() | ||
max_usage = self._get_memory_max_usage() | ||
return [CollectedMetrics("memory", "Total Memory Usage", usage), | ||
CollectedMetrics("memory", "Max Memory Usage", max_usage)] | ||
|
||
|
||
class CollectedMetrics(object): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this was not needed |
||
def __init__(self, controller, metric_name, value): | ||
self.controller = controller | ||
self.metric_name = metric_name | ||
self.value = value |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,24 +36,26 @@ def _get_metrics_list(metric): | |
|
||
@staticmethod | ||
def _process_cgroup_metric(cgroup_metrics): | ||
current_memory_usage, max_memory_levels, current_cpu_usage = cgroup_metrics.get_metrics() | ||
memory_usage = cgroup_metrics.get_memory_usage() | ||
max_memory_usage = cgroup_metrics.get_max_memory_usage() | ||
cpu_usage = cgroup_metrics.get_cpu_usage() | ||
|
||
processed_extension = {} | ||
|
||
if current_cpu_usage.count() > 0: | ||
processed_extension["cpu"] = {"cur_cpu": CGroupsTelemetry._get_metrics_list(current_cpu_usage)} | ||
if cpu_usage.count() > 0: | ||
processed_extension["cpu"] = {"cur_cpu": CGroupsTelemetry._get_metrics_list(cpu_usage)} | ||
|
||
if current_memory_usage.count() > 0: | ||
if memory_usage.count() > 0: | ||
if "memory" in processed_extension: | ||
processed_extension["memory"]["cur_mem"] = CGroupsTelemetry._get_metrics_list(current_memory_usage) | ||
processed_extension["memory"]["cur_mem"] = CGroupsTelemetry._get_metrics_list(memory_usage) | ||
else: | ||
processed_extension["memory"] = {"cur_mem": CGroupsTelemetry._get_metrics_list(current_memory_usage)} | ||
processed_extension["memory"] = {"cur_mem": CGroupsTelemetry._get_metrics_list(memory_usage)} | ||
|
||
if max_memory_levels.count() > 0: | ||
if max_memory_usage.count() > 0: | ||
if "memory" in processed_extension: | ||
processed_extension["memory"]["max_mem"] = CGroupsTelemetry._get_metrics_list(max_memory_levels) | ||
processed_extension["memory"]["max_mem"] = CGroupsTelemetry._get_metrics_list(max_memory_usage) | ||
else: | ||
processed_extension["memory"] = {"max_mem": CGroupsTelemetry._get_metrics_list(max_memory_levels)} | ||
processed_extension["memory"] = {"max_mem": CGroupsTelemetry._get_metrics_list(max_memory_usage)} | ||
|
||
return processed_extension | ||
|
||
|
@@ -118,19 +120,7 @@ def poll_all_tracked(): | |
if cgroup.name not in CGroupsTelemetry._cgroup_metrics: | ||
CGroupsTelemetry._cgroup_metrics[cgroup.name] = CgroupMetrics() | ||
|
||
metric = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. moved the exception handling to collect_data |
||
# noinspection PyBroadException | ||
try: | ||
metric = cgroup.collect() | ||
except Exception as e: | ||
if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: | ||
pass | ||
else: | ||
logger.periodic_warn(logger.EVERY_HALF_HOUR, | ||
'Could not collect the cgroup metrics for cgroup path {0}. ' | ||
'Internal error : {1}'.format(cgroup.path, ustr(e))) | ||
if metric: | ||
CGroupsTelemetry._cgroup_metrics[cgroup.name].add_new_data(cgroup.controller, metric) | ||
CGroupsTelemetry._cgroup_metrics[cgroup.name].collect_data(cgroup) | ||
|
||
if not cgroup.is_active(): | ||
CGroupsTelemetry.stop_tracking(cgroup) | ||
|
@@ -152,34 +142,38 @@ def cleanup(): | |
|
||
class CgroupMetrics(object): | ||
def __init__(self): | ||
self._current_memory_usage = Metric() | ||
self._max_memory_levels = Metric() | ||
self._current_cpu_usage = Metric() | ||
self._memory_usage = Metric() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. renamed those members to have consistent names with the rest of the code |
||
self._max_memory_usage = Metric() | ||
self._cpu_usage = Metric() | ||
self.marked_for_delete = False | ||
|
||
def _add_memory_usage(self, metric): | ||
self._current_memory_usage.append(metric[0].value) | ||
self._max_memory_levels.append(metric[1].value) | ||
|
||
def _add_cpu_usage(self, metric): | ||
self._current_cpu_usage.append(metric[0].value) | ||
|
||
def add_new_data(self, controller, metric): | ||
if metric: | ||
if controller == "cpu": | ||
self._add_cpu_usage(metric) | ||
elif controller == "memory": | ||
self._add_memory_usage(metric) | ||
def collect_data(self, cgroup): | ||
# noinspection PyBroadException | ||
try: | ||
if cgroup.controller == "cpu": | ||
self._cpu_usage.append(cgroup.get_cpu_usage()) | ||
elif cgroup.controller == "memory": | ||
self._memory_usage.append(cgroup.get_memory_usage()) | ||
self._max_memory_usage.append(cgroup.get_max_memory_usage()) | ||
else: | ||
raise CGroupsException('CGroup controller {0} is not supported'.format(controller)) | ||
except Exception as e: | ||
if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT: | ||
logger.periodic_warn(logger.EVERY_HALF_HOUR, 'Could not collect metrics for cgroup {0}. Error : {1}'.format(cgroup.path, ustr(e))) | ||
|
||
def get_memory_usage(self): | ||
return self._memory_usage | ||
|
||
def get_max_memory_usage(self): | ||
return self._max_memory_usage | ||
|
||
def get_metrics(self): | ||
return self._current_memory_usage, self._max_memory_levels, self._current_cpu_usage | ||
def get_cpu_usage(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. get_metrics was returning a list of values, replaced it with explicit methods for each value |
||
return self._cpu_usage | ||
|
||
def clear(self): | ||
self._current_memory_usage.clear() | ||
self._max_memory_levels.clear() | ||
self._current_cpu_usage.clear() | ||
self._memory_usage.clear() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. renamed for consistency |
||
self._max_memory_usage.clear() | ||
self._cpu_usage.clear() | ||
|
||
|
||
class Metric(object): | ||
|
@@ -188,12 +182,12 @@ def __init__(self): | |
self._first_poll_time = None | ||
self._last_poll_time = None | ||
|
||
def append(self, metric): | ||
def append(self, data): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 'metric' was confusing since we are not adding a Metric, but a number |
||
if not self._first_poll_time: | ||
# We only want to do it first time. | ||
self._first_poll_time = dt.utcnow() | ||
|
||
self._data.append(metric) | ||
self._data.append(data) | ||
self._last_poll_time = dt.utcnow() | ||
|
||
def clear(self): | ||
|
@@ -202,8 +196,7 @@ def clear(self): | |
self._data *= 0 | ||
|
||
def average(self): | ||
return float(sum(self._data) | ||
) / float(len(self._data)) if self._data else None | ||
return float(sum(self._data)) / float(len(self._data)) if self._data else None | ||
|
||
def max(self): | ||
return max(self._data) if self._data else None | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The implementation of collect returned an array of different length with different items for each of the controllers. This makes it difficult to use and error prone. I removed it and instead added explicit methods to get each metric.