From ac3803a6dc978a1635c2c0a050cfbf7ad2a8c308 Mon Sep 17 00:00:00 2001 From: Varad Meru Date: Mon, 12 Nov 2018 15:49:20 -0800 Subject: [PATCH] Correct osutil and add tests for threshold --- azurelinuxagent/common/cgroups.py | 82 +++++++++++++------------------ azurelinuxagent/ga/monitor.py | 4 +- tests/common/test_cgroups.py | 7 +++ 3 files changed, 44 insertions(+), 49 deletions(-) diff --git a/azurelinuxagent/common/cgroups.py b/azurelinuxagent/common/cgroups.py index 8d956fb17d..5a75accedf 100644 --- a/azurelinuxagent/common/cgroups.py +++ b/azurelinuxagent/common/cgroups.py @@ -186,8 +186,6 @@ class CGroupsTelemetry(object): tracked_names = set() - _osutil = get_osutil() - @staticmethod def metrics_hierarchies(): return CGroupsTelemetry._hierarchies @@ -269,24 +267,6 @@ def stop_tracking(name): if CGroupsTelemetry.is_tracked(name): del (CGroupsTelemetry._tracked[name]) - @staticmethod - def get_cpu_limits(name): - # default values - cpu_limit = DEFAULT_CPU_LIMIT_AGENT if AGENT_NAME.lower() in name.lower() else DEFAULT_CPU_LIMIT_EXT - - return cpu_limit - - @staticmethod - def get_memory_limits(name): - # default values - mem_limit = max(DEFAULT_MEM_LIMIT_MIN_MB, round(_osutil.get_total_mem() * DEFAULT_MEM_LIMIT_PCT / 100, 0)) - - # agent values - if AGENT_NAME.lower() in name.lower(): - mem_limit = min(DEFAULT_MEM_LIMIT_MAX_MB, mem_limit) - - return mem_limit - @staticmethod def collect_all_tracked(): """ @@ -304,8 +284,7 @@ def collect_all_tracked(): for cgroup_name, collector in CGroupsTelemetry._tracked.copy().items(): cgroup_name = cgroup_name if cgroup_name else WRAPPER_CGROUP_NAME results[cgroup_name] = collector.collect() - limits[cgroup_name] = {"cpu": CGroupsTelemetry.get_cpu_limits(cgroup_name), - "memory": CGroupsTelemetry.get_memory_limits(cgroup_name)} + limits[cgroup_name] = collector.cgroup.threshold return results, limits @@ -392,12 +371,12 @@ def _construct_systemd_path_for_hierarchy(hierarchy, cgroup_name): return os.path.join(BASE_CGROUPS, hierarchy, 'system.slice', cgroup_name).rstrip(os.path.sep) @staticmethod - def for_extension(name): - return CGroups(name, CGroups._construct_custom_path_for_hierarchy) + def for_extension(name, limits=None): + return CGroups(name, CGroups._construct_custom_path_for_hierarchy, limits) @staticmethod - def for_systemd_service(name): - return CGroups(name.lower(), CGroups._construct_systemd_path_for_hierarchy) + def for_systemd_service(name, limits=None): + return CGroups(name.lower(), CGroups._construct_systemd_path_for_hierarchy, limits) @staticmethod def enabled(): @@ -411,7 +390,7 @@ def disable(): def enable(): CGroups._enabled = True - def __init__(self, name, path_maker): + def __init__(self, name, path_maker, limits=None): """ Construct CGroups object. Create appropriately-named directory for each hierarchy of interest. @@ -426,7 +405,9 @@ def __init__(self, name, path_maker): self.is_wrapper_cgroup = False self.cgroups = {} - self.threshold = None + + self.threshold = CGroupLimits(self.name, cpu_threshold=limits["cpu"], + memory_threshold=limits["memory"]) if limits else CGroupLimits(self.name) if not self.enabled(): return @@ -503,22 +484,6 @@ def add(self, pid): tasks_file = self._get_cgroup_file(hierarchy, 'cgroup.procs') fileutil.append_file(tasks_file, "{0}\n".format(pid)) - def get_cpu_limits(self): - # default values - cpu_limit = DEFAULT_CPU_LIMIT_AGENT if AGENT_NAME.lower() in self.name.lower() else DEFAULT_CPU_LIMIT_EXT - - return cpu_limit - - def get_memory_limits(self): - # default values - mem_limit = max(DEFAULT_MEM_LIMIT_MIN_MB, round(self._osutil.get_total_mem() * DEFAULT_MEM_LIMIT_PCT / 100, 0)) - - # agent values - if AGENT_NAME.lower() in self.name.lower(): - mem_limit = min(DEFAULT_MEM_LIMIT_MAX_MB, mem_limit) - - return mem_limit - def set_limits(self): """ Set per-hierarchy limits based on the cgroup name (agent or particular extension) @@ -535,9 +500,8 @@ def set_limits(self): logger.info('No cgroups limits for {0}'.format(self.name)) return - # default values - cpu_limit = self.get_cpu_limits() - mem_limit = self.get_memory_limits() + cpu_limit = self.threshold.cpu_limit + mem_limit = self.threshold.memory_limit msg = '{0}: {1}% {2}mb'.format(self.name, cpu_limit, mem_limit) logger.info("Setting cgroups limits for {0}".format(msg)) @@ -815,3 +779,27 @@ def set_memory_limit(self, limit=None, unit='megabytes'): fileutil.write_file(memory_limit_file, '{0}\n'.format(value)) else: raise CGroupsException("Memory hierarchy not available in this cgroup") + + +class CGroupLimits(object): + def __init__(self, cgroup_name, cpu_threshold=None, memory_threshold=None): + self.cpu_limit = cpu_threshold if cpu_threshold else self.get_default_cpu_limits(cgroup_name) + self.memory_limit = memory_threshold if memory_threshold else self.get_default_cpu_limits(cgroup_name) + + @staticmethod + def get_default_cpu_limits(cgroup_name): + # default values + cpu_limit = DEFAULT_CPU_LIMIT_AGENT if AGENT_NAME.lower() in cgroup_name.lower() else DEFAULT_CPU_LIMIT_EXT + return cpu_limit + + @staticmethod + def get_default_memory_limits(cgroup_name): + os_util = get_osutil() + + # default values + mem_limit = max(DEFAULT_MEM_LIMIT_MIN_MB, round(os_util.get_total_mem() * DEFAULT_MEM_LIMIT_PCT / 100, 0)) + + # agent values + if AGENT_NAME.lower() in cgroup_name.lower(): + mem_limit = min(DEFAULT_MEM_LIMIT_MAX_MB, mem_limit) + return mem_limit diff --git a/azurelinuxagent/ga/monitor.py b/azurelinuxagent/ga/monitor.py index f3967d35f8..aae67df5f5 100644 --- a/azurelinuxagent/ga/monitor.py +++ b/azurelinuxagent/ga/monitor.py @@ -427,7 +427,7 @@ def send_cgroup_telemetry(self): report_metric(metric_group, metric_name, cgroup_name, value) if metric_group == "Memory": - if value >= thresholds["memory"]: + if value >= thresholds.memory_limit: msg = "CGroup {0}: Crossed the Memory Threshold. Current Value:{1}, Threshold:{2}.".format( cgroup_name, value, thresholds["memory"]) add_event(name=AGENT_NAME, @@ -438,7 +438,7 @@ def send_cgroup_telemetry(self): log_event=True) if metric_group == "Process": - if value >= thresholds["cpu"]: + if value >= thresholds.cpu_limit: msg = "CGroup {0}: Crossed the Processor Threshold. Current Value:{1}, Threshold:{2}.".format( cgroup_name, value, thresholds["cpu"]) add_event(name=AGENT_NAME, diff --git a/tests/common/test_cgroups.py b/tests/common/test_cgroups.py index 56d5a1df31..5e24966b24 100644 --- a/tests/common/test_cgroups.py +++ b/tests/common/test_cgroups.py @@ -144,6 +144,13 @@ def exercise_telemetry_instantiation(self, test_cgroup): else: self.fail("Unknown metric {0}/{1} value {2}".format(metric_family, metric_name, metric_value)) + my_limits = limits[test_extension_name] + for heirarchy, limit in my_limits.items(): + if heirarchy == "cpu": + self.assertGreater(limit, 0.0) + if heirarchy == "memory": + self.assertGreater(limit, 0) + @skip_if_predicate_false(i_am_root, "Test does not run when non-root") def test_telemetry_instantiation_as_superuser(self): """