Skip to content

Commit

Permalink
Correct osutil and add tests for threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
vrdmr committed Nov 14, 2018
1 parent c76e43b commit 91a3371
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 51 deletions.
82 changes: 35 additions & 47 deletions azurelinuxagent/common/cgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,6 @@ class CGroupsTelemetry(object):

tracked_names = set()

_osutil = get_osutil()

@staticmethod
def metrics_hierarchies():
return CGroupsTelemetry._hierarchies
Expand Down Expand Up @@ -269,24 +267,6 @@ def stop_tracking(name):
if CGroupsTelemetry.is_tracked(name):
del (CGroupsTelemetry._tracked[name])

@staticmethod
def get_cpu_limits(name):
# default values
cpu_limit = DEFAULT_CPU_LIMIT_AGENT if AGENT_NAME.lower() in name.lower() else DEFAULT_CPU_LIMIT_EXT

return cpu_limit

@staticmethod
def get_memory_limits(name):
# default values
mem_limit = max(DEFAULT_MEM_LIMIT_MIN_MB, round(_osutil.get_total_mem() * DEFAULT_MEM_LIMIT_PCT / 100, 0))

# agent values
if AGENT_NAME.lower() in name.lower():
mem_limit = min(DEFAULT_MEM_LIMIT_MAX_MB, mem_limit)

return mem_limit

@staticmethod
def collect_all_tracked():
"""
Expand All @@ -304,8 +284,7 @@ def collect_all_tracked():
for cgroup_name, collector in CGroupsTelemetry._tracked.copy().items():
cgroup_name = cgroup_name if cgroup_name else WRAPPER_CGROUP_NAME
results[cgroup_name] = collector.collect()
limits[cgroup_name] = {"cpu": CGroupsTelemetry.get_cpu_limits(cgroup_name),
"memory": CGroupsTelemetry.get_memory_limits(cgroup_name)}
limits[cgroup_name] = collector.cgroup.threshold

return results, limits

Expand Down Expand Up @@ -392,12 +371,12 @@ def _construct_systemd_path_for_hierarchy(hierarchy, cgroup_name):
return os.path.join(BASE_CGROUPS, hierarchy, 'system.slice', cgroup_name).rstrip(os.path.sep)

@staticmethod
def for_extension(name):
return CGroups(name, CGroups._construct_custom_path_for_hierarchy)
def for_extension(name, limits=None):
return CGroups(name, CGroups._construct_custom_path_for_hierarchy, limits)

@staticmethod
def for_systemd_service(name):
return CGroups(name.lower(), CGroups._construct_systemd_path_for_hierarchy)
def for_systemd_service(name, limits=None):
return CGroups(name.lower(), CGroups._construct_systemd_path_for_hierarchy, limits)

@staticmethod
def enabled():
Expand All @@ -411,7 +390,7 @@ def disable():
def enable():
CGroups._enabled = True

def __init__(self, name, path_maker):
def __init__(self, name, path_maker, limits=None):
"""
Construct CGroups object. Create appropriately-named directory for each hierarchy of interest.
Expand All @@ -426,7 +405,9 @@ def __init__(self, name, path_maker):
self.is_wrapper_cgroup = False

self.cgroups = {}
self.threshold = None

self.threshold = CGroupsLimits(self.name, cpu_threshold=limits["cpu"],
memory_threshold=limits["memory"]) if limits else CGroupsLimits(self.name)

if not self.enabled():
return
Expand Down Expand Up @@ -503,22 +484,6 @@ def add(self, pid):
tasks_file = self._get_cgroup_file(hierarchy, 'cgroup.procs')
fileutil.append_file(tasks_file, "{0}\n".format(pid))

def get_cpu_limits(self):
# default values
cpu_limit = DEFAULT_CPU_LIMIT_AGENT if AGENT_NAME.lower() in self.name.lower() else DEFAULT_CPU_LIMIT_EXT

return cpu_limit

def get_memory_limits(self):
# default values
mem_limit = max(DEFAULT_MEM_LIMIT_MIN_MB, round(self._osutil.get_total_mem() * DEFAULT_MEM_LIMIT_PCT / 100, 0))

# agent values
if AGENT_NAME.lower() in self.name.lower():
mem_limit = min(DEFAULT_MEM_LIMIT_MAX_MB, mem_limit)

return mem_limit

def set_limits(self):
"""
Set per-hierarchy limits based on the cgroup name (agent or particular extension)
Expand All @@ -535,9 +500,8 @@ def set_limits(self):
logger.info('No cgroups limits for {0}'.format(self.name))
return

# default values
cpu_limit = self.get_cpu_limits()
mem_limit = self.get_memory_limits()
cpu_limit = self.threshold.cpu_limit
mem_limit = self.threshold.memory_limit

msg = '{0}: {1}% {2}mb'.format(self.name, cpu_limit, mem_limit)
logger.info("Setting cgroups limits for {0}".format(msg))
Expand Down Expand Up @@ -815,3 +779,27 @@ def set_memory_limit(self, limit=None, unit='megabytes'):
fileutil.write_file(memory_limit_file, '{0}\n'.format(value))
else:
raise CGroupsException("Memory hierarchy not available in this cgroup")


class CGroupsLimits(object):
def __init__(self, cgroup_name, cpu_threshold=None, memory_threshold=None):
self.cpu_limit = cpu_threshold if cpu_threshold else self.get_default_cpu_limits(cgroup_name)
self.memory_limit = memory_threshold if memory_threshold else self.get_default_cpu_limits(cgroup_name)

@staticmethod
def get_default_cpu_limits(cgroup_name):
# default values
cpu_limit = DEFAULT_CPU_LIMIT_AGENT if AGENT_NAME.lower() in cgroup_name.lower() else DEFAULT_CPU_LIMIT_EXT
return cpu_limit

@staticmethod
def get_default_memory_limits(cgroup_name):
os_util = get_osutil()

# default values
mem_limit = max(DEFAULT_MEM_LIMIT_MIN_MB, round(os_util.get_total_mem() * DEFAULT_MEM_LIMIT_PCT / 100, 0))

# agent values
if AGENT_NAME.lower() in cgroup_name.lower():
mem_limit = min(DEFAULT_MEM_LIMIT_MAX_MB, mem_limit)
return mem_limit
4 changes: 2 additions & 2 deletions azurelinuxagent/ga/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ def send_cgroup_telemetry(self):
report_metric(metric_group, metric_name, cgroup_name, value)

if metric_group == "Memory":
if value >= thresholds["memory"]:
if value >= thresholds.memory_limit:
msg = "CGroup {0}: Crossed the Memory Threshold. Current Value:{1}, Threshold:{2}.".format(
cgroup_name, value, thresholds["memory"])
add_event(name=AGENT_NAME,
Expand All @@ -438,7 +438,7 @@ def send_cgroup_telemetry(self):
log_event=True)

if metric_group == "Process":
if value >= thresholds["cpu"]:
if value >= thresholds.cpu_limit:
msg = "CGroup {0}: Crossed the Processor Threshold. Current Value:{1}, Threshold:{2}.".format(
cgroup_name, value, thresholds["cpu"])
add_event(name=AGENT_NAME,
Expand Down
9 changes: 7 additions & 2 deletions tests/common/test_cgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

from __future__ import print_function

from azurelinuxagent.common.cgroups import CGroupsTelemetry, CGroups, CGroupsException, BASE_CGROUPS, Cpu, Memory, \
DEFAULT_MEM_LIMIT_MIN_MB
from azurelinuxagent.common.cgroups import CGroupsTelemetry, CGroups, CGroupsException, CGroupsLimits, BASE_CGROUPS, \
Cpu, Memory, DEFAULT_MEM_LIMIT_MIN_MB
from azurelinuxagent.common.version import AGENT_NAME
from tests.tools import *

Expand Down Expand Up @@ -144,6 +144,11 @@ def exercise_telemetry_instantiation(self, test_cgroup):
else:
self.fail("Unknown metric {0}/{1} value {2}".format(metric_family, metric_name, metric_value))

my_limits = limits[test_extension_name]
self.assertIsInstance(my_limits, CGroupsLimits, msg="is not the correct instance")
self.assertGreater(my_limits.cpu_limit, 0.0)
self.assertGreater(my_limits.memory_limit, 0.0)

@skip_if_predicate_false(i_am_root, "Test does not run when non-root")
def test_telemetry_instantiation_as_superuser(self):
"""
Expand Down

0 comments on commit 91a3371

Please sign in to comment.