Skip to content

Commit

Permalink
enforce extension cpu limits (#2555)
Browse files Browse the repository at this point in the history
* enforce ext cpu limits

* fix UTs
  • Loading branch information
nagworld9 authored Apr 20, 2022
1 parent 1da0ef6 commit a2ce6c8
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 10 deletions.
22 changes: 18 additions & 4 deletions azurelinuxagent/common/cgroupconfigurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
Before=slices.target
[Slice]
CPUAccounting=yes
CPUQuota={cpu_quota}
"""
LOGCOLLECTOR_SLICE = "azure-walinuxagent-logcollector.slice"
# More info on resource limits properties in systemd here:
Expand Down Expand Up @@ -762,21 +763,22 @@ def start_extension_command(self, extension_name, command, cmd_name, timeout, sh
process = subprocess.Popen(command, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) # pylint: disable=W1509
return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code)

def setup_extension_slice(self, extension_name):
def setup_extension_slice(self, extension_name, cpu_quota):
"""
Each extension runs under its own slice (Ex "Microsoft.CPlat.Extension.slice"). All the slices for
extensions are grouped under "azure-vmextensions.slice.
This method ensures that the extension slice is created. Setup should create
under /lib/systemd/system if it is not exist.
TODO: set cpu and memory quotas
TODO: set memory quotas
"""
if self.enabled():
unit_file_install_path = systemd.get_unit_file_install_path()
extension_slice_path = os.path.join(unit_file_install_path,
SystemdCgroupsApi.get_extension_slice_name(extension_name))
try:
slice_contents = _EXTENSION_SLICE_CONTENTS.format(extension_name=extension_name)
cpu_quota = str(cpu_quota) + "%" if cpu_quota is not None else ""
slice_contents = _EXTENSION_SLICE_CONTENTS.format(extension_name=extension_name, cpu_quota=cpu_quota)
CGroupConfigurator._Impl.__create_unit_file(extension_slice_path, slice_contents)
except Exception as exception:
_log_cgroup_warning("Failed to create unit files for the extension slice: {0}", ustr(exception))
Expand All @@ -800,7 +802,7 @@ def set_extension_services_cpu_memory_quota(self, services_list):
Each extension service will have name, systemd path and it's quotas.
This method ensures that drop-in files are created under service.d folder if quotas given.
ex: /lib/systemd/system/extension.service.d/11-CPUAccounting.conf
TODO: set cpu and memory quotas
TODO: set memory quotas
"""
if self.enabled() and services_list is not None:
for service in services_list:
Expand All @@ -813,6 +815,13 @@ def set_extension_services_cpu_memory_quota(self, services_list):
_DROP_IN_FILE_CPU_ACCOUNTING)
files_to_create.append((drop_in_file_cpu_accounting, _DROP_IN_FILE_CPU_ACCOUNTING_CONTENTS))

cpu_quota = service.get('cpuQuotaPercentage', None)
if cpu_quota is not None:
cpu_quota = str(cpu_quota) + "%"
drop_in_file_cpu_quota = os.path.join(drop_in_path, _DROP_IN_FILE_CPU_QUOTA)
cpu_quota_contents = _DROP_IN_FILE_CPU_QUOTA_CONTENTS_FORMAT.format(cpu_quota)
files_to_create.append((drop_in_file_cpu_quota, cpu_quota_contents))

self.__create_all_files(files_to_create)

# reload the systemd configuration; the new unit will be used once the service restarts
Expand All @@ -836,6 +845,11 @@ def remove_extension_services_drop_in_files(self, services_list):
drop_in_file_cpu_accounting = os.path.join(drop_in_path,
_DROP_IN_FILE_CPU_ACCOUNTING)
files_to_cleanup.append(drop_in_file_cpu_accounting)
cpu_quota = service.get('cpuQuotaPercentage', None)
if cpu_quota is not None:
drop_in_file_cpu_quota = os.path.join(drop_in_path, _DROP_IN_FILE_CPU_QUOTA)
files_to_cleanup.append(drop_in_file_cpu_quota)

CGroupConfigurator._Impl.__cleanup_all_files(files_to_cleanup)
_log_cgroup_info("Drop in files removed for {0}".format(service_name))

Expand Down
6 changes: 3 additions & 3 deletions azurelinuxagent/ga/exthandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,7 @@ def set_extension_resource_limits(self):
man = self.load_manifest()
resource_limits = man.get_resource_limits(extension_name, self.ext_handler.version)
CGroupConfigurator.get_instance().setup_extension_slice(
extension_name=extension_name)
extension_name=extension_name, cpu_quota=resource_limits.get_extension_slice_cpu_quota())
CGroupConfigurator.get_instance().set_extension_services_cpu_memory_quota(resource_limits.get_service_list())

def create_status_file_if_not_exist(self, extension, status, code, operation, message):
Expand Down Expand Up @@ -2318,12 +2318,12 @@ def __init__(self, data):

def get_extension_slice_cpu_quota(self):
if self.data is not None:
return self.data.get('cpuQuota', None)
return self.data.get('cpuQuotaPercentage', None)
return None

def get_extension_slice_memory_quota(self):
if self.data is not None:
return self.data.get('memoryQuota', None)
return self.data.get('memoryQuotaInMB', None)
return None

def get_service_list(self):
Expand Down
22 changes: 19 additions & 3 deletions tests/common/test_cgroupconfigurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,14 +186,18 @@ def test_setup_extension_slice_should_create_unit_files(self):
# get the paths to the mocked files
extension_slice_unit_file = configurator.mocks.get_mapped_path(UnitFilePaths.extensionslice)

configurator.setup_extension_slice(extension_name="Microsoft.CPlat.Extension")
configurator.setup_extension_slice(extension_name="Microsoft.CPlat.Extension", cpu_quota=5)

expected_cpu_accounting = "CPUAccounting=yes"
expected_cpu_quota_percentage = "5%"

self.assertTrue(os.path.exists(extension_slice_unit_file), "{0} was not created".format(extension_slice_unit_file))
self.assertTrue(fileutil.findre_in_file(extension_slice_unit_file, expected_cpu_accounting),
"CPUAccounting was not set correctly. Expected: {0}. Got:\n{1}".format(expected_cpu_accounting, fileutil.read_file(
extension_slice_unit_file)))
self.assertTrue(fileutil.findre_in_file(extension_slice_unit_file, expected_cpu_quota_percentage),
"CPUQuota was not set correctly. Expected: {0}. Got:\n{1}".format(expected_cpu_quota_percentage, fileutil.read_file(
extension_slice_unit_file)))

def test_remove_extension_slice_should_remove_unit_files(self):
with self._get_cgroup_configurator() as configurator:
Expand Down Expand Up @@ -583,22 +587,30 @@ def mock_popen(command, *args, **kwargs):
def test_it_should_set_extension_services_cpu_memory_quota(self):
service_list = [
{
"name": "extension.service"
"name": "extension.service",
"cpuQuotaPercentage": 5
}
]
with self._get_cgroup_configurator() as configurator:
# get the paths to the mocked files
extension_service_cpu_accounting = configurator.mocks.get_mapped_path(UnitFilePaths.extension_service_cpu_accounting)
extension_service_cpu_quota = configurator.mocks.get_mapped_path(UnitFilePaths.extension_service_cpu_quota)

configurator.set_extension_services_cpu_memory_quota(service_list)
expected_cpu_accounting = "CPUAccounting=yes"
expected_cpu_quota_percentage = "CPUQuota=5%"

# create drop in files to set those properties
self.assertTrue(os.path.exists(extension_service_cpu_accounting), "{0} was not created".format(extension_service_cpu_accounting))
self.assertTrue(
fileutil.findre_in_file(extension_service_cpu_accounting, expected_cpu_accounting),
"CPUAccounting was not enabled. Expected: {0}. Got:\n{1}".format(expected_cpu_accounting, fileutil.read_file(extension_service_cpu_accounting)))

self.assertTrue(os.path.exists(extension_service_cpu_quota), "{0} was not created".format(extension_service_cpu_quota))
self.assertTrue(
fileutil.findre_in_file(extension_service_cpu_quota, expected_cpu_quota_percentage),
"CPUQuota was not set. Expected: {0}. Got:\n{1}".format(expected_cpu_quota_percentage, fileutil.read_file(extension_service_cpu_quota)))

def test_it_should_set_extension_services_when_quotas_not_defined(self):
service_list = [
{
Expand Down Expand Up @@ -654,15 +666,19 @@ def test_it_should_stop_tracking_extension_services_cgroups(self):
def test_it_should_remove_extension_services_drop_in_files(self):
service_list = [
{
"name": "extension.service"
"name": "extension.service",
"cpuQuotaPercentage": 5
}
]
with self._get_cgroup_configurator() as configurator:
extension_service_cpu_accounting = configurator.mocks.get_mapped_path(
UnitFilePaths.extension_service_cpu_accounting)
extension_service_cpu_quota = configurator.mocks.get_mapped_path(UnitFilePaths.extension_service_cpu_quota)
configurator.remove_extension_services_drop_in_files(service_list)
self.assertFalse(os.path.exists(extension_service_cpu_accounting),
"{0} should not have been created".format(extension_service_cpu_accounting))
self.assertFalse(os.path.exists(extension_service_cpu_quota),
"{0} should not have been created".format(extension_service_cpu_quota))

def test_it_should_start_tracking_unit_cgroups(self):

Expand Down

0 comments on commit a2ce6c8

Please sign in to comment.