Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use alternate systemd detection #1677

Merged
merged 2 commits into from
Oct 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 7 additions & 16 deletions azurelinuxagent/common/cgroupapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,10 @@ def create():
@staticmethod
def _is_systemd():
"""
Determine if systemd is managing system services. If this process (presumed to be the agent) is in a CPU cgroup
that looks like one created by systemd, we can assume systemd is in use.

TODO: We need to re-evaluate whether this the right logic to determine if Systemd is managing cgroups.

:return: True if systemd is managing system services
:rtype: Bool
Determine if systemd is managing system services; the implementation follows the same strategy as, for example,
sd_booted() in libsystemd, or /usr/sbin/service
"""
controller_id = CGroupsApi._get_controller_id('cpu')
current_process_cgroup_path = CGroupsApi._get_current_process_cgroup_relative_path(controller_id)
is_systemd = current_process_cgroup_path == 'system.slice/walinuxagent.service'

return is_systemd
return os.path.exists('/run/systemd/system/')

@staticmethod
def _get_current_process_cgroup_relative_path(controller_id):
Expand Down Expand Up @@ -394,8 +385,8 @@ def create_and_start_unit(unit_filename, unit_contents):
try:
unit_path = os.path.join(UNIT_FILES_FILE_SYSTEM_PATH, unit_filename)
fileutil.write_file(unit_path, unit_contents)
shellutil.run_get_output("systemctl daemon-reload")
shellutil.run_get_output("systemctl start {0}".format(unit_filename))
shellutil.run_command(["systemctl", "daemon-reload"])
shellutil.run_command(["systemctl", "start", unit_filename])
except Exception as e:
raise CGroupsException("Failed to create and start {0}. Error: {1}".format(unit_filename, ustr(e)))

Expand Down Expand Up @@ -463,9 +454,9 @@ def remove_extension_cgroups(self, extension_name):
unit_filename = self._get_extension_slice_name(extension_name)
try:
unit_path = os.path.join(UNIT_FILES_FILE_SYSTEM_PATH, unit_filename)
shellutil.run_get_output("systemctl stop {0}".format(unit_filename))
shellutil.run_command(["systemctl", "stop", unit_filename])
fileutil.rm_files(unit_path)
shellutil.run_get_output("systemctl daemon-reload")
shellutil.run_command(["systemctl", "daemon-reload"])
except Exception as e:
raise CGroupsException("Failed to remove {0}. Error: {1}".format(unit_filename, ustr(e)))

Expand Down
9 changes: 3 additions & 6 deletions azurelinuxagent/common/utils/shellutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,10 @@ def run_get_output(cmd, chk_err=True, log_cmd=True, expected_errors=[]):
output = subprocess.check_output(cmd,
stderr=subprocess.STDOUT,
shell=True)
output = ustr(output,
encoding='utf-8',
errors="backslashreplace")
output = _encode_command_output(output)
except subprocess.CalledProcessError as e:
output = ustr(e.output,
encoding='utf-8',
errors="backslashreplace")
output = _encode_command_output(e.output)

if chk_err:
msg = u"Command: [{0}], " \
u"return code: [{1}], " \
Expand Down
92 changes: 25 additions & 67 deletions tests/common/test_cgroupapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,83 +55,41 @@ def test_create_should_return_a_FileSystemCgroupsApi_on_non_systemd_platforms(se
self.assertTrue(type(api) == FileSystemCgroupsApi)

def test_is_systemd_should_return_true_when_systemd_manages_current_process(self):
fileutil_read_file = fileutil.read_file

def mock_read_file(filepath, asbin=False, remove_bom=False, encoding='utf-8'):
if filepath == "/proc/cgroups":
return """
#subsys_name hierarchy num_cgroups enabled
cpuset 11 1 1
cpu 3 77 1
cpuacct 3 77 1
blkio 10 70 1
memory 12 124 1
devices 9 70 1
freezer 4 1 1
net_cls 2 1 1
perf_event 7 1 1
net_prio 2 1 1
hugetlb 8 1 1
pids 5 76 1
rdma 6 1 1
"""
if filepath == "/proc/self/cgroup":
return """
12:memory:/system.slice/walinuxagent.service
11:cpuset:/
10:blkio:/system.slice/walinuxagent.service
9:devices:/system.slice/walinuxagent.service
8:hugetlb:/
7:perf_event:/
6:rdma:/
5:pids:/system.slice/walinuxagent.service
4:freezer:/
3:cpu,cpuacct:/system.slice/walinuxagent.service
2:net_cls,net_prio:/
1:name=systemd:/system.slice/walinuxagent.service
0::/system.slice/walinuxagent.service
"""
return fileutil_read_file(filepath, asbin=asbin, remove_bom=remove_bom, encoding=encoding)
path_exists = os.path.exists

def mock_path_exists(path):
if path == "/run/systemd/system/":
mock_path_exists.path_tested = True
return True
return path_exists(path)

with patch("azurelinuxagent.common.cgroupapi.fileutil.read_file", mock_read_file):
mock_path_exists.path_tested = False

with patch("azurelinuxagent.common.cgroupapi.os.path.exists", mock_path_exists):
is_systemd = CGroupsApi._is_systemd()

self.assertTrue(is_systemd)

self.assertTrue(mock_path_exists.path_tested, 'The expected path was not tested; the implementation of CGroupsApi._is_systemd() may have changed.')

def test_is_systemd_should_return_false_when_systemd_does_not_manage_current_process(self):
fileutil_read_file = fileutil.read_file

def mock_read_file(filepath, asbin=False, remove_bom=False, encoding='utf-8'):
if filepath == "/proc/cgroups":
return """
#subsys_name hierarchy num_cgroups enabled
cpuset 11 1 1
cpu 3 77 1
cpuacct 3 77 1
blkio 10 70 1
memory 12 124 1
devices 9 70 1
freezer 4 1 1
net_cls 2 1 1
perf_event 7 1 1
net_prio 2 1 1
hugetlb 8 1 1
pids 5 76 1
rdma 6 1 1
"""
if filepath == "/proc/self/cgroup":
return """
3:name=systemd:/
2:memory:/walinuxagent.service
1:cpu,cpuacct:/walinuxagent.service
"""
return fileutil_read_file(filepath, asbin=asbin, remove_bom=remove_bom, encoding=encoding)

with patch("azurelinuxagent.common.cgroupapi.fileutil.read_file", mock_read_file):
path_exists = os.path.exists

def mock_path_exists(path):
if path == "/run/systemd/system/":
mock_path_exists.path_tested = True
return False
return path_exists(path)

mock_path_exists.path_tested = False

with patch("azurelinuxagent.common.cgroupapi.os.path.exists", mock_path_exists):
is_systemd = CGroupsApi._is_systemd()

self.assertFalse(is_systemd)

self.assertTrue(mock_path_exists.path_tested, 'The expected path was not tested; the implementation of CGroupsApi._is_systemd() may have changed.')

def test_foreach_controller_should_execute_operation_on_all_mounted_controllers(self):
executed_controllers = []

Expand Down
128 changes: 73 additions & 55 deletions tests/common/test_cgroupstelemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,21 +579,28 @@ def test_extension_temetry_not_sent_for_empty_perf_metrics(self, *args):
def test_telemetry_with_tracked_cgroup(self):
self.assertTrue(i_am_root(), "Test does not run when non-root")

max_num_polls = 30
time_to_wait = 3
extn_name = "foobar-1.0.0"
num_summarization_values = 7

cgs = make_new_cgroup(extn_name)
self.assertEqual(len(cgs), 2)

ext_handler_properties = ExtHandlerProperties()
ext_handler_properties.version = "1.0.0"
self.ext_handler = ExtHandler(name='foobar')
self.ext_handler.properties = ext_handler_properties
self.ext_handler_instance = ExtHandlerInstance(ext_handler=self.ext_handler, protocol=None)

command = self.create_script("keep_cpu_busy_and_consume_memory_for_5_seconds", '''
# This test has some timing issues when systemd is managing cgroups, so we force the file system API
# by creating a new instance of the CGroupConfigurator
with patch("azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd", return_value=False):
cgroup_configurator_instance = CGroupConfigurator._instance
CGroupConfigurator._instance = None

try:
max_num_polls = 30
time_to_wait = 3
extn_name = "foobar-1.0.0"
num_summarization_values = 7

cgs = make_new_cgroup(extn_name)
self.assertEqual(len(cgs), 2)

ext_handler_properties = ExtHandlerProperties()
ext_handler_properties.version = "1.0.0"
self.ext_handler = ExtHandler(name='foobar')
self.ext_handler.properties = ext_handler_properties
self.ext_handler_instance = ExtHandlerInstance(ext_handler=self.ext_handler, protocol=None)

command = self.create_script("keep_cpu_busy_and_consume_memory_for_5_seconds", '''
nohup python -c "import time

for i in range(5):
Expand All @@ -603,48 +610,59 @@ def test_telemetry_with_tracked_cgroup(self):
print('Test loop')" &
'''.format(time_to_wait))

self.log_dir = os.path.join(self.tmp_dir, "log")

with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \
patch_get_base_dir:
with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \
patch_get_log_dir:
self.ext_handler_instance.launch_command(command)

self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join(
BASE_CGROUPS, "cpu", "walinuxagent.extensions", "foobar_1.0.0")))
self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join(
BASE_CGROUPS, "memory", "walinuxagent.extensions", "foobar_1.0.0")))

for i in range(max_num_polls):
CGroupsTelemetry.poll_all_tracked()
time.sleep(0.5)
self.log_dir = os.path.join(self.tmp_dir, "log")

with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \
patch_get_base_dir:
with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \
patch_get_log_dir:
self.ext_handler_instance.launch_command(command)

#
# If the test is made to run using the systemd API, then the paths of the cgroups need to be checked differently:
#
# self.assertEquals(len(CGroupsTelemetry._tracked), 2)
# cpu = os.path.join(BASE_CGROUPS, "cpu", "system.slice", r"foobar_1.0.0_.*\.scope")
# self.assertTrue(any(re.match(cpu, tracked.path) for tracked in CGroupsTelemetry._tracked))
# memory = os.path.join(BASE_CGROUPS, "memory", "system.slice", r"foobar_1.0.0_.*\.scope")
# self.assertTrue(any(re.match(memory, tracked.path) for tracked in CGroupsTelemetry._tracked))
#
self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join(
BASE_CGROUPS, "cpu", "walinuxagent.extensions", "foobar_1.0.0")))
self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join(
BASE_CGROUPS, "memory", "walinuxagent.extensions", "foobar_1.0.0")))

for i in range(max_num_polls):
CGroupsTelemetry.poll_all_tracked()
time.sleep(0.5)

collected_metrics = CGroupsTelemetry.report_all_tracked()
collected_metrics = CGroupsTelemetry.report_all_tracked()

self.assertIn("memory", collected_metrics[extn_name])
self.assertIn("cur_mem", collected_metrics[extn_name]["memory"])
self.assertIn("max_mem", collected_metrics[extn_name]["memory"])
self.assertEqual(len(collected_metrics[extn_name]["memory"]["cur_mem"]), num_summarization_values)
self.assertEqual(len(collected_metrics[extn_name]["memory"]["max_mem"]), num_summarization_values)

self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][5], str)
self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][6], str)
self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][5], str)
self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][6], str)

self.assertIn("cpu", collected_metrics[extn_name])
self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"])
self.assertEqual(len(collected_metrics[extn_name]["cpu"]["cur_cpu"]), num_summarization_values)

self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str)
self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str)

for i in range(5):
self.assertGreater(collected_metrics[extn_name]["memory"]["cur_mem"][i], 0)
self.assertGreater(collected_metrics[extn_name]["memory"]["max_mem"][i], 0)
self.assertGreaterEqual(collected_metrics[extn_name]["cpu"]["cur_cpu"][i], 0)
# Equal because CPU could be zero for minimum value.
self.assertIn("memory", collected_metrics[extn_name])
self.assertIn("cur_mem", collected_metrics[extn_name]["memory"])
self.assertIn("max_mem", collected_metrics[extn_name]["memory"])
self.assertEqual(len(collected_metrics[extn_name]["memory"]["cur_mem"]), num_summarization_values)
self.assertEqual(len(collected_metrics[extn_name]["memory"]["max_mem"]), num_summarization_values)

self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][5], str)
self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][6], str)
self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][5], str)
self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][6], str)

self.assertIn("cpu", collected_metrics[extn_name])
self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"])
self.assertEqual(len(collected_metrics[extn_name]["cpu"]["cur_cpu"]), num_summarization_values)

self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str)
self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str)

for i in range(5):
self.assertGreater(collected_metrics[extn_name]["memory"]["cur_mem"][i], 0)
self.assertGreater(collected_metrics[extn_name]["memory"]["max_mem"][i], 0)
self.assertGreaterEqual(collected_metrics[extn_name]["cpu"]["cur_cpu"][i], 0)
# Equal because CPU could be zero for minimum value.
finally:
CGroupConfigurator._instance = cgroup_configurator_instance


class TestMetric(AgentTestCase):
Expand Down