From ee51eb90e42b05613c906e0d2c5307375ae7e7aa Mon Sep 17 00:00:00 2001 From: nam Date: Fri, 18 Oct 2019 16:51:39 -0700 Subject: [PATCH 1/2] use alternate systemd detection --- azurelinuxagent/common/cgroupapi.py | 23 ++-- azurelinuxagent/common/utils/shellutil.py | 9 +- tests/common/test_cgroupapi.py | 94 +++++----------- tests/common/test_cgroupstelemetry.py | 128 ++++++++++++---------- 4 files changed, 109 insertions(+), 145 deletions(-) diff --git a/azurelinuxagent/common/cgroupapi.py b/azurelinuxagent/common/cgroupapi.py index d497c68cc4..06bafb1b14 100644 --- a/azurelinuxagent/common/cgroupapi.py +++ b/azurelinuxagent/common/cgroupapi.py @@ -88,19 +88,10 @@ def create(): @staticmethod def _is_systemd(): """ - Determine if systemd is managing system services. If this process (presumed to be the agent) is in a CPU cgroup - that looks like one created by systemd, we can assume systemd is in use. - - TODO: We need to re-evaluate whether this the right logic to determine if Systemd is managing cgroups. - - :return: True if systemd is managing system services - :rtype: Bool + Determine if systemd is managing system services; the implementation follows the same strategy as, for example, + sd_booted() in libsystemd, or /usr/sbin/service """ - controller_id = CGroupsApi._get_controller_id('cpu') - current_process_cgroup_path = CGroupsApi._get_current_process_cgroup_relative_path(controller_id) - is_systemd = current_process_cgroup_path == 'system.slice/walinuxagent.service' - - return is_systemd + return os.path.exists('/run/systemd/system/') @staticmethod def _get_current_process_cgroup_relative_path(controller_id): @@ -394,8 +385,8 @@ def create_and_start_unit(unit_filename, unit_contents): try: unit_path = os.path.join(UNIT_FILES_FILE_SYSTEM_PATH, unit_filename) fileutil.write_file(unit_path, unit_contents) - shellutil.run_get_output("systemctl daemon-reload") - shellutil.run_get_output("systemctl start {0}".format(unit_filename)) + shellutil.run_command(["systemctl", "daemon-reload"]) + shellutil.run_command(["systemctl", "start", unit_filename]) except Exception as e: raise CGroupsException("Failed to create and start {0}. Error: {1}".format(unit_filename, ustr(e))) @@ -463,9 +454,9 @@ def remove_extension_cgroups(self, extension_name): unit_filename = self._get_extension_slice_name(extension_name) try: unit_path = os.path.join(UNIT_FILES_FILE_SYSTEM_PATH, unit_filename) - shellutil.run_get_output("systemctl stop {0}".format(unit_filename)) + shellutil.run_command(["systemctl", "stop", unit_filename]) fileutil.rm_files(unit_path) - shellutil.run_get_output("systemctl daemon-reload") + shellutil.run_command(["systemctl", "daemon-reload"]) except Exception as e: raise CGroupsException("Failed to remove {0}. Error: {1}".format(unit_filename, ustr(e))) diff --git a/azurelinuxagent/common/utils/shellutil.py b/azurelinuxagent/common/utils/shellutil.py index 96897635d0..1260f9d928 100644 --- a/azurelinuxagent/common/utils/shellutil.py +++ b/azurelinuxagent/common/utils/shellutil.py @@ -89,13 +89,10 @@ def run_get_output(cmd, chk_err=True, log_cmd=True, expected_errors=[]): output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) - output = ustr(output, - encoding='utf-8', - errors="backslashreplace") + output = _encode_command_output(output) except subprocess.CalledProcessError as e: - output = ustr(e.output, - encoding='utf-8', - errors="backslashreplace") + output = _encode_command_output(e.output) + if chk_err: msg = u"Command: [{0}], " \ u"return code: [{1}], " \ diff --git a/tests/common/test_cgroupapi.py b/tests/common/test_cgroupapi.py index 324104b5e8..107878f453 100644 --- a/tests/common/test_cgroupapi.py +++ b/tests/common/test_cgroupapi.py @@ -55,82 +55,40 @@ def test_create_should_return_a_FileSystemCgroupsApi_on_non_systemd_platforms(se self.assertTrue(type(api) == FileSystemCgroupsApi) def test_is_systemd_should_return_true_when_systemd_manages_current_process(self): - fileutil_read_file = fileutil.read_file - - def mock_read_file(filepath, asbin=False, remove_bom=False, encoding='utf-8'): - if filepath == "/proc/cgroups": - return """ -#subsys_name hierarchy num_cgroups enabled -cpuset 11 1 1 -cpu 3 77 1 -cpuacct 3 77 1 -blkio 10 70 1 -memory 12 124 1 -devices 9 70 1 -freezer 4 1 1 -net_cls 2 1 1 -perf_event 7 1 1 -net_prio 2 1 1 -hugetlb 8 1 1 -pids 5 76 1 -rdma 6 1 1 -""" - if filepath == "/proc/self/cgroup": - return """ -12:memory:/system.slice/walinuxagent.service -11:cpuset:/ -10:blkio:/system.slice/walinuxagent.service -9:devices:/system.slice/walinuxagent.service -8:hugetlb:/ -7:perf_event:/ -6:rdma:/ -5:pids:/system.slice/walinuxagent.service -4:freezer:/ -3:cpu,cpuacct:/system.slice/walinuxagent.service -2:net_cls,net_prio:/ -1:name=systemd:/system.slice/walinuxagent.service -0::/system.slice/walinuxagent.service -""" - return fileutil_read_file(filepath, asbin=asbin, remove_bom=remove_bom, encoding=encoding) + path_exists = os.path.exists + + def mock_path_exists(path): + if path in ("/run/systemd/system/", "/run/systemd/system"): + mock_path_exists.path_tested = True + return True + return path_exists(path) - with patch("azurelinuxagent.common.cgroupapi.fileutil.read_file", mock_read_file): + mock_path_exists.path_tested = False + + with patch("azurelinuxagent.common.cgroupapi.os.path.exists", mock_path_exists): is_systemd = CGroupsApi._is_systemd() self.assertTrue(is_systemd) + self.assertTrue(mock_path_exists.path_tested, 'The expected path was not tested; the implementation of CGroupsApi._is_systemd() may have changed.') + def test_is_systemd_should_return_false_when_systemd_does_not_manage_current_process(self): - fileutil_read_file = fileutil.read_file - - def mock_read_file(filepath, asbin=False, remove_bom=False, encoding='utf-8'): - if filepath == "/proc/cgroups": - return """ -#subsys_name hierarchy num_cgroups enabled -cpuset 11 1 1 -cpu 3 77 1 -cpuacct 3 77 1 -blkio 10 70 1 -memory 12 124 1 -devices 9 70 1 -freezer 4 1 1 -net_cls 2 1 1 -perf_event 7 1 1 -net_prio 2 1 1 -hugetlb 8 1 1 -pids 5 76 1 -rdma 6 1 1 -""" - if filepath == "/proc/self/cgroup": - return """ -3:name=systemd:/ -2:memory:/walinuxagent.service -1:cpu,cpuacct:/walinuxagent.service -""" - return fileutil_read_file(filepath, asbin=asbin, remove_bom=remove_bom, encoding=encoding) - - with patch("azurelinuxagent.common.cgroupapi.fileutil.read_file", mock_read_file): + path_exists = os.path.exists + + def mock_path_exists(path): + if path in ("/run/systemd/system/", "/run/systemd/system"): + mock_path_exists.path_tested = True + return True + return path_exists(path) + + mock_path_exists.path_tested = False + + with patch("azurelinuxagent.common.cgroupapi.os.path.exists", mock_path_exists): is_systemd = CGroupsApi._is_systemd() - self.assertFalse(is_systemd) + self.assertTrue(is_systemd) + + self.assertTrue(mock_path_exists.path_tested, 'The expected path was not tested; the implementation of CGroupsApi._is_systemd() may have changed.') def test_foreach_controller_should_execute_operation_on_all_mounted_controllers(self): executed_controllers = [] diff --git a/tests/common/test_cgroupstelemetry.py b/tests/common/test_cgroupstelemetry.py index 7fb6ba8299..34abfb0880 100644 --- a/tests/common/test_cgroupstelemetry.py +++ b/tests/common/test_cgroupstelemetry.py @@ -579,21 +579,28 @@ def test_extension_temetry_not_sent_for_empty_perf_metrics(self, *args): def test_telemetry_with_tracked_cgroup(self): self.assertTrue(i_am_root(), "Test does not run when non-root") - max_num_polls = 30 - time_to_wait = 3 - extn_name = "foobar-1.0.0" - num_summarization_values = 7 - - cgs = make_new_cgroup(extn_name) - self.assertEqual(len(cgs), 2) - - ext_handler_properties = ExtHandlerProperties() - ext_handler_properties.version = "1.0.0" - self.ext_handler = ExtHandler(name='foobar') - self.ext_handler.properties = ext_handler_properties - self.ext_handler_instance = ExtHandlerInstance(ext_handler=self.ext_handler, protocol=None) - - command = self.create_script("keep_cpu_busy_and_consume_memory_for_5_seconds", ''' + # This test has some timing issues when systemd is managing cgroups, so we force the file system API + # by creating a new instance of the CGroupConfigurator + with patch("azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd", return_value=False): + cgroup_configurator_instance = CGroupConfigurator._instance + CGroupConfigurator._instance = None + + try: + max_num_polls = 30 + time_to_wait = 3 + extn_name = "foobar-1.0.0" + num_summarization_values = 7 + + cgs = make_new_cgroup(extn_name) + self.assertEqual(len(cgs), 2) + + ext_handler_properties = ExtHandlerProperties() + ext_handler_properties.version = "1.0.0" + self.ext_handler = ExtHandler(name='foobar') + self.ext_handler.properties = ext_handler_properties + self.ext_handler_instance = ExtHandlerInstance(ext_handler=self.ext_handler, protocol=None) + + command = self.create_script("keep_cpu_busy_and_consume_memory_for_5_seconds", ''' nohup python -c "import time for i in range(5): @@ -603,48 +610,59 @@ def test_telemetry_with_tracked_cgroup(self): print('Test loop')" & '''.format(time_to_wait)) - self.log_dir = os.path.join(self.tmp_dir, "log") - - with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \ - patch_get_base_dir: - with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \ - patch_get_log_dir: - self.ext_handler_instance.launch_command(command) - - self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( - BASE_CGROUPS, "cpu", "walinuxagent.extensions", "foobar_1.0.0"))) - self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( - BASE_CGROUPS, "memory", "walinuxagent.extensions", "foobar_1.0.0"))) - - for i in range(max_num_polls): - CGroupsTelemetry.poll_all_tracked() - time.sleep(0.5) + self.log_dir = os.path.join(self.tmp_dir, "log") + + with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \ + patch_get_base_dir: + with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \ + patch_get_log_dir: + self.ext_handler_instance.launch_command(command) + + # + # If the test is made to run using the systemd API, then the paths of the cgroups need to be checked differently: + # + # self.assertEquals(len(CGroupsTelemetry._tracked), 2) + # cpu = os.path.join(BASE_CGROUPS, "cpu", "system.slice", r"foobar_1.0.0_.*\.scope") + # self.assertTrue(any(re.match(cpu, tracked.path) for tracked in CGroupsTelemetry._tracked)) + # memory = os.path.join(BASE_CGROUPS, "memory", "system.slice", r"foobar_1.0.0_.*\.scope") + # self.assertTrue(any(re.match(memory, tracked.path) for tracked in CGroupsTelemetry._tracked)) + # + self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( + BASE_CGROUPS, "cpu", "walinuxagent.extensions", "foobar_1.0.0"))) + self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( + BASE_CGROUPS, "memory", "walinuxagent.extensions", "foobar_1.0.0"))) + + for i in range(max_num_polls): + CGroupsTelemetry.poll_all_tracked() + time.sleep(0.5) - collected_metrics = CGroupsTelemetry.report_all_tracked() + collected_metrics = CGroupsTelemetry.report_all_tracked() - self.assertIn("memory", collected_metrics[extn_name]) - self.assertIn("cur_mem", collected_metrics[extn_name]["memory"]) - self.assertIn("max_mem", collected_metrics[extn_name]["memory"]) - self.assertEqual(len(collected_metrics[extn_name]["memory"]["cur_mem"]), num_summarization_values) - self.assertEqual(len(collected_metrics[extn_name]["memory"]["max_mem"]), num_summarization_values) - - self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][5], str) - self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][6], str) - self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][5], str) - self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][6], str) - - self.assertIn("cpu", collected_metrics[extn_name]) - self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"]) - self.assertEqual(len(collected_metrics[extn_name]["cpu"]["cur_cpu"]), num_summarization_values) - - self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str) - self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str) - - for i in range(5): - self.assertGreater(collected_metrics[extn_name]["memory"]["cur_mem"][i], 0) - self.assertGreater(collected_metrics[extn_name]["memory"]["max_mem"][i], 0) - self.assertGreaterEqual(collected_metrics[extn_name]["cpu"]["cur_cpu"][i], 0) - # Equal because CPU could be zero for minimum value. + self.assertIn("memory", collected_metrics[extn_name]) + self.assertIn("cur_mem", collected_metrics[extn_name]["memory"]) + self.assertIn("max_mem", collected_metrics[extn_name]["memory"]) + self.assertEqual(len(collected_metrics[extn_name]["memory"]["cur_mem"]), num_summarization_values) + self.assertEqual(len(collected_metrics[extn_name]["memory"]["max_mem"]), num_summarization_values) + + self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][5], str) + self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][6], str) + self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][5], str) + self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][6], str) + + self.assertIn("cpu", collected_metrics[extn_name]) + self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"]) + self.assertEqual(len(collected_metrics[extn_name]["cpu"]["cur_cpu"]), num_summarization_values) + + self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str) + self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str) + + for i in range(5): + self.assertGreater(collected_metrics[extn_name]["memory"]["cur_mem"][i], 0) + self.assertGreater(collected_metrics[extn_name]["memory"]["max_mem"][i], 0) + self.assertGreaterEqual(collected_metrics[extn_name]["cpu"]["cur_cpu"][i], 0) + # Equal because CPU could be zero for minimum value. + finally: + CGroupConfigurator._instance = cgroup_configurator_instance class TestMetric(AgentTestCase): From 41526c6692894a6b91308131fa63764155b9fbee Mon Sep 17 00:00:00 2001 From: nam Date: Tue, 22 Oct 2019 17:20:55 -0700 Subject: [PATCH 2/2] Fix unit test --- tests/common/test_cgroupapi.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/common/test_cgroupapi.py b/tests/common/test_cgroupapi.py index 107878f453..8d892e841a 100644 --- a/tests/common/test_cgroupapi.py +++ b/tests/common/test_cgroupapi.py @@ -58,7 +58,7 @@ def test_is_systemd_should_return_true_when_systemd_manages_current_process(self path_exists = os.path.exists def mock_path_exists(path): - if path in ("/run/systemd/system/", "/run/systemd/system"): + if path == "/run/systemd/system/": mock_path_exists.path_tested = True return True return path_exists(path) @@ -76,9 +76,9 @@ def test_is_systemd_should_return_false_when_systemd_does_not_manage_current_pro path_exists = os.path.exists def mock_path_exists(path): - if path in ("/run/systemd/system/", "/run/systemd/system"): + if path == "/run/systemd/system/": mock_path_exists.path_tested = True - return True + return False return path_exists(path) mock_path_exists.path_tested = False @@ -86,7 +86,7 @@ def mock_path_exists(path): with patch("azurelinuxagent.common.cgroupapi.os.path.exists", mock_path_exists): is_systemd = CGroupsApi._is_systemd() - self.assertTrue(is_systemd) + self.assertFalse(is_systemd) self.assertTrue(mock_path_exists.path_tested, 'The expected path was not tested; the implementation of CGroupsApi._is_systemd() may have changed.')