diff --git a/azurelinuxagent/common/cgroupapi.py b/azurelinuxagent/common/cgroupapi.py index 0d0e030874..b72a2b31a2 100644 --- a/azurelinuxagent/common/cgroupapi.py +++ b/azurelinuxagent/common/cgroupapi.py @@ -510,6 +510,8 @@ def create_cgroup(controller): stderr.truncate(0) # Try invoking the process again, this time without systemd-run + logger.info('Extension invocation using systemd failed, falling back to regular invocation ' + 'without cgroups tracking.') process = subprocess.Popen(command, shell=shell, cwd=cwd, diff --git a/tests/common/test_cgroupapi.py b/tests/common/test_cgroupapi.py index 8d892e841a..12575860ce 100644 --- a/tests/common/test_cgroupapi.py +++ b/tests/common/test_cgroupapi.py @@ -507,16 +507,18 @@ def mock_popen(*args, **kwargs): @patch('time.sleep', side_effect=lambda _: mock_sleep(0.001)) def test_start_extension_command_should_use_fallback_option_if_systemd_times_out(self, _): - # Mock systemd timeout and make sure the failure is only attributed to the extension if the command fails - # using the fallback option + # Systemd has its own internal timeout which is shorter than what we define for extension operation timeout. + # When systemd times out, it will write a message to stderr and exit with exit code 1. + # In that case, we will internally recognize the failure due to the non-zero exit code, not as a timeout. original_popen = subprocess.Popen - success_cmd = "echo 'success'" + systemd_timeout_command = "echo 'Failed to start transient scope unit: Connection timed out' >&2 && exit 1" def mock_popen(*args, **kwargs): - # Inject a syntax error to the call + # If trying to invoke systemd, mock what would happen if systemd timed out internally: + # write failure to stderr and exit with exit code 1. new_args = args if "systemd-run" in args[0]: - new_args = (args[0].replace(success_cmd, "sleep 1s"),) # Inject sleep for timeout + new_args = (systemd_timeout_command,) return original_popen(new_args, **kwargs) @@ -524,21 +526,28 @@ def mock_popen(*args, **kwargs): with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", side_effect=mock_popen): - with patch("azurelinuxagent.common.cgroupapi.SystemdCgroupsApi._is_systemd_failure", - return_value=True): - extension_cgroups, process_output = SystemdCgroupsApi().start_extension_command( - extension_name="Microsoft.Compute.TestExtension-1.2.3", - command="echo 'success'", - timeout=300, - shell=True, - cwd=self.tmp_dir, - env={}, - stdout=stdout, - stderr=stderr) + with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", side_effect=mock_popen) \ + as patch_mock_popen: + extension_cgroups, process_output = SystemdCgroupsApi().start_extension_command( + extension_name="Microsoft.Compute.TestExtension-1.2.3", + command="echo 'success'", + timeout=300, + shell=True, + cwd=self.tmp_dir, + env={}, + stdout=stdout, + stderr=stderr) - self.assertEquals(extension_cgroups, []) - self.assertEquals(expected_output.format("success"), process_output) + # We expect two calls to Popen, first for the systemd-run call, second for the fallback option + self.assertEquals(2, patch_mock_popen.call_count) + + first_call_args = patch_mock_popen.mock_calls[0][1][0] + second_call_args = patch_mock_popen.mock_calls[1][1][0] + self.assertIn("systemd-run --unit", first_call_args) + self.assertNotIn("systemd-run --unit", second_call_args) + + self.assertEquals(extension_cgroups, []) + self.assertEquals(expected_output.format("success"), process_output) @attr('requires_sudo') @patch("azurelinuxagent.common.cgroupapi.add_event")