Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix check for systemd-run failure when invoking extensions #1943

Merged
merged 9 commits into from
Jul 16, 2020
11 changes: 7 additions & 4 deletions azurelinuxagent/common/cgroupapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.utils import fileutil, shellutil
from azurelinuxagent.common.utils.extensionprocessutil import handle_process_completion, read_output
from azurelinuxagent.common.utils.extensionprocessutil import handle_process_completion, read_output, \
TELEMETRY_MESSAGE_MAX_LEN
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.version import get_distro

Expand Down Expand Up @@ -652,9 +653,11 @@ def get_processes_in_cgroup(cgroup_path):
return processes

@staticmethod
def _is_systemd_failure(scope_name, process_output):
def _is_systemd_failure(scope_name, stderr):
stderr.seek(0)
stderr = ustr(stderr.read(TELEMETRY_MESSAGE_MAX_LEN), encoding='utf-8', errors='backslashreplace')
unit_not_found = "Unit {0} not found.".format(scope_name)
return unit_not_found in process_output or scope_name not in process_output
return unit_not_found in stderr or scope_name not in stderr

def start_extension_command(self, extension_name, command, timeout, shell, cwd, env, stdout, stderr,
error_code=ExtensionErrorCodes.PluginUnknownFailure):
Expand Down Expand Up @@ -709,8 +712,8 @@ def start_extension_command(self, extension_name, command, timeout, shell, cwd,
except ExtensionError as e:
# The extension didn't terminate successfully. Determine whether it was due to systemd errors or
# extension errors.
systemd_failure = self._is_systemd_failure(scope, stderr)
process_output = read_output(stdout, stderr)
systemd_failure = self._is_systemd_failure(scope, process_output)

if not systemd_failure:
# There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error
Expand Down
3 changes: 2 additions & 1 deletion azurelinuxagent/common/utils/extensionprocessutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def read_output(stdout, stderr):
return format_stdout_stderr("", "Cannot read stdout/stderr: {0}".format(ustr(e)))


def format_stdout_stderr(stdout, stderr, max_len=TELEMETRY_MESSAGE_MAX_LEN):
def format_stdout_stderr(stdout, stderr):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lesson to learn: don't assign constants in the function signature since this is loaded before the function body, so patching the constant will not work.

"""
Format stdout and stderr's output to make it suitable in telemetry.
The goal is to maximize the amount of output given the constraints
Expand All @@ -118,6 +118,7 @@ def format_stdout_stderr(stdout, stderr, max_len=TELEMETRY_MESSAGE_MAX_LEN):
"""
template = "[stdout]\n{0}\n\n[stderr]\n{1}"
# +6 == len("{0}") + len("{1}")
max_len = TELEMETRY_MESSAGE_MAX_LEN
max_len_each = int((max_len - len(template) + 6) / 2)

if max_len_each <= 0:
Expand Down
Loading