diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index 733362c0d0..a1e8f6bc12 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -24,21 +24,23 @@ from __future__ import print_function import os -import sys import re import subprocess +import sys import threading import traceback -import azurelinuxagent.common.logger as logger -import azurelinuxagent.common.event as event import azurelinuxagent.common.conf as conf -from azurelinuxagent.common.version import AGENT_NAME, AGENT_LONG_VERSION, \ - DISTRO_NAME, DISTRO_VERSION, \ - PY_VERSION_MAJOR, PY_VERSION_MINOR, \ - PY_VERSION_MICRO, GOAL_STATE_AGENT_VERSION +import azurelinuxagent.common.event as event +import azurelinuxagent.common.logger as logger +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.logcollector import LogCollector, OUTPUT_RESULTS_FILE_PATH from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.utils import fileutil +from azurelinuxagent.common.version import AGENT_NAME, AGENT_LONG_VERSION, \ + DISTRO_NAME, DISTRO_VERSION, \ + PY_VERSION_MAJOR, PY_VERSION_MINOR, \ + PY_VERSION_MICRO, GOAL_STATE_AGENT_VERSION class Agent(object): @@ -49,17 +51,17 @@ def __init__(self, verbose, conf_file_path=None): self.conf_file_path = conf_file_path self.osutil = get_osutil() - #Init stdout log + # Init stdout log level = logger.LogLevel.VERBOSE if verbose else logger.LogLevel.INFO logger.add_logger_appender(logger.AppenderType.STDOUT, level) - #Init config + # Init config conf_file_path = self.conf_file_path \ if self.conf_file_path is not None \ else self.osutil.get_agent_conf_file_path() conf.load_conf_from_file(conf_file_path) - #Init log + # Init log verbose = verbose or conf.get_logs_verbose() level = logger.LogLevel.VERBOSE if verbose else logger.LogLevel.INFO logger.add_logger_appender(logger.AppenderType.FILE, level, @@ -104,7 +106,6 @@ def daemon(self): child_args = None \ if self.conf_file_path is None \ else "-configuration-path:{0}".format(self.conf_file_path) - from azurelinuxagent.daemon import get_daemon_handler daemon_handler = get_daemon_handler() daemon_handler.run(child_args=child_args) @@ -151,6 +152,22 @@ def show_configuration(self): for k in sorted(configuration.keys()): print("{0} = {1}".format(k, configuration[k])) + def collect_logs(self, is_full_mode): + if is_full_mode: + print("Running log collector mode full") + else: + print("Running log collector mode normal") + + try: + log_collector = LogCollector(is_full_mode) + archive = log_collector.collect_logs_and_get_archive() + print("Log collection successfully completed. Archive can be found at {0} " + "and detailed log output can be found at {1}".format(archive, OUTPUT_RESULTS_FILE_PATH)) + except Exception as e: # pylint: disable=C0103 + print("Log collection completed unsuccessfully. Error: {0}".format(ustr(e))) + print("Detailed log output can be found at {0}".format(OUTPUT_RESULTS_FILE_PATH)) + sys.exit(1) + def main(args=[]): # pylint: disable=R0912,W0102 """ @@ -159,7 +176,7 @@ def main(args=[]): # pylint: disable=R0912,W0102 """ if len(args) <= 0: # pylint: disable=len-as-condition args = sys.argv[1:] - command, force, verbose, debug, conf_file_path = parse_args(args) + command, force, verbose, debug, conf_file_path, log_collector_full_mode = parse_args(args) if command == "version": version() elif command == "help": @@ -183,11 +200,14 @@ def main(args=[]): # pylint: disable=R0912,W0102 agent.run_exthandlers(debug) elif command == "show-configuration": agent.show_configuration() + elif command == "collect-logs": + agent.collect_logs(log_collector_full_mode) except Exception: logger.error(u"Failed to run '{0}': {1}", command, traceback.format_exc()) + def parse_args(sys_args): # pylint: disable=R0912 """ Parse command line arguments @@ -197,45 +217,50 @@ def parse_args(sys_args): # pylint: disable=R0912 verbose = False debug = False conf_file_path = None - for a in sys_args: # pylint: disable=C0103 - m = re.match("^(?:[-/]*)configuration-path:([\w/\.\-_]+)", a) # pylint: disable=W1401,C0103 + log_collector_full_mode = False + + for arg in sys_args: + m = re.match("^(?:[-/]*)configuration-path:([\w/\.\-_]+)", arg) # pylint: disable=W1401,C0103 if not m is None: conf_file_path = m.group(1) if not os.path.exists(conf_file_path): print("Error: Configuration file {0} does not exist".format( conf_file_path), file=sys.stderr) - usage() + print(usage()) sys.exit(1) - - elif re.match("^([-/]*)deprovision\\+user", a): + elif re.match("^([-/]*)deprovision\\+user", arg): cmd = "deprovision+user" - elif re.match("^([-/]*)deprovision", a): + elif re.match("^([-/]*)deprovision", arg): cmd = "deprovision" - elif re.match("^([-/]*)daemon", a): + elif re.match("^([-/]*)daemon", arg): cmd = "daemon" - elif re.match("^([-/]*)start", a): + elif re.match("^([-/]*)start", arg): cmd = "start" - elif re.match("^([-/]*)register-service", a): + elif re.match("^([-/]*)register-service", arg): cmd = "register-service" - elif re.match("^([-/]*)run-exthandlers", a): + elif re.match("^([-/]*)run-exthandlers", arg): cmd = "run-exthandlers" - elif re.match("^([-/]*)version", a): + elif re.match("^([-/]*)version", arg): cmd = "version" - elif re.match("^([-/]*)verbose", a): + elif re.match("^([-/]*)verbose", arg): verbose = True - elif re.match("^([-/]*)debug", a): + elif re.match("^([-/]*)debug", arg): debug = True - elif re.match("^([-/]*)force", a): + elif re.match("^([-/]*)force", arg): force = True - elif re.match("^([-/]*)show-configuration", a): + elif re.match("^([-/]*)show-configuration", arg): cmd = "show-configuration" - elif re.match("^([-/]*)(help|usage|\\?)", a): + elif re.match("^([-/]*)(help|usage|\\?)", arg): cmd = "help" + elif re.match("^([-/]*)collect-logs", arg): + cmd = "collect-logs" + elif re.match("^([-/]*)full", arg): + log_collector_full_mode = True else: cmd = "help" break - return cmd, force, verbose, debug, conf_file_path + return cmd, force, verbose, debug, conf_file_path, log_collector_full_mode def version(): @@ -250,6 +275,7 @@ def version(): PY_VERSION_MICRO)) print("Goal state agent: {0}".format(GOAL_STATE_AGENT_VERSION)) + def usage(): """ Return agent usage message @@ -258,11 +284,12 @@ def usage(): s += ("usage: {0} [-verbose] [-force] [-help] " # pylint: disable=C0103 "-configuration-path:" "-deprovision[+user]|-register-service|-version|-daemon|-start|" - "-run-exthandlers|-show-configuration]" + "-run-exthandlers|-show-configuration|-collect-logs [-full]" "").format(sys.argv[0]) s += "\n" # pylint: disable=C0103 return s + def start(conf_file_path=None): """ Start agent daemon in a background process and set stdout/stderr to @@ -274,5 +301,6 @@ def start(conf_file_path=None): args.append('-configuration-path:{0}'.format(conf_file_path)) subprocess.Popen(args, stdout=devnull, stderr=devnull) + if __name__ == '__main__' : main() diff --git a/azurelinuxagent/common/cgroupapi.py b/azurelinuxagent/common/cgroupapi.py index a51bdaeea2..6d8c5c9a3b 100644 --- a/azurelinuxagent/common/cgroupapi.py +++ b/azurelinuxagent/common/cgroupapi.py @@ -41,6 +41,7 @@ VM_AGENT_CGROUP_NAME = "walinuxagent.service" EXTENSIONS_ROOT_CGROUP_NAME = "walinuxagent.extensions" UNIT_FILES_FILE_SYSTEM_PATH = "/etc/systemd/system" +SYSTEMD_RUN_PATH = "/run/systemd/system/" class CGroupsApi(object): @@ -94,15 +95,15 @@ def create(): """ Factory method to create the correct API for the current platform """ - return SystemdCgroupsApi() if CGroupsApi._is_systemd() else FileSystemCgroupsApi() + return SystemdCgroupsApi() if CGroupsApi.is_systemd() else FileSystemCgroupsApi() @staticmethod - def _is_systemd(): + def is_systemd(): """ Determine if systemd is managing system services; the implementation follows the same strategy as, for example, sd_booted() in libsystemd, or /usr/sbin/service """ - return os.path.exists('/run/systemd/system/') + return os.path.exists(SYSTEMD_RUN_PATH) @staticmethod def _foreach_controller(operation, message): diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 766b25bffc..102c3c7db2 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -97,6 +97,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "OS.CheckRdmaDriver": False, "Logs.Verbose": False, "Logs.Console": True, + "Logs.Collect": False, "Extensions.Enabled": True, "Provisioning.AllowResetSysUser": False, "Provisioning.RegenerateSshHostKeyPair": False, @@ -149,7 +150,8 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "Provisioning.PasswordCryptSaltLength": 10, "HttpProxy.Port": None, "ResourceDisk.SwapSizeMB": 0, - "Autoupdate.Frequency": 3600 + "Autoupdate.Frequency": 3600, + "Logs.CollectPeriod": 3600 } @@ -223,6 +225,14 @@ def get_logs_console(conf=__conf__): return conf.get_switch("Logs.Console", True) +def get_collect_logs(conf=__conf__): + return conf.get_switch("Logs.Collect", False) + + +def get_collect_logs_period(conf=__conf__): + return conf.get_int("Logs.CollectPeriod", 3600) + + def get_lib_dir(conf=__conf__): return conf.get("Lib.Dir", "/var/lib/waagent") @@ -374,9 +384,11 @@ def get_password_crypt_salt_len(conf=__conf__): def get_monitor_hostname(conf=__conf__): return conf.get_switch("Provisioning.MonitorHostName", False) + def get_monitor_hostname_period(conf=__conf__): return conf.get_int("Provisioning.MonitorHostNamePeriod", 30) + def get_httpproxy_host(conf=__conf__): return conf.get("HttpProxy.Host", None) @@ -395,10 +407,12 @@ def get_resourcedisk_format(conf=__conf__): def get_resourcedisk_enable_swap(conf=__conf__): return conf.get_switch("ResourceDisk.EnableSwap", False) - + + def get_resourcedisk_enable_swap_encryption(conf=__conf__): return conf.get_switch("ResourceDisk.EnableSwapEncryption", False) + def get_resourcedisk_mountpoint(conf=__conf__): return conf.get("ResourceDisk.MountPoint", "/mnt/resource") diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 43305f0153..e513fa5c18 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -101,6 +101,7 @@ class WALAEventOperation: # pylint: disable=R0903,no-init InitializeHostPlugin = "InitializeHostPlugin" InvokeCommandUsingSystemd = "InvokeCommandUsingSystemd" Log = "Log" + LogCollection = "LogCollection" OSInfo = "OSInfo" Partition = "Partition" PluginSettingsVersionMismatch = "PluginSettingsVersionMismatch" diff --git a/azurelinuxagent/common/logcollector.py b/azurelinuxagent/common/logcollector.py index 5ece11eca1..0f43ae65a6 100644 --- a/azurelinuxagent/common/logcollector.py +++ b/azurelinuxagent/common/logcollector.py @@ -18,18 +18,20 @@ # import glob -from heapq import heappush, heappop import logging import os import subprocess import time import zipfile - -# Please note: be careful when adding agent dependencies in this module. -# This module uses its own logger and logs to its own file, not to the agent log. +from datetime import datetime +from heapq import heappush, heappop from azurelinuxagent.common.conf import get_lib_dir, get_ext_log_dir, get_agent_log_file from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.logcollector_manifests import MANIFEST_NORMAL, MANIFEST_FULL + +# Please note: be careful when adding agent dependencies in this module. +# This module uses its own logger and logs to its own file, not to the agent log. _EXTENSION_LOG_DIR = get_ext_log_dir() _AGENT_LIB_DIR = get_lib_dir() @@ -38,8 +40,8 @@ _LOG_COLLECTOR_DIR = os.path.join(_AGENT_LIB_DIR, "logcollector") _TRUNCATED_FILES_DIR = os.path.join(_LOG_COLLECTOR_DIR, "truncated") -_OUTPUT_RESULTS_FILE_PATH = os.path.join(_LOG_COLLECTOR_DIR, "results.txt") -_COMPRESSED_ARCHIVE_PATH = os.path.join(_LOG_COLLECTOR_DIR, "logs.zip") +OUTPUT_RESULTS_FILE_PATH = os.path.join(_LOG_COLLECTOR_DIR, "results.txt") +COMPRESSED_ARCHIVE_PATH = os.path.join(_LOG_COLLECTOR_DIR, "logs.zip") _MUST_COLLECT_FILES = [ _AGENT_LOG, @@ -65,8 +67,9 @@ class LogCollector(object): # pylint: disable=R0903 _TRUNCATED_FILE_PREFIX = "truncated_" - def __init__(self, manifest_file_path): - self._manifest_file_path = manifest_file_path + def __init__(self, is_full_mode=False): + self._is_full_mode = is_full_mode + self._manifest = MANIFEST_FULL if is_full_mode else MANIFEST_NORMAL self._must_collect_files = self._expand_must_collect_files() self._create_base_dirs() self._set_logger() @@ -88,7 +91,7 @@ def _create_base_dirs(): @staticmethod def _set_logger(): - _f_handler = logging.FileHandler(_OUTPUT_RESULTS_FILE_PATH, encoding="utf-8") + _f_handler = logging.FileHandler(OUTPUT_RESULTS_FILE_PATH, encoding="utf-8") _f_format = logging.Formatter(fmt='%(asctime)s %(levelname)s %(message)s', datefmt=u'%Y-%m-%dT%H:%M:%SZ') _f_format.converter = time.gmtime @@ -143,14 +146,8 @@ def _expand_must_collect_files(): return manifest - def _read_manifest_file(self): - with open(self._manifest_file_path, "rb") as in_file: - data = in_file.read() - if data is None: # pylint: disable=R1705 - return None - else: - data = ustr(data, encoding="utf-8") - return data.splitlines() + def _read_manifest(self): + return self._manifest.splitlines() @staticmethod def _process_ll_command(folder): @@ -172,7 +169,8 @@ def _convert_file_name_to_archive_name(file_name): # File name is the name of the file on disk, whereas archive name is the name of that same file in the archive. # For non-truncated files: /var/log/waagent.log on disk becomes var/log/waagent.log in archive # (leading separator is removed by the archive). - # For truncated files: /var/truncated/var/log/syslog.1 on disk becomes truncated_var_log_syslog.1 in archive. + # For truncated files: /var/lib/waagent/logcollector/truncated/var/log/syslog.1 on disk becomes + # truncated_var_log_syslog.1 in the archive. if file_name.startswith(_TRUNCATED_FILES_DIR): # pylint: disable=R1705 original_file_path = file_name[len(_TRUNCATED_FILES_DIR):].lstrip(os.path.sep) archive_file_name = LogCollector._TRUNCATED_FILE_PREFIX + original_file_path.replace(os.path.sep, "_") @@ -210,7 +208,7 @@ def _expand_parameters(manifest_data): def _process_manifest_file(self): files_to_collect = set() - data = self._read_manifest_file() + data = self._read_manifest() manifest_entries = LogCollector._expand_parameters(data) for entry in manifest_entries: @@ -268,11 +266,11 @@ def _truncate_large_file(file_path): _LOGGER.error("Failed to truncate large file: %s", ustr(e)) return None - def _get_file_priority(self, file): # pylint: disable=redefined-builtin + def _get_file_priority(self, file_entry): # The sooner the file appears in the must collect list, the bigger its priority. # Priority is higher the lower the number (0 is highest priority). try: - return self._must_collect_files.index(file) + return self._must_collect_files.index(file_entry) except ValueError: # Doesn't matter, file is not in the must collect list, assign a low priority return 999999999 @@ -281,9 +279,9 @@ def _get_priority_files_list(self, file_list): # Given a list of files to collect, determine if they show up in the must collect list and build a priority # queue. The queue will determine the order in which the files are collected, highest priority files first. priority_file_queue = [] - for file in file_list: # pylint: disable=redefined-builtin - priority = self._get_file_priority(file) - heappush(priority_file_queue, (priority, file)) + for file_entry in file_list: + priority = self._get_file_priority(file_entry) + heappush(priority_file_queue, (priority, file_entry)) return priority_file_queue @@ -304,16 +302,16 @@ def _get_final_list_for_archive(self, priority_file_queue): if os.path.getsize(file_path) <= _FILE_SIZE_LIMIT: final_files_to_collect.append(file_path) - _LOGGER.info("Adding file %s, size %db", file_path, file_size) + _LOGGER.info("Adding file %s, size %s b", file_path, file_size) else: truncated_file_path = self._truncate_large_file(file_path) if truncated_file_path: - _LOGGER.info("Adding truncated file %s, size %db", truncated_file_path, file_size) + _LOGGER.info("Adding truncated file %s, size %s b", truncated_file_path, file_size) final_files_to_collect.append(truncated_file_path) total_uncompressed_size += file_size - _LOGGER.info("Uncompressed archive size is %db", total_uncompressed_size) + _LOGGER.info("Uncompressed archive size is %s b", total_uncompressed_size) return final_files_to_collect @@ -328,37 +326,45 @@ def _create_list_of_files_to_collect(self): files_to_collect = self._get_final_list_for_archive(prioritized_file_paths) return files_to_collect - def collect_logs(self): + def collect_logs_and_get_archive(self): """ - Public method that collects necessary log files in a tarball that is updated each time this method is invoked. - The tarball is then compressed into a zip. - :return: Returns True if the log collection succeeded + Public method that collects necessary log files in a compressed zip archive. + :return: Returns the path of the collected compressed archive """ files_to_collect = [] try: # Clear previous run's output and create base directories if they don't exist already. self._create_base_dirs() - LogCollector._reset_file(_OUTPUT_RESULTS_FILE_PATH) + LogCollector._reset_file(OUTPUT_RESULTS_FILE_PATH) + start_time = datetime.utcnow() + _LOGGER.info("Starting log collection at %s", start_time.strftime("%Y-%m-%dT%H:%M:%SZ")) + _LOGGER.info("Using log collection mode %s", "full" if self._is_full_mode else "normal") files_to_collect = self._create_list_of_files_to_collect() _LOGGER.info("### Creating compressed archive ###") - with zipfile.ZipFile(_COMPRESSED_ARCHIVE_PATH, "w", compression=zipfile.ZIP_DEFLATED) as compressed_archive: - for file in files_to_collect: # pylint: disable=redefined-builtin - archive_file_name = LogCollector._convert_file_name_to_archive_name(file) - compressed_archive.write(file, arcname=archive_file_name) + with zipfile.ZipFile(COMPRESSED_ARCHIVE_PATH, "w", compression=zipfile.ZIP_DEFLATED) as compressed_archive: + for file_to_collect in files_to_collect: + archive_file_name = LogCollector._convert_file_name_to_archive_name(file_to_collect) + compressed_archive.write(file_to_collect.encode("utf-8"), arcname=archive_file_name) + + compressed_archive_size = os.path.getsize(COMPRESSED_ARCHIVE_PATH) + _LOGGER.info("Successfully compressed files. Compressed archive size is %s b", compressed_archive_size) + + end_time = datetime.utcnow() + duration = end_time - start_time + elapsed_ms = int(((duration.days * 24 * 60 * 60 + duration.seconds) * 1000) + (duration.microseconds / 1000.0)) + _LOGGER.info("Finishing log collection at %s", end_time.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) + _LOGGER.info("Elapsed time: %s ms", elapsed_ms) - compressed_archive_size = os.path.getsize(_COMPRESSED_ARCHIVE_PATH) - _LOGGER.info("Successfully compressed files. " - "Compressed archive size is %db", compressed_archive_size) - compressed_archive.write(_OUTPUT_RESULTS_FILE_PATH, arcname="results.txt") + compressed_archive.write(OUTPUT_RESULTS_FILE_PATH.encode("utf-8"), arcname="results.txt") - return True + return COMPRESSED_ARCHIVE_PATH except Exception as e: # pylint: disable=C0103 msg = "Failed to collect logs: {0}".format(ustr(e)) _LOGGER.error(msg) - return False + raise finally: self._remove_uncollected_truncated_files(files_to_collect) diff --git a/config/logcollector_manifest_full b/azurelinuxagent/common/logcollector_manifests.py similarity index 55% rename from config/logcollector_manifest_full rename to azurelinuxagent/common/logcollector_manifests.py index ca01063cf8..ffa3d165e3 100644 --- a/config/logcollector_manifest_full +++ b/azurelinuxagent/common/logcollector_manifests.py @@ -1,4 +1,58 @@ -echo,### Probing Directories ### +# Microsoft Azure Linux Agent +# +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ +# + +MANIFEST_NORMAL = """echo,### Probing Directories ### +ll,/var/log +ll,$LIB_DIR + +echo,### Gathering Configuration Files ### +copy,/etc/*-release +copy,/etc/HOSTNAME +copy,/etc/hostname +copy,/etc/waagent.conf +echo, + +echo,### Gathering Log Files ### +copy,$AGENT_LOG* +copy,/var/log/dmesg* +copy,/var/log/syslog* +copy,/var/log/auth* +copy,$LOG_DIR/*/* +copy,$LOG_DIR/*/*/* +copy,$LOG_DIR/custom-script/handler.log +echo, + +echo,### Gathering Extension Files ### +copy,$LIB_DIR/*.xml +copy,$LIB_DIR/waagent_status.json +copy,$LIB_DIR/*/status/*.status +copy,$LIB_DIR/*/config/*.settings +copy,$LIB_DIR/*/config/HandlerState +copy,$LIB_DIR/*/config/HandlerStatus +copy,$LIB_DIR/*.agentsManifest +copy,$LIB_DIR/error.json +copy,$LIB_DIR/Incarnation +copy,$LIB_DIR/history/*.zip +echo, +""" + +MANIFEST_FULL = """echo,### Probing Directories ### ll,/var/log ll,$LIB_DIR ll,/etc/udev/rules.d @@ -70,4 +124,5 @@ echo, echo,### Gathering Disk Info ### -diskinfo, \ No newline at end of file +diskinfo, +""" \ No newline at end of file diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index b609c4dbd4..c943353902 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -230,13 +230,12 @@ def put_vm_log(self, content): url = URI_FORMAT_PUT_LOG.format(self.endpoint, HOST_PLUGIN_PORT) response = restutil.http_put(url, data=content, - headers=self._build_log_headers()) + headers=self._build_log_headers(), + redact_data=True) if restutil.request_failed(response): # pylint: disable=R1720 error_response = restutil.read_response_error(response) raise HttpError("HostGAPlugin: Upload VM logs failed: {0}".format(error_response)) - else: - logger.info("HostGAPlugin: Upload VM logs succeeded") return response diff --git a/azurelinuxagent/common/utils/restutil.py b/azurelinuxagent/common/utils/restutil.py index 0579e288ac..eb28dacd3e 100644 --- a/azurelinuxagent/common/utils/restutil.py +++ b/azurelinuxagent/common/utils/restutil.py @@ -279,8 +279,8 @@ def redact_sas_tokens_in_urls(url): return SAS_TOKEN_RETRIEVAL_REGEX.sub(r"\1" + REDACTED_TEXT + r"\3", url) -def _http_request(method, host, rel_uri, port=None, data=None, secure=False, # pylint: disable=R0913 - headers=None, proxy_host=None, proxy_port=None): +def _http_request(method, host, rel_uri, port=None, data=None, secure=False, # pylint: disable=R0913,R0914 + headers=None, proxy_host=None, proxy_port=None, redact_data=False): headers = {} if headers is None else headers headers['Connection'] = 'close' @@ -312,11 +312,15 @@ def _http_request(method, host, rel_uri, port=None, data=None, secure=False, # p conn_port, timeout=10) + payload = data + if redact_data: + payload = "[REDACTED]" + # Logger requires the data to be a ustr to log properly, ensuring that the data string that we log is always ustr. logger.verbose("HTTP connection [{0}] [{1}] [{2}] [{3}]", method, redact_sas_tokens_in_urls(url), - textutil.str_to_encoded_ustr(data), + textutil.str_to_encoded_ustr(payload), headers) conn.request(method=method, url=url, body=data, headers=headers) @@ -324,11 +328,12 @@ def _http_request(method, host, rel_uri, port=None, data=None, secure=False, # p def http_request(method, # pylint: disable=R0913,R0912,R0914,W0102 - url, data, headers=None, - use_proxy=False, - max_retry=DEFAULT_RETRIES, - retry_codes=RETRY_CODES, - retry_delay=DELAY_IN_SECONDS): + url, data, headers=None, + use_proxy=False, + max_retry=DEFAULT_RETRIES, + retry_codes=RETRY_CODES, + retry_delay=DELAY_IN_SECONDS, + redact_data=False): global SECURE_WARNING_EMITTED # pylint: disable=W0603 @@ -401,7 +406,8 @@ def http_request(method, # pylint: disable=R0913,R0912,R0914,W0102 secure=secure, headers=headers, proxy_host=proxy_host, - proxy_port=proxy_port) + proxy_port=proxy_port, + redact_data=redact_data) logger.verbose("[HTTP Response] Status Code {0}", resp.status) if request_failed(resp): @@ -497,14 +503,16 @@ def http_put(url, # pylint: disable=R0913,W0102 use_proxy=False, max_retry=DEFAULT_RETRIES, retry_codes=RETRY_CODES, - retry_delay=DELAY_IN_SECONDS): + retry_delay=DELAY_IN_SECONDS, + redact_data=False): return http_request("PUT", url, data, headers=headers, use_proxy=use_proxy, max_retry=max_retry, retry_codes=retry_codes, - retry_delay=retry_delay) + retry_delay=retry_delay, + redact_data=redact_data) def http_delete(url, # pylint: disable=R0913,W0102 diff --git a/azurelinuxagent/common/utils/shellutil.py b/azurelinuxagent/common/utils/shellutil.py index 6649d02760..ba54010f8f 100644 --- a/azurelinuxagent/common/utils/shellutil.py +++ b/azurelinuxagent/common/utils/shellutil.py @@ -17,10 +17,18 @@ # Requires Python 2.6+ and Openssl 1.0+ # +import platform import subprocess + import azurelinuxagent.common.logger as logger from azurelinuxagent.common.future import ustr + +def get_python_cmd(): + major_version = platform.python_version_tuple()[0] + return "python" if int(major_version) <= 2 else "python{0}".format(major_version) + + if not hasattr(subprocess, 'check_output'): def check_output(*popenargs, **kwargs): r"""Backport from subprocess module from python 2.7""" @@ -59,12 +67,14 @@ def __str__(self): """ # pylint: enable=W0105 + def has_command(cmd): """ Return True if the given command is on the path """ return not run(cmd, False) + def run(cmd, chk_err=True, expected_errors=[]): # pylint: disable=W0102 """ Note: Deprecating in favour of `azurelinuxagent.common.utils.shellutil.run_command` function. @@ -150,7 +160,8 @@ def format_command(cmd): # PIPE, an existing file descriptor (a positive integer), an existing file object, and None stdin = subprocess.PIPE if cmd_input else None try: - # Starting Python 3.4+, you need to encode the string, i.e. you need to pass Bytes to the input rather than string to process.communicate() + # Starting Python 3.4+, you need to encode the string, i.e. you need to pass Bytes to the input rather than + # string to process.communicate() process_input = cmd_input.encode() if cmd_input else None process = subprocess.Popen(command, stdin=stdin, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False) diff --git a/azurelinuxagent/common/utils/textutil.py b/azurelinuxagent/common/utils/textutil.py index c8cb3b8a87..ba3073bef5 100644 --- a/azurelinuxagent/common/utils/textutil.py +++ b/azurelinuxagent/common/utils/textutil.py @@ -427,5 +427,6 @@ def str_to_encoded_ustr(s, encoding='utf-8'): # pylint: disable=C0103 except Exception: # If some issues in decoding, just return the string return ustr(s) + # For Py2, explicitly convert the string to unicode with the specified encoding return ustr(s, encoding=encoding) diff --git a/azurelinuxagent/ga/collect_logs.py b/azurelinuxagent/ga/collect_logs.py new file mode 100644 index 0000000000..b2a34f0f8e --- /dev/null +++ b/azurelinuxagent/ga/collect_logs.py @@ -0,0 +1,224 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ +# + +import datetime +import os +import sys +import threading +import time + +import azurelinuxagent.common.conf as conf +from azurelinuxagent.common import logger +from azurelinuxagent.common.cgroupapi import CGroupsApi +from azurelinuxagent.common.event import elapsed_milliseconds, add_event, WALAEventOperation +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.logcollector import COMPRESSED_ARCHIVE_PATH +from azurelinuxagent.common.protocol.util import get_protocol_util +from azurelinuxagent.common.utils import shellutil +from azurelinuxagent.common.utils.shellutil import CommandError +from azurelinuxagent.common.version import PY_VERSION_MAJOR, PY_VERSION_MINOR, AGENT_NAME, CURRENT_VERSION +from azurelinuxagent.ga.periodic_operation import PeriodicOperation + + +def get_collect_logs_handler(): + return CollectLogsHandler() + + +def is_log_collection_allowed(): + # There are three conditions that need to be met in order to allow periodic log collection: + # 1) It should be enabled in the configuration. + # 2) The system must be using systemd to manage services. Needed for resource limiting of the log collection. + # 3) The python version must be greater than 2.6 in order to support the ZipFile library used when collecting. + conf_enabled = conf.get_collect_logs() + systemd_present = CGroupsApi.is_systemd() + supported_python = PY_VERSION_MINOR >= 7 if PY_VERSION_MAJOR == 2 else PY_VERSION_MAJOR == 3 + is_allowed = conf_enabled and systemd_present and supported_python + + msg = "Checking if log collection is allowed at this time [{0}]. All three conditions must be met: " \ + "configuration enabled [{1}], systemd present [{2}], python supported: [{3}]".format(is_allowed, + conf_enabled, + systemd_present, + supported_python) + logger.info(msg) + add_event( + name=AGENT_NAME, + version=CURRENT_VERSION, + op=WALAEventOperation.LogCollection, + is_success=is_allowed, + message=msg, + log_event=False) + + return is_allowed + + +class CollectLogsHandler(object): + """ + Periodically collects and uploads logs from the VM to the host. + """ + + _THREAD_NAME = "CollectLogsHandler" + + @staticmethod + def get_thread_name(): + return CollectLogsHandler._THREAD_NAME + + def __init__(self): + self.protocol = None + self.protocol_util = None + self.event_thread = None + self.should_run = True + self.last_state = None + + self._periodic_operations = [ + PeriodicOperation("collect_and_send_logs", self.collect_and_send_logs, conf.get_collect_logs_period()) + ] + + def run(self): + self.start(init_data=True) + + def is_alive(self): + return self.event_thread.is_alive() + + def start(self, init_data=False): + self.event_thread = threading.Thread(target=self.daemon, args=(init_data,)) + self.event_thread.setDaemon(True) + self.event_thread.setName(self.get_thread_name()) + self.event_thread.start() + + def join(self): + self.event_thread.join() + + def stopped(self): + return not self.should_run + + def stop(self): + self.should_run = False + if self.is_alive(): + self.join() + + def init_protocols(self): + # The initialization of ProtocolUtil for the log collection thread should be done within the thread itself + # rather than initializing it in the ExtHandler thread. This is done to avoid any concurrency issues as each + # thread would now have its own ProtocolUtil object as per the SingletonPerThread model. + self.protocol_util = get_protocol_util() + self.protocol = self.protocol_util.get_protocol() + + def daemon(self, init_data=False): + try: + if init_data: + self.init_protocols() + + while not self.stopped(): + try: + for op in self._periodic_operations: # pylint: disable=C0103 + op.run() + except Exception as e: # pylint: disable=C0103 + logger.error("An error occurred in the log collection thread main loop; " + "will skip the current iteration.\n{0}", ustr(e)) + finally: + PeriodicOperation.sleep_until_next_operation(self._periodic_operations) + except Exception as e: # pylint: disable=C0103 + logger.error("An error occurred in the log collection thread; will exit the thread.\n{0}", ustr(e)) + + def collect_and_send_logs(self): + if self._collect_logs(): + self._send_logs() + + @staticmethod + def _get_resource_limits(): + # Define CPU limit (as percentage of CPU time) and memory limit (absolute value in megabytes). + cpu_limit = "5%" + memory_limit = "30M" # K for kb, M for mb + return cpu_limit, memory_limit + + @staticmethod + def _collect_logs(): + logger.info("Starting log collection...") + + # Invoke the command line tool in the agent to collect logs, with resource limits on CPU and memory (RAM). + scope_name = "collect-logs-{0}.scope".format(ustr(int(time.time() * 1000000))) + systemd_cmd = ["systemd-run", "--unit={0}".format(scope_name), "--scope"] + + # More info on resource limits properties in systemd here: + # https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/resource_management_guide/sec-modifying_control_groups + cpu_limit, memory_limit = CollectLogsHandler._get_resource_limits() + resource_limits = ["--property=CPUAccounting=1", "--property=CPUQuota={0}".format(cpu_limit), + "--property=MemoryAccounting=1", "--property=MemoryLimit={0}".format(memory_limit)] + + # The log tool is invoked from the current agent's egg with the command line option + collect_logs_cmd = [sys.executable, "-u", sys.argv[0], "-collect-logs"] + final_command = systemd_cmd + resource_limits + collect_logs_cmd + + start_time = datetime.datetime.utcnow() + success = False + msg = None + try: + shellutil.run_command(final_command, log_error=True) + duration = elapsed_milliseconds(start_time) + archive_size = os.path.getsize(COMPRESSED_ARCHIVE_PATH) + + msg = "Successfully collected logs. Archive size: {0} b, elapsed time: {1} ms.".format(archive_size, + duration) + logger.info(msg) + success = True + + return True + except Exception as e: # pylint: disable=C0103 + duration = elapsed_milliseconds(start_time) + + if isinstance(e, CommandError): + exception_message = ustr("[stderr] %s", e.stderr) # pylint: disable=no-member + else: + exception_message = ustr(e) + + msg = "Failed to collect logs. Elapsed time: {0} ms. Error: {1}".format(duration, exception_message) + # No need to log to the local log since we ran run_command with logging errors as enabled + + return False + finally: + add_event( + name=AGENT_NAME, + version=CURRENT_VERSION, + op=WALAEventOperation.LogCollection, + is_success=success, + message=msg, + log_event=False) + + def _send_logs(self): + msg = None + success = False + try: + with open(COMPRESSED_ARCHIVE_PATH, "rb") as fh: # pylint: disable=C0103 + archive_content = fh.read() + self.protocol.upload_logs(archive_content) + msg = "Successfully uploaded logs." + logger.info(msg) + + success = True + except Exception as e: # pylint: disable=C0103 + msg = "Failed to upload logs. Error: {0}".format(ustr(e)) + logger.warn(msg) + finally: + add_event( + name=AGENT_NAME, + version=CURRENT_VERSION, + op=WALAEventOperation.LogCollection, + is_success=success, + message=msg, + log_event=False) diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index 5bac78d2c9..f4f1394db7 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -42,6 +42,7 @@ MAXIMUM_CACHED_FILES = 50 + def get_env_handler(): return EnvHandler() diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 98b5926945..39bd15e547 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -19,7 +19,6 @@ import glob import json import os -import platform import random import re import shutil @@ -49,9 +48,11 @@ from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.utils.shellutil import get_python_cmd from azurelinuxagent.common.version import AGENT_NAME, AGENT_VERSION, AGENT_DIR_PATTERN, CURRENT_AGENT,\ CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION, is_current_agent_installed, PY_VERSION_MAJOR, PY_VERSION_MINOR, \ PY_VERSION_MICRO +from azurelinuxagent.ga.collect_logs import get_collect_logs_handler, is_log_collection_allowed from azurelinuxagent.ga.env import get_env_handler from azurelinuxagent.ga.extension_telemetry import get_extension_telemetry_handler @@ -92,13 +93,7 @@ def get_update_handler(): return UpdateHandler() -def get_python_cmd(): - major_version = platform.python_version_tuple()[0] - return "python" if int(major_version) <= 2 else "python{0}".format(major_version) - - class UpdateHandler(object): # pylint: disable=R0902 - TELEMETRY_HEARTBEAT_PERIOD = timedelta(minutes=30) def __init__(self): @@ -279,6 +274,9 @@ def run(self, debug=False): # pylint: disable=R0912,R0914 get_env_handler() ] + if is_log_collection_allowed(): + all_thread_handlers.append(get_collect_logs_handler()) + if is_extension_telemetry_pipeline_enabled(): # Reuse the same protocol_util as the UpdateHandler class to avoid new initializations all_thread_handlers.append(get_extension_telemetry_handler(self.protocol_util)) diff --git a/config/debian/waagent.conf b/config/debian/waagent.conf index 5677224f0c..30647d50a3 100644 --- a/config/debian/waagent.conf +++ b/config/debian/waagent.conf @@ -63,6 +63,12 @@ Logs.Verbose=n # Enable Console logging, default is y # Logs.Console=y +# Enable periodic log collection, default is n +Logs.Collect=n + +# How frequently to collect logs, default is each hour +Logs.CollectPeriod=3600 + # Is FIPS enabled OS.EnableFIPS=n diff --git a/config/logcollector_manifest_normal b/config/logcollector_manifest_normal deleted file mode 100644 index c24eff6438..0000000000 --- a/config/logcollector_manifest_normal +++ /dev/null @@ -1,33 +0,0 @@ -echo,### Probing Directories ### -ll,/var/log -ll,$LIB_DIR - -echo,### Gathering Configuration Files ### -copy,/etc/*-release -copy,/etc/HOSTNAME -copy,/etc/hostname -copy,/etc/waagent.conf -echo, - -echo,### Gathering Log Files ### -copy,$AGENT_LOG* -copy,/var/log/dmesg* -copy,/var/log/syslog* -copy,/var/log/auth* -copy,$LOG_DIR/*/* -copy,$LOG_DIR/*/*/* -copy,$LOG_DIR/custom-script/handler.log -echo, - -echo,### Gathering Extension Files ### -copy,$LIB_DIR/*.xml -copy,$LIB_DIR/waagent_status.json -copy,$LIB_DIR/*/status/*.status -copy,$LIB_DIR/*/config/*.settings -copy,$LIB_DIR/*/config/HandlerState -copy,$LIB_DIR/*/config/HandlerStatus -copy,$LIB_DIR/*.agentsManifest -copy,$LIB_DIR/error.json -copy,$LIB_DIR/Incarnation -copy,$LIB_DIR/history/*.zip -echo, \ No newline at end of file diff --git a/config/suse/waagent.conf b/config/suse/waagent.conf index b6815abe7e..85fd2a3e98 100644 --- a/config/suse/waagent.conf +++ b/config/suse/waagent.conf @@ -66,6 +66,12 @@ Logs.Verbose=n # Enable Console logging, default is y # Logs.Console=y +# Enable periodic log collection, default is n +Logs.Collect=n + +# How frequently to collect logs, default is each hour +Logs.CollectPeriod=3600 + # Is FIPS enabled OS.EnableFIPS=n diff --git a/config/ubuntu/waagent.conf b/config/ubuntu/waagent.conf index 6b7b3a26aa..22d48a8ef9 100644 --- a/config/ubuntu/waagent.conf +++ b/config/ubuntu/waagent.conf @@ -66,6 +66,12 @@ Logs.Verbose=n # Enable Console logging, default is y # Logs.Console=y +# Enable periodic log collection, default is n +Logs.Collect=n + +# How frequently to collect logs, default is each hour +Logs.CollectPeriod=3600 + # Is FIPS enabled OS.EnableFIPS=n diff --git a/config/waagent.conf b/config/waagent.conf index cb933758ed..2118c73e13 100644 --- a/config/waagent.conf +++ b/config/waagent.conf @@ -72,6 +72,12 @@ Logs.Verbose=n # Enable Console logging, default is y # Logs.Console=y +# Enable periodic log collection, default is n +Logs.Collect=n + +# How frequently to collect logs, default is each hour +Logs.CollectPeriod=3600 + # Is FIPS enabled OS.EnableFIPS=n diff --git a/setup.py b/setup.py index d19e60099d..f0f95a9acf 100755 --- a/setup.py +++ b/setup.py @@ -263,3 +263,4 @@ def run(self): 'install': install } ) + diff --git a/tests/common/mock_cgroup_commands.py b/tests/common/mock_cgroup_commands.py index 41d2c5bdf5..baa9d0a2de 100644 --- a/tests/common/mock_cgroup_commands.py +++ b/tests/common/mock_cgroup_commands.py @@ -121,7 +121,7 @@ def mock_path_exists(path): with patch("azurelinuxagent.common.cgroupapi.os.path.exists", side_effect=mock_path_exists): with patch("azurelinuxagent.common.cgroupapi.fileutil.read_file", side_effect=mock_read_file): with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.cgroups_supported', return_value=True): - with patch('azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd', return_value=True): + with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.is_systemd', return_value=True): patcher.commands = _default_commands[:] patcher.files = _default_files[:] patcher.add_command = add_command diff --git a/tests/common/test_cgroupapi.py b/tests/common/test_cgroupapi.py index 7f89110663..3755221344 100644 --- a/tests/common/test_cgroupapi.py +++ b/tests/common/test_cgroupapi.py @@ -24,7 +24,8 @@ from nose.plugins.attrib import attr -from azurelinuxagent.common.cgroupapi import CGroupsApi, FileSystemCgroupsApi, SystemdCgroupsApi, VM_AGENT_CGROUP_NAME +from azurelinuxagent.common.cgroupapi import CGroupsApi, FileSystemCgroupsApi, SystemdCgroupsApi, VM_AGENT_CGROUP_NAME, \ + SYSTEMD_RUN_PATH from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.exception import ExtensionError, ExtensionErrorCodes from azurelinuxagent.common.future import ustr @@ -73,13 +74,13 @@ def test_cgroups_should_be_supported_only_on_ubuntu_16_and_later(self): self.assertEqual(CGroupsApi.cgroups_supported(), supported, "cgroups_supported() failed on {0}".format(distro)) def test_create_should_return_a_SystemdCgroupsApi_on_systemd_platforms(self): # pylint: disable=invalid-name - with patch("azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd", return_value=True): + with patch("azurelinuxagent.common.cgroupapi.CGroupsApi.is_systemd", return_value=True): api = CGroupsApi.create() self.assertTrue(type(api) == SystemdCgroupsApi) # pylint: disable=unidiomatic-typecheck def test_create_should_return_a_FileSystemCgroupsApi_on_non_systemd_platforms(self): # pylint: disable=invalid-name - with patch("azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd", return_value=False): + with patch("azurelinuxagent.common.cgroupapi.CGroupsApi.is_systemd", return_value=False): api = CGroupsApi.create() self.assertTrue(type(api) == FileSystemCgroupsApi) # pylint: disable=unidiomatic-typecheck @@ -88,7 +89,7 @@ def test_is_systemd_should_return_true_when_systemd_manages_current_process(self path_exists = os.path.exists def mock_path_exists(path): - if path == "/run/systemd/system/": + if path == SYSTEMD_RUN_PATH: mock_path_exists.path_tested = True return True return path_exists(path) @@ -96,7 +97,7 @@ def mock_path_exists(path): mock_path_exists.path_tested = False with patch("azurelinuxagent.common.cgroupapi.os.path.exists", mock_path_exists): - is_systemd = CGroupsApi._is_systemd() # pylint: disable=protected-access + is_systemd = CGroupsApi.is_systemd() # pylint: disable=protected-access self.assertTrue(is_systemd) @@ -106,7 +107,7 @@ def test_is_systemd_should_return_false_when_systemd_does_not_manage_current_pro path_exists = os.path.exists def mock_path_exists(path): - if path == "/run/systemd/system/": + if path == SYSTEMD_RUN_PATH: mock_path_exists.path_tested = True return False return path_exists(path) @@ -114,7 +115,7 @@ def mock_path_exists(path): mock_path_exists.path_tested = False with patch("azurelinuxagent.common.cgroupapi.os.path.exists", mock_path_exists): - is_systemd = CGroupsApi._is_systemd() # pylint: disable=protected-access + is_systemd = CGroupsApi.is_systemd() # pylint: disable=protected-access self.assertFalse(is_systemd) diff --git a/tests/common/test_logcollector.py b/tests/common/test_logcollector.py index cb315df8ac..fb1c0292e0 100644 --- a/tests/common/test_logcollector.py +++ b/tests/common/test_logcollector.py @@ -19,14 +19,13 @@ import os import shutil -import zipfile import tempfile +import zipfile -from azurelinuxagent.common.utils.fileutil import rm_dirs, mkdir, rm_files, write_file from azurelinuxagent.common.logcollector import LogCollector +from azurelinuxagent.common.utils.fileutil import rm_dirs, mkdir, rm_files from tests.tools import AgentTestCase, is_python_version_26, patch, skip_if_predicate_true - SMALL_FILE_SIZE = 1 * 1024 * 1024 # 1 MB LARGE_FILE_SIZE = 5 * 1024 * 1024 # 5 MB @@ -40,12 +39,15 @@ def setUpClass(cls): prefix = "{0}_".format(cls.__class__.__name__) cls.tmp_dir = tempfile.mkdtemp(prefix=prefix) + cls.root_collect_dir = os.path.join(cls.tmp_dir, "files_to_collect") + mkdir(cls.root_collect_dir) cls._mock_constants() @classmethod def _mock_constants(cls): - cls.manifest_path = os.path.join(cls.tmp_dir, "logcollector_manifest") + cls.mock_manifest = patch("azurelinuxagent.common.logcollector.MANIFEST_NORMAL", cls._build_manifest()) + cls.mock_manifest.start() cls.log_collector_dir = os.path.join(cls.tmp_dir, "logcollector") cls.mock_log_collector_dir = patch("azurelinuxagent.common.logcollector._LOG_COLLECTOR_DIR", @@ -58,17 +60,18 @@ def _mock_constants(cls): cls.mock_truncated_files_dir.start() cls.output_results_file_path = os.path.join(cls.log_collector_dir, "results.txt") - cls.mock_output_results_file_path = patch("azurelinuxagent.common.logcollector._OUTPUT_RESULTS_FILE_PATH", + cls.mock_output_results_file_path = patch("azurelinuxagent.common.logcollector.OUTPUT_RESULTS_FILE_PATH", cls.output_results_file_path) cls.mock_output_results_file_path.start() cls.compressed_archive_path = os.path.join(cls.log_collector_dir, "logs.zip") - cls.mock_compressed_archive_path = patch("azurelinuxagent.common.logcollector._COMPRESSED_ARCHIVE_PATH", + cls.mock_compressed_archive_path = patch("azurelinuxagent.common.logcollector.COMPRESSED_ARCHIVE_PATH", cls.compressed_archive_path) cls.mock_compressed_archive_path.start() @classmethod def tearDownClass(cls): + cls.mock_manifest.stop() cls.mock_log_collector_dir.stop() cls.mock_truncated_files_dir.stop() cls.mock_output_results_file_path.stop() @@ -81,7 +84,6 @@ def tearDownClass(cls): def setUp(self): AgentTestCase.setUp(self) self._build_test_data() - self._build_manifest(self.manifest_path) def tearDown(self): rm_dirs(self.root_collect_dir) @@ -93,9 +95,6 @@ def _build_test_data(cls): """ Build a dummy file structure which will be used as a foundation for the log collector tests """ - cls.root_collect_dir = os.path.join(cls.tmp_dir, "files_to_collect") - mkdir(cls.root_collect_dir) - cls._create_file_of_specific_size(os.path.join(cls.root_collect_dir, "waagent.log"), SMALL_FILE_SIZE) # small text file cls._create_file_of_specific_size(os.path.join(cls.root_collect_dir, "waagent.log.1"), @@ -112,7 +111,7 @@ def _build_test_data(cls): SMALL_FILE_SIZE) @classmethod - def _build_manifest(cls, manifest_file): + def _build_manifest(cls): """ Files listed in the manifest will be collected, others will be ignored """ @@ -123,9 +122,11 @@ def _build_manifest(cls, manifest_file): os.path.join(cls.root_collect_dir, "non_existing_file"), ] - with open(manifest_file, "w") as fh: # pylint: disable=invalid-name - for file in files: # pylint: disable=redefined-builtin - fh.write("copy,{0}\n".format(file)) + manifest = "" + for file_entry in files: + manifest += "copy,{0}\n".format(file_entry) + + return manifest @staticmethod def _create_file_of_specific_size(file_path, file_size, binary=False): @@ -183,18 +184,16 @@ def test_log_collector_parses_commands_in_manifest(self): file_to_collect = os.path.join(self.root_collect_dir, "waagent.log") folder_to_list = self.root_collect_dir - manifest_content = """ + manifest = """ echo,### Test header ### unknown command ll,{0} copy,{1} diskinfo,""".format(folder_to_list, file_to_collect) - manifest_file_path = os.path.join(self.tmp_dir, "manifest") - write_file(manifest_file_path, manifest_content) - - lc = LogCollector(manifest_file_path) # pylint: disable=invalid-name - archive = lc.collect_logs() + with patch("azurelinuxagent.common.logcollector.MANIFEST_NORMAL", manifest): + log_collector = LogCollector() + archive = log_collector.collect_logs_and_get_archive() with open(self.output_results_file_path, "r") as fh: # pylint: disable=invalid-name results = fh.readlines() @@ -212,11 +211,30 @@ def test_log_collector_parses_commands_in_manifest(self): no_files = self._get_number_of_files_in_archive() self.assertEqual(1, no_files, "Expected 1 file in archive, found {0}!".format(no_files)) + def test_log_collector_uses_full_manifest_when_full_mode_enabled(self): + file_to_collect = os.path.join(self.root_collect_dir, "less_important_file") + + manifest = """ +echo,### Test header ### +copy,{0} +""".format(file_to_collect) + + with patch("azurelinuxagent.common.logcollector.MANIFEST_FULL", manifest): + log_collector = LogCollector(is_full_mode=True) + archive = log_collector.collect_logs_and_get_archive() + + self._assert_archive_created(archive) + self._assert_files_are_in_archive(expected_files=[file_to_collect]) + + no_files = self._get_number_of_files_in_archive() + self.assertEqual(1, no_files, "Expected 1 file in archive, found {0}!".format(no_files)) + def test_log_collector_should_collect_all_files(self): # All files in the manifest should be collected, since none of them are over the individual file size limit, # and combined they do not cross the archive size threshold. - lc = LogCollector(self.manifest_path) # pylint: disable=invalid-name - archive = lc.collect_logs() + + log_collector = LogCollector() + archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -236,8 +254,8 @@ def test_log_collector_should_collect_all_files(self): def test_log_collector_should_truncate_large_text_files_and_ignore_large_binary_files(self): # Set the size limit so that some files are too large to collect in full. with patch("azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): - lc = LogCollector(self.manifest_path) # pylint: disable=invalid-name - archive = lc.collect_logs() + log_collector = LogCollector() + archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -269,8 +287,8 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): - lc = LogCollector(self.manifest_path) # pylint: disable=invalid-name - archive = lc.collect_logs() + log_collector = LogCollector() + archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -296,7 +314,7 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): - second_archive = lc.collect_logs() + second_archive = log_collector.collect_logs_and_get_archive() expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), @@ -319,8 +337,8 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_deleted(self): # Ensure the archive reflects the state of files on the disk at collection time. If a file was updated, it # needs to be updated in the archive, deleted if removed from disk, and added if not previously seen. - lc = LogCollector(self.manifest_path) # pylint: disable=invalid-name - first_archive = lc.collect_logs() + log_collector = LogCollector() + first_archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(first_archive) # Everything should be in the archive @@ -349,7 +367,7 @@ def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_d LARGE_FILE_SIZE) rm_files(os.path.join(self.root_collect_dir, "waagent.log.1")) - second_archive = lc.collect_logs() + second_archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(second_archive) expected_files = [ @@ -389,8 +407,8 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE): with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): with patch("azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): - lc = LogCollector(self.manifest_path) # pylint: disable=invalid-name - archive = lc.collect_logs() + log_collector = LogCollector() + archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -410,8 +428,8 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE): with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): with patch("azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): - lc = LogCollector(self.manifest_path) # pylint: disable=invalid-name - second_archive = lc.collect_logs() + log_collector = LogCollector() + second_archive = log_collector.collect_logs_and_get_archive() expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), diff --git a/tests/data/test_waagent.conf b/tests/data/test_waagent.conf index 3e16c45a1a..7b2f4fa794 100644 --- a/tests/data/test_waagent.conf +++ b/tests/data/test_waagent.conf @@ -67,6 +67,12 @@ ResourceDisk.MountOptions=None # Enable verbose logging (y|n) Logs.Verbose=n +# Enable periodic log collection, default is n +Logs.Collect=n + +# How frequently to collect logs, default is each hour +Logs.CollectPeriod=3600 + # Is FIPS enabled OS.EnableFIPS=y#Another EOL comment that should be ignored diff --git a/tests/ga/test_collect_logs.py b/tests/ga/test_collect_logs.py new file mode 100644 index 0000000000..f396b3b24d --- /dev/null +++ b/tests/ga/test_collect_logs.py @@ -0,0 +1,188 @@ +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ +# +import contextlib +import os + +from azurelinuxagent.common import logger +from azurelinuxagent.common.cgroupapi import SYSTEMD_RUN_PATH +from azurelinuxagent.common.logger import Logger +from azurelinuxagent.common.protocol.util import ProtocolUtil +from azurelinuxagent.ga.collect_logs import get_collect_logs_handler, CollectLogsHandler, is_log_collection_allowed +from azurelinuxagent.ga.periodic_operation import PeriodicOperation +from tests.protocol.mocks import mock_wire_protocol, HttpRequestPredicates, MockHttpResponse +from tests.protocol.mockwiredata import DATA_FILE +from tests.tools import Mock, MagicMock, patch, AgentTestCase, clear_singleton_instances, skip_if_predicate_true, \ + is_python_version_26 + + +@contextlib.contextmanager +def _create_collect_logs_handler(iterations=1, systemd_present=True): + """ + Creates an instance of CollectLogsHandler that + * Uses a mock_wire_protocol for network requests, + * Runs its main loop only the number of times given in the 'iterations' parameter, and + * Does not sleep at the end of each iteration + + The returned CollectLogsHandler is augmented with 2 methods: + * get_mock_wire_protocol() - returns the mock protocol + * run_and_wait() - invokes run() and wait() on the CollectLogsHandler + + """ + + original_file_exists = os.path.exists + + def mock_file_exists(filepath): + if filepath == SYSTEMD_RUN_PATH: + return systemd_present + return original_file_exists(filepath) + + with mock_wire_protocol(DATA_FILE) as protocol: + protocol_util = MagicMock() + protocol_util.get_protocol = Mock(return_value=protocol) + with patch("azurelinuxagent.ga.collect_logs.get_protocol_util", return_value=protocol_util): + with patch("azurelinuxagent.ga.collect_logs.CollectLogsHandler.stopped", side_effect=[False] * iterations + [True]): + with patch("time.sleep"): + with patch("azurelinuxagent.ga.collect_logs.os.path.exists", side_effect=mock_file_exists): + with patch("azurelinuxagent.ga.collect_logs.conf.get_collect_logs", return_value=True): + def run_and_wait(): + collect_logs_handler.run() + collect_logs_handler.join() + + collect_logs_handler = get_collect_logs_handler() + collect_logs_handler.get_mock_wire_protocol = lambda: protocol + collect_logs_handler.run_and_wait = run_and_wait + yield collect_logs_handler + + +@skip_if_predicate_true(is_python_version_26, "Disabled on Python 2.6") +class TestCollectLogs(AgentTestCase, HttpRequestPredicates): + def setUp(self): + AgentTestCase.setUp(self) + prefix = "UnitTest" + logger.DEFAULT_LOGGER = Logger(prefix=prefix) + + self.archive_path = os.path.join(self.tmp_dir, "logs.zip") + self.mock_archive_path = patch("azurelinuxagent.ga.collect_logs.COMPRESSED_ARCHIVE_PATH", self.archive_path) + self.mock_archive_path.start() + + # Since ProtocolUtil is a singleton per thread, we need to clear it to ensure that the test cases do not + # reuse a previous state + clear_singleton_instances(ProtocolUtil) + + def tearDown(self): + if os.path.exists(self.archive_path): + os.remove(self.archive_path) + self.mock_archive_path.stop() + AgentTestCase.tearDown(self) + + def _create_dummy_archive(self, size=1024): + with open(self.archive_path, "wb") as f: # pylint: disable=C0103 + f.truncate(size) + + def test_it_should_invoke_all_periodic_operations(self): + invoked_operations = [] + + with _create_collect_logs_handler() as collect_logs_handler: + def mock_run(self): + invoked_operations.append(self._name) + + with patch.object(PeriodicOperation, "run", side_effect=mock_run, spec=CollectLogsHandler.run): + collect_logs_handler.run_and_wait() + + expected_operations = ["collect_and_send_logs"] + + self.assertEqual(invoked_operations.sort(), expected_operations.sort(), + "The collect logs thread did not invoke the expected operations") + + def test_it_should_only_collect_logs_if_conditions_are_met(self): + # In order to collect logs, three conditions have to be met: + # 1) the flag must be set to true in the conf file + # 2) systemd must be managing services + # 3) python version 2.7+ which is automatically true for these tests since they are disabled on py2.6 + + # systemd not present, config flag false + with _create_collect_logs_handler(systemd_present=False): + with patch("azurelinuxagent.ga.collect_logs.conf.get_collect_logs", return_value=False): + self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") + + # systemd present, config flag false + with _create_collect_logs_handler(): + with patch("azurelinuxagent.ga.collect_logs.conf.get_collect_logs", return_value=False): + self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") + + # systemd not present, config flag true + with _create_collect_logs_handler(systemd_present=False): + with patch("azurelinuxagent.ga.collect_logs.conf.get_collect_logs", return_value=True): + self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") + + # systemd present, config flag true + with _create_collect_logs_handler(): + self.assertEqual(True, is_log_collection_allowed(), "Log collection should have been enabled") + + def test_log_collection_is_invoked_with_resource_limits(self): + with _create_collect_logs_handler() as collect_logs_handler: + with patch("azurelinuxagent.ga.collect_logs.shellutil.run_command") as patch_run_command: + collect_logs_handler.run_and_wait() + + args, _ = patch_run_command.call_args + self.assertIn("systemd-run", args[0], "The log collector should have been invoked with systemd-run") + self.assertIn("--property=CPUAccounting=1", args[0], "The log collector should have been invoked with CPUAccounting turned on") + self.assertIn("--property=MemoryAccounting=1", args[0], "The log collector should have been invoked with MemoryAccounting turned on") + self.assertIn("--property=CPUQuota=5%", args[0], "The log collector should have been invoked with a CPU limit") + self.assertIn("--property=MemoryLimit=30M", args[0], "The log collector should have been invoked with a memory limit") + + def test_it_uploads_logs_when_collection_is_successful(self): + archive_size = 42 + + def mock_run_command(*_, **__): + return self._create_dummy_archive(size=archive_size) + + with _create_collect_logs_handler() as collect_logs_handler: + with patch("azurelinuxagent.ga.collect_logs.shellutil.run_command", side_effect=mock_run_command): + def http_put_handler(url, content, **__): + if self.is_host_plugin_put_logs_request(url): + http_put_handler.counter += 1 + http_put_handler.archive = content + return MockHttpResponse(status=200) + return None + + http_put_handler.counter = 0 + http_put_handler.archive = b"" + protocol = collect_logs_handler.get_mock_wire_protocol() + protocol.set_http_handlers(http_put_handler=http_put_handler) + + collect_logs_handler.run_and_wait() + self.assertEqual(http_put_handler.counter, 1, "The PUT API to upload logs should have been called once") + self.assertTrue(os.path.exists(self.archive_path), "The archive file should exist on disk") + self.assertEqual(archive_size, len(http_put_handler.archive), "The archive file should have {0} bytes, not {1}".format(archive_size, len(http_put_handler.archive))) + + def test_it_does_not_upload_logs_when_collection_is_unsuccessful(self): + with _create_collect_logs_handler() as collect_logs_handler: + with patch("azurelinuxagent.ga.collect_logs.shellutil.run_command", side_effect=Exception("test exception")): + def http_put_handler(url, _, **__): + if self.is_host_plugin_put_logs_request(url): + http_put_handler.counter += 1 + return MockHttpResponse(status=200) + return None + + http_put_handler.counter = 0 + protocol = collect_logs_handler.get_mock_wire_protocol() + protocol.set_http_handlers(http_put_handler=http_put_handler) + + collect_logs_handler.run_and_wait() + self.assertFalse(os.path.exists(self.archive_path), "The archive file should not exist on disk") + self.assertEqual(http_put_handler.counter, 0, "The PUT API to upload logs shouldn't have been called") diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 60629e99ca..0fbdbe95ba 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -32,12 +32,13 @@ from azurelinuxagent.common.protocol.wire import WireProtocol from azurelinuxagent.common.utils import fileutil, restutil, textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.utils.shellutil import get_python_cmd from azurelinuxagent.common.version import AGENT_PKG_GLOB, AGENT_DIR_GLOB, AGENT_NAME, AGENT_DIR_PATTERN, \ AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION from azurelinuxagent.ga.exthandlers import ExtHandlerInstance, HandlerEnvironment from azurelinuxagent.ga.update import GuestAgent, GuestAgentError, MAX_FAILURE, AGENT_MANIFEST_FILE, \ get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, AGENT_ERROR_FILE, ORPHAN_WAIT_INTERVAL, \ - CHILD_LAUNCH_RESTART_MAX, get_python_cmd, CHILD_HEALTH_INTERVAL, UpdateHandler + CHILD_LAUNCH_RESTART_MAX, CHILD_HEALTH_INTERVAL, UpdateHandler from tests.protocol.mocks import mock_wire_protocol from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_MULTIPLE_EXT from tests.tools import AgentTestCase, call, data_dir, DEFAULT, patch, load_bin_data, load_data, Mock, MagicMock, \ @@ -1256,25 +1257,28 @@ def iterator(*args, **kwargs): # pylint: disable=useless-return,unused-argument with patch('azurelinuxagent.ga.remoteaccess.get_remote_access_handler') as mock_ra_handler: with patch('azurelinuxagent.ga.update.get_monitor_handler') as mock_monitor: with patch('azurelinuxagent.ga.update.get_env_handler') as mock_env: - with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): - with patch('time.sleep', side_effect=iterator) as mock_sleep: # pylint: disable=redefined-outer-name - with patch('sys.exit') as mock_exit: - if isinstance(os.getppid, MagicMock): - self.update_handler.run() - else: - with patch('os.getppid', return_value=42): - self.update_handler.run() - - self.assertEqual(1, mock_handler.call_count) - self.assertEqual(mock_handler.return_value.method_calls, calls) - self.assertEqual(1, mock_ra_handler.call_count) - self.assertEqual(mock_ra_handler.return_value.method_calls, calls) - self.assertEqual(invocations, mock_sleep.call_count) - if invocations > 0: - self.assertEqual(sleep_interval, mock_sleep.call_args[0]) - self.assertEqual(1, mock_monitor.call_count) - self.assertEqual(1, mock_env.call_count) - self.assertEqual(1, mock_exit.call_count) + with patch('azurelinuxagent.ga.update.get_collect_logs_handler') as mock_collect_logs: + with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): + with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): + with patch('time.sleep', side_effect=iterator) as mock_sleep: # pylint: disable=redefined-outer-name + with patch('sys.exit') as mock_exit: + if isinstance(os.getppid, MagicMock): + self.update_handler.run() + else: + with patch('os.getppid', return_value=42): + self.update_handler.run() + + self.assertEqual(1, mock_handler.call_count) + self.assertEqual(mock_handler.return_value.method_calls, calls) + self.assertEqual(1, mock_ra_handler.call_count) + self.assertEqual(mock_ra_handler.return_value.method_calls, calls) + self.assertEqual(invocations, mock_sleep.call_count) + if invocations > 0: + self.assertEqual(sleep_interval, mock_sleep.call_args[0]) + self.assertEqual(1, mock_monitor.call_count) + self.assertEqual(1, mock_env.call_count) + self.assertEqual(1, mock_collect_logs.call_count) + self.assertEqual(1, mock_exit.call_count) def test_run(self): self._test_run() @@ -1591,14 +1595,14 @@ def test_it_should_delete_extension_events_directory_if_extension_telemetry_pipe self.assertFalse(os.path.exists(ext_dir), "Extension directory {0} still exists!".format(ext_dir)) # pylint: disable=bad-indentation def test_it_should_retain_extension_events_directories_if_extension_telemetry_pipeline_enabled(self): - - # Rerun update handler again with extension telemetry pipeline enabled to ensure we dont delete events directories - with self._setup_test_for_ext_event_dirs_retention() as (update_handler, expected_events_dirs): # pylint: disable=bad-indentation - update_handler.run(debug=True) # pylint: disable=bad-indentation - for ext_dir in expected_events_dirs: # pylint: disable=bad-indentation - self.assertTrue(os.path.exists(ext_dir), "Extension directory {0} should exist!".format(ext_dir)) # pylint: disable=bad-indentation + # Rerun update handler again with extension telemetry pipeline enabled to ensure we dont delete events directories + with self._setup_test_for_ext_event_dirs_retention() as (update_handler, expected_events_dirs): + update_handler.run(debug=True) + for ext_dir in expected_events_dirs: + self.assertTrue(os.path.exists(ext_dir), "Extension directory {0} should exist!".format(ext_dir)) +@patch('azurelinuxagent.ga.update.get_collect_logs_handler') @patch('azurelinuxagent.ga.update.get_monitor_handler') @patch('azurelinuxagent.ga.update.get_env_handler') class MonitorThreadTest(AgentTestCase): @@ -1626,9 +1630,10 @@ def iterator(*args, **kwargs): # pylint: disable=useless-return,unused-argument with patch('azurelinuxagent.ga.remoteaccess.get_remote_access_handler'): with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff - with patch('time.sleep', side_effect=iterator): - with patch('sys.exit'): - self.update_handler.run() + with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): + with patch('time.sleep', side_effect=iterator): + with patch('sys.exit'): + self.update_handler.run() def _setup_mock_thread_and_start_test_run(self, mock_thread, is_alive=True, invocations=0): self.assertTrue(self.update_handler.running) @@ -1642,7 +1647,7 @@ def _setup_mock_thread_and_start_test_run(self, mock_thread, is_alive=True, invo self._test_run(invocations=invocations) return thread - def test_start_threads(self, mock_env, mock_monitor): + def test_start_threads(self, mock_env, mock_monitor, mock_collect_logs): self.assertTrue(self.update_handler.running) mock_monitor_thread = MagicMock() @@ -1653,48 +1658,54 @@ def test_start_threads(self, mock_env, mock_monitor): mock_env_thread.run = MagicMock() mock_env.return_value = mock_env_thread + mock_collect_logs_thread = MagicMock() + mock_collect_logs_thread.run = MagicMock() + mock_collect_logs.return_value = mock_collect_logs_thread + self._test_run(invocations=0) self.assertEqual(1, mock_monitor.call_count) self.assertEqual(1, mock_monitor_thread.run.call_count) self.assertEqual(1, mock_env.call_count) self.assertEqual(1, mock_env_thread.run.call_count) + self.assertEqual(1, mock_collect_logs.call_count) + self.assertEqual(1, mock_collect_logs_thread.run.call_count) - def test_check_if_monitor_thread_is_alive(self, mock_env, mock_monitor): # pylint: disable=unused-argument + def test_check_if_monitor_thread_is_alive(self, mock_env, mock_monitor, mock_collect_logs): # pylint: disable=unused-argument mock_monitor_thread = self._setup_mock_thread_and_start_test_run(mock_monitor, is_alive=True, invocations=0) self.assertEqual(1, mock_monitor.call_count) self.assertEqual(1, mock_monitor_thread.run.call_count) self.assertEqual(1, mock_monitor_thread.is_alive.call_count) self.assertEqual(0, mock_monitor_thread.start.call_count) - def test_check_if_env_thread_is_alive(self, mock_env, mock_monitor): # pylint: disable=unused-argument + def test_check_if_env_thread_is_alive(self, mock_env, mock_monitor, mock_collect_logs): # pylint: disable=unused-argument mock_env_thread = self._setup_mock_thread_and_start_test_run(mock_env, is_alive=True, invocations=1) self.assertEqual(1, mock_env.call_count) self.assertEqual(1, mock_env_thread.run.call_count) self.assertEqual(1, mock_env_thread.is_alive.call_count) self.assertEqual(0, mock_env_thread.start.call_count) - def test_restart_monitor_thread_if_not_alive(self, mock_env, mock_monitor): # pylint: disable=unused-argument + def test_restart_monitor_thread_if_not_alive(self, mock_env, mock_monitor, mock_collect_logs): # pylint: disable=unused-argument mock_monitor_thread = self._setup_mock_thread_and_start_test_run(mock_monitor, is_alive=False, invocations=1) self.assertEqual(1, mock_monitor.call_count) self.assertEqual(1, mock_monitor_thread.run.call_count) self.assertEqual(1, mock_monitor_thread.is_alive.call_count) self.assertEqual(1, mock_monitor_thread.start.call_count) - def test_restart_env_thread_if_not_alive(self, mock_env, mock_monitor): # pylint: disable=unused-argument + def test_restart_env_thread_if_not_alive(self, mock_env, mock_monitor, mock_collect_logs): # pylint: disable=unused-argument mock_env_thread = self._setup_mock_thread_and_start_test_run(mock_env, is_alive=False, invocations=1) self.assertEqual(1, mock_env.call_count) self.assertEqual(1, mock_env_thread.run.call_count) self.assertEqual(1, mock_env_thread.is_alive.call_count) self.assertEqual(1, mock_env_thread.start.call_count) - def test_restart_monitor_thread(self, mock_env, mock_monitor): # pylint: disable=unused-argument + def test_restart_monitor_thread(self, mock_env, mock_monitor, mock_collect_logs): # pylint: disable=unused-argument mock_monitor_thread = self._setup_mock_thread_and_start_test_run(mock_monitor, is_alive=False, invocations=0) self.assertEqual(True, mock_monitor.called) self.assertEqual(True, mock_monitor_thread.run.called) self.assertEqual(True, mock_monitor_thread.is_alive.called) self.assertEqual(True, mock_monitor_thread.start.called) - def test_restart_env_thread(self, mock_env, mock_monitor): # pylint: disable=unused-argument + def test_restart_env_thread(self, mock_env, mock_monitor, mock_collect_logs): # pylint: disable=unused-argument mock_env_thread = self._setup_mock_thread_and_start_test_run(mock_env, is_alive=False, invocations=0) self.assertEqual(True, mock_env.called) self.assertEqual(True, mock_env_thread.run.called) diff --git a/tests/test_agent.py b/tests/test_agent.py index 904a0f8fb7..ad7f6a6317 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -38,6 +38,8 @@ HttpProxy.Host = None HttpProxy.Port = None Lib.Dir = /var/lib/waagent +Logs.Collect = False +Logs.CollectPeriod = 3600 Logs.Console = True Logs.Verbose = False OS.AllowHTTP = False @@ -82,13 +84,13 @@ class TestAgent(AgentTestCase): def test_accepts_configuration_path(self): conf_path = os.path.join(data_dir, "test_waagent.conf") - c, f, v, d, cfp = parse_args(["-configuration-path:" + conf_path]) # pylint: disable=unused-variable,invalid-name + c, f, v, d, cfp, lcm = parse_args(["-configuration-path:" + conf_path]) # pylint: disable=unused-variable,invalid-name self.assertEqual(cfp, conf_path) @patch("os.path.exists", return_value=True) def test_checks_configuration_path(self, mock_exists): conf_path = "/foo/bar-baz/something.conf" - c, f, v, d, cfp = parse_args(["-configuration-path:"+conf_path]) # pylint: disable=unused-variable,invalid-name + c, f, v, d, cfp, lcm = parse_args(["-configuration-path:"+conf_path]) # pylint: disable=unused-variable,invalid-name self.assertEqual(cfp, conf_path) self.assertEqual(mock_exists.call_count, 1) @@ -97,18 +99,16 @@ def test_checks_configuration_path(self, mock_exists): @patch("sys.exit", side_effect=Exception) def test_rejects_missing_configuration_path(self, mock_exit, mock_exists, mock_stderr): # pylint: disable=unused-argument try: - c, f, v, d, cfp = parse_args(["-configuration-path:/foo/bar.conf"]) # pylint: disable=unused-variable,invalid-name - self.assertTrue(False) # pylint: disable=redundant-unittest-assert + c, f, v, d, cfp, lcm = parse_args(["-configuration-path:/foo/bar.conf"]) # pylint: disable=unused-variable,invalid-name except Exception: self.assertEqual(mock_exit.call_count, 1) def test_configuration_path_defaults_to_none(self): - c, f, v, d, cfp = parse_args([]) # pylint: disable=unused-variable,invalid-name + c, f, v, d, cfp, lcm = parse_args([]) # pylint: disable=unused-variable,invalid-name self.assertEqual(cfp, None) def test_agent_accepts_configuration_path(self): - Agent(False, - conf_file_path=os.path.join(data_dir, "test_waagent.conf")) + Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) self.assertTrue(conf.get_fips_enabled()) @patch("azurelinuxagent.common.conf.load_conf_from_file") @@ -165,7 +165,7 @@ def test_agent_logs_if_extension_log_directory_is_a_file(self, mock_dir, mock_lo self.assertTrue(os.path.isfile(ext_log_dir)) self.assertFalse(os.path.isdir(ext_log_dir)) agent = Agent(False, # pylint: disable=unused-variable - conf_file_path=os.path.join(data_dir, "test_waagent.conf")) + conf_file_path=os.path.join(data_dir, "test_waagent.conf")) self.assertTrue(os.path.isfile(ext_log_dir)) self.assertFalse(os.path.isdir(ext_log_dir)) self.assertEqual(1, mock_log.call_count) @@ -179,6 +179,38 @@ def test_agent_get_configuration(self): actual_configuration.append("{0} = {1}".format(k, configuration[k])) self.assertListEqual(EXPECTED_CONFIGURATION, actual_configuration) + def test_checks_log_collector_mode(self): + # Specify full mode + c, f, v, d, cfp, lcm = parse_args(["-collect-logs", "-full"]) # pylint: disable=unused-variable,invalid-name + self.assertEqual(c, "collect-logs") + self.assertEqual(lcm, True) + + # Defaults to None if mode not specified + c, f, v, d, cfp, lcm = parse_args(["-collect-logs"]) # pylint: disable=unused-variable,invalid-name + self.assertEqual(c, "collect-logs") + self.assertEqual(lcm, False) + + @patch("sys.stderr") + @patch("sys.exit", side_effect=Exception) + def test_rejects_invalid_log_collector_mode(self, mock_exit, mock_stderr): # pylint: disable=unused-argument + try: + c, f, v, d, cfp, lcm = parse_args(["-collect-logs", "-notvalid"]) # pylint: disable=unused-variable,invalid-name + except Exception: + self.assertEqual(mock_exit.call_count, 1) + + @patch("os.path.exists", return_value=True) + @patch("azurelinuxagent.agent.LogCollector") + def test_calls_collect_logs_with_proper_mode(self, mock_log_collector, *args): # pylint: disable=unused-argument + agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) + + agent.collect_logs(is_full_mode=True) + full_mode = mock_log_collector.call_args_list[0][0][0] + self.assertTrue(full_mode) + + agent.collect_logs(is_full_mode=False) + full_mode = mock_log_collector.call_args_list[1][0][0] + self.assertFalse(full_mode) + def test_agent_usage_message(self): message = usage() @@ -194,6 +226,7 @@ def test_agent_usage_message(self): self.assertTrue("-start" in message) self.assertTrue("-run-exthandlers" in message) self.assertTrue("-show-configuration" in message) + self.assertTrue("-collect-logs" in message) # sanity check self.assertFalse("-not-a-valid-option" in message) diff --git a/tests/tools.py b/tests/tools.py index aaad50de4f..a0a59820b8 100644 --- a/tests/tools.py +++ b/tests/tools.py @@ -34,6 +34,7 @@ import azurelinuxagent.common.conf as conf import azurelinuxagent.common.event as event import azurelinuxagent.common.logger as logger +from azurelinuxagent.common.cgroupapi import SYSTEMD_RUN_PATH from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator # pylint: disable=unused-import from azurelinuxagent.common.osutil.factory import _get_osutil # pylint: disable=unused-import from azurelinuxagent.common.osutil.ubuntu import Ubuntu14OSUtil, Ubuntu16OSUtil # pylint: disable=unused-import @@ -120,7 +121,7 @@ def running_under_travis(): def is_systemd_present(): - return os.path.exists("/run/systemd/system") + return os.path.exists(SYSTEMD_RUN_PATH) def i_am_root():