diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index 5d373b9be3..77fe06339f 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -63,10 +63,10 @@ def __init__(self, verbose, conf_file_path=None): verbose = verbose or conf.get_logs_verbose() level = logger.LogLevel.VERBOSE if verbose else logger.LogLevel.INFO logger.add_logger_appender(logger.AppenderType.FILE, level, - path="/var/log/waagent.log") + path=conf.get_agent_log_file()) if conf.get_logs_console(): logger.add_logger_appender(logger.AppenderType.CONSOLE, level, - path="/dev/console") + path="/dev/console") if event.send_logs_to_telemetry(): logger.add_logger_appender(logger.AppenderType.TELEMETRY, diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 7e556a842a..bb2e92e52c 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -243,6 +243,10 @@ def get_ext_log_dir(conf=__conf__): return conf.get("Extension.LogDir", "/var/log/azure") +def get_agent_log_file(): + return "/var/log/waagent.log" + + def get_fips_enabled(conf=__conf__): return conf.get_switch("OS.EnableFIPS", False) diff --git a/azurelinuxagent/common/logcollector.py b/azurelinuxagent/common/logcollector.py new file mode 100644 index 0000000000..4192db7983 --- /dev/null +++ b/azurelinuxagent/common/logcollector.py @@ -0,0 +1,364 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ +# + +import glob +from heapq import heappush, heappop +import logging +import os +import subprocess +import time +import zipfile + +# Please note: be careful when adding agent dependencies in this module. +# This module uses its own logger and logs to its own file, not to the agent log. + +from azurelinuxagent.common.conf import get_lib_dir, get_ext_log_dir, get_agent_log_file +from azurelinuxagent.common.future import ustr + +_EXTENSION_LOG_DIR = get_ext_log_dir() +_AGENT_LIB_DIR = get_lib_dir() +_AGENT_LOG = get_agent_log_file() + +_LOG_COLLECTOR_DIR = os.path.join(_AGENT_LIB_DIR, "logcollector") +_TRUNCATED_FILES_DIR = os.path.join(_LOG_COLLECTOR_DIR, "truncated") + +_OUTPUT_RESULTS_FILE_PATH = os.path.join(_LOG_COLLECTOR_DIR, "results.txt") +_COMPRESSED_ARCHIVE_PATH = os.path.join(_LOG_COLLECTOR_DIR, "logs.zip") + +_MUST_COLLECT_FILES = [ + _AGENT_LOG, + os.path.join(_AGENT_LIB_DIR, "GoalState.*.xml"), + os.path.join(_AGENT_LIB_DIR, "ExtensionsConfig.*.xml"), + os.path.join(_AGENT_LIB_DIR, "HostingEnvironmentConfig.*.xml"), + os.path.join(_AGENT_LIB_DIR, "SharedConfig.*.xml"), + os.path.join(_AGENT_LIB_DIR, "*manifest.xml"), + os.path.join(_AGENT_LIB_DIR, "waagent_status.json"), + os.path.join(_AGENT_LIB_DIR, "history", "*.zip"), + os.path.join(_EXTENSION_LOG_DIR, "*", "*"), + os.path.join(_EXTENSION_LOG_DIR, "*", "*", "*"), + "{0}.*".format(_AGENT_LOG) # any additional waagent.log files (e.g., waagent.log.1.gz) +] + +_FILE_SIZE_LIMIT = 30 * 1024 * 1024 # 30 MB +_UNCOMPRESSED_ARCHIVE_SIZE_LIMIT = 150 * 1024 * 1024 # 150 MB + +_LOGGER = logging.getLogger(__name__) + + +class LogCollector(object): + + _TRUNCATED_FILE_PREFIX = "truncated_" + + def __init__(self, manifest_file_path): + self._manifest_file_path = manifest_file_path + self._must_collect_files = self._expand_must_collect_files() + self._create_base_dirs() + self._set_logger() + + @staticmethod + def _mkdir(dirname): + if not os.path.isdir(dirname): + os.makedirs(dirname) + + @staticmethod + def _reset_file(filepath): + with open(filepath, "wb") as out_file: + out_file.write("".encode("utf-8")) + + @staticmethod + def _create_base_dirs(): + LogCollector._mkdir(_LOG_COLLECTOR_DIR) + LogCollector._mkdir(_TRUNCATED_FILES_DIR) + + @staticmethod + def _set_logger(): + _f_handler = logging.FileHandler(_OUTPUT_RESULTS_FILE_PATH, encoding="utf-8") + _f_format = logging.Formatter(fmt='%(asctime)s %(levelname)s %(message)s', + datefmt=u'%Y-%m-%dT%H:%M:%SZ') + _f_format.converter = time.gmtime + _f_handler.setFormatter(_f_format) + _LOGGER.addHandler(_f_handler) + _LOGGER.setLevel(logging.INFO) + + @staticmethod + def _run_shell_command(command, stdout=subprocess.PIPE, log_output=False): + """ + Runs a shell command in a subprocess, logs any errors to the log file, enables changing the stdout stream, + and logs the output of the command to the log file if indicated by the `log_output` parameter. + :param command: Shell command to run + :param stdout: Where to write the output of the command + :param log_output: If true, log the command output to the log file + """ + def format_command(cmd): + return " ".join(cmd) if isinstance(cmd, list) else command + + def _encode_command_output(output): + return ustr(output, encoding="utf-8", errors="backslashreplace") + + try: + process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=stdout, stderr=subprocess.PIPE, shell=False) + stdout, stderr = process.communicate() + return_code = process.returncode + except Exception as e: + error_msg = u"Command [{0}] raised unexpected exception: [{1}]".format(format_command(command), ustr(e)) + _LOGGER.error(error_msg) + return + + if return_code != 0: + encoded_stdout = _encode_command_output(stdout) + encoded_stderr = _encode_command_output(stderr) + error_msg = "Command: [{0}], return code: [{1}], stdout: [{2}] stderr: [{3}]".format(format_command(command), + return_code, + encoded_stdout, + encoded_stderr) + _LOGGER.error(error_msg) + return + + if log_output: + msg = "Output of command [{0}]:\n{1}".format(format_command(command), _encode_command_output(stdout)) + _LOGGER.info(msg) + + @staticmethod + def _expand_must_collect_files(): + # Match the regexes from the MUST_COLLECT_FILES list to existing file paths on disk. + manifest = [] + for path in _MUST_COLLECT_FILES: + manifest.extend(sorted(glob.glob(path))) + + return manifest + + def _read_manifest_file(self): + with open(self._manifest_file_path, "rb") as in_file: + data = in_file.read() + if data is None: + return None + else: + data = ustr(data, encoding="utf-8") + return data.splitlines() + + @staticmethod + def _process_ll_command(folder): + LogCollector._run_shell_command(["ls", "-alF", folder], log_output=True) + + @staticmethod + def _process_echo_command(message): + _LOGGER.info(message) + + @staticmethod + def _process_copy_command(path): + file_paths = glob.glob(path) + for file_path in file_paths: + _LOGGER.info(file_path) + return file_paths + + @staticmethod + def _convert_file_name_to_archive_name(file_name): + # File name is the name of the file on disk, whereas archive name is the name of that same file in the archive. + # For non-truncated files: /var/log/waagent.log on disk becomes var/log/waagent.log in archive + # (leading separator is removed by the archive). + # For truncated files: /var/truncated/var/log/syslog.1 on disk becomes truncated_var_log_syslog.1 in archive. + if file_name.startswith(_TRUNCATED_FILES_DIR): + original_file_path = file_name[len(_TRUNCATED_FILES_DIR):].lstrip(os.path.sep) + archive_file_name = LogCollector._TRUNCATED_FILE_PREFIX + original_file_path.replace(os.path.sep, "_") + return archive_file_name + else: + return file_name.lstrip(os.path.sep) + + @staticmethod + def _remove_uncollected_truncated_files(files_to_collect): + # After log collection is completed, see if there are any old truncated files which were not collected + # and remove them since they probably won't be collected in the future. This is possible when the + # original file got deleted, so there is no need to keep its truncated version anymore. + truncated_files = os.listdir(_TRUNCATED_FILES_DIR) + + for file_path in truncated_files: + full_path = os.path.join(_TRUNCATED_FILES_DIR, file_path) + if full_path not in files_to_collect: + if os.path.isfile(full_path): + os.remove(full_path) + + @staticmethod + def _expand_parameters(manifest_data): + _LOGGER.info("Using {0} as $LIB_DIR".format(_AGENT_LIB_DIR)) + _LOGGER.info("Using {0} as $LOG_DIR".format(_EXTENSION_LOG_DIR)) + _LOGGER.info("Using {0} as $AGENT_LOG".format(_AGENT_LOG)) + + new_manifest = [] + for line in manifest_data: + new_line = line.replace("$LIB_DIR", _AGENT_LIB_DIR) + new_line = new_line.replace("$LOG_DIR", _EXTENSION_LOG_DIR) + new_line = new_line.replace("$AGENT_LOG", _AGENT_LOG) + new_manifest.append(new_line) + + return new_manifest + + def _process_manifest_file(self): + files_to_collect = set() + data = self._read_manifest_file() + manifest_entries = LogCollector._expand_parameters(data) + + for entry in manifest_entries: + # The entry can be one of the four flavours: + # 1) ll,/etc/udev/rules.d -- list out contents of the folder and store to results file + # 2) echo,### Gathering Configuration Files ### -- print message to results file + # 3) copy,/var/lib/waagent/provisioned -- add file to list of files to be collected + # 4) diskinfo, -- ignore commands from manifest other than ll, echo, and copy for now + + contents = entry.split(",") + if len(contents) != 2: + # If it's not a comment or an empty line, it's a malformed entry + if not entry.startswith("#") and len(entry.strip()) > 0: + _LOGGER.error("Couldn't parse \"{0}\"".format(entry)) + continue + + command, value = contents + + if command == "ll": + self._process_ll_command(value) + elif command == "echo": + self._process_echo_command(value) + elif command == "copy": + files_to_collect.update(self._process_copy_command(value)) + + return files_to_collect + + @staticmethod + def _truncate_large_file(file_path): + # Truncate large file to size limit (keep freshest entries of the file), copy file to a temporary location + # and update file path in list of files to collect + try: + # Binary files cannot be truncated, don't include large binary files + ext = os.path.splitext(file_path)[1] + if ext in [".gz", ".zip", ".xz"]: + _LOGGER.warning("Discarding large binary file {0}".format(file_path)) + return None + + truncated_file_path = os.path.join(_TRUNCATED_FILES_DIR, file_path.replace(os.path.sep, "_")) + if os.path.exists(truncated_file_path): + original_file_mtime = os.path.getmtime(file_path) + truncated_file_mtime = os.path.getmtime(truncated_file_path) + + # If the original file hasn't been updated since the truncated file, it means there were no changes + # and we don't need to truncate it again. + if original_file_mtime < truncated_file_mtime: + return truncated_file_path + + # Get the last N bytes of the file + with open(truncated_file_path, "w+") as fh: + LogCollector._run_shell_command(["tail", "-c", str(_FILE_SIZE_LIMIT), file_path], stdout=fh) + + return truncated_file_path + except OSError as e: + _LOGGER.error("Failed to truncate large file: {0}".format(ustr(e))) + return None + + def _get_file_priority(self, file): + # The sooner the file appears in the must collect list, the bigger its priority. + # Priority is higher the lower the number (0 is highest priority). + try: + return self._must_collect_files.index(file) + except ValueError: + # Doesn't matter, file is not in the must collect list, assign a low priority + return 999999999 + + def _get_priority_files_list(self, file_list): + # Given a list of files to collect, determine if they show up in the must collect list and build a priority + # queue. The queue will determine the order in which the files are collected, highest priority files first. + priority_file_queue = [] + for file in file_list: + priority = self._get_file_priority(file) + heappush(priority_file_queue, (priority, file)) + + return priority_file_queue + + def _get_final_list_for_archive(self, priority_file_queue): + # Given a priority queue of files to collect, add one by one while the archive size is under the size limit. + # If a single file is over the file size limit, truncate it before adding it to the archive. + _LOGGER.info("### Preparing list of files to add to archive ###") + total_uncompressed_size = 0 + final_files_to_collect = [] + + while priority_file_queue: + file_path = heappop(priority_file_queue)[1] # (priority, file_path) + file_size = min(os.path.getsize(file_path), _FILE_SIZE_LIMIT) + + if total_uncompressed_size + file_size > _UNCOMPRESSED_ARCHIVE_SIZE_LIMIT: + _LOGGER.warning("Archive too big, done with adding files.") + break + + if os.path.getsize(file_path) <= _FILE_SIZE_LIMIT: + final_files_to_collect.append(file_path) + _LOGGER.info("Adding file {0}, size {1}b".format(file_path, file_size)) + else: + truncated_file_path = self._truncate_large_file(file_path) + if truncated_file_path: + _LOGGER.info("Adding truncated file {0}, size {1}b".format(truncated_file_path, file_size)) + final_files_to_collect.append(truncated_file_path) + + total_uncompressed_size += file_size + + _LOGGER.info("Uncompressed archive size is {0}b".format(total_uncompressed_size)) + + return final_files_to_collect + + def _create_list_of_files_to_collect(self): + # The final list of files to be collected by zip is created in three steps: + # 1) Parse given manifest file, expanding wildcards and keeping a list of files that exist on disk + # 2) Assign those files a priority depending on whether they are in the must collect file list. + # 3) In priority order, add files to the final list to be collected, until the size of the archive is under + # the size limit. + parsed_file_paths = self._process_manifest_file() + prioritized_file_paths = self._get_priority_files_list(parsed_file_paths) + files_to_collect = self._get_final_list_for_archive(prioritized_file_paths) + return files_to_collect + + def collect_logs(self): + """ + Public method that collects necessary log files in a tarball that is updated each time this method is invoked. + The tarball is then compressed into a zip. + :return: Returns True if the log collection succeeded + """ + files_to_collect = [] + + try: + # Clear previous run's output and create base directories if they don't exist already. + self._create_base_dirs() + LogCollector._reset_file(_OUTPUT_RESULTS_FILE_PATH) + + files_to_collect = self._create_list_of_files_to_collect() + _LOGGER.info("### Creating compressed archive ###") + + with zipfile.ZipFile(_COMPRESSED_ARCHIVE_PATH, "w", compression=zipfile.ZIP_DEFLATED) as compressed_archive: + for file in files_to_collect: + archive_file_name = LogCollector._convert_file_name_to_archive_name(file) + compressed_archive.write(file, arcname=archive_file_name) + + compressed_archive_size = os.path.getsize(_COMPRESSED_ARCHIVE_PATH) + _LOGGER.info("Successfully compressed files. " + "Compressed archive size is {0}b".format(compressed_archive_size)) + compressed_archive.write(_OUTPUT_RESULTS_FILE_PATH, arcname="results.txt") + + return True + except Exception as e: + msg = "Failed to collect logs: {0}".format(ustr(e)) + _LOGGER.error(msg) + + return False + finally: + self._remove_uncollected_truncated_files(files_to_collect) diff --git a/azurelinuxagent/pa/deprovision/default.py b/azurelinuxagent/pa/deprovision/default.py index eb783b3e26..dac3b89c4c 100644 --- a/azurelinuxagent/pa/deprovision/default.py +++ b/azurelinuxagent/pa/deprovision/default.py @@ -92,7 +92,7 @@ def del_dirs(self, warnings, actions): actions.append(DeprovisionAction(fileutil.rm_dirs, dirs)) def del_files(self, warnings, actions): - files = ['/root/.bash_history', '/var/log/waagent.log'] + files = ['/root/.bash_history', conf.get_agent_log_file()] actions.append(DeprovisionAction(fileutil.rm_files, files)) # For OpenBSD diff --git a/config/logcollector_manifest_full b/config/logcollector_manifest_full new file mode 100644 index 0000000000..ca01063cf8 --- /dev/null +++ b/config/logcollector_manifest_full @@ -0,0 +1,73 @@ +echo,### Probing Directories ### +ll,/var/log +ll,$LIB_DIR +ll,/etc/udev/rules.d + +echo,### Gathering Configuration Files ### +copy,$LIB_DIR/provisioned +copy,/etc/fstab +copy,/etc/ssh/sshd_config +copy,/boot/grub*/grub.c* +copy,/boot/grub*/menu.lst +copy,/etc/*-release +copy,/etc/HOSTNAME +copy,/etc/hostname +copy,/etc/network/interfaces +copy,/etc/network/interfaces.d/*.cfg +copy,/etc/netplan/50-cloud-init.yaml +copy,/etc/nsswitch.conf +copy,/etc/resolv.conf +copy,/run/systemd/resolve/stub-resolv.conf +copy,/run/resolvconf/resolv.conf +copy,/etc/sysconfig/iptables +copy,/etc/sysconfig/network +copy,/etc/sysconfig/network/ifcfg-eth* +copy,/etc/sysconfig/network/routes +copy,/etc/sysconfig/network-scripts/ifcfg-eth* +copy,/etc/sysconfig/network-scripts/route-eth* +copy,/etc/sysconfig/SuSEfirewall2 +copy,/etc/ufw/ufw.conf +copy,/etc/waagent.conf +copy,/var/lib/dhcp/dhclient.eth0.leases +copy,/var/lib/dhclient/dhclient-eth0.leases +copy,/var/lib/wicked/lease-eth0-dhcp-ipv4.xml +echo, + +echo,### Gathering Log Files ### +copy,$AGENT_LOG* +copy,/var/log/syslog* +copy,/var/log/rsyslog* +copy,/var/log/messages* +copy,/var/log/kern* +copy,/var/log/dmesg* +copy,/var/log/dpkg* +copy,/var/log/yum* +copy,/var/log/cloud-init* +copy,/var/log/boot* +copy,/var/log/auth* +copy,/var/log/secure* +copy,$LOG_DIR/*/* +copy,$LOG_DIR/*/*/* +copy,$LOG_DIR/custom-script/handler.log +copy,$LOG_DIR/run-command/handler.log +echo, + +echo,### Gathering Extension Files ### +copy,$LIB_DIR/ExtensionsConfig.*.xml +copy,$LIB_DIR/*/status/*.status +copy,$LIB_DIR/*/config/*.settings +copy,$LIB_DIR/*/config/HandlerState +copy,$LIB_DIR/*/config/HandlerStatus +copy,$LIB_DIR/GoalState.*.xml +copy,$LIB_DIR/HostingEnvironmentConfig.xml +copy,$LIB_DIR/*.manifest.xml +copy,$LIB_DIR/SharedConfig.xml +copy,$LIB_DIR/ManagedIdentity-*.json +copy,$LIB_DIR/error.json +copy,$LIB_DIR/Incarnation +copy,$LIB_DIR/waagent_status.json +copy,$LIB_DIR/history/*.zip +echo, + +echo,### Gathering Disk Info ### +diskinfo, \ No newline at end of file diff --git a/config/logcollector_manifest_normal b/config/logcollector_manifest_normal new file mode 100644 index 0000000000..c24eff6438 --- /dev/null +++ b/config/logcollector_manifest_normal @@ -0,0 +1,33 @@ +echo,### Probing Directories ### +ll,/var/log +ll,$LIB_DIR + +echo,### Gathering Configuration Files ### +copy,/etc/*-release +copy,/etc/HOSTNAME +copy,/etc/hostname +copy,/etc/waagent.conf +echo, + +echo,### Gathering Log Files ### +copy,$AGENT_LOG* +copy,/var/log/dmesg* +copy,/var/log/syslog* +copy,/var/log/auth* +copy,$LOG_DIR/*/* +copy,$LOG_DIR/*/*/* +copy,$LOG_DIR/custom-script/handler.log +echo, + +echo,### Gathering Extension Files ### +copy,$LIB_DIR/*.xml +copy,$LIB_DIR/waagent_status.json +copy,$LIB_DIR/*/status/*.status +copy,$LIB_DIR/*/config/*.settings +copy,$LIB_DIR/*/config/HandlerState +copy,$LIB_DIR/*/config/HandlerStatus +copy,$LIB_DIR/*.agentsManifest +copy,$LIB_DIR/error.json +copy,$LIB_DIR/Incarnation +copy,$LIB_DIR/history/*.zip +echo, \ No newline at end of file diff --git a/tests/common/test_logcollector.py b/tests/common/test_logcollector.py new file mode 100644 index 0000000000..1c0f10476e --- /dev/null +++ b/tests/common/test_logcollector.py @@ -0,0 +1,432 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ +# + +import os +import shutil +import zipfile +import tempfile + +from azurelinuxagent.common.utils.fileutil import rm_dirs, mkdir, rm_files, write_file +from azurelinuxagent.common.logcollector import LogCollector +from tests.tools import AgentTestCase, is_python_version_26, patch, skip_if_predicate_true + + +SMALL_FILE_SIZE = 1 * 1024 * 1024 # 1 MB +LARGE_FILE_SIZE = 5 * 1024 * 1024 # 5 MB + + +@skip_if_predicate_true(is_python_version_26, "Disabled on Python 2.6") +class TestLogCollector(AgentTestCase): + + @classmethod + def setUpClass(cls): + AgentTestCase.setUpClass() + + prefix = "{0}_".format(cls.__class__.__name__) + cls.tmp_dir = tempfile.mkdtemp(prefix=prefix) + + cls._mock_constants() + + @classmethod + def _mock_constants(cls): + cls.manifest_path = os.path.join(cls.tmp_dir, "logcollector_manifest") + + cls.log_collector_dir = os.path.join(cls.tmp_dir, "logcollector") + cls.mock_log_collector_dir = patch("azurelinuxagent.common.logcollector._LOG_COLLECTOR_DIR", + cls.log_collector_dir) + cls.mock_log_collector_dir.start() + + cls.truncated_files_dir = os.path.join(cls.tmp_dir, "truncated") + cls.mock_truncated_files_dir = patch("azurelinuxagent.common.logcollector._TRUNCATED_FILES_DIR", + cls.truncated_files_dir) + cls.mock_truncated_files_dir.start() + + cls.output_results_file_path = os.path.join(cls.log_collector_dir, "results.txt") + cls.mock_output_results_file_path = patch("azurelinuxagent.common.logcollector._OUTPUT_RESULTS_FILE_PATH", + cls.output_results_file_path) + cls.mock_output_results_file_path.start() + + cls.compressed_archive_path = os.path.join(cls.log_collector_dir, "logs.zip") + cls.mock_compressed_archive_path = patch("azurelinuxagent.common.logcollector._COMPRESSED_ARCHIVE_PATH", + cls.compressed_archive_path) + cls.mock_compressed_archive_path.start() + + @classmethod + def tearDownClass(cls): + cls.mock_log_collector_dir.stop() + cls.mock_truncated_files_dir.stop() + cls.mock_output_results_file_path.stop() + cls.mock_compressed_archive_path.stop() + + shutil.rmtree(cls.tmp_dir) + + AgentTestCase.tearDownClass() + + def setUp(self): + AgentTestCase.setUp(self) + self._build_test_data() + self._build_manifest(self.manifest_path) + + def tearDown(self): + rm_dirs(self.root_collect_dir) + rm_files(self.compressed_archive_path) + AgentTestCase.tearDown(self) + + @classmethod + def _build_test_data(cls): + """ + Build a dummy file structure which will be used as a foundation for the log collector tests + """ + cls.root_collect_dir = os.path.join(cls.tmp_dir, "files_to_collect") + mkdir(cls.root_collect_dir) + + cls._create_file_of_specific_size(os.path.join(cls.root_collect_dir, "waagent.log"), + SMALL_FILE_SIZE) # small text file + cls._create_file_of_specific_size(os.path.join(cls.root_collect_dir, "waagent.log.1"), + LARGE_FILE_SIZE) # large text file + cls._create_file_of_specific_size(os.path.join(cls.root_collect_dir, "waagent.log.2.gz"), + SMALL_FILE_SIZE, binary=True) # small binary file + cls._create_file_of_specific_size(os.path.join(cls.root_collect_dir, "waagent.log.3.gz"), + LARGE_FILE_SIZE, binary=True) # large binary file + + mkdir(os.path.join(cls.root_collect_dir, "another_dir")) + cls._create_file_of_specific_size(os.path.join(cls.root_collect_dir, "less_important_file"), + SMALL_FILE_SIZE) + cls._create_file_of_specific_size(os.path.join(cls.root_collect_dir, "another_dir", "least_important_file"), + SMALL_FILE_SIZE) + + @classmethod + def _build_manifest(cls, manifest_file): + """ + Files listed in the manifest will be collected, others will be ignored + """ + files = [ + os.path.join(cls.root_collect_dir, "waagent*"), + os.path.join(cls.root_collect_dir, "less_important_file*"), + os.path.join(cls.root_collect_dir, "another_dir", "least_important_file"), + os.path.join(cls.root_collect_dir, "non_existing_file"), + ] + + with open(manifest_file, "w") as fh: + for file in files: + fh.write("copy,{0}\n".format(file)) + + @staticmethod + def _create_file_of_specific_size(file_path, file_size, binary=False): + binary_descriptor = "b" if binary else "" + data = b'0' if binary else '0' + + with open(file_path, "w{0}".format(binary_descriptor)) as fh: + fh.seek(file_size - 1) + fh.write(data) + + @staticmethod + def _truncated_path(normal_path): + return "truncated_" + normal_path.replace(os.path.sep, "_") + + def _assert_files_are_in_archive(self, expected_files): + with zipfile.ZipFile(self.compressed_archive_path, "r") as archive: + archive_files = archive.namelist() + + for file in expected_files: + if file.lstrip(os.path.sep) not in archive_files: + self.fail("File {0} was supposed to be collected, but is not present in the archive!".format(file)) + + # Assert that results file is always present + if "results.txt" not in archive_files: + self.fail("File results.txt was supposed to be collected, but is not present in the archive!") + + self.assertTrue(True) + + def _assert_files_are_not_in_archive(self, unexpected_files): + with zipfile.ZipFile(self.compressed_archive_path, "r") as archive: + archive_files = archive.namelist() + + for file in unexpected_files: + if file.lstrip(os.path.sep) in archive_files: + self.fail("File {0} wasn't supposed to be collected, but is present in the archive!".format(file)) + + self.assertTrue(True) + + def _assert_archive_created(self, archive): + with open(self.output_results_file_path, "r") as out: + error_message = out.readlines()[-1] + self.assertTrue(archive, "Failed to collect logs, error message: {0}".format(error_message)) + + def _get_uncompressed_file_size(self, file): + with zipfile.ZipFile(self.compressed_archive_path, "r") as archive: + return archive.getinfo(file.lstrip(os.path.sep)).file_size + + def _get_number_of_files_in_archive(self): + with zipfile.ZipFile(self.compressed_archive_path, "r") as archive: + # Exclude results file + return len(archive.namelist())-1 + + def test_log_collector_parses_commands_in_manifest(self): + # Ensure familiar commands are parsed and unknowns are ignored (like diskinfo and malformed entries) + file_to_collect = os.path.join(self.root_collect_dir, "waagent.log") + folder_to_list = self.root_collect_dir + + manifest_content = """ +echo,### Test header ### +unknown command +ll,{0} +copy,{1} +diskinfo,""".format(folder_to_list, file_to_collect) + + manifest_file_path = os.path.join(self.tmp_dir, "manifest") + write_file(manifest_file_path, manifest_content) + + lc = LogCollector(manifest_file_path) + archive = lc.collect_logs() + + with open(self.output_results_file_path, "r") as fh: + results = fh.readlines() + + # Assert echo was parsed + self.assertTrue(any([line.endswith("### Test header ###\n") for line in results])) + # Assert unknown command was reported + self.assertTrue(any([line.endswith("ERROR Couldn\'t parse \"unknown command\"\n") for line in results])) + # Assert ll was parsed + self.assertTrue(any(["ls -alF {0}".format(folder_to_list) in line for line in results])) + # Assert copy was parsed + self._assert_archive_created(archive) + self._assert_files_are_in_archive(expected_files=[file_to_collect]) + + no_files = self._get_number_of_files_in_archive() + self.assertEquals(1, no_files, "Expected 1 file in archive, found {0}!".format(no_files)) + + def test_log_collector_should_collect_all_files(self): + # All files in the manifest should be collected, since none of them are over the individual file size limit, + # and combined they do not cross the archive size threshold. + lc = LogCollector(self.manifest_path) + archive = lc.collect_logs() + + self._assert_archive_created(archive) + + expected_files = [ + os.path.join(self.root_collect_dir, "waagent.log"), + os.path.join(self.root_collect_dir, "waagent.log.1"), + os.path.join(self.root_collect_dir, "waagent.log.2.gz"), + os.path.join(self.root_collect_dir, "waagent.log.3.gz"), + os.path.join(self.root_collect_dir, "less_important_file"), + os.path.join(self.root_collect_dir, "another_dir", "least_important_file") + ] + self._assert_files_are_in_archive(expected_files) + + no_files = self._get_number_of_files_in_archive() + self.assertEquals(6, no_files, "Expected 6 files in archive, found {0}!".format(no_files)) + + def test_log_collector_should_truncate_large_text_files_and_ignore_large_binary_files(self): + # Set the size limit so that some files are too large to collect in full. + with patch("azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): + lc = LogCollector(self.manifest_path) + archive = lc.collect_logs() + + self._assert_archive_created(archive) + + expected_files = [ + os.path.join(self.root_collect_dir, "waagent.log"), + self._truncated_path(os.path.join(self.root_collect_dir, "waagent.log.1")), # this file should be truncated + os.path.join(self.root_collect_dir, "waagent.log.2.gz"), + os.path.join(self.root_collect_dir, "less_important_file"), + os.path.join(self.root_collect_dir, "another_dir", "least_important_file") + ] + unexpected_files = [ + os.path.join(self.root_collect_dir, "waagent.log.3.gz") # binary files cannot be truncated, ignore it + ] + self._assert_files_are_in_archive(expected_files) + self._assert_files_are_not_in_archive(unexpected_files) + + no_files = self._get_number_of_files_in_archive() + self.assertEquals(5, no_files, "Expected 5 files in archive, found {0}!".format(no_files)) + + def test_log_collector_should_prioritize_important_files_if_archive_too_big(self): + # Set the archive size limit so that not all files can be collected. In that case, files will be added to the + # archive according to their priority. + + # Specify files that have priority. The list is ordered, where the first entry has the highest priority. + must_collect_files = [ + os.path.join(self.root_collect_dir, "waagent*"), + os.path.join(self.root_collect_dir, "less_important_file*") + ] + + with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): + with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): + lc = LogCollector(self.manifest_path) + archive = lc.collect_logs() + + self._assert_archive_created(archive) + + expected_files = [ + os.path.join(self.root_collect_dir, "waagent.log"), + os.path.join(self.root_collect_dir, "waagent.log.1"), + os.path.join(self.root_collect_dir, "waagent.log.2.gz") + ] + unexpected_files = [ + os.path.join(self.root_collect_dir, "waagent.log.3.gz"), + os.path.join(self.root_collect_dir, "less_important_file"), + os.path.join(self.root_collect_dir, "another_dir", "least_important_file") + ] + self._assert_files_are_in_archive(expected_files) + self._assert_files_are_not_in_archive(unexpected_files) + + no_files = self._get_number_of_files_in_archive() + self.assertEquals(3, no_files, "Expected 3 files in archive, found {0}!".format(no_files)) + + # Second collection, if a file got deleted, delete it from the archive and add next file on the priority list + # if there is enough space. + rm_files(os.path.join(self.root_collect_dir, "waagent.log.3.gz")) + + with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): + with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): + second_archive = lc.collect_logs() + + expected_files = [ + os.path.join(self.root_collect_dir, "waagent.log"), + os.path.join(self.root_collect_dir, "waagent.log.1"), + os.path.join(self.root_collect_dir, "waagent.log.2.gz"), + os.path.join(self.root_collect_dir, "less_important_file"), + os.path.join(self.root_collect_dir, "another_dir", "least_important_file") + ] + unexpected_files = [ + os.path.join(self.root_collect_dir, "waagent.log.3.gz") + ] + self._assert_files_are_in_archive(expected_files) + self._assert_files_are_not_in_archive(unexpected_files) + + self._assert_archive_created(second_archive) + + no_files = self._get_number_of_files_in_archive() + self.assertEquals(5, no_files, "Expected 5 files in archive, found {0}!".format(no_files)) + + def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_deleted(self): + # Ensure the archive reflects the state of files on the disk at collection time. If a file was updated, it + # needs to be updated in the archive, deleted if removed from disk, and added if not previously seen. + lc = LogCollector(self.manifest_path) + first_archive = lc.collect_logs() + self._assert_archive_created(first_archive) + + # Everything should be in the archive + expected_files = [ + os.path.join(self.root_collect_dir, "waagent.log"), + os.path.join(self.root_collect_dir, "waagent.log.1"), + os.path.join(self.root_collect_dir, "waagent.log.2.gz"), + os.path.join(self.root_collect_dir, "waagent.log.3.gz"), + os.path.join(self.root_collect_dir, "less_important_file"), + os.path.join(self.root_collect_dir, "another_dir", "least_important_file") + ] + self._assert_files_are_in_archive(expected_files) + + no_files = self._get_number_of_files_in_archive() + self.assertEquals(6, no_files, "Expected 6 files in archive, found {0}!".format(no_files)) + + # Update a file and its last modified time to ensure the last modified time and last collection time are not + # the same in this test + file_to_update = os.path.join(self.root_collect_dir, "waagent.log") + self._create_file_of_specific_size(file_to_update, LARGE_FILE_SIZE) # update existing file + new_time = os.path.getmtime(file_to_update) + 5 + os.utime(file_to_update, (new_time, new_time)) + + # Create a new file (that is covered by the manifest and will be collected) and delete a file + self._create_file_of_specific_size(os.path.join(self.root_collect_dir, "less_important_file.1"), + LARGE_FILE_SIZE) + rm_files(os.path.join(self.root_collect_dir, "waagent.log.1")) + + second_archive = lc.collect_logs() + self._assert_archive_created(second_archive) + + expected_files = [ + os.path.join(self.root_collect_dir, "waagent.log"), + os.path.join(self.root_collect_dir, "waagent.log.2.gz"), + os.path.join(self.root_collect_dir, "waagent.log.3.gz"), + os.path.join(self.root_collect_dir, "less_important_file"), + os.path.join(self.root_collect_dir, "less_important_file.1"), + os.path.join(self.root_collect_dir, "another_dir", "least_important_file") + ] + unexpected_files = [ + os.path.join(self.root_collect_dir, "waagent.log.1") + ] + self._assert_files_are_in_archive(expected_files) + self._assert_files_are_not_in_archive(unexpected_files) + + file = os.path.join(self.root_collect_dir, "waagent.log") + new_file_size = self._get_uncompressed_file_size(file) + self.assertEquals(LARGE_FILE_SIZE, new_file_size, "File {0} hasn't been updated! Size in archive is {1}, but " + "should be {2}.".format(file, new_file_size, LARGE_FILE_SIZE)) + + no_files = self._get_number_of_files_in_archive() + self.assertEquals(6, no_files, "Expected 6 files in archive, found {0}!".format(no_files)) + + def test_log_collector_should_clean_up_uncollected_truncated_files(self): + # Make sure that truncated files that are no longer needed are cleaned up. If an existing truncated file + # from a previous run is not collected in the current run, it should be deleted to free up space. + + # Specify files that have priority. The list is ordered, where the first entry has the highest priority. + must_collect_files = [ + os.path.join(self.root_collect_dir, "waagent*") + ] + + # Set the archive size limit so that not all files can be collected. In that case, files will be added to the + # archive according to their priority. + # Set the size limit so that only two files can be collected, of which one needs to be truncated. + with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE): + with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): + with patch("azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): + lc = LogCollector(self.manifest_path) + archive = lc.collect_logs() + + self._assert_archive_created(archive) + + expected_files = [ + os.path.join(self.root_collect_dir, "waagent.log"), + self._truncated_path(os.path.join(self.root_collect_dir, "waagent.log.1")), # this file should be truncated + ] + self._assert_files_are_in_archive(expected_files) + + no_files = self._get_number_of_files_in_archive() + self.assertEquals(2, no_files, "Expected 2 files in archive, found {0}!".format(no_files)) + + # Remove the original file so it is not collected anymore. In the next collection, the truncated file should be + # removed both from the archive and from the filesystem. + rm_files(os.path.join(self.root_collect_dir, "waagent.log.1")) + + with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE): + with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): + with patch("azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): + lc = LogCollector(self.manifest_path) + second_archive = lc.collect_logs() + + expected_files = [ + os.path.join(self.root_collect_dir, "waagent.log"), + os.path.join(self.root_collect_dir, "waagent.log.2.gz"), + ] + unexpected_files = [ + self._truncated_path(os.path.join(self.root_collect_dir, "waagent.log.1")) + ] + self._assert_files_are_in_archive(expected_files) + self._assert_files_are_not_in_archive(unexpected_files) + + self._assert_archive_created(second_archive) + + no_files = self._get_number_of_files_in_archive() + self.assertEquals(2, no_files, "Expected 2 files in archive, found {0}!".format(no_files)) + + truncated_files = os.listdir(self.truncated_files_dir) + self.assertEquals(0, len(truncated_files), "Uncollected truncated file waagent.log.1 should have been deleted!") diff --git a/tests/tools.py b/tests/tools.py index 7a8d0b7092..d66079aac2 100644 --- a/tests/tools.py +++ b/tests/tools.py @@ -168,6 +168,10 @@ def i_am_root(): return os.geteuid() == 0 +def is_python_version_26(): + return sys.version_info[0] == 2 and sys.version_info[1] == 6 + + class AgentTestCase(unittest.TestCase): @classmethod def setUpClass(cls): diff --git a/tests/utils/test_crypt_util.py b/tests/utils/test_crypt_util.py index c923c176cb..b447ac8025 100644 --- a/tests/utils/test_crypt_util.py +++ b/tests/utils/test_crypt_util.py @@ -16,17 +16,12 @@ # import os -import sys import unittest import azurelinuxagent.common.conf as conf from azurelinuxagent.common.exception import CryptError from azurelinuxagent.common.utils.cryptutil import CryptUtil -from tests.tools import AgentTestCase, data_dir, load_data, skip_if_predicate_true - - -def is_python_version_26(): - return sys.version_info[0] == 2 and sys.version_info[1] == 6 +from tests.tools import AgentTestCase, data_dir, load_data, is_python_version_26, skip_if_predicate_true class TestCryptoUtilOperations(AgentTestCase):