From 5bad0b4b19c907386b80ec18ad1423cdb7f3a050 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 9 Oct 2023 11:14:30 -0700 Subject: [PATCH] skip cgorup monitor (#2939) --- azurelinuxagent/agent.py | 30 ++++++++++++++++++++++-------- azurelinuxagent/ga/collect_logs.py | 10 +++++----- azurelinuxagent/ga/logcollector.py | 15 +-------------- tests/ga/test_logcollector.py | 16 ++++++++-------- tests/test_agent.py | 8 ++++---- 5 files changed, 40 insertions(+), 39 deletions(-) diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index f565f2975f..2811e215ed 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -29,6 +29,7 @@ import sys import threading from azurelinuxagent.ga import logcollector, cgroupconfigurator +from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi import azurelinuxagent.common.conf as conf @@ -204,11 +205,10 @@ def collect_logs(self, is_full_mode): logger.info("Running log collector mode normal") # Check the cgroups unit - cpu_cgroup_path, memory_cgroup_path, log_collector_monitor = None, None, None - if CollectLogsHandler.should_validate_cgroups(): - cgroups_api = SystemdCgroupsApi() - cpu_cgroup_path, memory_cgroup_path = cgroups_api.get_process_cgroup_paths("self") - + log_collector_monitor = None + cgroups_api = SystemdCgroupsApi() + cpu_cgroup_path, memory_cgroup_path = cgroups_api.get_process_cgroup_paths("self") + if CollectLogsHandler.is_enabled_monitor_cgroups_check(): cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path) memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path) @@ -221,10 +221,24 @@ def collect_logs(self, is_full_mode): sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) + def initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path): + cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path) + msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup) + logger.info(msg) + cpu_cgroup.initialize_cpu_usage() + memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path) + msg = "Started tracking memory cgroup {0}".format(memory_cgroup) + logger.info(msg) + return [cpu_cgroup, memory_cgroup] + try: - log_collector = LogCollector(is_full_mode, cpu_cgroup_path, memory_cgroup_path) - log_collector_monitor = get_log_collector_monitor_handler(log_collector.cgroups) - log_collector_monitor.run() + log_collector = LogCollector(is_full_mode) + # Running log collector resource(CPU, Memory) monitoring only if agent starts the log collector. + # If Log collector start by any other means, then it will not be monitored. + if CollectLogsHandler.is_enabled_monitor_cgroups_check(): + tracked_cgroups = initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path) + log_collector_monitor = get_log_collector_monitor_handler(tracked_cgroups) + log_collector_monitor.run() archive = log_collector.collect_logs_and_get_archive() logger.info("Log collection successfully completed. Archive can be found at {0} " "and detailed log output can be found at {1}".format(archive, OUTPUT_RESULTS_FILE_PATH)) diff --git a/azurelinuxagent/ga/collect_logs.py b/azurelinuxagent/ga/collect_logs.py index 244d45b1e4..4987d865e9 100644 --- a/azurelinuxagent/ga/collect_logs.py +++ b/azurelinuxagent/ga/collect_logs.py @@ -83,16 +83,16 @@ def get_thread_name(): return CollectLogsHandler._THREAD_NAME @staticmethod - def enable_cgroups_validation(): + def enable_monitor_cgroups_check(): os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] = "1" @staticmethod - def disable_cgroups_validation(): + def disable_monitor_cgroups_check(): if CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE in os.environ: del os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] @staticmethod - def should_validate_cgroups(): + def is_enabled_monitor_cgroups_check(): if CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE in os.environ: return os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] == "1" return False @@ -147,7 +147,7 @@ def daemon(self): time.sleep(_INITIAL_LOG_COLLECTION_DELAY) try: - CollectLogsHandler.enable_cgroups_validation() + CollectLogsHandler.enable_monitor_cgroups_check() if self.protocol_util is None or self.protocol is None: self.init_protocols() @@ -162,7 +162,7 @@ def daemon(self): except Exception as e: logger.error("An error occurred in the log collection thread; will exit the thread.\n{0}", ustr(e)) finally: - CollectLogsHandler.disable_cgroups_validation() + CollectLogsHandler.disable_monitor_cgroups_check() def collect_and_send_logs(self): if self._collect_logs(): diff --git a/azurelinuxagent/ga/logcollector.py b/azurelinuxagent/ga/logcollector.py index f2947e9373..393dd3c2ef 100644 --- a/azurelinuxagent/ga/logcollector.py +++ b/azurelinuxagent/ga/logcollector.py @@ -26,7 +26,6 @@ from datetime import datetime from heapq import heappush, heappop -from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_LOG_COLLECTOR, MemoryCgroup from azurelinuxagent.common.conf import get_lib_dir, get_ext_log_dir, get_agent_log_file from azurelinuxagent.common.event import initialize_event_logger_vminfo_common_parameters from azurelinuxagent.common.future import ustr @@ -71,14 +70,13 @@ class LogCollector(object): _TRUNCATED_FILE_PREFIX = "truncated_" - def __init__(self, is_full_mode=False, cpu_cgroup_path=None, memory_cgroup_path=None): + def __init__(self, is_full_mode=False): self._is_full_mode = is_full_mode self._manifest = MANIFEST_FULL if is_full_mode else MANIFEST_NORMAL self._must_collect_files = self._expand_must_collect_files() self._create_base_dirs() self._set_logger() self._initialize_telemetry() - self.cgroups = self._set_resource_usage_cgroups(cpu_cgroup_path, memory_cgroup_path) @staticmethod def _mkdir(dirname): @@ -105,17 +103,6 @@ def _set_logger(): _LOGGER.addHandler(_f_handler) _LOGGER.setLevel(logging.INFO) - @staticmethod - def _set_resource_usage_cgroups(cpu_cgroup_path, memory_cgroup_path): - cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path) - msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup) - _LOGGER.info(msg) - cpu_cgroup.initialize_cpu_usage() - memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path) - msg = "Started tracking memory cgroup {0}".format(memory_cgroup) - _LOGGER.info(msg) - return [cpu_cgroup, memory_cgroup] - @staticmethod def _initialize_telemetry(): protocol = get_protocol_util().get_protocol(init_goal_state=False) diff --git a/tests/ga/test_logcollector.py b/tests/ga/test_logcollector.py index 0fefedea23..cedf894b09 100644 --- a/tests/ga/test_logcollector.py +++ b/tests/ga/test_logcollector.py @@ -212,7 +212,7 @@ def test_log_collector_parses_commands_in_manifest(self): with patch("azurelinuxagent.ga.logcollector.MANIFEST_NORMAL", manifest): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() with open(self.output_results_file_path, "r") as fh: @@ -241,7 +241,7 @@ def test_log_collector_uses_full_manifest_when_full_mode_enabled(self): with patch("azurelinuxagent.ga.logcollector.MANIFEST_FULL", manifest): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(is_full_mode=True, cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector(is_full_mode=True) archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -255,7 +255,7 @@ def test_log_collector_should_collect_all_files(self): # and combined they do not cross the archive size threshold. with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -277,7 +277,7 @@ def test_log_collector_should_truncate_large_text_files_and_ignore_large_binary_ # Set the size limit so that some files are too large to collect in full. with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -311,7 +311,7 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self with patch("azurelinuxagent.ga.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -362,7 +362,7 @@ def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_d # Ensure the archive reflects the state of files on the disk at collection time. If a file was updated, it # needs to be updated in the archive, deleted if removed from disk, and added if not previously seen. with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() first_archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(first_archive) @@ -433,7 +433,7 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -455,7 +455,7 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() second_archive = log_collector.collect_logs_and_get_archive() expected_files = [ diff --git a/tests/test_agent.py b/tests/test_agent.py index a2509ad851..414faa7266 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -233,7 +233,7 @@ def test_calls_collect_logs_with_proper_mode(self, mock_log_collector, *args): @patch("azurelinuxagent.agent.LogCollector") def test_calls_collect_logs_on_valid_cgroups(self, mock_log_collector): try: - CollectLogsHandler.enable_cgroups_validation() + CollectLogsHandler.enable_monitor_cgroups_check() mock_log_collector.run = Mock() def mock_cgroup_paths(*args, **kwargs): @@ -248,12 +248,12 @@ def mock_cgroup_paths(*args, **kwargs): mock_log_collector.assert_called_once() finally: - CollectLogsHandler.disable_cgroups_validation() + CollectLogsHandler.disable_monitor_cgroups_check() @patch("azurelinuxagent.agent.LogCollector") def test_doesnt_call_collect_logs_on_invalid_cgroups(self, mock_log_collector): try: - CollectLogsHandler.enable_cgroups_validation() + CollectLogsHandler.enable_monitor_cgroups_check() mock_log_collector.run = Mock() def mock_cgroup_paths(*args, **kwargs): @@ -272,7 +272,7 @@ def mock_cgroup_paths(*args, **kwargs): mock_exit.assert_called_once_with(logcollector.INVALID_CGROUPS_ERRCODE) self.assertEqual(exit_error, re) finally: - CollectLogsHandler.disable_cgroups_validation() + CollectLogsHandler.disable_monitor_cgroups_check() def test_it_should_parse_setup_firewall_properly(self):