Skip to content

Commit

Permalink
skip cgorup monitor (#2939)
Browse files Browse the repository at this point in the history
  • Loading branch information
nagworld9 authored Oct 9, 2023
1 parent 6dd91e5 commit 5bad0b4
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 39 deletions.
30 changes: 22 additions & 8 deletions azurelinuxagent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import sys
import threading
from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup
from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi

import azurelinuxagent.common.conf as conf
Expand Down Expand Up @@ -204,11 +205,10 @@ def collect_logs(self, is_full_mode):
logger.info("Running log collector mode normal")

# Check the cgroups unit
cpu_cgroup_path, memory_cgroup_path, log_collector_monitor = None, None, None
if CollectLogsHandler.should_validate_cgroups():
cgroups_api = SystemdCgroupsApi()
cpu_cgroup_path, memory_cgroup_path = cgroups_api.get_process_cgroup_paths("self")

log_collector_monitor = None
cgroups_api = SystemdCgroupsApi()
cpu_cgroup_path, memory_cgroup_path = cgroups_api.get_process_cgroup_paths("self")
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path)
memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path)

Expand All @@ -221,10 +221,24 @@ def collect_logs(self, is_full_mode):

sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

def initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path):
cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path)
msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup)
logger.info(msg)
cpu_cgroup.initialize_cpu_usage()
memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path)
msg = "Started tracking memory cgroup {0}".format(memory_cgroup)
logger.info(msg)
return [cpu_cgroup, memory_cgroup]

try:
log_collector = LogCollector(is_full_mode, cpu_cgroup_path, memory_cgroup_path)
log_collector_monitor = get_log_collector_monitor_handler(log_collector.cgroups)
log_collector_monitor.run()
log_collector = LogCollector(is_full_mode)
# Running log collector resource(CPU, Memory) monitoring only if agent starts the log collector.
# If Log collector start by any other means, then it will not be monitored.
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
tracked_cgroups = initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path)
log_collector_monitor = get_log_collector_monitor_handler(tracked_cgroups)
log_collector_monitor.run()
archive = log_collector.collect_logs_and_get_archive()
logger.info("Log collection successfully completed. Archive can be found at {0} "
"and detailed log output can be found at {1}".format(archive, OUTPUT_RESULTS_FILE_PATH))
Expand Down
10 changes: 5 additions & 5 deletions azurelinuxagent/ga/collect_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,16 @@ def get_thread_name():
return CollectLogsHandler._THREAD_NAME

@staticmethod
def enable_cgroups_validation():
def enable_monitor_cgroups_check():
os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] = "1"

@staticmethod
def disable_cgroups_validation():
def disable_monitor_cgroups_check():
if CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE in os.environ:
del os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE]

@staticmethod
def should_validate_cgroups():
def is_enabled_monitor_cgroups_check():
if CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE in os.environ:
return os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] == "1"
return False
Expand Down Expand Up @@ -147,7 +147,7 @@ def daemon(self):
time.sleep(_INITIAL_LOG_COLLECTION_DELAY)

try:
CollectLogsHandler.enable_cgroups_validation()
CollectLogsHandler.enable_monitor_cgroups_check()
if self.protocol_util is None or self.protocol is None:
self.init_protocols()

Expand All @@ -162,7 +162,7 @@ def daemon(self):
except Exception as e:
logger.error("An error occurred in the log collection thread; will exit the thread.\n{0}", ustr(e))
finally:
CollectLogsHandler.disable_cgroups_validation()
CollectLogsHandler.disable_monitor_cgroups_check()

def collect_and_send_logs(self):
if self._collect_logs():
Expand Down
15 changes: 1 addition & 14 deletions azurelinuxagent/ga/logcollector.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from datetime import datetime
from heapq import heappush, heappop

from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_LOG_COLLECTOR, MemoryCgroup
from azurelinuxagent.common.conf import get_lib_dir, get_ext_log_dir, get_agent_log_file
from azurelinuxagent.common.event import initialize_event_logger_vminfo_common_parameters
from azurelinuxagent.common.future import ustr
Expand Down Expand Up @@ -71,14 +70,13 @@ class LogCollector(object):

_TRUNCATED_FILE_PREFIX = "truncated_"

def __init__(self, is_full_mode=False, cpu_cgroup_path=None, memory_cgroup_path=None):
def __init__(self, is_full_mode=False):
self._is_full_mode = is_full_mode
self._manifest = MANIFEST_FULL if is_full_mode else MANIFEST_NORMAL
self._must_collect_files = self._expand_must_collect_files()
self._create_base_dirs()
self._set_logger()
self._initialize_telemetry()
self.cgroups = self._set_resource_usage_cgroups(cpu_cgroup_path, memory_cgroup_path)

@staticmethod
def _mkdir(dirname):
Expand All @@ -105,17 +103,6 @@ def _set_logger():
_LOGGER.addHandler(_f_handler)
_LOGGER.setLevel(logging.INFO)

@staticmethod
def _set_resource_usage_cgroups(cpu_cgroup_path, memory_cgroup_path):
cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path)
msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup)
_LOGGER.info(msg)
cpu_cgroup.initialize_cpu_usage()
memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path)
msg = "Started tracking memory cgroup {0}".format(memory_cgroup)
_LOGGER.info(msg)
return [cpu_cgroup, memory_cgroup]

@staticmethod
def _initialize_telemetry():
protocol = get_protocol_util().get_protocol(init_goal_state=False)
Expand Down
16 changes: 8 additions & 8 deletions tests/ga/test_logcollector.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def test_log_collector_parses_commands_in_manifest(self):

with patch("azurelinuxagent.ga.logcollector.MANIFEST_NORMAL", manifest):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path")
log_collector = LogCollector()
archive = log_collector.collect_logs_and_get_archive()

with open(self.output_results_file_path, "r") as fh:
Expand Down Expand Up @@ -241,7 +241,7 @@ def test_log_collector_uses_full_manifest_when_full_mode_enabled(self):

with patch("azurelinuxagent.ga.logcollector.MANIFEST_FULL", manifest):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector(is_full_mode=True, cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path")
log_collector = LogCollector(is_full_mode=True)
archive = log_collector.collect_logs_and_get_archive()

self._assert_archive_created(archive)
Expand All @@ -255,7 +255,7 @@ def test_log_collector_should_collect_all_files(self):
# and combined they do not cross the archive size threshold.

with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path")
log_collector = LogCollector()
archive = log_collector.collect_logs_and_get_archive()

self._assert_archive_created(archive)
Expand All @@ -277,7 +277,7 @@ def test_log_collector_should_truncate_large_text_files_and_ignore_large_binary_
# Set the size limit so that some files are too large to collect in full.
with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path")
log_collector = LogCollector()
archive = log_collector.collect_logs_and_get_archive()

self._assert_archive_created(archive)
Expand Down Expand Up @@ -311,7 +311,7 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self
with patch("azurelinuxagent.ga.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024):
with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path")
log_collector = LogCollector()
archive = log_collector.collect_logs_and_get_archive()

self._assert_archive_created(archive)
Expand Down Expand Up @@ -362,7 +362,7 @@ def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_d
# Ensure the archive reflects the state of files on the disk at collection time. If a file was updated, it
# needs to be updated in the archive, deleted if removed from disk, and added if not previously seen.
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path")
log_collector = LogCollector()
first_archive = log_collector.collect_logs_and_get_archive()
self._assert_archive_created(first_archive)

Expand Down Expand Up @@ -433,7 +433,7 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self):
with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files):
with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path")
log_collector = LogCollector()
archive = log_collector.collect_logs_and_get_archive()

self._assert_archive_created(archive)
Expand All @@ -455,7 +455,7 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self):
with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files):
with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path")
log_collector = LogCollector()
second_archive = log_collector.collect_logs_and_get_archive()

expected_files = [
Expand Down
8 changes: 4 additions & 4 deletions tests/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def test_calls_collect_logs_with_proper_mode(self, mock_log_collector, *args):
@patch("azurelinuxagent.agent.LogCollector")
def test_calls_collect_logs_on_valid_cgroups(self, mock_log_collector):
try:
CollectLogsHandler.enable_cgroups_validation()
CollectLogsHandler.enable_monitor_cgroups_check()
mock_log_collector.run = Mock()

def mock_cgroup_paths(*args, **kwargs):
Expand All @@ -248,12 +248,12 @@ def mock_cgroup_paths(*args, **kwargs):

mock_log_collector.assert_called_once()
finally:
CollectLogsHandler.disable_cgroups_validation()
CollectLogsHandler.disable_monitor_cgroups_check()

@patch("azurelinuxagent.agent.LogCollector")
def test_doesnt_call_collect_logs_on_invalid_cgroups(self, mock_log_collector):
try:
CollectLogsHandler.enable_cgroups_validation()
CollectLogsHandler.enable_monitor_cgroups_check()
mock_log_collector.run = Mock()

def mock_cgroup_paths(*args, **kwargs):
Expand All @@ -272,7 +272,7 @@ def mock_cgroup_paths(*args, **kwargs):
mock_exit.assert_called_once_with(logcollector.INVALID_CGROUPS_ERRCODE)
self.assertEqual(exit_error, re)
finally:
CollectLogsHandler.disable_cgroups_validation()
CollectLogsHandler.disable_monitor_cgroups_check()

def test_it_should_parse_setup_firewall_properly(self):

Expand Down

0 comments on commit 5bad0b4

Please sign in to comment.