Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor cgroup controllers #3135

Merged
merged 9 commits into from
Jun 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 15 additions & 29 deletions azurelinuxagent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup
from azurelinuxagent.ga.controllermetrics import AGENT_LOG_COLLECTOR, CpuMetrics
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning, InvalidCgroupMountpointException

import azurelinuxagent.common.conf as conf
Expand Down Expand Up @@ -208,8 +208,7 @@ def collect_logs(self, is_full_mode):

# Check the cgroups unit
log_collector_monitor = None
cpu_cgroup_path = None
memory_cgroup_path = None
tracked_metrics = []
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
try:
cgroup_api = get_cgroup_api()
Expand All @@ -220,40 +219,27 @@ def collect_logs(self, is_full_mode):
log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

cpu_cgroup_path, memory_cgroup_path = cgroup_api.get_process_cgroup_paths("self")
cpu_slice_matches = False
memory_slice_matches = False
if cpu_cgroup_path is not None:
cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path)
if memory_cgroup_path is not None:
memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path)

if not cpu_slice_matches or not memory_slice_matches:
log_cgroup_warning("The Log Collector process is not in the proper cgroups:", send_event=False)
if not cpu_slice_matches:
log_cgroup_warning("\tunexpected cpu slice: {0}".format(cpu_cgroup_path), send_event=False)
if not memory_slice_matches:
log_cgroup_warning("\tunexpected memory slice: {0}".format(memory_cgroup_path), send_event=False)
log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR)
tracked_metrics = log_collector_cgroup.get_controller_metrics()

if len(tracked_metrics) != len(log_collector_cgroup.get_supported_controllers()):
log_cgroup_warning("At least one required controller is missing. The following controllers are required for the log collector to run: {0}".format(log_collector_cgroup.get_supported_controllers()))
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

def initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path):
cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path)
msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup)
logger.info(msg)
cpu_cgroup.initialize_cpu_usage()
memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path)
msg = "Started tracking memory cgroup {0}".format(memory_cgroup)
logger.info(msg)
return [cpu_cgroup, memory_cgroup]
if not log_collector_cgroup.check_in_expected_slice(cgroupconfigurator.LOGCOLLECTOR_SLICE):
log_cgroup_warning("The Log Collector process is not in the proper cgroups", send_event=False)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

try:
log_collector = LogCollector(is_full_mode)
# Running log collector resource(CPU, Memory) monitoring only if agent starts the log collector.
# Running log collector resource monitoring only if agent starts the log collector.
# If Log collector start by any other means, then it will not be monitored.
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
tracked_cgroups = initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path)
log_collector_monitor = get_log_collector_monitor_handler(tracked_cgroups)
for metric in tracked_metrics:
if isinstance(metric, CpuMetrics):
metric.initialize_cpu_usage()
break
log_collector_monitor = get_log_collector_monitor_handler(tracked_metrics)
log_collector_monitor.run()
archive = log_collector.collect_logs_and_get_archive()
logger.info("Log collection successfully completed. Archive can be found at {0} "
Expand Down
Loading
Loading