Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove ama extension services cgroups tracking code #3210

Merged
merged 5 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 1 addition & 22 deletions azurelinuxagent/common/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
"ResourceDisk.MountPoint": "/mnt/resource",
"ResourceDisk.MountOptions": None,
"ResourceDisk.Filesystem": "ext3",
"AutoUpdate.GAFamily": "Prod",
"Debug.CgroupMonitorExpiryTime": "2022-03-31",
"Debug.CgroupMonitorExtensionName": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent",
"AutoUpdate.GAFamily": "Prod"
}


Expand Down Expand Up @@ -616,25 +614,6 @@ def get_enable_agent_memory_usage_check(conf=__conf__):
"""
return conf.get_switch("Debug.EnableAgentMemoryUsageCheck", False)


def get_cgroup_monitor_expiry_time(conf=__conf__):
"""
cgroups monitoring for pilot extensions disabled after expiry time

NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get("Debug.CgroupMonitorExpiryTime", "2022-03-31")


def get_cgroup_monitor_extension_name (conf=__conf__):
"""
cgroups monitoring extension name

NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get("Debug.CgroupMonitorExtensionName", "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent")


def get_enable_fast_track(conf=__conf__):
"""
If True, the agent use FastTrack when retrieving goal states
Expand Down
44 changes: 4 additions & 40 deletions azurelinuxagent/ga/exthandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1325,7 +1325,7 @@ def set_extension_resource_limits(self):
extension_name = self.get_full_name()
# setup the resource limits for extension operations and it's services.
man = self.load_manifest()
resource_limits = man.get_resource_limits(extension_name, self.ext_handler.version)
resource_limits = man.get_resource_limits()
if not CGroupConfigurator.get_instance().is_extension_resource_limits_setup_completed(extension_name,
cpu_quota=resource_limits.get_extension_slice_cpu_quota()):
CGroupConfigurator.get_instance().setup_extension_slice(
Expand Down Expand Up @@ -1395,7 +1395,7 @@ def _enable_extension(self, extension, uninstall_exit_code):
self.__set_extension_state(extension, ExtensionState.Enabled)

# start tracking the extension services cgroup.
resource_limits = man.get_resource_limits(self.get_full_name(), self.ext_handler.version)
resource_limits = man.get_resource_limits()
CGroupConfigurator.get_instance().start_tracking_extension_services_cgroups(
resource_limits.get_service_list())

Expand Down Expand Up @@ -1462,7 +1462,7 @@ def uninstall(self, extension=None):
man = self.load_manifest()

# stop tracking extension services cgroup.
resource_limits = man.get_resource_limits(self.get_full_name(), self.ext_handler.version)
resource_limits = man.get_resource_limits()
CGroupConfigurator.get_instance().stop_tracking_extension_services_cgroups(
resource_limits.get_service_list())
CGroupConfigurator.get_instance().remove_extension_services_drop_in_files(
Expand Down Expand Up @@ -2132,14 +2132,6 @@ def get_env_file(self):
def get_log_dir(self):
return os.path.join(conf.get_ext_log_dir(), self.ext_handler.name)

@staticmethod
def is_azuremonitorlinuxagent(extension_name):
cgroup_monitor_extension_name = conf.get_cgroup_monitor_extension_name()
if re.match(r"\A" + cgroup_monitor_extension_name, extension_name) is not None\
and datetime.datetime.utcnow() < datetime.datetime.strptime(conf.get_cgroup_monitor_expiry_time(), "%Y-%m-%d"):
return True
return False

@staticmethod
def _read_status_file(ext_status_file):
err_count = 0
Expand Down Expand Up @@ -2258,35 +2250,7 @@ def supports_multiple_extensions(self):
value = self.data['handlerManifest'].get('supportsMultipleExtensions', False)
return self._parse_boolean_value(value, default_val=False)

def get_resource_limits(self, extension_name, str_version):
"""
Placeholder values for testing and monitoring the monitor extension resource usage.
This is not effective after nov 30th.
"""
if ExtHandlerInstance.is_azuremonitorlinuxagent(extension_name):
if FlexibleVersion(str_version) < FlexibleVersion("1.12"):
test_man = {
"resourceLimits": {
"services": [
{
"name": "mdsd.service"
}
]
}
}
return ResourceLimits(test_man.get('resourceLimits', None))
else:
test_man = {
"resourceLimits": {
"services": [
{
"name": "azuremonitoragent.service"
}
]
}
}
return ResourceLimits(test_man.get('resourceLimits', None))

def get_resource_limits(self):
return ResourceLimits(self.data.get('resourceLimits', None))

def report_invalid_boolean_properties(self, ext_name):
Expand Down
2 changes: 0 additions & 2 deletions tests/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@
Debug.CgroupDisableOnProcessCheckFailure = True
Debug.CgroupDisableOnQuotaCheckFailure = True
Debug.CgroupLogMetrics = False
Debug.CgroupMonitorExpiryTime = 2022-03-31
Debug.CgroupMonitorExtensionName = Microsoft.Azure.Monitor.AzureMonitorLinuxAgent
Debug.EnableAgentMemoryUsageCheck = False
Debug.EnableCgroupV2ResourceLimiting = False
Debug.EnableExtensionPolicy = False
Expand Down
14 changes: 0 additions & 14 deletions tests_e2e/tests/ext_cgroups/install_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from datetime import datetime, timedelta
from pathlib import Path

from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext
Expand All @@ -35,7 +34,6 @@ def __init__(self, context: AgentVmTestContext):
self._ssh_client = self._context.create_ssh_client()

def run(self):
self._prepare_agent()
# Install the GATest extension to test service cgroups
self._install_gatest_extension()
# Install the Azure Monitor Agent to test long running process cgroup
Expand All @@ -45,18 +43,6 @@ def run(self):
# Install the CSE extension to test extension cgroup
self._install_cse()

def _prepare_agent(self):
log.info("=====Executing update-waagent-conf remote script to update monitoring deadline flag for tracking azuremonitoragent service")
future_date = datetime.utcnow() + timedelta(days=2)
expiry_time = future_date.date().strftime("%Y-%m-%d")
# Agent needs extension info and it's services info in the handlermanifest.xml to monitor and limit the resource usage.
# As part of pilot testing , agent hardcoded azuremonitoragent service name to monitor it for sometime in production without need of manifest update from extesnion side.
# So that they can get sense of resource usage for their extensions. This we did for few months and now we no logner monitoring it in production.
# But we are changing the config flag expiry time to future date in this test. So that test agent will start track the cgroups that is used by the service.
result = self._ssh_client.run_command(f"update-waagent-conf Debug.CgroupMonitorExpiryTime={expiry_time}", use_sudo=True)
log.info(result)
log.info("Updated agent cgroups config(CgroupMonitorExpiryTime)")

def _install_ama(self):
ama_extension = VirtualMachineExtensionClient(
self._context.vm, VmExtensionIds.AzureMonitorLinuxAgent,
Expand Down
6 changes: 2 additions & 4 deletions tests_e2e/tests/lib/cgroup_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,9 @@
CGROUP_TRACKED_PATTERN = re.compile(r'Started tracking cgroup ([^\s]+)\s+\[(?P<path>[^\s]+)\]')

GATESTEXT_FULL_NAME = "Microsoft.Azure.Extensions.Edp.GATestExtGo"
GATESTEXT_SERVICE = "gatestext.service"
GATESTEXT_SERVICE = "gatestext"
AZUREMONITOREXT_FULL_NAME = "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent"
AZUREMONITORAGENT_SERVICE = "azuremonitoragent.service"
MDSD_SERVICE = "mdsd.service"

AZUREMONITORAGENT_SERVICE = "azuremonitoragent"

def verify_if_distro_supports_cgroup():
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from tests_e2e.tests.lib.agent_log import AgentLog
from tests_e2e.tests.lib.cgroup_helpers import verify_if_distro_supports_cgroup, \
verify_agent_cgroup_assigned_correctly, BASE_CGROUP, EXT_CONTROLLERS, get_unit_cgroup_mount_path, \
GATESTEXT_SERVICE, AZUREMONITORAGENT_SERVICE, MDSD_SERVICE, check_agent_quota_disabled, \
GATESTEXT_SERVICE, AZUREMONITORAGENT_SERVICE, check_agent_quota_disabled, \
check_cgroup_disabled_with_unknown_process, CGROUP_TRACKED_PATTERN, AZUREMONITOREXT_FULL_NAME, GATESTEXT_FULL_NAME, \
print_cgroups
from tests_e2e.tests.lib.logging import log
Expand Down Expand Up @@ -118,10 +118,6 @@ def verify_extension_service_cgroup_created_on_file_system():
# Azure Monitor Extension Service
azuremonitoragent_cgroup_mount_path = get_unit_cgroup_mount_path(AZUREMONITORAGENT_SERVICE)
azuremonitoragent_service_name = AZUREMONITORAGENT_SERVICE
# Old versions of AMA extension has different service name
if azuremonitoragent_cgroup_mount_path is None:
azuremonitoragent_cgroup_mount_path = get_unit_cgroup_mount_path(MDSD_SERVICE)
azuremonitoragent_service_name = MDSD_SERVICE
verify_extension_service_cgroup_created(azuremonitoragent_service_name, azuremonitoragent_cgroup_mount_path)

log.info('Verified all extension service cgroup paths created in file system .\n')
Expand Down Expand Up @@ -177,7 +173,7 @@ def verify_ext_cgroups_tracked():
azuremonitoragent_cgroups_tracked = True
elif name.startswith(GATESTEXT_SERVICE):
gatestext_service_cgroups_tracked = True
elif name.startswith(AZUREMONITORAGENT_SERVICE) or name.startswith(MDSD_SERVICE):
elif name.startswith(AZUREMONITORAGENT_SERVICE):
azuremonitoragent_service_cgroups_tracked = True
cgroups_added_for_telemetry.append((name, path))

Expand Down
Loading