From 656f863dc668259671e8725a6cc22c849e842f61 Mon Sep 17 00:00:00 2001 From: narrieta Date: Wed, 18 Oct 2023 09:28:50 -0700 Subject: [PATCH 01/30] update tests --- .../tests/agent_bvt/extension_operations.py | 9 ++--- tests_e2e/tests/agent_bvt/run_command.py | 4 +-- tests_e2e/tests/agent_bvt/vm_access.py | 14 ++++---- .../tests/agent_cgroups/agent_cgroups.py | 10 +++--- .../agent_ext_workflow/extension_workflow.py | 13 +++----- .../tests/agent_firewall/agent_firewall.py | 10 +++--- .../agent_not_provisioned.py | 7 ++-- .../tests/agent_publish/agent_publish.py | 12 +++---- tests_e2e/tests/agent_status/agent_status.py | 8 ++--- tests_e2e/tests/agent_update/rsm_update.py | 33 ++++++++----------- tests_e2e/tests/ext_cgroups/ext_cgroups.py | 10 +++--- .../tests/ext_cgroups/install_extensions.py | 6 ++-- .../ext_telemetry_pipeline.py | 10 +++--- .../extensions_disabled.py | 4 +-- tests_e2e/tests/fips/fips.py | 4 +-- .../keyvault_certificates.py | 4 +-- .../multi_config_ext/multi_config_ext.py | 4 +-- .../check_fallback_to_hgap.py | 6 ++-- .../check_no_outbound_connections.py | 4 +-- tests_e2e/tests/samples/error_remote_test.py | 6 ++-- tests_e2e/tests/samples/error_test.py | 4 +-- tests_e2e/tests/samples/fail_remote_test.py | 6 ++-- tests_e2e/tests/samples/fail_test.py | 4 +-- tests_e2e/tests/samples/pass_remote_test.py | 6 ++-- tests_e2e/tests/samples/pass_test.py | 4 +-- 25 files changed, 96 insertions(+), 106 deletions(-) diff --git a/tests_e2e/tests/agent_bvt/extension_operations.py b/tests_e2e/tests/agent_bvt/extension_operations.py index e5c607c1d1..88714f1ced 100755 --- a/tests_e2e/tests/agent_bvt/extension_operations.py +++ b/tests_e2e/tests/agent_bvt/extension_operations.py @@ -31,19 +31,16 @@ from azure.core.exceptions import ResourceNotFoundError -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.identifiers import VmExtensionIds, VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class ExtensionOperationsBvt(AgentTest): +class ExtensionOperationsBvt(AgentVmTest): def run(self): - ssh_client: SshClient = SshClient( - ip_address=self._context.vm_ip_address, - username=self._context.username, - private_key_file=self._context.private_key_file) + ssh_client: SshClient = self._context.create_ssh_client() is_arm64: bool = ssh_client.get_architecture() == "aarch64" diff --git a/tests_e2e/tests/agent_bvt/run_command.py b/tests_e2e/tests/agent_bvt/run_command.py index 494458eab4..b6532a21e2 100755 --- a/tests_e2e/tests/agent_bvt/run_command.py +++ b/tests_e2e/tests/agent_bvt/run_command.py @@ -31,14 +31,14 @@ from assertpy import assert_that, soft_assertions from typing import Callable, Dict -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.identifiers import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class RunCommandBvt(AgentTest): +class RunCommandBvt(AgentVmTest): class TestCase: def __init__(self, extension: VirtualMachineExtensionClient, get_settings: Callable[[str], Dict[str, str]]): self.extension = extension diff --git a/tests_e2e/tests/agent_bvt/vm_access.py b/tests_e2e/tests/agent_bvt/vm_access.py index 9b52ac2453..c82433431f 100755 --- a/tests_e2e/tests/agent_bvt/vm_access.py +++ b/tests_e2e/tests/agent_bvt/vm_access.py @@ -28,7 +28,7 @@ from assertpy import assert_that from pathlib import Path -from tests_e2e.tests.lib.agent_test import AgentTest, TestSkipped +from tests_e2e.tests.lib.agent_test import AgentVmTest, TestSkipped from tests_e2e.tests.lib.identifiers import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient @@ -36,10 +36,10 @@ from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class VmAccessBvt(AgentTest): +class VmAccessBvt(AgentVmTest): def run(self): - ssh: SshClient = self._context.create_ssh_client() - if not VmExtensionIds.VmAccess.supports_distro(ssh.run_command("uname -a")): + ssh_client: SshClient = self._context.create_ssh_client() + if not VmExtensionIds.VmAccess.supports_distro(ssh_client.run_command("uname -a")): raise TestSkipped("Currently VMAccess is not supported on this distro") # Try to use a unique username for each test run (note that we truncate to 32 chars to @@ -52,8 +52,8 @@ def run(self): private_key_file: Path = self._context.working_directory/f"{username}_rsa" public_key_file: Path = self._context.working_directory/f"{username}_rsa.pub" log.info("Generating SSH key as %s", private_key_file) - ssh = SshClient(ip_address=self._context.vm_ip_address, username=username, private_key_file=private_key_file) - ssh.generate_ssh_key(private_key_file) + ssh_client = SshClient(ip_address=self._context.ip_address, username=username, identity_file=private_key_file) + ssh_client.generate_ssh_key(private_key_file) with public_key_file.open() as f: public_key = f.read() @@ -70,7 +70,7 @@ def run(self): # Verify the user was added correctly by starting an SSH session to the VM log.info("Verifying SSH connection to the test VM") - stdout = ssh.run_command("echo -n $USER") + stdout = ssh_client.run_command("echo -n $USER") assert_that(stdout).described_as("Output from SSH command").is_equal_to(username) log.info("SSH command output ($USER): %s", stdout) diff --git a/tests_e2e/tests/agent_cgroups/agent_cgroups.py b/tests_e2e/tests/agent_cgroups/agent_cgroups.py index c0394f6c62..449c5c3629 100644 --- a/tests_e2e/tests/agent_cgroups/agent_cgroups.py +++ b/tests_e2e/tests/agent_cgroups/agent_cgroups.py @@ -16,17 +16,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.logging import log -class AgentCgroups(AgentTest): +class AgentCgroups(AgentVmTest): """ This test verifies that the agent is running in the expected cgroups. """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client = self._context.create_ssh_client() @@ -35,7 +35,7 @@ def run(self): log.info("Restarting agent service to make sure service starts with new configuration that was setup by the cgroupconfigurator") self._ssh_client.run_command("agent-service restart", use_sudo=True) log.info("=====Validating agent cgroups=====") - self._run_remote_test("agent_cgroups-check_cgroups_agent.py") + self._run_remote_test(self._ssh_client, "agent_cgroups-check_cgroups_agent.py") log.info("Successfully Verified that agent present in correct cgroups") diff --git a/tests_e2e/tests/agent_ext_workflow/extension_workflow.py b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py index 8c08ea7d3c..edf179f39d 100644 --- a/tests_e2e/tests/agent_ext_workflow/extension_workflow.py +++ b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py @@ -23,15 +23,15 @@ from random import choice import uuid -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.identifiers import VmExtensionIds, VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class ExtensionWorkflow(AgentTest): +class ExtensionWorkflow(AgentVmTest): """ This scenario tests if the correct extension workflow sequence is being executed from the agent. It installs the GuestAgentDcrTestExtension on the test VM and makes requests to install, enable, update, and delete the extension @@ -59,12 +59,9 @@ class ExtensionWorkflow(AgentTest): - Match the operation sequence as per the test and make sure they are in the correct chronological order - Restart the agent and verify if the correct operation sequence is followed """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) - self._ssh_client = SshClient( - ip_address=self._context.vm_ip_address, - username=self._context.username, - private_key_file=self._context.private_key_file) + self._ssh_client = self.create_ssh_client() # This class represents the GuestAgentDcrTestExtension running on the test VM class GuestAgentDcrTestExtension: diff --git a/tests_e2e/tests/agent_firewall/agent_firewall.py b/tests_e2e/tests/agent_firewall/agent_firewall.py index 804443a470..c5b789dea7 100644 --- a/tests_e2e/tests/agent_firewall/agent_firewall.py +++ b/tests_e2e/tests/agent_firewall/agent_firewall.py @@ -16,23 +16,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.logging import log -class AgentFirewall(AgentTest): +class AgentFirewall(AgentVmTest): """ This test verifies the agent firewall rules are added properly. It checks each firewall rule is present and working as expected. """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client = self._context.create_ssh_client() def run(self): log.info("Checking iptable rules added by the agent") - self._run_remote_test(f"agent_firewall-verify_all_firewall_rules.py --user {self._context.username}", use_sudo=True) + self._run_remote_test(self._ssh_client, f"agent_firewall-verify_all_firewall_rules.py --user {self._context.username}", use_sudo=True) log.info("Successfully verified all rules present and working as expected.") diff --git a/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py index 490fba3b8d..ed8dc7caec 100755 --- a/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py +++ b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py @@ -21,7 +21,7 @@ from azure.mgmt.compute.models import VirtualMachineInstanceView -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.identifiers import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.shell import CommandError @@ -30,7 +30,7 @@ from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class AgentNotProvisioned(AgentTest): +class AgentNotProvisioned(AgentVmTest): """ When osProfile.linuxConfiguration.provisionVMAgent is set to 'false', this test verifies that the agent is disabled and that extension operations are not allowed. @@ -66,8 +66,7 @@ def run(self): # Validate that the agent is not reporting status. # log.info("Verifying that the Agent status is 'Not Ready' (i.e. it is not reporting status).") - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) - instance_view: VirtualMachineInstanceView = vm.get_instance_view() + instance_view: VirtualMachineInstanceView = self._context.vm.get_instance_view() log.info("Instance view of VM Agent:\n%s", instance_view.vm_agent.serialize()) assert_that(instance_view.vm_agent.statuses).described_as("The VM agent should have exactly 1 status").is_length(1) assert_that(instance_view.vm_agent.statuses[0].code).described_as("The VM Agent should not be available").is_equal_to('ProvisioningState/Unavailable') diff --git a/tests_e2e/tests/agent_publish/agent_publish.py b/tests_e2e/tests/agent_publish/agent_publish.py index eaddc74ede..f065131f2e 100644 --- a/tests_e2e/tests/agent_publish/agent_publish.py +++ b/tests_e2e/tests/agent_publish/agent_publish.py @@ -19,20 +19,20 @@ import uuid from typing import Any, Dict, List -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.identifiers import VmExtensionIds, VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class AgentPublishTest(AgentTest): +class AgentPublishTest(AgentVmTest): """ This script verifies if the agent update performed in the vm. """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client: SshClient = self._context.create_ssh_client() @@ -57,12 +57,12 @@ def _get_agent_info(self) -> None: def _prepare_agent(self) -> None: log.info("Modifying agent update related config flags") - self._run_remote_test("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True) + self._run_remote_test(self._ssh_client, "update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True) log.info('Updated agent-update DownloadNewAgents GAFamily config flags') def _check_update(self) -> None: log.info("Verifying for agent update status") - self._run_remote_test("agent_publish-check_update.py") + self._run_remote_test(self._ssh_client, "agent_publish-check_update.py") log.info('Successfully checked the agent update') def _check_cse(self) -> None: diff --git a/tests_e2e/tests/agent_status/agent_status.py b/tests_e2e/tests/agent_status/agent_status.py index b9caef8f32..612434b8c4 100644 --- a/tests_e2e/tests/agent_status/agent_status.py +++ b/tests_e2e/tests/agent_status/agent_status.py @@ -28,8 +28,8 @@ from time import sleep import json -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient @@ -38,8 +38,8 @@ class RetryableAgentStatusException(BaseException): pass -class AgentStatus(AgentTest): - def __init__(self, context: AgentTestContext): +class AgentStatus(AgentVmTest): + def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client = self._context.create_ssh_client() diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index d31e8ce3e6..06c31a8116 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -33,23 +33,19 @@ from azure.mgmt.compute.models import VirtualMachine from msrestazure.azure_cloud import Cloud -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import retry_if_false -from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient -class RsmUpdateBvt(AgentTest): +class RsmUpdateBvt(AgentVmTest): - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) - self._ssh_client = SshClient( - ip_address=self._context.vm_ip_address, - username=self._context.username, - private_key_file=self._context.private_key_file) + self._ssh_client = self._context.create_ssh_client() self._installed_agent_version = "9.9.9.9" self._downgrade_version = "9.9.9.9" @@ -131,7 +127,7 @@ def run(self) -> None: def _check_rsm_gs(self, requested_version: str) -> None: # This checks if RSM GS available to the agent after we send the rsm update request log.info('Executing wait_for_rsm_gs.py remote script to verify latest GS contain requested version after rsm update requested') - self._run_remote_test(f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True) + self._run_remote_test(self._ssh_client, f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True) log.info('Verified latest GS contain requested version after rsm update requested') def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: @@ -141,11 +137,11 @@ def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: 2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions. """ log.info('Executing modify_agent_version remote script to update agent installed version to lower than requested version') - self._run_remote_test(f"agent_update-modify_agent_version {daemon_version}", use_sudo=True) + self._run_remote_test(self._ssh_client, f"agent_update-modify_agent_version {daemon_version}", use_sudo=True) log.info('Successfully updated agent installed version') if update_config: log.info('Executing update-waagent-conf remote script to update agent update config flags to allow and download test versions') - self._run_remote_test("update-waagent-conf Debug.EnableGAVersioning=y AutoUpdate.GAFamily=Test", use_sudo=True) + self._run_remote_test(self._ssh_client, "update-waagent-conf Debug.EnableGAVersioning=y AutoUpdate.GAFamily=Test", use_sudo=True) log.info('Successfully updated agent update config') @staticmethod @@ -175,23 +171,22 @@ def _request_rsm_update(self, requested_version: str) -> None: This method is to simulate the rsm request. First we ensure the PlatformUpdates enabled in the vm and then make a request using rest api """ - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) - if not self._verify_agent_update_flag_enabled(vm): + if not self._verify_agent_update_flag_enabled(self._context.vm): # enable the flag log.info("Attempting vm update to set the enableVMAgentPlatformUpdates flag") - self._enable_agent_update_flag(vm) + self._enable_agent_update_flag(self._context.vm) log.info("Updated the enableVMAgentPlatformUpdates flag to True") else: log.info("Already enableVMAgentPlatformUpdates flag set to True") - cloud: Cloud = AZURE_CLOUDS[self._context.vm.cloud] + cloud: Cloud = AZURE_CLOUDS[self._context.cloud] credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) token = credential.get_token(cloud.endpoints.resource_manager + "/.default") headers = {'Authorization': 'Bearer ' + token.token, 'Content-Type': 'application/json'} # Later this api call will be replaced by azure-python-sdk wrapper base_url = cloud.endpoints.resource_manager url = base_url + "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Compute/virtualMachines/{2}/" \ - "UpgradeVMAgent?api-version=2022-08-01".format(self._context.vm.subscription, self._context.vm.resource_group, self._context.vm.name) + "UpgradeVMAgent?api-version=2022-08-01".format(self._context.subscription, self._context.resource_group, self._context.name) data = { "target": "Microsoft.OSTCLinuxAgent.Test", "targetVersion": requested_version @@ -240,7 +235,7 @@ def _verify_agent_reported_supported_feature_flag(self): """ log.info("Executing verify_versioning_supported_feature.py remote script to verify agent reported supported feature flag, so that CRP can send RSM update request") - self._run_remote_test("agent_update-verify_versioning_supported_feature.py", use_sudo=True) + self._run_remote_test(self._ssh_client, "agent_update-verify_versioning_supported_feature.py", use_sudo=True) log.info("Successfully verified that Agent reported VersioningGovernance supported feature flag") def _verify_agent_reported_update_status(self, version: str): @@ -249,7 +244,7 @@ def _verify_agent_reported_update_status(self, version: str): """ log.info("Executing verify_agent_reported_update_status.py remote script to verify agent reported update status for version {0}".format(version)) - self._run_remote_test(f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True) + self._run_remote_test(self._ssh_client, f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True) log.info("Successfully Agent reported update status for version {0}".format(version)) def _retrieve_installed_agent_version(self): diff --git a/tests_e2e/tests/ext_cgroups/ext_cgroups.py b/tests_e2e/tests/ext_cgroups/ext_cgroups.py index 33092ca41e..94a0c97258 100644 --- a/tests_e2e/tests/ext_cgroups/ext_cgroups.py +++ b/tests_e2e/tests/ext_cgroups/ext_cgroups.py @@ -17,17 +17,17 @@ # limitations under the License. # from tests_e2e.tests.ext_cgroups.install_extensions import InstallExtensions -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.logging import log -class ExtCgroups(AgentTest): +class ExtCgroups(AgentVmTest): """ This test verifies the installed extensions assigned correctly in their cgroups. """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client = self._context.create_ssh_client() @@ -35,7 +35,7 @@ def run(self): log.info("=====Installing extensions to validate ext cgroups scenario") InstallExtensions(self._context).run() log.info("=====Executing remote script check_cgroups_extensions.py to validate extension cgroups") - self._run_remote_test("ext_cgroups-check_cgroups_extensions.py", use_sudo=True) + self._run_remote_test(self._ssh_client, "ext_cgroups-check_cgroups_extensions.py", use_sudo=True) log.info("Successfully verified that extensions present in correct cgroup") diff --git a/tests_e2e/tests/ext_cgroups/install_extensions.py b/tests_e2e/tests/ext_cgroups/install_extensions.py index 6617730ed0..2295cdffb3 100644 --- a/tests_e2e/tests/ext_cgroups/install_extensions.py +++ b/tests_e2e/tests/ext_cgroups/install_extensions.py @@ -19,7 +19,7 @@ from datetime import datetime, timedelta from pathlib import Path -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.identifiers import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient @@ -30,7 +30,7 @@ class InstallExtensions: This test installs the multiple extensions in order to verify extensions cgroups in the next test. """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): self._context = context self._ssh_client = self._context.create_ssh_client() @@ -67,7 +67,7 @@ def _install_ama(self): def _install_vmaccess(self): # fetch the public key - public_key_file: Path = Path(self._context.private_key_file).with_suffix(".pub") + public_key_file: Path = Path(self._context.identity_file).with_suffix(".pub") with public_key_file.open() as f: public_key = f.read() # Invoke the extension diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py index de051485ad..e1ffd5fba5 100755 --- a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -29,14 +29,14 @@ from azurelinuxagent.common.conf import get_etp_collection_period -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.identifiers import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class ExtTelemetryPipeline(AgentTest): +class ExtTelemetryPipeline(AgentVmTest): def run(self): ssh_client: SshClient = self._context.create_ssh_client() @@ -77,7 +77,8 @@ def run(self): log.info("") log.info("Add good extension events and check they are reported...") max_events = random.randint(10, 50) - self._run_remote_test(f"ext_telemetry_pipeline-add_extension_events.py " + self._run_remote_test(self._ssh_client, + f"ext_telemetry_pipeline-add_extension_events.py " f"--extensions {','.join(extensions)} " f"--num_events_total {max_events}", use_sudo=True) log.info("") @@ -86,7 +87,8 @@ def run(self): # Add invalid events for each extension and check that the TelemetryEventsCollector drops them log.info("") log.info("Add bad extension events and check they are reported...") - self._run_remote_test(f"ext_telemetry_pipeline-add_extension_events.py " + self._run_remote_test(self._ssh_client, + f"ext_telemetry_pipeline-add_extension_events.py " f"--extensions {','.join(extensions)} " f"--num_events_total {max_events} " f"--num_events_bad {random.randint(5, max_events-5)}", use_sudo=True) diff --git a/tests_e2e/tests/extensions_disabled/extensions_disabled.py b/tests_e2e/tests/extensions_disabled/extensions_disabled.py index 27c62427ab..084b29e4c0 100755 --- a/tests_e2e/tests/extensions_disabled/extensions_disabled.py +++ b/tests_e2e/tests/extensions_disabled/extensions_disabled.py @@ -31,7 +31,7 @@ from azure.mgmt.compute.models import VirtualMachineInstanceView -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.identifiers import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient @@ -39,7 +39,7 @@ from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class ExtensionsDisabled(AgentTest): +class ExtensionsDisabled(AgentVmTest): class TestCase: def __init__(self, extension: VirtualMachineExtensionClient, settings: Any): self.extension = extension diff --git a/tests_e2e/tests/fips/fips.py b/tests_e2e/tests/fips/fips.py index 9f490de4ca..a64f6fec7a 100755 --- a/tests_e2e/tests/fips/fips.py +++ b/tests_e2e/tests/fips/fips.py @@ -20,7 +20,7 @@ import uuid from assertpy import fail -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.shell import CommandError from tests_e2e.tests.lib.ssh_client import SshClient @@ -29,7 +29,7 @@ from tests_e2e.tests.lib.identifiers import VmExtensionIds -class Fips(AgentTest): +class Fips(AgentVmTest): """ Enables FIPS on the test VM, which is Mariner 2 VM, and verifies that extensions with protected settings are handled correctly under FIPS. """ diff --git a/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py index 676d7ed249..01049bdf8b 100755 --- a/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py +++ b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py @@ -22,14 +22,14 @@ # from assertpy import fail -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.shell import CommandError from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient -class KeyvaultCertificates(AgentTest): +class KeyvaultCertificates(AgentVmTest): def run(self): test_certificates = { 'C49A06B3044BD1778081366929B53EBF154133B3': { diff --git a/tests_e2e/tests/multi_config_ext/multi_config_ext.py b/tests_e2e/tests/multi_config_ext/multi_config_ext.py index a42ca8900c..a79d68f4fa 100644 --- a/tests_e2e/tests/multi_config_ext/multi_config_ext.py +++ b/tests_e2e/tests/multi_config_ext/multi_config_ext.py @@ -27,14 +27,14 @@ from assertpy import fail from azure.mgmt.compute.models import VirtualMachineInstanceView -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.identifiers import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class MultiConfigExt(AgentTest): +class MultiConfigExt(AgentVmTest): class TestCase: def __init__(self, extension: VirtualMachineExtensionClient, get_settings: Callable[[str], Dict[str, str]]): self.extension = extension diff --git a/tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py b/tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py index b767dc93cd..48827dbe14 100755 --- a/tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py +++ b/tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py @@ -18,12 +18,12 @@ # from assertpy import assert_that -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient -class NoOutboundConnections(AgentTest): +class CheckFallbackToHGAP(AgentVmTest): """ Check the agent log to verify that the default channel was changed to HostGAPlugin before executing any extensions. """ @@ -47,5 +47,5 @@ def run(self): if __name__ == "__main__": - NoOutboundConnections.run_from_command_line() + CheckFallbackToHGAP.run_from_command_line() diff --git a/tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py b/tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py index 66cc707d26..985e77b70f 100755 --- a/tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py +++ b/tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py @@ -18,13 +18,13 @@ # from assertpy import fail -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.shell import CommandError from tests_e2e.tests.lib.ssh_client import SshClient -class CheckNoOutboundConnections(AgentTest): +class CheckNoOutboundConnections(AgentVmTest): """ Verifies that there is no outbound connectivity on the test VM. """ diff --git a/tests_e2e/tests/samples/error_remote_test.py b/tests_e2e/tests/samples/error_remote_test.py index 29612f4246..6b52e46cd0 100755 --- a/tests_e2e/tests/samples/error_remote_test.py +++ b/tests_e2e/tests/samples/error_remote_test.py @@ -17,15 +17,15 @@ # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest -class ErrorRemoteTest(AgentTest): +class ErrorRemoteTest(AgentVmTest): """ A trivial remote test that fails """ def run(self): - self._run_remote_test("samples-error_remote_test.py") + self._run_remote_test(self._context.create_ssh_client(), "samples-error_remote_test.py") if __name__ == "__main__": diff --git a/tests_e2e/tests/samples/error_test.py b/tests_e2e/tests/samples/error_test.py index 4c24080687..e2d584c6e1 100755 --- a/tests_e2e/tests/samples/error_test.py +++ b/tests_e2e/tests/samples/error_test.py @@ -17,10 +17,10 @@ # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest -class ErrorTest(AgentTest): +class ErrorTest(AgentVmTest): """ A trivial test that errors out """ diff --git a/tests_e2e/tests/samples/fail_remote_test.py b/tests_e2e/tests/samples/fail_remote_test.py index f0a50495ad..7a05b67a99 100755 --- a/tests_e2e/tests/samples/fail_remote_test.py +++ b/tests_e2e/tests/samples/fail_remote_test.py @@ -17,15 +17,15 @@ # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest -class FailRemoteTest(AgentTest): +class FailRemoteTest(AgentVmTest): """ A trivial remote test that fails """ def run(self): - self._run_remote_test("samples-fail_remote_test.py") + self._run_remote_test(self._context.create_ssh_client(), "samples-fail_remote_test.py") if __name__ == "__main__": diff --git a/tests_e2e/tests/samples/fail_test.py b/tests_e2e/tests/samples/fail_test.py index fcebd99183..dfdecb52fb 100755 --- a/tests_e2e/tests/samples/fail_test.py +++ b/tests_e2e/tests/samples/fail_test.py @@ -18,10 +18,10 @@ # from assertpy import fail -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest -class FailTest(AgentTest): +class FailTest(AgentVmTest): """ A trivial test that fails """ diff --git a/tests_e2e/tests/samples/pass_remote_test.py b/tests_e2e/tests/samples/pass_remote_test.py index 94e0cb604c..609ef4d4c7 100755 --- a/tests_e2e/tests/samples/pass_remote_test.py +++ b/tests_e2e/tests/samples/pass_remote_test.py @@ -17,15 +17,15 @@ # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest -class PassRemoteTest(AgentTest): +class PassRemoteTest(AgentVmTest): """ A trivial remote test that succeeds """ def run(self): - self._run_remote_test("samples-pass_remote_test.py") + self._run_remote_test(self._context.create_ssh_client(), "samples-pass_remote_test.py") if __name__ == "__main__": diff --git a/tests_e2e/tests/samples/pass_test.py b/tests_e2e/tests/samples/pass_test.py index 580db2dc08..d7c85a3552 100755 --- a/tests_e2e/tests/samples/pass_test.py +++ b/tests_e2e/tests/samples/pass_test.py @@ -17,11 +17,11 @@ # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.logging import log -class PassTest(AgentTest): +class PassTest(AgentVmTest): """ A trivial test that passes. """ From 5633e537b7d287631f34d9fe4b6ef11bd73c6eca Mon Sep 17 00:00:00 2001 From: narrieta Date: Thu, 19 Oct 2023 10:31:43 -0700 Subject: [PATCH 02/30] cleanup --- .../orchestrator/lib/agent_test_loader.py | 14 +- .../sample_runbooks/existing_vm.yml | 149 -------------- tests_e2e/test_suites/vmss.yml | 8 + tests_e2e/tests/lib/agent_test.py | 39 +++- tests_e2e/tests/lib/agent_test_context.py | 186 +++++++----------- .../{azure_client.py => azure_sdk_client.py} | 19 +- tests_e2e/tests/lib/identifiers.py | 31 +-- tests_e2e/tests/lib/resource_group_client.py | 74 +++++++ tests_e2e/tests/lib/ssh_client.py | 20 +- tests_e2e/tests/lib/virtual_machine_client.py | 106 +++++----- .../lib/virtual_machine_extension_client.py | 26 +-- .../lib/virtual_machine_scale_set_client.py | 95 +++++++++ tests_e2e/tests/samples/vmss_test.py | 37 ++++ 13 files changed, 417 insertions(+), 387 deletions(-) delete mode 100644 tests_e2e/orchestrator/sample_runbooks/existing_vm.yml create mode 100644 tests_e2e/test_suites/vmss.yml rename tests_e2e/tests/lib/{azure_client.py => azure_sdk_client.py} (67%) create mode 100644 tests_e2e/tests/lib/resource_group_client.py create mode 100644 tests_e2e/tests/lib/virtual_machine_scale_set_client.py create mode 100755 tests_e2e/tests/samples/vmss_test.py diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index fbd6cfe8f8..eff661712d 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -23,7 +23,7 @@ from typing import Any, Dict, List, Type import tests_e2e -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentTest, AgentVmTest, AgentVmssTest class TestInfo(object): @@ -31,7 +31,7 @@ class TestInfo(object): Description of a test """ # The class that implements the test - test_class: Type[AgentTest] + test_class: Type[AgentVmTest] # If True, an error in the test blocks the execution of the test suite (defaults to False) blocks_suite: bool @@ -57,6 +57,8 @@ class TestSuiteInfo(object): locations: List[str] # Whether this suite must run on its own test VM owns_vm: bool + # If True, the suite must run on a scale set (instead of a single VM) + executes_on_scale_set: bool # Whether to install the test Agent on the test VM install_test_agent: bool # Customization for the ARM template used when creating the test VM @@ -222,6 +224,7 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: This is useful for suites that modify the test VMs in such a way that the setup may cause problems in other test suites (for example, some tests targeted to the HGAP block internet access in order to force the agent to use the HGAP). + * executes_on_scale_set - [Optional; boolean] True indicates that the test runs on a scale set. * install_test_agent - [Optional; boolean] By default the setup process installs the test Agent on the test VMs; set this property to False to skip the installation. * template - [Optional; string] If given, the ARM template for the test VM is customized using the given Python module. @@ -266,6 +269,7 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: test_suite_info.locations = locations test_suite_info.owns_vm = "owns_vm" in test_suite and test_suite["owns_vm"] + test_suite_info.executes_on_scale_set = "executes_on_scale_set" in test_suite and test_suite["executes_on_scale_set"] test_suite_info.install_test_agent = "install_test_agent" not in test_suite or test_suite["install_test_agent"] test_suite_info.template = test_suite.get("template", "") @@ -281,7 +285,7 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: return test_suite_info @staticmethod - def _load_test_class(relative_path: str) -> Type[AgentTest]: + def _load_test_class(relative_path: str) -> Type[AgentVmTest]: """ Loads an AgentTest from its source code file, which is given as a path relative to WALinuxAgent/tests_e2e/tests. """ @@ -289,8 +293,8 @@ def _load_test_class(relative_path: str) -> Type[AgentTest]: spec = importlib.util.spec_from_file_location(f"tests_e2e.tests.{relative_path.replace('/', '.').replace('.py', '')}", str(full_path)) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) - # return all the classes in the module that are subclasses of AgentTest but are not AgentTest itself. - matches = [v for v in module.__dict__.values() if isinstance(v, type) and issubclass(v, AgentTest) and v != AgentTest] + # return all the classes in the module that are subclasses of AgentTest but are not AgentVmTest or AgentVmssTest themselves. + matches = [v for v in module.__dict__.values() if isinstance(v, type) and issubclass(v, AgentTest) and v != AgentVmTest and v != AgentVmssTest] if len(matches) != 1: raise Exception(f"Error in {full_path} (each test file must contain exactly one class derived from AgentTest)") return matches[0] diff --git a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml b/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml deleted file mode 100644 index 8ef5baba28..0000000000 --- a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml +++ /dev/null @@ -1,149 +0,0 @@ -# Microsoft Azure Linux Agent -# -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# -# Executes the test suites on an existing VM -# -name: ExistingVM - -testcase: - - criteria: - area: waagent - -extension: - - "../lib" - -variable: - # - # These variables identify the existing VM, and the user for SSH connections - # - - name: cloud - value: "AzureCloud" - is_case_visible: true - - name: subscription_id - value: "" - - name: resource_group_name - value: "" - - name: vm_name - value: "" - - name: location - value: "" - - - name: user - value: "" - - name: identity_file - value: "" - is_secret: true - - # - # The test suites to execute - # - - name: test_suites - value: "agent_bvt" - - # - # These variables define parameters for the AgentTestSuite; see the test wiki for details. - # - # NOTE: c_test_suites, generated by the AgentTestSuitesCombinator, is also a parameter - # for the AgentTestSuite - # - # Root directory for log files (optional) - - name: log_path - value: "" - is_case_visible: true - - # Whether to collect logs from the test VM - - name: collect_logs - value: "failed" - is_case_visible: true - - # Whether to skip setup of the test VM - - name: skip_setup - value: false - is_case_visible: true - - # - # The values for these variables are generated by the AgentTestSuitesCombinator. See - # tests_e2e/orchestrator/runbook.yml for details. - # - - name: c_env_name - value: "" - is_case_visible: true - - name: c_vm_name - value: "" - - name: c_marketplace_image_information_location - value: "" - - name: c_shared_resource_group_location - value: "" - - name: c_location - value: "" - is_case_visible: true - - name: c_test_suites - value: [] - is_case_visible: true - - name: c_vm_tags - value: {} - - # - # Set these variables to use an SSH proxy when executing the runbook - # - - name: proxy - value: False - - name: proxy_host - value: "" - - name: proxy_user - value: "foo" - - name: proxy_identity_file - value: "" - is_secret: true - -platform: - - type: azure - admin_username: $(user) - admin_private_key_file: $(identity_file) - azure: - cloud: $(cloud) - marketplace_image_information_location: $(c_marketplace_image_information_location) - shared_resource_group_location: $(c_shared_resource_group_location) - resource_group_name: $(resource_group_name) - deploy: false - subscription_id: $(subscription_id) - vm_tags: $(c_vm_tags) - requirement: - azure: - name: $(c_vm_name) - location: $(c_location) - -combinator: - type: agent_test_suites - test_suites: $(test_suites) - cloud: $(cloud) - location: $(location) - vm_name: $(vm_name) - -notifier: - - type: env_stats - - type: agent.junit - -dev: - enabled: $(proxy) - mock_tcp_ping: $(proxy) - jump_boxes: - - private_key_file: $(proxy_identity_file) - address: $(proxy_host) - username: $(proxy_user) - password: "dummy" diff --git a/tests_e2e/test_suites/vmss.yml b/tests_e2e/test_suites/vmss.yml new file mode 100644 index 0000000000..d9ca6be01f --- /dev/null +++ b/tests_e2e/test_suites/vmss.yml @@ -0,0 +1,8 @@ +# +# Sample test for scale sets +# +name: "VMSS" +tests: + - "samples/vmss_test.py" +executes_on_scale_set: true +images: "ubuntu_2004" diff --git a/tests_e2e/tests/lib/agent_test.py b/tests_e2e/tests/lib/agent_test.py index 2eac007afd..b64488dcc5 100644 --- a/tests_e2e/tests/lib/agent_test.py +++ b/tests_e2e/tests/lib/agent_test.py @@ -23,7 +23,7 @@ from assertpy import fail from typing import Any, Dict, List -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test_context import AgentTestContext, AgentVmTestContext, AgentVmssTestContext from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.remote_test import FAIL_EXIT_CODE from tests_e2e.tests.lib.shell import CommandError @@ -45,28 +45,37 @@ class RemoteTestError(CommandError): class AgentTest(ABC): """ - Defines the interface for agent tests, which are simply constructed from an AgentTestContext and expose a single method, - run(), to execute the test. + Abstract base class for Agent tests """ def __init__(self, context: AgentTestContext): - self._context = context + self._context: AgentTestContext = context @abstractmethod def run(self): + """ + Test must define this method, which is used to execute the test. + """ pass def get_ignore_error_rules(self) -> List[Dict[str, Any]]: - # Tests can override this method to return a list with rules to ignore errors in the agent log (see agent_log.py for sample rules). + """ + Tests can override this method to return a list with rules to ignore errors in the agent log (see agent_log.py for sample rules). + """ return [] @classmethod def run_from_command_line(cls): """ Convenience method to execute the test when it is being invoked directly from the command line (as opposed as - being invoked from a test framework or library. + being invoked from a test framework or library.) """ try: - cls(AgentTestContext.from_args()).run() + if issubclass(cls, AgentVmTest): + cls(AgentVmTestContext.from_args()).run() + elif issubclass(cls, AgentVmssTest): + cls(AgentVmssTestContext.from_args()).run() + else: + raise Exception(f"Class {cls.__name__} is not a valid test class") except SystemExit: # Bad arguments pass except AssertionError as e: @@ -78,12 +87,11 @@ def run_from_command_line(cls): sys.exit(0) - def _run_remote_test(self, command: str, use_sudo: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> None: + def _run_remote_test(self, ssh_client: SshClient, command: str, use_sudo: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> None: """ Derived classes can use this method to execute a remote test (a test that runs over SSH). """ try: - ssh_client: SshClient = self._context.create_ssh_client() output = ssh_client.run_command(command=command, use_sudo=use_sudo, attempts=attempts, attempt_delay=attempt_delay) log.info("*** PASSED: [%s]\n%s", command, self._indent(output)) except CommandError as error: @@ -94,3 +102,16 @@ def _run_remote_test(self, command: str, use_sudo: bool = False, attempts: int = @staticmethod def _indent(text: str, indent: str = " " * 8): return "\n".join(f"{indent}{line}" for line in text.splitlines()) + + +class AgentVmTest(AgentTest, ABC): + """ + Base class for Agent tests that run on a single VM + """ + + +class AgentVmssTest(AgentTest, ABC): + """ + Base class for Agent tests that run on a scale set + """ + diff --git a/tests_e2e/tests/lib/agent_test_context.py b/tests_e2e/tests/lib/agent_test_context.py index e791542894..c807247b8a 100644 --- a/tests_e2e/tests/lib/agent_test_context.py +++ b/tests_e2e/tests/lib/agent_test_context.py @@ -17,161 +17,107 @@ import argparse import os +from abc import ABC from pathlib import Path -import tests_e2e -from tests_e2e.tests.lib.identifiers import VmIdentifier +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient from tests_e2e.tests.lib.ssh_client import SshClient -class AgentTestContext: +class AgentTestContext(ABC): """ - Execution context for agent tests. Defines the test VM, working directories and connection info for the tests. - - NOTE: The context is shared by all tests in the same runbook execution. Tests within the same test suite - are executed sequentially, but multiple test suites may be executed concurrently depending on the - concurrency level of the runbook. + Base class for the execution context of agent tests; includes the working directories and SSH info for the tests. """ - class Paths: - DEFAULT_TEST_SOURCE_DIRECTORY = Path(tests_e2e.__path__[0]) - - def __init__( - self, - working_directory: Path, - remote_working_directory: Path, - test_source_directory: Path = DEFAULT_TEST_SOURCE_DIRECTORY - ): - self._test_source_directory: Path = test_source_directory - self._working_directory: Path = working_directory - self._remote_working_directory: Path = remote_working_directory - - class Connection: - DEFAULT_SSH_PORT = 22 - - def __init__( - self, - ip_address: str, - username: str, - private_key_file: Path, - ssh_port: int = DEFAULT_SSH_PORT - ): - self._ip_address: str = ip_address - self._username: str = username - self._private_key_file: Path = private_key_file - self._ssh_port: int = ssh_port - - def __init__(self, vm: VmIdentifier, paths: Paths, connection: Connection): - self._vm: VmIdentifier = vm - self._paths = paths - self._connection = connection - - @property - def vm(self) -> VmIdentifier: - """ - The test VM (the VM on which the tested Agent is running) - """ - return self._vm + DEFAULT_SSH_PORT = 22 - @property - def vm_ip_address(self) -> str: - """ - The IP address of the test VM - """ - return self._connection._ip_address + def __init__(self, working_directory: Path, username: str, identity_file: Path, ssh_port: int): + self.working_directory: Path = working_directory + self.username: str = username + self.identity_file: Path = identity_file + self.ssh_port: int = ssh_port - @property - def test_source_directory(self) -> Path: - """ - Root directory for the source code of the tests. Used to build paths to specific scripts. + @staticmethod + def _create_argument_parser() -> argparse.ArgumentParser: """ - return self._paths._test_source_directory - - @property - def working_directory(self) -> Path: + Creates an ArgumentParser that includes the arguments common to the concrete classes derived from AgentTestContext """ - Tests can create temporary files under this directory. + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--cloud', dest="cloud", required=False, choices=['AzureCloud', 'AzureChinaCloud', 'AzureUSGovernment'], default="AzureCloud") + parser.add_argument('-g', '--group', required=True) + parser.add_argument('-l', '--location', required=True) + parser.add_argument('-s', '--subscription', required=True) - """ - return self._paths._working_directory + parser.add_argument('-w', '--working-directory', dest="working_directory", required=False, default=str(Path().home() / "tmp")) - @property - def remote_working_directory(self) -> Path: - """ - Tests can create temporary files under this directory on the test VM. - """ - return self._paths._remote_working_directory + parser.add_argument('-u', '--username', required=False, default=os.getenv("USER")) + parser.add_argument('-k', '--identity-file', dest="identity_file", required=False, default=str(Path.home() / ".ssh" / "id_rsa")) + parser.add_argument('-p', '--ssh-port', dest="ssh_port", required=False, default=AgentTestContext.DEFAULT_SSH_PORT) - @property - def username(self) -> str: - """ - The username to use for SSH connections - """ - return self._connection._username + return parser - @property - def private_key_file(self) -> Path: - """ - The file containing the private SSH key for the username - """ - return self._connection._private_key_file - @property - def ssh_port(self) -> int: - """ - Port for SSH connections - """ - return self._connection._ssh_port +class AgentVmTestContext(AgentTestContext): + """ + Execution context for agent tests targeted to individual VMs. + """ + def __init__(self, working_directory: Path, vm: VirtualMachineClient, ip_address: str, username: str, identity_file: Path, ssh_port: int = AgentTestContext.DEFAULT_SSH_PORT): + super().__init__(working_directory, username, identity_file, ssh_port) + self.vm: VirtualMachineClient = vm + self.ip_address: str = ip_address def create_ssh_client(self) -> SshClient: """ Convenience method to create an SSH client using the connection info from the context. """ return SshClient( - ip_address=self.vm_ip_address, + ip_address=self.ip_address, username=self.username, - private_key_file=self.private_key_file, + identity_file=self.identity_file, port=self.ssh_port) @staticmethod def from_args(): """ - Creates an AgentTestContext from the command line arguments. + Creates an AgentVmTestContext from the command line arguments. """ - parser = argparse.ArgumentParser() - parser.add_argument('-c', '--cloud', dest="cloud", required=False, choices=['AzureCloud', 'AzureChinaCloud', 'AzureUSGovernment'], default="AzureCloud") - parser.add_argument('-g', '--group', required=True) - parser.add_argument('-l', '--location', required=True) - parser.add_argument('-s', '--subscription', required=True) + parser = AgentTestContext._create_argument_parser() parser.add_argument('-vm', '--vm', required=True) + parser.add_argument('-a', '--ip-address', dest="ip_address", required=False) # Use the vm name as default - parser.add_argument('-rw', '--remote-working-directory', dest="remote_working_directory", required=False, default=str(Path('/home')/os.getenv("USER"))) - parser.add_argument('-t', '--test-source-directory', dest="test_source_directory", required=False, default=str(AgentTestContext.Paths.DEFAULT_TEST_SOURCE_DIRECTORY)) - parser.add_argument('-w', '--working-directory', dest="working_directory", required=False, default=str(Path().home()/"tmp")) + args = parser.parse_args() - parser.add_argument('-a', '--ip-address', dest="ip_address", required=False) # Use the vm name as default - parser.add_argument('-u', '--username', required=False, default=os.getenv("USER")) - parser.add_argument('-k', '--private-key-file', dest="private_key_file", required=False, default=str(Path.home()/".ssh"/"id_rsa")) - parser.add_argument('-p', '--ssh-port', dest="ssh_port", required=False, default=AgentTestContext.Connection.DEFAULT_SSH_PORT) + working_directory: Path = Path(args.working_directory) + if not working_directory.exists(): + working_directory.mkdir(exist_ok=True) + + vm = VirtualMachineClient(cloud=args.cloud, location=args.location, subscription=args.subscription, resource_group=args.group, name=args.vm) + ip_address = args.ip_address if args.ip_address is not None else args.vm + return AgentVmTestContext(working_directory=working_directory, vm=vm, ip_address=ip_address, username=args.username, identity_file=Path(args.identity_file), ssh_port=args.ssh_port) + + +class AgentVmssTestContext(AgentTestContext): + """ + Execution context for agent tests targeted to VM Scale Sets. + """ + def __init__(self, working_directory: Path, vmss: VirtualMachineScaleSetClient, username: str, identity_file: Path, ssh_port: int = AgentTestContext.DEFAULT_SSH_PORT): + super().__init__(working_directory, username, identity_file, ssh_port) + self.vmss: VirtualMachineScaleSetClient = vmss + + @staticmethod + def from_args(): + """ + Creates an AgentVmssTestContext from the command line arguments. + """ + parser = AgentTestContext._create_argument_parser() + parser.add_argument('-vmss', '--vmss', required=True) args = parser.parse_args() - working_directory = Path(args.working_directory) + working_directory: Path = Path(args.working_directory) if not working_directory.exists(): working_directory.mkdir(exist_ok=True) - return AgentTestContext( - vm=VmIdentifier( - cloud=args.cloud, - location=args.location, - subscription=args.subscription, - resource_group=args.group, - name=args.vm), - paths=AgentTestContext.Paths( - working_directory=Path(working_directory), - remote_working_directory=Path(args.remote_working_directory), - test_source_directory=Path(args.test_source_directory)), - connection=AgentTestContext.Connection( - ip_address=args.ip_address if args.ip_address is not None else args.vm, - username=args.username, - private_key_file=Path(args.private_key_file), - ssh_port=args.ssh_port)) + vmss = VirtualMachineScaleSetClient(cloud=args.cloud, location=args.location, subscription=args.subscription, resource_group=args.group, name=args.vmss) + return AgentVmssTestContext(working_directory=working_directory, vmss=vmss, username=args.username, identity_file=Path(args.identity_file), ssh_port=args.ssh_port) + diff --git a/tests_e2e/tests/lib/azure_client.py b/tests_e2e/tests/lib/azure_sdk_client.py similarity index 67% rename from tests_e2e/tests/lib/azure_client.py rename to tests_e2e/tests/lib/azure_sdk_client.py index 3e01762e8b..078c6d1d62 100644 --- a/tests_e2e/tests/lib/azure_client.py +++ b/tests_e2e/tests/lib/azure_sdk_client.py @@ -17,18 +17,32 @@ from typing import Any, Callable +from azure.identity import DefaultAzureCredential from azure.core.polling import LROPoller +from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import execute_with_retry -class AzureClient: +class AzureSdkClient: """ - Utilities for classes using the Azure SDK. + Base class for classes implementing clients of the Azure SDK. """ _DEFAULT_TIMEOUT = 10 * 60 # (in seconds) + @staticmethod + def create(client_type: type, cloud: str, subscription_id: str): + """ + Creates an SDK client of the given 'client_type' + """ + azure_cloud = AZURE_CLOUDS[cloud] + return client_type( + base_url=azure_cloud.endpoints.resource_manager, + credential=DefaultAzureCredential(authority=azure_cloud.endpoints.active_directory), + credential_scopes=[azure_cloud.endpoints.resource_manager + "/.default"], + subscription_id=subscription_id) + @staticmethod def _execute_async_operation(operation: Callable[[], LROPoller], operation_name: str, timeout: int) -> Any: """ @@ -42,3 +56,4 @@ def _execute_async_operation(operation: Callable[[], LROPoller], operation_name: raise TimeoutError(f"[{operation_name}] did not complete within {timeout} seconds") log.info("[%s] completed", operation_name) return poller.result() + diff --git a/tests_e2e/tests/lib/identifiers.py b/tests_e2e/tests/lib/identifiers.py index 45af22745f..26113e445c 100644 --- a/tests_e2e/tests/lib/identifiers.py +++ b/tests_e2e/tests/lib/identifiers.py @@ -18,31 +18,16 @@ from typing import Dict, List -class VmIdentifier(object): - def __init__(self, cloud: str, location: str, subscription: str, resource_group: str, name: str): - """ - Represents the information that identifies a VM to the ARM APIs - """ - self.cloud: str = cloud - self.location = location - self.subscription: str = subscription - self.resource_group: str = resource_group - self.name: str = name - - def __str__(self): - return f"{self.resource_group}:{self.name}" - - class VmExtensionIdentifier(object): - def __init__(self, publisher: str, ext_type: str, version: str): - """ - Represents the information that identifies an extension to the ARM APIs + """ + Represents the information that identifies an extension to the ARM APIs - publisher - e.g. Microsoft.Azure.Extensions - type - e.g. CustomScript - version - e.g. 2.1, 2.* - name - arbitrary name for the extension ARM resource - """ + publisher - e.g. Microsoft.Azure.Extensions + type - e.g. CustomScript + version - e.g. 2.1, 2.* + name - arbitrary name for the extension ARM resource + """ + def __init__(self, publisher: str, ext_type: str, version: str): self.publisher: str = publisher self.type: str = ext_type self.version: str = version diff --git a/tests_e2e/tests/lib/resource_group_client.py b/tests_e2e/tests/lib/resource_group_client.py new file mode 100644 index 0000000000..d48a74d065 --- /dev/null +++ b/tests_e2e/tests/lib/resource_group_client.py @@ -0,0 +1,74 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This module includes facilities to create a resource group and deploy an arm template to it +# +from typing import Dict, Any + +from azure.mgmt.compute import ComputeManagementClient +from azure.mgmt.resource import ResourceManagementClient +from azure.mgmt.resource.resources.models import DeploymentProperties, DeploymentMode + +from tests_e2e.tests.lib.azure_sdk_client import AzureSdkClient +from tests_e2e.tests.lib.logging import log + + +class ResourceGroupClient(AzureSdkClient): + """ + Provides operations on resource groups (create, template deployment, etc). + """ + def __init__(self, cloud: str, subscription: str, name: str, location: str = ""): + super().__init__() + self.cloud: str = cloud + self.location = location + self.subscription: str = subscription + self.name: str = name + self._compute_client = AzureSdkClient.create(ComputeManagementClient, cloud, subscription) + self._resource_client = AzureSdkClient.create(ResourceManagementClient, cloud, subscription) + + def create(self) -> None: + """ + Creates a resource group + """ + log.info("Creating resource group %s", self) + self._resource_client.resource_groups.create_or_update(self.name, {"location": self.location}) + + def deploy_template(self, template: Dict[str, Any], parameters: Dict[str, Any] = None): + """ + Deploys an ARM template to the resource group + """ + if parameters: + properties = DeploymentProperties(template=template, parameters=parameters, mode=DeploymentMode.incremental) + else: + properties = DeploymentProperties(template=template, mode=DeploymentMode.incremental) + + log.info("Deploying template to resource group %s...", self) + self._execute_async_operation( + operation=lambda: self._resource_client.deployments.begin_create_or_update(self.name, 'TestDeployment', {'properties': properties}), + operation_name=f"Deploy template to resource group {self}", + timeout=AzureSdkClient._DEFAULT_TIMEOUT) + + def delete(self) -> None: + """ + Deletes the resource group + """ + log.info("Deleting resource group %s (no wait)", self) + self._resource_client.resource_groups.begin_delete(self.name) # Do not wait for the deletion to complete + + def __str__(self): + return f"{self.name}" diff --git a/tests_e2e/tests/lib/ssh_client.py b/tests_e2e/tests/lib/ssh_client.py index 3e0d7269c3..ae7600c110 100644 --- a/tests_e2e/tests/lib/ssh_client.py +++ b/tests_e2e/tests/lib/ssh_client.py @@ -28,11 +28,11 @@ class SshClient(object): - def __init__(self, ip_address: str, username: str, private_key_file: Path, port: int = 22): - self._ip_address: str = ip_address - self._username: str = username - self._private_key_file: Path = private_key_file - self._port: int = port + def __init__(self, ip_address: str, username: str, identity_file: Path, port: int = 22): + self.ip_address: str = ip_address + self.username: str = username + self.identity_file: Path = identity_file + self.port: int = port def run_command(self, command: str, use_sudo: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> str: """ @@ -42,13 +42,13 @@ def run_command(self, command: str, use_sudo: bool = False, attempts: int = ATTE if re.match(r"^\s*sudo\s*", command): raise Exception("Do not include 'sudo' in the 'command' argument, use the 'use_sudo' parameter instead") - destination = f"ssh://{self._username}@{self._ip_address}:{self._port}" + destination = f"ssh://{self.username}@{self.ip_address}:{self.port}" # Note that we add ~/bin to the remote PATH, since Python (Pypy) and other test tools are installed there. # Note, too, that when using sudo we need to carry over the value of PATH to the sudo session sudo = "sudo env PATH=$PATH PYTHONPATH=$PYTHONPATH" if use_sudo else '' command = [ - "ssh", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, + "ssh", "-o", "StrictHostKeyChecking=no", "-i", self.identity_file, destination, f"if [[ -e ~/bin/set-agent-env ]]; then source ~/bin/set-agent-env; fi; {sudo} {command}" ] @@ -79,11 +79,11 @@ def copy_from_node(self, remote_path: Path, local_path: Path, recursive: bool = def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: bool, recursive: bool, attempts: int, attempt_delay: int) -> None: if remote_source: - source = f"{self._username}@{self._ip_address}:{source}" + source = f"{self.username}@{self.ip_address}:{source}" if remote_target: - target = f"{self._username}@{self._ip_address}:{target}" + target = f"{self.username}@{self.ip_address}:{target}" - command = ["scp", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file] + command = ["scp", "-o", "StrictHostKeyChecking=no", "-i", self.identity_file] if recursive: command.append("-r") command.extend([str(source), str(target)]) diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py index dd739fe535..b82032b1b7 100644 --- a/tests_e2e/tests/lib/virtual_machine_client.py +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -24,59 +24,65 @@ import time from typing import Any, Dict, List -from azure.identity import DefaultAzureCredential from azure.mgmt.compute import ComputeManagementClient from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineInstanceView, VirtualMachine +from azure.mgmt.network import NetworkManagementClient +from azure.mgmt.network.models import NetworkInterface, PublicIPAddress from azure.mgmt.resource import ResourceManagementClient -from msrestazure.azure_cloud import Cloud -from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS -from tests_e2e.tests.lib.azure_client import AzureClient -from tests_e2e.tests.lib.identifiers import VmIdentifier +from tests_e2e.tests.lib.azure_sdk_client import AzureSdkClient from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import execute_with_retry from tests_e2e.tests.lib.shell import CommandError from tests_e2e.tests.lib.ssh_client import SshClient -class VirtualMachineClient(AzureClient): +class VirtualMachineClient(AzureSdkClient): """ - Provides operations on virtual machine (get instance view, update, restart, etc). + Provides operations on virtual machines (get instance view, update, restart, etc). """ - def __init__(self, vm: VmIdentifier): + def __init__(self, cloud: str, location: str, subscription: str, resource_group: str, name: str): super().__init__() - self._identifier: VmIdentifier = vm - cloud: Cloud = AZURE_CLOUDS[vm.cloud] - credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) - self._compute_client = ComputeManagementClient( - credential=credential, - subscription_id=vm.subscription, - base_url=cloud.endpoints.resource_manager, - credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) - self._resource_client = ResourceManagementClient( - credential=credential, - subscription_id=vm.subscription, - base_url=cloud.endpoints.resource_manager, - credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) + self.cloud: str = cloud + self.location = location + self.subscription: str = subscription + self.resource_group: str = resource_group + self.name: str = name + self._compute_client = AzureSdkClient.create(ComputeManagementClient, cloud, subscription) + self._resource_client = AzureSdkClient.create(ResourceManagementClient, cloud, subscription) + self._network_client = AzureSdkClient.create(NetworkManagementClient, cloud, subscription) + + def get_ip_address(self) -> str: + """ + Retrieves the public IP address of the virtual machine + """ + vm_model = self.get_model() + nic: NetworkInterface = self._network_client.network_interfaces.get( + resource_group_name=self.resource_group, + network_interface_name=vm_model.network_profile.network_interfaces[0].id.split('/')[-1]) # the name of the interface is the last component of the id + public_ip: PublicIPAddress = self._network_client.public_ip_addresses.get( + resource_group_name=self.resource_group, + public_ip_address_name=nic.ip_configurations[0].public_ip_address.id.split('/')[-1]) # the name of the ip address is the last component of the id + return public_ip.ip_address def get_model(self) -> VirtualMachine: """ Retrieves the model of the virtual machine. """ - log.info("Retrieving VM model for %s", self._identifier) + log.info("Retrieving VM model for %s", self) return execute_with_retry( lambda: self._compute_client.virtual_machines.get( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name)) + resource_group_name=self.resource_group, + vm_name=self.name)) def get_instance_view(self) -> VirtualMachineInstanceView: """ Retrieves the instance view of the virtual machine """ - log.info("Retrieving instance view for %s", self._identifier) + log.info("Retrieving instance view for %s", self) return execute_with_retry(lambda: self._compute_client.virtual_machines.get( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name, + resource_group_name=self.resource_group, + vm_name=self.name, expand="instanceView" ).instance_view) @@ -84,37 +90,37 @@ def get_extensions(self) -> List[VirtualMachineExtension]: """ Retrieves the extensions installed on the virtual machine """ - log.info("Retrieving extensions for %s", self._identifier) + log.info("Retrieving extensions for %s", self) return execute_with_retry( lambda: self._compute_client.virtual_machine_extensions.list( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name)) + resource_group_name=self.resource_group, + vm_name=self.name)) - def update(self, properties: Dict[str, Any], timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + def update(self, properties: Dict[str, Any], timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: """ Updates a set of properties on the virtual machine """ # location is a required by begin_create_or_update, always add it properties_copy = properties.copy() - properties_copy["location"] = self._identifier.location + properties_copy["location"] = self.location - log.info("Updating %s with properties: %s", self._identifier, properties_copy) + log.info("Updating %s with properties: %s", self, properties_copy) self._execute_async_operation( lambda: self._compute_client.virtual_machines.begin_create_or_update( - self._identifier.resource_group, - self._identifier.name, + self.resource_group, + self.name, properties_copy), - operation_name=f"Update {self._identifier}", + operation_name=f"Update {self}", timeout=timeout) - def reapply(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + def reapply(self, timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: """ Reapplies the goal state on the virtual machine """ self._execute_async_operation( - lambda: self._compute_client.virtual_machines.begin_reapply(self._identifier.resource_group, self._identifier.name), - operation_name=f"Reapply {self._identifier}", + lambda: self._compute_client.virtual_machines.begin_reapply(self.resource_group, self.name), + operation_name=f"Reapply {self}", timeout=timeout) def restart( @@ -122,7 +128,7 @@ def restart( wait_for_boot, ssh_client: SshClient = None, boot_timeout: datetime.timedelta = datetime.timedelta(minutes=5), - timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: """ Restarts (reboots) the virtual machine. @@ -138,9 +144,9 @@ def restart( self._execute_async_operation( lambda: self._compute_client.virtual_machines.begin_restart( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name), - operation_name=f"Restart {self._identifier}", + resource_group_name=self.resource_group, + vm_name=self.name), + operation_name=f"Restart {self}", timeout=timeout) if not wait_for_boot: @@ -148,7 +154,7 @@ def restart( start = datetime.datetime.utcnow() while datetime.datetime.utcnow() < start + boot_timeout: - log.info("Waiting for VM %s to boot", self._identifier) + log.info("Waiting for VM %s to boot", self) time.sleep(15) # Note that we always sleep at least 1 time, to give the reboot time to start instance_view = self.get_instance_view() power_state = [s.code for s in instance_view.statuses if "PowerState" in s.code] @@ -164,19 +170,15 @@ def restart( log.info("Uptime: %s", uptime) boot_time = datetime.datetime.utcnow() - datetime.timedelta(seconds=float(uptime)) if boot_time > before_restart: - log.info("VM %s completed boot and is running. Boot time: %s", self._identifier, boot_time) + log.info("VM %s completed boot and is running. Boot time: %s", self, boot_time) return log.info("The VM has not rebooted yet. Restart time: %s. Boot time: %s", before_restart, boot_time) except CommandError as e: if e.exit_code == 255 and "Connection refused" in str(e): - log.info("VM %s is not yet accepting SSH connections", self._identifier) + log.info("VM %s is not yet accepting SSH connections", self) else: raise - raise Exception(f"VM {self._identifier} did not boot after {boot_timeout}") + raise Exception(f"VM {self} did not boot after {boot_timeout}") def __str__(self): - return f"{self._identifier}" - - - - + return f"{self.resource_group}:{self.name}" diff --git a/tests_e2e/tests/lib/virtual_machine_extension_client.py b/tests_e2e/tests/lib/virtual_machine_extension_client.py index 6697d594a6..6d35756d16 100644 --- a/tests_e2e/tests/lib/virtual_machine_extension_client.py +++ b/tests_e2e/tests/lib/virtual_machine_extension_client.py @@ -26,32 +26,24 @@ from azure.mgmt.compute import ComputeManagementClient from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineExtensionInstanceView -from azure.identity import DefaultAzureCredential -from msrestazure.azure_cloud import Cloud -from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS -from tests_e2e.tests.lib.azure_client import AzureClient -from tests_e2e.tests.lib.identifiers import VmIdentifier, VmExtensionIdentifier +from tests_e2e.tests.lib.azure_sdk_client import AzureSdkClient +from tests_e2e.tests.lib.identifiers import VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import execute_with_retry +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient -class VirtualMachineExtensionClient(AzureClient): +class VirtualMachineExtensionClient(AzureSdkClient): """ Client for operations virtual machine extensions. """ - def __init__(self, vm: VmIdentifier, extension: VmExtensionIdentifier, resource_name: str = None): + def __init__(self, vm: VirtualMachineClient, extension: VmExtensionIdentifier, resource_name: str = None): super().__init__() - self._vm: VmIdentifier = vm + self._vm: VirtualMachineClient = vm self._identifier = extension self._resource_name = resource_name or extension.type - cloud: Cloud = AZURE_CLOUDS[vm.cloud] - credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) - self._compute_client: ComputeManagementClient = ComputeManagementClient( - credential=credential, - subscription_id=vm.subscription, - base_url=cloud.endpoints.resource_manager, - credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) + self._compute_client: ComputeManagementClient = AzureSdkClient.create(ComputeManagementClient, self._vm.cloud, self._vm.subscription) def get_instance_view(self) -> VirtualMachineExtensionInstanceView: """ @@ -73,7 +65,7 @@ def enable( auto_upgrade_minor_version: bool = True, force_update: bool = False, force_update_tag: str = None, - timeout: int = AzureClient._DEFAULT_TIMEOUT + timeout: int = AzureSdkClient._DEFAULT_TIMEOUT ) -> None: """ Performs an enable operation on the extension. @@ -116,7 +108,7 @@ def enable( log.info("Provisioning state: %s", result.provisioning_state) - def delete(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + def delete(self, timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: """ Performs a delete operation on the extension """ diff --git a/tests_e2e/tests/lib/virtual_machine_scale_set_client.py b/tests_e2e/tests/lib/virtual_machine_scale_set_client.py new file mode 100644 index 0000000000..5fd77bd03f --- /dev/null +++ b/tests_e2e/tests/lib/virtual_machine_scale_set_client.py @@ -0,0 +1,95 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This module includes facilities to execute operations on virtual machines scale sets (list instances, delete, etc). +# + +import re + +from typing import List + +from azure.mgmt.compute import ComputeManagementClient +from azure.mgmt.compute.models import VirtualMachineScaleSetVM +from azure.mgmt.network import NetworkManagementClient + +from tests_e2e.tests.lib.azure_sdk_client import AzureSdkClient +from tests_e2e.tests.lib.logging import log + + +class VmssInstanceIpAddress(object): + """ + IP address of a virtual machine scale set instance + """ + def __init__(self, instance_name: str, ip_address: str): + self.instance_name: str = instance_name + self.ip_address: str = ip_address + + def __str__(self): + return f"{self.instance_name}:{self.ip_address}" + + +class VirtualMachineScaleSetClient(AzureSdkClient): + """ + Provides operations on virtual machine scale sets. + """ + def __init__(self, cloud: str, location: str, subscription: str, resource_group: str, name: str): + super().__init__() + self.cloud: str = cloud + self.location = location + self.subscription: str = subscription + self.resource_group: str = resource_group + self.name: str = name + self._compute_client = AzureSdkClient.create(ComputeManagementClient, cloud, subscription) + self._network_client = AzureSdkClient.create(NetworkManagementClient, cloud, subscription) + + def list_vms(self) -> List[VirtualMachineScaleSetVM]: + """ + Returns the VM instances of the virtual machine scale set + """ + return list(self._compute_client.virtual_machine_scale_set_vms.list(resource_group_name=self.resource_group, virtual_machine_scale_set_name=self.name)) + + def get_instances_ip_address(self) -> List[VmssInstanceIpAddress]: + """ + Returns a list containing the IP addresses of scale set instances + """ + log.info("Retrieving instances of scale set %s", self) + ip_addresses = self._network_client.public_ip_addresses.list_virtual_machine_scale_set_public_ip_addresses(resource_group_name=self.resource_group, virtual_machine_scale_set_name=self.name) + ip_addresses = list(ip_addresses) + + def parse_instance(resource_id: str) -> str: + # the resource_id looks like /subscriptions/{subs}}/resourceGroups/{rg}/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss}/virtualMachines/{instance}/networkInterfaces/{netiace}/ipConfigurations/ipconfig1/publicIPAddresses/{name} + match = re.search(r'virtualMachines/(?P[0-9])/networkInterfaces', resource_id) + if match is None: + raise Exception(f"Unable to parse instance from IP address ID:{resource_id}") + return match.group('instance') + + return [VmssInstanceIpAddress(instance_name=f"{self.name}_{parse_instance(a.id)}", ip_address=a.ip_address) for a in ip_addresses if a.ip_address is not None] + + def delete_extension(self, extension: str, timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: + """ + Deletes the given operation + """ + log.info("Deleting extension %s from %s", extension, self) + self._execute_async_operation( + operation=lambda: self._compute_client.virtual_machine_scale_set_extensions.begin_delete(resource_group_name=self.resource_group, vm_scale_set_name=self.name, vmss_extension_name=extension), + operation_name=f"Delete {extension} from {self}", + timeout=timeout) + + def __str__(self): + return f"{self.resource_group}:{self.name}" + diff --git a/tests_e2e/tests/samples/vmss_test.py b/tests_e2e/tests/samples/vmss_test.py new file mode 100755 index 0000000000..0f50dad8f4 --- /dev/null +++ b/tests_e2e/tests/samples/vmss_test.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from tests_e2e.tests.lib.agent_test import AgentVmssTest +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.ssh_client import SshClient + + +class VmssTest(AgentVmssTest): + """ + Sample test for scale sets + """ + def run(self): + for address in self._context.vmss.get_instances_ip_address(): + ssh_client: SshClient = SshClient(ip_address=address.ip_address, username=self._context.username, identity_file=self._context.identity_file) + log.info("%s: Hostname: %s", address.instance_name, ssh_client.run_command("hostname").strip()) + log.info("* PASSED *") + + +if __name__ == "__main__": + VmssTest.run_from_command_line() From 029f5aee1f5d61f4ed0bca5af4f1a447139237b0 Mon Sep 17 00:00:00 2001 From: narrieta Date: Thu, 19 Oct 2023 12:54:09 -0700 Subject: [PATCH 03/30] . --- tests_e2e/orchestrator/templates/vmss.json | 253 ++++++++++++++++++ .../disable_agent_provisioning.py | 7 +- .../tests/lib/add_network_security_group.py | 136 ++++++---- tests_e2e/tests/lib/update_arm_template.py | 5 +- .../deny_outbound_connections.py | 5 +- 5 files changed, 349 insertions(+), 57 deletions(-) create mode 100644 tests_e2e/orchestrator/templates/vmss.json diff --git a/tests_e2e/orchestrator/templates/vmss.json b/tests_e2e/orchestrator/templates/vmss.json new file mode 100644 index 0000000000..293edf80c1 --- /dev/null +++ b/tests_e2e/orchestrator/templates/vmss.json @@ -0,0 +1,253 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "username": { + "type": "string" + }, + "sshPublicKey": { + "type": "string" + }, + "vmName": { + "type": "string" + }, + "scenarioPrefix": { + "type": "string", + "defaultValue": "e2e-test" + }, + "publisher": { + "type": "string" + }, + "offer": { + "type": "string" + }, + "sku": { + "type": "string" + }, + "version": { + "type": "string" + } + }, + "variables": { + "nicName": "[concat(parameters('scenarioPrefix'),'Nic')]", + "vnetAddressPrefix": "10.130.0.0/16", + "subnetName": "[concat(parameters('scenarioPrefix'),'Subnet')]", + "subnetPrefix": "10.130.0.0/24", + "publicIPAddressName": "[concat(parameters('scenarioPrefix'),'PublicIp')]", + "lbIpName": "[concat(parameters('scenarioPrefix'),'PublicLbIp')]", + "virtualNetworkName": "[concat(parameters('scenarioPrefix'),'Vnet')]", + "lbName": "[concat(parameters('scenarioPrefix'),'lb')]", + "lbIpId": "[resourceId('Microsoft.Network/publicIPAddresses', variables('lbIpName'))]", + "bepoolName": "[concat(variables('lbName'), 'bepool')]", + "natpoolName": "[concat(variables('lbName'), 'natpool')]", + "feIpConfigName": "[concat(variables('lbName'), 'fepool', 'IpConfig')]", + "sshProbeName": "[concat(variables('lbName'), 'probe')]", + "vnetID": "[resourceId('Microsoft.Network/virtualNetworks',variables('virtualNetworkName'))]", + "subnetRef": "[concat(variables('vnetID'),'/subnets/',variables('subnetName'))]", + "lbId": "[resourceId('Microsoft.Network/loadBalancers', variables('lbName'))]", + "bepoolID": "[concat(variables('lbId'), '/backendAddressPools/', variables('bepoolName'))]", + "natpoolID": "[concat(variables('lbId'), '/inboundNatPools/', variables('natpoolName'))]", + "feIpConfigId": "[concat(variables('lbId'), '/frontendIPConfigurations/', variables('feIpConfigName'))]", + "sshProbeId": "[concat(variables('lbId'), '/probes/', variables('sshProbeName'))]", + "sshKeyPath": "[concat('/home/', parameters('username'), '/.ssh/authorized_keys')]" + }, + "resources": [ + { + "apiVersion": "2023-06-01", + "type": "Microsoft.Network/virtualNetworks", + "name": "[variables('virtualNetworkName')]", + "location": "[resourceGroup().location]", + "properties": { + "addressSpace": { + "addressPrefixes": [ + "[variables('vnetAddressPrefix')]" + ] + }, + "subnets": [ + { + "name": "[variables('subnetName')]", + "properties": { + "addressPrefix": "[variables('subnetPrefix')]" + } + } + ] + } + }, + { + "type": "Microsoft.Network/publicIPAddresses", + "name": "[variables('lbIpName')]", + "location": "[resourceGroup().location]", + "apiVersion": "2023-06-01", + "properties": { + "publicIPAllocationMethod": "Dynamic", + "dnsSettings": { + "domainNameLabel": "[parameters('vmName')]" + } + } + }, + { + "type": "Microsoft.Network/loadBalancers", + "name": "[variables('lbName')]", + "location": "[resourceGroup().location]", + "apiVersion": "2020-06-01", + "dependsOn": [ + "[concat('Microsoft.Network/virtualNetworks/', variables('virtualNetworkName'))]", + "[concat('Microsoft.Network/publicIPAddresses/', variables('lbIpName'))]" + ], + "properties": { + "frontendIPConfigurations": [ + { + "name": "[variables('feIpConfigName')]", + "properties": { + "PublicIpAddress": { + "id": "[variables('lbIpId')]" + } + } + } + ], + "backendAddressPools": [ + { + "name": "[variables('bepoolName')]" + } + ], + "inboundNatPools": [ + { + "name": "[variables('natpoolName')]", + "properties": { + "FrontendIPConfiguration": { + "Id": "[variables('feIpConfigId')]" + }, + "BackendPort": 22, + "Protocol": "tcp", + "FrontendPortRangeStart": 3500, + "FrontendPortRangeEnd": 4500 + } + } + ], + "loadBalancingRules": [ + { + "name": "ProbeRule", + "properties": { + "frontendIPConfiguration": { + "id": "[variables('feIpConfigId')]" + }, + "backendAddressPool": { + "id": "[variables('bepoolID')]" + }, + "protocol": "Tcp", + "frontendPort": 80, + "backendPort": 80, + "idleTimeoutInMinutes": 5, + "probe": { + "id": "[variables('sshProbeId')]" + } + } + } + ], + "probes": [ + { + "name": "[variables('sshProbeName')]", + "properties": { + "protocol": "tcp", + "port": 22, + "intervalInSeconds": 5, + "numberOfProbes": 2 + } + } + ] + } + }, + { + "apiVersion": "2023-03-01", + "type": "Microsoft.Compute/virtualMachineScaleSets", + "name": "[parameters('vmName')]", + "location": "[resourceGroup().location]", + "dependsOn": [ + "[concat('Microsoft.Network/virtualNetworks/', variables('virtualNetworkName'))]", + "[concat('Microsoft.Network/loadBalancers/', variables('lbName'))]" + ], + "sku": { + "name": "Standard_D2s_v3", + "tier": "Standard", + "capacity": 3 + }, + "properties": { + "orchestrationMode": "Uniform", + "overprovision": false, + "virtualMachineProfile": { + "extensionProfile": { + "extensions": [] + }, + "osProfile": { + "computerNamePrefix": "[parameters('vmName')]", + "adminUsername": "[parameters('username')]", + "linuxConfiguration": { + "disablePasswordAuthentication": true, + "ssh": { + "publicKeys": [ + { + "path": "[variables('sshKeyPath')]", + "keyData": "[parameters('sshPublicKey')]" + } + ] + } + } + }, + "storageProfile": { + "osDisk": { + "osType": "Linux", + "createOption": "FromImage", + "caching": "ReadWrite", + "managedDisk": { + "storageAccountType": "Premium_LRS" + }, + "diskSizeGB": 64 + }, + "imageReference": { + "publisher": "[parameters('publisher')]", + "offer": "[parameters('offer')]", + "sku": "[parameters('sku')]", + "version": "[parameters('version')]" + } + }, + "diagnosticsProfile": { + "bootDiagnostics": { + "enabled": true + } + }, + "networkProfile": { + "networkInterfaceConfigurations": [ + { + "name": "[variables('nicName')]", + "properties": { + "primary": true, + "ipConfigurations": [ + { + "name": "ipconfig1", + "properties": { + "primary": true, + "publicIPAddressConfiguration": { + "name": "[variables('publicIPAddressName')]", + "properties": { + "idleTimeoutInMinutes": 15 + } + }, + "subnet": { + "id": "[variables('subnetRef')]" + } + } + } + ] + } + } + ] + } + }, + "upgradePolicy": { + "mode": "Automatic" + }, + "platformFaultDomainCount": 1 + } + } + ] +} diff --git a/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py b/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py index 8de9e55967..6f0a562cd2 100755 --- a/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py +++ b/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py @@ -22,11 +22,14 @@ from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate -class DenyOutboundConnections(UpdateArmTemplate): +class DisableAgentProvisioning(UpdateArmTemplate): """ Updates the ARM template to set osProfile.linuxConfiguration.provisionVMAgent to false. """ - def update(self, template: Dict[str, Any]) -> None: + def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: + if not is_lisa_template: + raise Exception('This test can only customize LISA ARM templates.') + # # NOTE: LISA's template uses this function to generate the value for osProfile.linuxConfiguration. The function is # under the 'lisa' namespace. diff --git a/tests_e2e/tests/lib/add_network_security_group.py b/tests_e2e/tests/lib/add_network_security_group.py index 28cf69b59f..8bf781f0c4 100644 --- a/tests_e2e/tests/lib/add_network_security_group.py +++ b/tests_e2e/tests/lib/add_network_security_group.py @@ -32,14 +32,14 @@ class AddNetworkSecurityGroup(UpdateArmTemplate): """ Updates the ARM template to add a network security group allowing SSH access from the current machine. """ - def update(self, template: Dict[str, Any]) -> None: + def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: resources: List[Dict[str, Any]] = template["resources"] # Append the NSG to the list of resources network_security_group = json.loads(f"""{{ "type": "Microsoft.Network/networkSecurityGroups", "name": "{NETWORK_SECURITY_GROUP}", - "location": "[parameters('location')]", + "location": "[resourceGroup().location]", "apiVersion": "2020-05-01", "properties": {{ "securityRules": [] @@ -66,72 +66,104 @@ def update(self, template: Dict[str, Any]) -> None: except Exception as e: log.warning("******** Waagent: Failed to create Allow security rule for SSH, skipping rule: %s", e) - - # - # Add reference to the NSG to the properties of the subnets. - # - # The subnets are a copy property of the virtual network in LISA's ARM template: # - # { - # "condition": "[empty(parameters('virtual_network_resource_group'))]", - # "apiVersion": "2020-05-01", - # "type": "Microsoft.Network/virtualNetworks", - # "name": "[parameters('virtual_network_name')]", - # "location": "[parameters('location')]", - # "properties": { - # "addressSpace": { - # "addressPrefixes": [ - # "10.0.0.0/16" - # ] - # }, - # "copy": [ - # { - # "name": "subnets", - # "count": "[parameters('subnet_count')]", - # "input": { - # "name": "[concat(parameters('subnet_prefix'), copyIndex('subnets'))]", - # "properties": { - # "addressPrefix": "[concat('10.0.', copyIndex('subnets'), '.0/24')]" - # } - # } - # } - # ] - # } - # } + # Add a dependency on the NSG to the virtual network # network_resource = self._get_resource(resources, "Microsoft.Network/virtualNetworks") - - # Add a dependency on the NSG - nsg_reference = f"[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]" network_resource_dependencies = network_resource.get("dependsOn") + nsg_reference = f"[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]" if network_resource_dependencies is None: network_resource["dependsOn"] = [nsg_reference] else: network_resource_dependencies.append(nsg_reference) - subnets_copy = network_resource["properties"].get("copy") if network_resource.get("properties") is not None else None - if subnets_copy is None: - raise Exception("Cannot find the copy property of the virtual network in the ARM template") - - subnets = [i for i in subnets_copy if "name" in i and i["name"] == 'subnets'] - if len(subnets) == 0: - raise Exception("Cannot find the subnets of the virtual network in the ARM template") - - subnets_input = subnets[0].get("input") - if subnets_input is None: - raise Exception("Cannot find the input property of the subnets in the ARM template") - + # + # Add a reference to the NSG to the properties of the subnets. + # nsg_reference = json.loads(f"""{{ "networkSecurityGroup": {{ "id": "[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]" }} }}""") - subnets_properties = subnets_input.get("properties") - if subnets_properties is None: - subnets_input["properties"] = nsg_reference + if is_lisa_template: + # The subnets are a copy property of the virtual network in LISA's ARM template: + # + # { + # "condition": "[empty(parameters('virtual_network_resource_group'))]", + # "apiVersion": "2020-05-01", + # "type": "Microsoft.Network/virtualNetworks", + # "name": "[parameters('virtual_network_name')]", + # "location": "[parameters('location')]", + # "properties": { + # "addressSpace": { + # "addressPrefixes": [ + # "10.0.0.0/16" + # ] + # }, + # "copy": [ + # { + # "name": "subnets", + # "count": "[parameters('subnet_count')]", + # "input": { + # "name": "[concat(parameters('subnet_prefix'), copyIndex('subnets'))]", + # "properties": { + # "addressPrefix": "[concat('10.0.', copyIndex('subnets'), '.0/24')]" + # } + # } + # } + # ] + # } + # } + # + subnets_copy = network_resource["properties"].get("copy") if network_resource.get("properties") is not None else None + if subnets_copy is None: + raise Exception("Cannot find the copy property of the virtual network in the ARM template") + + subnets = [i for i in subnets_copy if "name" in i and i["name"] == 'subnets'] + if len(subnets) == 0: + raise Exception("Cannot find the subnets of the virtual network in the ARM template") + + subnets_input = subnets[0].get("input") + if subnets_input is None: + raise Exception("Cannot find the input property of the subnets in the ARM template") + + subnets_properties = subnets_input.get("properties") + if subnets_properties is None: + subnets_input["properties"] = nsg_reference + else: + subnets_properties.update(nsg_reference) else: - subnets_properties.update(nsg_reference) + # { + # "apiVersion": "2023-06-01", + # "type": "Microsoft.Network/virtualNetworks", + # "name": "[variables('virtualNetworkName')]", + # "location": "[resourceGroup().location]", + # "properties": { + # "addressSpace": { + # "addressPrefixes": [ + # "[variables('vnetAddressPrefix')]" + # ] + # }, + # "subnets": [ + # { + # "name": "[variables('subnetName')]", + # "properties": { + # "addressPrefix": "[variables('subnetPrefix')]", + # } + # } + # ] + # } + # } + subnets = network_resource["properties"].get("subnets") if network_resource.get("properties") is not None else None + if subnets is None: + raise Exception("Cannot find the subnets property of the virtual network in the ARM template") + + subnets_properties = subnets[0].get("properties") + if subnets_properties is None: + subnets["properties"] = nsg_reference + else: + subnets_properties.update(nsg_reference) @property def _my_ip_address(self) -> str: diff --git a/tests_e2e/tests/lib/update_arm_template.py b/tests_e2e/tests/lib/update_arm_template.py index 9637525f35..c50f7b74c7 100644 --- a/tests_e2e/tests/lib/update_arm_template.py +++ b/tests_e2e/tests/lib/update_arm_template.py @@ -22,12 +22,13 @@ class UpdateArmTemplate(ABC): @abstractmethod - def update(self, template: Dict[str, Any]) -> None: + def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: """ Derived classes implement this method to customize the ARM template used to create the test VMs. The 'template' parameter is a dictionary created from the template's JSON document, as parsed by json.loads(). - The original JSON document is currently at https://github.com/microsoft/lisa/blob/main/lisa/sut_orchestrator/azure/arm_template.json + If the 'is_lisa_template' parameter is True, the template was created by LISA. The original JSON document is located at + https://github.com/microsoft/lisa/blob/main/lisa/sut_orchestrator/azure/arm_template.json """ @staticmethod diff --git a/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py b/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py index 114999d5b4..838082d345 100755 --- a/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py +++ b/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py @@ -29,7 +29,10 @@ class DenyOutboundConnections(UpdateArmTemplate): """ Updates the ARM template to add a security rule that denies all outbound connections. """ - def update(self, template: Dict[str, Any]) -> None: + def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: + if not is_lisa_template: + raise Exception('This test can only customize LISA ARM templates.') + resources = template["resources"] nsg = self._get_resource_by_name(resources, NETWORK_SECURITY_GROUP, "Microsoft.Network/networkSecurityGroups") properties = nsg.get("properties") From ae3ad4a22ade911d0958cf1b753784c67000b6ae Mon Sep 17 00:00:00 2001 From: narrieta Date: Thu, 19 Oct 2023 21:17:09 -0700 Subject: [PATCH 04/30] . --- tests_e2e/orchestrator/lib/agent_test_loader.py | 6 +++++- tests_e2e/orchestrator/lib/update_arm_template_hook.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index eff661712d..a1ac6c2a46 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -269,10 +269,14 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: test_suite_info.locations = locations test_suite_info.owns_vm = "owns_vm" in test_suite and test_suite["owns_vm"] - test_suite_info.executes_on_scale_set = "executes_on_scale_set" in test_suite and test_suite["executes_on_scale_set"] test_suite_info.install_test_agent = "install_test_agent" not in test_suite or test_suite["install_test_agent"] + test_suite_info.executes_on_scale_set = "executes_on_scale_set" in test_suite and test_suite["executes_on_scale_set"] test_suite_info.template = test_suite.get("template", "") + # TODO: Add support for custom templates + if test_suite_info.executes_on_scale_set and test_suite_info.template != '': + raise Exception(f"Currently custom templates are not supported on scale sets. [Test suite: {test_suite_info.name}]") + skip_on_clouds = test_suite.get("skip_on_clouds") if skip_on_clouds is not None: if isinstance(skip_on_clouds, str): diff --git a/tests_e2e/orchestrator/lib/update_arm_template_hook.py b/tests_e2e/orchestrator/lib/update_arm_template_hook.py index 2ff910a9a7..fee943de14 100644 --- a/tests_e2e/orchestrator/lib/update_arm_template_hook.py +++ b/tests_e2e/orchestrator/lib/update_arm_template_hook.py @@ -46,7 +46,7 @@ def azure_update_arm_template(self, template: Any, environment: Environment) -> # Add the network security group for the test VM. This group includes a rule allowing SSH access from the current machine. # log.info("******** Waagent: Adding network security rule to the ARM template") - AddNetworkSecurityGroup().update(template) + AddNetworkSecurityGroup().update(template, is_lisa_template=True) # # Apply any template customizations provided by the tests. @@ -60,7 +60,7 @@ def azure_update_arm_template(self, template: Any, environment: Environment) -> for t in test_templates.split(","): update_arm_template = self._get_update_arm_template(t) - update_arm_template().update(template) + update_arm_template().update(template, is_lisa_template=True) _SOURCE_CODE_ROOT: Path = Path(tests_e2e.__path__[0]) From 705796dadd81739b7c0048eed061f0dbdf19f6f2 Mon Sep 17 00:00:00 2001 From: narrieta Date: Fri, 20 Oct 2023 07:34:03 -0700 Subject: [PATCH 05/30] . --- .../orchestrator/lib/agent_test_suite.py | 744 ++++++++++-------- .../lib/agent_test_suite_combinator.py | 337 ++++++-- tests_e2e/orchestrator/runbook.yml | 75 +- 3 files changed, 738 insertions(+), 418 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 4a14b1f665..9428d0c435 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -23,7 +23,7 @@ from pathlib import Path from threading import current_thread, RLock -from typing import Any, Dict, List +from typing import Any, Dict, List, Tuple # Disable those warnings, since 'lisa' is an external, non-standard, dependency # E0401: Unable to import 'lisa' (import-error) @@ -31,7 +31,6 @@ from lisa import ( # pylint: disable=E0401 Environment, Logger, - Node, notifier, simple_requirement, TestCaseMetadata, @@ -40,18 +39,26 @@ ) from lisa.environment import EnvironmentStatus # pylint: disable=E0401 from lisa.messages import TestStatus, TestResultMessage # pylint: disable=E0401 +from lisa.node import Node # pylint: disable=E0401 from lisa.sut_orchestrator.azure.common import get_node_context # pylint: disable=E0401 +from lisa.sut_orchestrator.ready import ReadyPlatform # pylint: disable=E0401 import makepkg from azurelinuxagent.common.version import AGENT_VERSION + +from tests_e2e.tests.lib.add_network_security_group import AddNetworkSecurityGroup +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient + +import tests_e2e from tests_e2e.orchestrator.lib.agent_test_loader import TestSuiteInfo from tests_e2e.tests.lib.agent_log import AgentLog from tests_e2e.tests.lib.agent_test import TestSkipped, RemoteTestError -from tests_e2e.tests.lib.agent_test_context import AgentTestContext -from tests_e2e.tests.lib.identifiers import VmIdentifier +from tests_e2e.tests.lib.agent_test_context import AgentTestContext, AgentVmTestContext, AgentVmssTestContext from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.logging import set_current_thread_log from tests_e2e.tests.lib.agent_log import AgentLogRecord +from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient from tests_e2e.tests.lib.shell import run_command, CommandError from tests_e2e.tests.lib.ssh_client import SshClient @@ -102,76 +109,81 @@ class CollectLogs(object): No = 'no' # Never collect logs +# +# Possible values for the keep_environment parameter +# +class KeepEnvironment(object): + Always = 'always' # Do not delete resources created by the test suite + Failed = 'failed' # Skip delete only on test failures + No = 'no' # Always delete resources created by the test suite + + +class _TestNode(object): + def __init__(self, name: str, ip_address: str): + self.name = name + self.ip_address = ip_address + + def __str__(self): + return f"{self.name}:{self.ip_address}" + + @TestSuiteMetadata(area="waagent", category="", description="") class AgentTestSuite(LisaTestSuite): """ Manages the setup of test VMs and execution of Agent test suites. This class acts as the interface with the LISA framework, which will invoke the execute() method when a runbook is executed. """ - - class _Context(AgentTestContext): - def __init__(self, vm: VmIdentifier, paths: AgentTestContext.Paths, connection: AgentTestContext.Connection): - super().__init__(vm=vm, paths=paths, connection=connection) - # These are initialized by AgentTestSuite._set_context(). - self.log_path: Path = None - self.lisa_log: Logger = None - self.node: Node = None - self.runbook_name: str = None - self.environment_name: str = None - self.is_vhd: bool = None - self.test_suites: List[AgentTestSuite] = None - self.collect_logs: str = None - self.skip_setup: bool = None - self.ssh_client: SshClient = None - def __init__(self, metadata: TestSuiteMetadata) -> None: super().__init__(metadata) - # The context is initialized by _set_context() via the call to execute() - self.__context: AgentTestSuite._Context = None - - def _initialize(self, node: Node, variables: Dict[str, Any], lisa_working_path: str, lisa_log_path: str, lisa_log: Logger): - connection_info = node.connection_info - node_context = get_node_context(node) - - self.__context = self._Context( - vm=VmIdentifier( - cloud=self._get_required_parameter(variables, "cloud"), - location=self._get_required_parameter(variables, "c_location"), - subscription=node.features._platform.subscription_id, - resource_group=node_context.resource_group_name, - name=node_context.vm_name), - paths=AgentTestContext.Paths( - working_directory=self._get_working_directory(lisa_working_path), - remote_working_directory=Path('/home')/connection_info['username']), - connection=AgentTestContext.Connection( - ip_address=connection_info['address'], - username=connection_info['username'], - private_key_file=connection_info['private_key_file'], - ssh_port=connection_info['port'])) - - self.__context.log_path = self._get_log_path(variables, lisa_log_path) - self.__context.lisa_log = lisa_log - self.__context.node = node - self.__context.is_vhd = self._get_optional_parameter(variables, "c_vhd") != "" - self.__context.environment_name = f"{node.os.name}-vhd" if self.__context.is_vhd else self._get_required_parameter(variables, "c_env_name") - self.__context.test_suites = self._get_required_parameter(variables, "c_test_suites") - self.__context.collect_logs = self._get_required_parameter(variables, "collect_logs") - self.__context.skip_setup = self._get_required_parameter(variables, "skip_setup") - self.__context.ssh_client = SshClient(ip_address=self.__context.vm_ip_address, username=self.__context.username, private_key_file=self.__context.private_key_file) - - @staticmethod - def _get_required_parameter(variables: Dict[str, Any], name: str) -> Any: - value = variables.get(name) - if value is None: - raise Exception(f"The runbook is missing required parameter '{name}'") - return value - - @staticmethod - def _get_optional_parameter(variables: Dict[str, Any], name: str, default_value: Any = "") -> Any: - value = variables.get(name) - if value is None: - return default_value - return value + self._test_source_directory: Path + self._working_directory: Path + self._test_agent_package_path: Path + self._test_tools_tarball_path: Path + self._log_path: Path + self._lisa_log: Logger + self._runbook_name: str + self._environment_name: str + self._subscription_id: str + self._cloud: str + self._location: str + self._marketplace_image: str + self._is_vhd: bool + self._user: str + self._identity_file: str + self._test_suites: List[AgentTestSuite] + self._skip_setup: bool + self._collect_logs: str + self._keep_environment: str + self._existing_resource_group: str + self._existing_vmss: str + self._resource_groups_to_delete: List[ResourceGroupClient] + + def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_working_path: str, lisa_log_path: str, lisa_log: Logger): + self._test_source_directory = Path(tests_e2e.__path__[0]) + self._working_directory = self._get_working_directory(lisa_working_path) + self._test_agent_package_path = self._working_directory/"eggs"/f"WALinuxAgent-{AGENT_VERSION}.zip" + self._test_tools_tarball_path = self._working_directory/"waagent-tools.tar" + self._pypy_x64_path = Path("/tmp/pypy3.7-x64.tar.bz2") + self._pypy_arm64_path = Path("/tmp/pypy3.7-arm64.tar.bz2") + self._log_path = self._get_log_path(variables, lisa_log_path) + self._lisa_log = lisa_log + self._runbook_name = variables["name"] + self._environment_name = variables["c_env_name"] + self._subscription_id = variables["subscription_id"] + self._cloud = variables["cloud"] + self._location = variables["c_location"] + self._marketplace_image = variables["c_marketplace_image"] + self._is_vhd = variables["c_vhd"] != "" + self._user = variables["user"] + self._identity_file = variables["identity_file"] + self._test_suites = variables["c_test_suites"] + self._skip_setup = variables["skip_setup"] + self._keep_environment = variables["keep_environment"] + self._collect_logs = variables["collect_logs"] + # If an existing VMSS was passed in the command line, these variables will contain its name and resource group, otherwise they will be empty + self._existing_resource_group = variables["resource_group_name"] + self._existing_vmss = variables["vmss_name"] + self._resource_groups_to_delete = [] @staticmethod def _get_log_path(variables: Dict[str, Any], lisa_log_path: str) -> Path: @@ -188,18 +200,12 @@ def _get_log_path(variables: Dict[str, Any], lisa_log_path: str) -> Path: def _get_working_directory(lisa_working_path: str) -> Path: # LISA's "working_path" has a value similar to # "<--working_path>/20230322/20230322-194430-287/tests/20230322-194451-333-agent_test_suite - # where "<--working_path>" is the value given to the --working_path command line argument. Create the working for + # where "<--working_path>" is the value given to the --working_path command line argument. Create the working directory for # the AgentTestSuite as # "<--working_path>/20230322/20230322-194430-287/waagent # This directory will be unique for each execution of the runbook ("20230322-194430" is the timestamp and "287" is a # unique ID per execution) - return Path(lisa_working_path).parent.parent / "waagent" - - @property - def context(self): - if self.__context is None: - raise Exception("The context for the AgentTestSuite has not been initialized") - return self.__context + return Path(lisa_working_path).parent.parent/"waagent" # # Test suites within the same runbook may be executed concurrently, and setup needs to be done only once. @@ -217,13 +223,13 @@ def _create_working_directory(self) -> None: self._working_directory_lock.acquire() try: - if not self.context.working_directory.exists(): - log.info("Creating working directory: %s", self.context.working_directory) - self.context.working_directory.mkdir(parents=True) + if not self._working_directory.exists(): + log.info("Creating working directory: %s", self._working_directory) + self._working_directory.mkdir(parents=True) finally: self._working_directory_lock.release() - def _setup(self) -> None: + def _setup_test_run(self) -> None: """ Prepares the test suite for execution (currently, it just builds the agent package) @@ -232,17 +238,54 @@ def _setup(self) -> None: self._setup_lock.acquire() try: - log.info("") - log.info("**************************************** [Build] ****************************************") - log.info("") - completed: Path = self.context.working_directory/"completed" + completed: Path = self._working_directory / "completed" if completed.exists(): log.info("Found %s. Build has already been done, skipping.", completed) return - self.context.lisa_log.info("Building test agent") - self._build_agent_package() + log.info("") + log.info("********************************** [Preparing Test Run] **********************************") + log.info("") + + self._lisa_log.info("Building agent package to %s", self._test_agent_package_path) + log.info("Building agent package to %s", self._test_agent_package_path) + makepkg.run(agent_family="Test", output_directory=str(self._working_directory), log=log) + if not self._test_agent_package_path.exists(): # the target path is created by makepkg, ensure we are using the correct value + raise Exception(f"The test Agent package was not created at the expected path {self._test_agent_package_path}") + + # + # Ensure that Pypy (both x64 and ARM) has been downloaded to the local machine; it is pre-downloaded to /tmp on + # the container image used for Azure Pipelines runs, but for developer runs it may need to be downloaded. + # + for pypy in [self._pypy_x64_path, self._pypy_arm64_path]: + if pypy.exists(): + log.info("Found Pypy at %s", pypy) + else: + pypy_download = f"https://dcrdata.blob.core.windows.net/python/{pypy.name}" + log.info("Downloading %s to %s", pypy_download, pypy) + run_command(["wget", pypy_download, "-O", pypy]) + + # + # Create a tarball with the tools we need to copy to the test node. The tarball includes two directories: + # + # * bin - Executables file (Bash and Python scripts) + # * lib - Library files (Python modules) + # + log.info("Creating %s with the tools needed on the test node", self._test_tools_tarball_path) + log.info("Adding orchestrator/scripts") + command = "cd {0} ; tar cf {1} --transform='s,^,bin/,' *".format(self._test_source_directory/"orchestrator"/"scripts", self._test_tools_tarball_path) + log.info("%s", command) + run_command(command, shell=True) + log.info("Adding tests/scripts") + command = "cd {0} ; tar rf {1} --transform='s,^,bin/,' *".format(self._test_source_directory/"tests"/"scripts", self._test_tools_tarball_path) + log.info("%s", command) + run_command(command, shell=True) + log.info("Adding tests/lib") + command = "cd {0} ; tar rf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self._test_source_directory.parent, self._test_tools_tarball_path) + log.info("%s", command) + run_command(command, shell=True) + log.info("Contents of %s:\n%s", self._test_tools_tarball_path, run_command(['tar', 'tvf', str(self._test_tools_tarball_path)])) log.info("Completed setup, creating %s", completed) completed.touch() @@ -250,163 +293,113 @@ def _setup(self) -> None: finally: self._setup_lock.release() - def _build_agent_package(self) -> None: + def _clean_up(self, success: bool) -> None: """ - Builds the agent package and returns the path to the package. + Cleans up any items created by the test suite run. """ - log.info("Building agent package to %s", self.context.working_directory) - - makepkg.run(agent_family="Test", output_directory=str(self.context.working_directory), log=log) - - package_path: Path = self._get_agent_package_path() - if not package_path.exists(): - raise Exception(f"Can't find the agent package at {package_path}") - - log.info("Built agent package as %s", package_path) - - def _get_agent_package_path(self) -> Path: - """ - Returns the path to the agent package. - """ - return self.context.working_directory/"eggs"/f"WALinuxAgent-{AGENT_VERSION}.zip" + if len(self._resource_groups_to_delete) > 0: + if self._keep_environment == KeepEnvironment.Always: + log.info("Won't delete resource groups %s, per the test suite configuration.", self._resource_groups_to_delete) + elif self._keep_environment == KeepEnvironment.No or self._keep_environment == KeepEnvironment.Failed and not success: + for resource_group in self._resource_groups_to_delete: + try: + self._lisa_log.info("Deleting resource group %s", resource_group) + resource_group.delete() + except Exception as error: # pylint: disable=broad-except + log.warning("Error deleting resource group %s: %s", resource_group, error) - def _clean_up(self) -> None: + def _setup_test_nodes(self, test_nodes: List[_TestNode]) -> None: """ - Cleans up any leftovers from the test suite run. Currently just an empty placeholder for future use. + Prepares the provided remote nodes for executing the test suite (installs tools and the test agent, etc) """ + install_test_agent = self._test_suites[0].install_test_agent # All suites in the environment have the same value for install_test_agent - def _setup_node(self, install_test_agent: bool) -> None: - """ - Prepares the remote node for executing the test suite (installs tools and the test agent, etc) - """ - self.context.lisa_log.info("Setting up test node") - log.info("") - log.info("************************************** [Node Setup] **************************************") log.info("") - log.info("Test Node: %s", self.context.vm.name) - log.info("IP Address: %s", self.context.vm_ip_address) - log.info("Resource Group: %s", self.context.vm.resource_group) + log.info("************************************ [Test Nodes Setup] ************************************") log.info("") + for node in test_nodes: + self._lisa_log.info(f"Setting up test node {node}") + log.info("Test Node: %s", node.name) + log.info("IP Address: %s", node.ip_address) + log.info("") - # - # Ensure that the correct version (x84 vs ARM64) Pypy has been downloaded; it is pre-downloaded to /tmp on the container image - # used for Azure Pipelines runs, but for developer runs it may need to be downloaded. - # - if self.context.ssh_client.get_architecture() == "aarch64": - pypy_path = Path("/tmp/pypy3.7-arm64.tar.bz2") - pypy_download = "https://dcrdata.blob.core.windows.net/python/pypy3.7-arm64.tar.bz2" - else: - pypy_path = Path("/tmp/pypy3.7-x64.tar.bz2") - pypy_download = "https://dcrdata.blob.core.windows.net/python/pypy3.7-x64.tar.bz2" - if pypy_path.exists(): - log.info("Found Pypy at %s", pypy_path) - else: - log.info("Downloading %s to %s", pypy_download, pypy_path) - run_command(["wget", pypy_download, "-O", pypy_path]) - - # - # Cleanup the test node (useful for developer runs) - # - log.info('Preparing the test node for setup') - # Note that removing lib requires sudo, since a Python cache may have been created by tests using sudo - self.context.ssh_client.run_command("rm -rvf ~/{bin,lib,tmp}", use_sudo=True) - - # - # Copy Pypy and the test Agent to the test node - # - target_path = Path("~")/"tmp" - self.context.ssh_client.run_command(f"mkdir {target_path}") - log.info("Copying %s to %s:%s", pypy_path, self.context.node.name, target_path) - self.context.ssh_client.copy_to_node(pypy_path, target_path) - agent_package_path: Path = self._get_agent_package_path() - log.info("Copying %s to %s:%s", agent_package_path, self.context.node.name, target_path) - self.context.ssh_client.copy_to_node(agent_package_path, target_path) - - # tar commands sometimes fail with 'tar: Unexpected EOF in archive' error. Retry tarball creation, copy, and - # extraction if we hit this error - tar_retries = 3 - while tar_retries > 0: - try: - # - # Create a tarball with the files we need to copy to the test node. The tarball includes two directories: - # - # * bin - Executables file (Bash and Python scripts) - # * lib - Library files (Python modules) - # - # After extracting the tarball on the test node, 'bin' will be added to PATH and PYTHONPATH will be set to 'lib'. - # - # Note that executables are placed directly under 'bin', while the path for Python modules is preserved under 'lib. - # - tarball_path: Path = Path("/tmp/waagent.tar") - log.info("Creating %s with the files need on the test node", tarball_path) - log.info("Adding orchestrator/scripts") - command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"orchestrator"/"scripts", str(tarball_path)) - log.info("%s\n%s", command, run_command(command, shell=True)) - log.info("Adding tests/scripts") - command = "cd {0} ; tar rvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path)) - log.info("%s\n%s", command, run_command(command, shell=True)) - log.info("Adding tests/lib") - command = "cd {0} ; tar rvf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self.context.test_source_directory.parent, str(tarball_path)) - log.info("%s\n%s", command, run_command(command, shell=True)) - log.info("Contents of %s:\n\n%s", tarball_path, run_command(['tar', 'tvf', str(tarball_path)])) - - # - # Copy the tarball to the test node - # - log.info("Copying %s to %s:%s", tarball_path, self.context.node.name, target_path) - self.context.ssh_client.copy_to_node(tarball_path, target_path) - - # - # Extract the tarball and execute the install scripts - # - log.info('Installing tools on the test node') - command = f"tar xvf {target_path/tarball_path.name} && ~/bin/install-tools" - log.info("Remote command [%s] completed:\n%s", command, self.context.ssh_client.run_command(command)) - - # Tarball creation and extraction was successful - no need to retry - tar_retries = 0 - - except CommandError as error: - if "tar: Unexpected EOF in archive" in error.stderr: - tar_retries -= 1 - # Log the error with traceback to see which tar operation failed - log.info(f"Tarball creation or extraction failed: \n{error}") - # Retry tar operations - if tar_retries > 0: - log.info("Retrying tarball creation and extraction...") - else: - raise Exception(f"Unexpected error when creating or extracting tarball during node setup: {error}") - - if self.context.is_vhd: - log.info("Using a VHD; will not install the Test Agent.") - elif not install_test_agent: - log.info("Will not install the Test Agent per the test suite configuration.") - else: - log.info("Installing the Test Agent on the test node") - command = f"install-agent --package ~/tmp/{agent_package_path.name} --version {AGENT_VERSION}" - log.info("%s\n%s", command, self.context.ssh_client.run_command(command, use_sudo=True)) - - log.info("Completed test node setup") - - def _collect_node_logs(self) -> None: + ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file)) + + # + # Cleanup the test node (useful for developer runs) + # + log.info('Preparing the test node for setup') + # Note that removing lib requires sudo, since a Python cache may have been created by tests using sudo + ssh_client.run_command("rm -rvf ~/{bin,lib,tmp}", use_sudo=True) + + # + # Copy Pypy, the test Agent, and the test tools to the test node + # + ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file)) + if ssh_client.get_architecture() == "aarch64": + pypy_path = self._pypy_arm64_path + else: + pypy_path = self._pypy_x64_path + target_path = Path("~")/"tmp" + ssh_client.run_command(f"mkdir {target_path}") + log.info("Copying %s to %s:%s", pypy_path, node.name, target_path) + ssh_client.copy_to_node(pypy_path, target_path) + log.info("Copying %s to %s:%s", self._test_agent_package_path, node.name, target_path) + ssh_client.copy_to_node(self._test_agent_package_path, target_path) + log.info("Copying %s to %s:%s", self._test_tools_tarball_path, node.name, target_path) + ssh_client.copy_to_node(self._test_tools_tarball_path, target_path) + + # + # Extract the tarball with the test tools. The tarball includes two directories: + # + # * bin - Executables file (Bash and Python scripts) + # * lib - Library files (Python modules) + # + # After extracting the tarball on the test node, 'bin' will be added to PATH and PYTHONPATH will be set to 'lib'. + # + # Note that executables are placed directly under 'bin', while the path for Python modules is preserved under 'lib. + # + log.info('Installing tools on the test node') + command = f"tar xvf {target_path/self._test_tools_tarball_path.name} && ~/bin/install-tools" + log.info("Remote command [%s] completed:\n%s", command, ssh_client.run_command(command)) + + if self._is_vhd: + log.info("Using a VHD; will not install the Test Agent.") + elif not install_test_agent: + log.info("Will not install the Test Agent per the test suite configuration.") + else: + log.info("Installing the Test Agent on the test node") + command = f"install-agent --package ~/tmp/{self._test_agent_package_path.name} --version {AGENT_VERSION}" + log.info("%s\n%s", command, ssh_client.run_command(command, use_sudo=True)) + + log.info("Completed test node setup") + + def _collect_logs_from_test_nodes(self, test_nodes: List[_TestNode]) -> None: """ - Collects the test logs from the remote machine and copies them to the local machine + Collects the test logs from the provided remote nodes and copies them to the local machine """ - try: - # Collect the logs on the test machine into a compressed tarball - self.context.lisa_log.info("Collecting logs on test node") - log.info("Collecting logs on test node") - stdout = self.context.ssh_client.run_command("collect-logs", use_sudo=True) - log.info(stdout) - - # Copy the tarball to the local logs directory - remote_path = "/tmp/waagent-logs.tgz" - local_path = self.context.log_path/'{0}.tgz'.format(self.context.environment_name) - log.info("Copying %s:%s to %s", self.context.node.name, remote_path, local_path) - self.context.ssh_client.copy_from_node(remote_path, local_path) - - except: # pylint: disable=bare-except - log.exception("Failed to collect logs from the test machine") + for node in test_nodes: + node_name = node.name + ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file)) + try: + # Collect the logs on the test machine into a compressed tarball + self._lisa_log.info("Collecting logs on test node %s", node_name) + log.info("Collecting logs on test node %s", node_name) + stdout = ssh_client.run_command("collect-logs", use_sudo=True) + log.info(stdout) + + # Copy the tarball to the local logs directory + tgz_name = self._environment_name + if len(test_nodes) > 1: + # Append instance of scale set to the end of tarball name + tgz_name += '_' + node_name.split('_')[-1] + remote_path = "/tmp/waagent-logs.tgz" + local_path = self._log_path / '{0}.tgz'.format(tgz_name) + log.info("Copying %s:%s to %s", node_name, remote_path, local_path) + ssh_client.copy_from_node(remote_path, local_path) + + except: # pylint: disable=bare-except + log.exception("Failed to collect logs from the test machine") # NOTES: # @@ -418,23 +411,25 @@ def _collect_node_logs(self) -> None: # # W0621: Redefining name 'log' from outer scope (line 53) (redefined-outer-name) @TestCaseMetadata(description="", priority=0, requirement=simple_requirement(environment_status=EnvironmentStatus.Deployed)) - def main(self, node: Node, environment: Environment, variables: Dict[str, Any], working_path: str, log_path: str, log: Logger): # pylint: disable=redefined-outer-name + def main(self, environment: Environment, variables: Dict[str, Any], working_path: str, log_path: str, log: Logger): # pylint: disable=redefined-outer-name """ Entry point from LISA """ - self._initialize(node, variables, working_path, log_path, log) + self._initialize(environment, variables, working_path, log_path, log) self._execute(environment, variables) - def _execute(self, environment: Environment, variables: Dict[str, Any]): + def _execute(self, environment: Environment, variables: Dict[str, Any]) -> None: """ Executes each of the AgentTests included in the "c_test_suites" variable (which is generated by the AgentTestSuitesCombinator). """ + unexpected_error = False + test_suite_success = True + # Set the thread name to the name of the environment. The thread name is added to each item in LISA's log. - with _set_thread_name(self.context.environment_name): - log_path: Path = self.context.log_path/f"env-{self.context.environment_name}.log" + with _set_thread_name(self._environment_name): + log_path: Path = self._log_path / f"env-{self._environment_name}.log" with set_current_thread_log(log_path): start_time: datetime.datetime = datetime.datetime.now() - success = True try: # Log the environment's name and the variables received from the runbook (note that we need to expand the names of the test suites) @@ -443,64 +438,59 @@ def _execute(self, environment: Environment, variables: Dict[str, Any]): for name, value in variables.items(): log.info(" %s: %s", name, value if name != 'c_test_suites' else [t.name for t in value]) - test_suite_success = True + self._create_working_directory() - try: - self._create_working_directory() + if not self._skip_setup: + self._setup_test_run() - if not self.context.skip_setup: - self._setup() + try: + test_context, test_nodes = self._create_test_context(environment, variables) - if not self.context.skip_setup: - # pylint seems to think self.context.test_suites is not iterable. Suppressing this warning here and a few lines below, since - # its type is List[AgentTestSuite]. - # E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable) - install_test_agent = all([suite.install_test_agent for suite in self.context.test_suites]) # pylint: disable=E1133 + if not self._skip_setup: try: - self._setup_node(install_test_agent) + self._setup_test_nodes(test_nodes) except: test_suite_success = False raise - for suite in self.context.test_suites: # pylint: disable=E1133 + for suite in self._test_suites: log.info("Executing test suite %s", suite.name) - self.context.lisa_log.info("Executing Test Suite %s", suite.name) - test_suite_success = self._execute_test_suite(suite) and test_suite_success + self._lisa_log.info("Executing Test Suite %s", suite.name) + test_suite_success = self._execute_test_suite(suite, test_context, test_nodes) and test_suite_success finally: - collect = self.context.collect_logs - if collect == CollectLogs.Always or collect == CollectLogs.Failed and not test_suite_success: - self._collect_node_logs() + if self._collect_logs == CollectLogs.Always or self._collect_logs == CollectLogs.Failed and not test_suite_success: + self._collect_logs_from_test_nodes(test_nodes) except Exception as e: # pylint: disable=bare-except # Report the error and raise an exception to let LISA know that the test errored out. - success = False + unexpected_error = True log.exception("UNEXPECTED ERROR.") self._report_test_result( - self.context.environment_name, + self._environment_name, "Unexpected Error", TestStatus.FAILED, start_time, message="UNEXPECTED ERROR.", add_exception_stack_trace=True) - raise Exception(f"[{self.context.environment_name}] Unexpected error in AgentTestSuite: {e}") + raise Exception(f"[{self._environment_name}] Unexpected error in AgentTestSuite: {e}") finally: - self._clean_up() - if not success: + self._clean_up(test_suite_success and not unexpected_error) + if unexpected_error: self._mark_log_as_failed() - def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: + def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestContext, test_nodes: List[_TestNode]) -> bool: """ Executes the given test suite and returns True if all the tests in the suite succeeded. """ suite_name = suite.name - suite_full_name = f"{suite_name}-{self.context.environment_name}" + suite_full_name = f"{suite_name}-{self._environment_name}" suite_start_time: datetime.datetime = datetime.datetime.now() with _set_thread_name(suite_full_name): # The thread name is added to the LISA log - log_path: Path = self.context.log_path/f"{suite_full_name}.log" + log_path: Path = self._log_path / f"{suite_full_name}.log" with set_current_thread_log(log_path): suite_success: bool = True @@ -517,16 +507,16 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: test_start_time: datetime.datetime = datetime.datetime.now() log.info("******** Executing %s", test.name) - self.context.lisa_log.info("Executing test %s", test_full_name) + self._lisa_log.info("Executing test %s", test_full_name) test_success: bool = True - test_instance = test.test_class(self.context) + test_instance = test.test_class(test_context) try: test_instance.run() summary.append(f"[Passed] {test.name}") log.info("******** [Passed] %s", test.name) - self.context.lisa_log.info("[Passed] %s", test_full_name) + self._lisa_log.info("[Passed] %s", test_full_name) self._report_test_result( suite_full_name, test.name, @@ -535,7 +525,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: except TestSkipped as e: summary.append(f"[Skipped] {test.name}") log.info("******** [Skipped] %s: %s", test.name, e) - self.context.lisa_log.info("******** [Skipped] %s", test_full_name) + self._lisa_log.info("******** [Skipped] %s", test_full_name) self._report_test_result( suite_full_name, test.name, @@ -546,7 +536,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: test_success = False summary.append(f"[Failed] {test.name}") log.error("******** [Failed] %s: %s", test.name, e) - self.context.lisa_log.error("******** [Failed] %s", test_full_name) + self._lisa_log.error("******** [Failed] %s", test_full_name) self._report_test_result( suite_full_name, test.name, @@ -558,7 +548,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: summary.append(f"[Failed] {test.name}") message = f"UNEXPECTED ERROR IN [{e.command}] {e.stderr}\n{e.stdout}" log.error("******** [Failed] %s: %s", test.name, message) - self.context.lisa_log.error("******** [Failed] %s", test_full_name) + self._lisa_log.error("******** [Failed] %s", test_full_name) self._report_test_result( suite_full_name, test.name, @@ -569,7 +559,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: test_success = False summary.append(f"[Error] {test.name}") log.exception("UNEXPECTED ERROR IN %s", test.name) - self.context.lisa_log.exception("UNEXPECTED ERROR IN %s", test_full_name) + self._lisa_log.exception("UNEXPECTED ERROR IN %s", test_full_name) self._report_test_result( suite_full_name, test.name, @@ -608,58 +598,117 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: if not suite_success: self._mark_log_as_failed() - suite_success = suite_success and self._check_agent_log(ignore_error_rules) + suite_success = suite_success and self._check_agent_log_on_test_nodes(test_nodes, ignore_error_rules) return suite_success - def _check_agent_log(self, ignore_error_rules: List[Dict[str, Any]]) -> bool: + def _check_agent_log_on_test_nodes(self, test_nodes: List[_TestNode], ignore_error_rules: List[Dict[str, Any]]) -> bool: """ - Checks the agent log for errors; returns true on success (no errors int the log) + Checks the agent log on the remote nodes for errors; returns true on success (no errors in the logs) """ - start_time: datetime.datetime = datetime.datetime.now() + success: bool = True - try: - self.context.lisa_log.info("Checking agent log on the test node") - log.info("Checking agent log on the test node") - - output = self.context.ssh_client.run_command("check-agent-log.py -j") - errors = json.loads(output, object_hook=AgentLogRecord.from_dictionary) - - # Individual tests may have rules to ignore known errors; filter those out - if len(ignore_error_rules) > 0: - new = [] - for e in errors: - if not AgentLog.matches_ignore_rule(e, ignore_error_rules): - new.append(e) - errors = new - - if len(errors) == 0: - # If no errors, we are done; don't create a log or test result. - log.info("There are no errors in the agent log") - return True - - message = f"Detected {len(errors)} error(s) in the agent log" - self.context.lisa_log.error(message) - log.error("%s:\n\n%s\n", message, '\n'.join(['\t\t' + e.text.replace('\n', '\n\t\t') for e in errors])) - self._mark_log_as_failed() - - self._report_test_result( - self.context.environment_name, - "CheckAgentLog", - TestStatus.FAILED, - start_time, - message=message + ' - First few errors:\n' + '\n'.join([e.text for e in errors[0:3]])) - except: # pylint: disable=bare-except - log.exception("Error checking agent log") - self._report_test_result( - self.context.environment_name, - "CheckAgentLog", - TestStatus.FAILED, - start_time, - "Error checking agent log", - add_exception_stack_trace=True) - - return False + for node in test_nodes: + node_name = node.name + ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file)) + + test_result_name = self._environment_name + if len(test_nodes) > 1: + # If there are multiple test nodes, as in a scale set, append the name of the node to the name of the result + test_result_name += '_' + node_name.split('_')[-1] + + start_time: datetime.datetime = datetime.datetime.now() + + try: + self._lisa_log.info("Checking agent log on the test node %s", node_name) + log.info("Checking agent log on the test node %s", node_name) + + output = ssh_client.run_command("check-agent-log.py -j") + errors = json.loads(output, object_hook=AgentLogRecord.from_dictionary) + + # Individual tests may have rules to ignore known errors; filter those out + if len(ignore_error_rules) > 0: + new = [] + for e in errors: + if not AgentLog.matches_ignore_rule(e, ignore_error_rules): + new.append(e) + errors = new + + if len(errors) == 0: + # If no errors, we are done; don't create a log or test result. + log.info("There are no errors in the agent log") + else: + message = f"Detected {len(errors)} error(s) in the agent log on {node_name}" + self._lisa_log.error(message) + log.error("%s:\n\n%s\n", message, '\n'.join(['\t\t' + e.text.replace('\n', '\n\t\t') for e in errors])) + self._mark_log_as_failed() + success = False + + self._report_test_result( + test_result_name, + "CheckAgentLog", + TestStatus.FAILED, + start_time, + message=message + ' - First few errors:\n' + '\n'.join([e.text for e in errors[0:3]])) + except: # pylint: disable=bare-except + log.exception("Error checking agent log on %s", node_name) + success = False + self._report_test_result( + test_result_name, + "CheckAgentLog", + TestStatus.FAILED, + start_time, + "Error checking agent log", + add_exception_stack_trace=True) + + return success + + def _create_test_context(self, environment: Environment, variables: Dict[str, Any]) -> Tuple[AgentTestContext, List[_TestNode]]: + """ + """ + # Note that all the test suites in the environment have the same value for executes_on_scale_set + if self._test_suites[0].executes_on_scale_set: + log.info("Creating test context for scale set") + if self._existing_vmss != "": + scale_set_client = VirtualMachineScaleSetClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, resource_group=self._existing_resource_group, name=self._existing_vmss) + log.info("Using existing scale set %s", scale_set_client) + else: + log.info("Creating scale set") + scale_set_client = self._create_scale_set() + + test_context = AgentVmssTestContext(working_directory=self._working_directory, vmss=scale_set_client, username=self._user, identity_file=self._identity_file) + ip_addresses = scale_set_client.get_instances_ip_address() + log.info("Scale set instances: %s", [str(i) for i in ip_addresses]) + nodes = [_TestNode(i.instance_name, i.ip_address) for i in ip_addresses] + else: + self._lisa_log.info("Creating test context for virtual machine") + test_node: Node = environment.nodes[0] + connection_info = test_node.connection_info + + if isinstance(test_node.features._platform, ReadyPlatform): + # A "ready" platform indicates that the tests are running on an existing VM and the vm and resource group names + # were passed as arguments in the command line + resource_group_name = variables["resource_group_name"] + node_name = variables["vm_name"] + else: + # Else the test VM was created by LISA and we need to get the vm and resource group names from the node context + node_context = get_node_context(test_node) + resource_group_name = node_context.resource_group_name + node_name = node_context.vm_name + + vm = VirtualMachineClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, resource_group=resource_group_name, name=node_name) + + test_context = AgentVmTestContext( + working_directory=self._working_directory, + vm=vm, + ip_address=connection_info['address'], + username=connection_info['username'], + identity_file=connection_info['private_key_file'], + ssh_port=connection_info['port']) + + nodes = [_TestNode(test_context.vm.name, test_context.ip_address)] + + return test_context, nodes @staticmethod def _mark_log_as_failed(): @@ -703,4 +752,71 @@ def _report_test_result( notifier.notify(msg) + _resource_group_counter: int = 0 # Used to generate unique resource group names + _resource_group_counter_lock: RLock = RLock() + + def _create_scale_set(self) -> VirtualMachineScaleSetClient: + """ + """ + self._resource_group_counter_lock.acquire() + try: + unique_id = self._resource_group_counter + self._resource_group_counter += 1 + finally: + self._resource_group_counter_lock.release() + + # We use a naming convention similar to LISA's, to facilitate automatic cleanup and to identify resources created by automation + timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + resource_group_name = f"lisa-{self._runbook_name}-{timestamp}-e{unique_id}" + scale_set_name = f"{resource_group_name}-n0" + + resource_group = ResourceGroupClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, name=resource_group_name) + self._lisa_log.info("Creating resource group %s", resource_group) + resource_group.create() + self._resource_groups_to_delete.append(resource_group) + + self._lisa_log.info("Creating scale set %s", scale_set_name) + log.info("Creating scale set %s", scale_set_name) + template, parameters = self._get_scale_set_deployment_template(scale_set_name) + resource_group.deploy_template(template, parameters) + + return VirtualMachineScaleSetClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, resource_group=resource_group_name, name=scale_set_name) + + def _get_scale_set_deployment_template(self, scale_set_name: str) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """ + Returns the deployment template for scale sets and its parameters + """ + def read_file(path: str) -> str: + with open(path, "r") as file_: + return file_.read().strip() + + publisher, offer, sku, version = self._marketplace_image.replace(":", " ").split(' ') + + template: Dict[str, Any] = json.loads(read_file(str(self._test_source_directory/"orchestrator"/"templates/vmss.json"))) + + # Scale sets for some images need to be deployed with 'plan' property + plan_required_images = ["almalinux", "kinvolk", "erockyenterprisesoftwarefoundationinc1653071250513"] + if publisher in plan_required_images: + resources: List[Dict[str, Any]] = template.get('resources') + for resource in resources: + if resource.get('type') == "Microsoft.Compute/virtualMachineScaleSets": + resource["plan"] = { + "name": "[parameters('sku')]", + "product": "[parameters('offer')]", + "publisher": "[parameters('publisher')]" + } + + AddNetworkSecurityGroup().update(template, is_lisa_template=False) + + return template, { + "username": {"value": self._user}, + "sshPublicKey": {"value": read_file(f"{self._identity_file}.pub")}, + "vmName": {"value": scale_set_name}, + "publisher": {"value": publisher}, + "offer": {"value": offer}, + "sku": {"value": sku}, + "version": {"value": version} + } + + diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index be72cc4c70..7ad5e2f3ce 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -1,9 +1,12 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +import datetime import logging import random import re +import traceback import urllib.parse +import uuid from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Type @@ -14,11 +17,14 @@ # Disable those warnings, since 'lisa' is an external, non-standard, dependency # E0401: Unable to import 'lisa' (import-error) # etc -from lisa import schema # pylint: disable=E0401 +from lisa import notifier, schema # pylint: disable=E0401 from lisa.combinator import Combinator # pylint: disable=E0401 +from lisa.messages import TestStatus, TestResultMessage # pylint: disable=E0401 from lisa.util import field_metadata # pylint: disable=E0401 from tests_e2e.orchestrator.lib.agent_test_loader import AgentTestLoader, VmImageInfo, TestSuiteInfo +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient @dataclass_json() @@ -30,17 +36,35 @@ class AgentTestSuitesCombinatorSchema(schema.Combinator): cloud: str = field( default_factory=str, metadata=field_metadata(required=True) ) + subscription_id: str = field( + default_factory=str, metadata=field_metadata(required=True) + ) location: str = field( default_factory=str, metadata=field_metadata(required=True) ) image: str = field( - default_factory=str, metadata=field_metadata(required=False) + default_factory=str, metadata=field_metadata(required=True) ) vm_size: str = field( - default_factory=str, metadata=field_metadata(required=False) + default_factory=str, metadata=field_metadata(required=True) + ) + resource_group_name: str = field( + default_factory=str, metadata=field_metadata(required=True) ) vm_name: str = field( - default_factory=str, metadata=field_metadata(required=False) + default_factory=str, metadata=field_metadata(required=True) + ) + vmss_name: str = field( + default_factory=str, metadata=field_metadata(required=True) + ) + keep_environment: str = field( + default_factory=str, metadata=field_metadata(required=True) + ) + user: str = field( + default_factory=str, metadata=field_metadata(required=True) + ) + identity_file: str = field( + default_factory=str, metadata=field_metadata(required=True) ) @@ -73,16 +97,21 @@ def __init__(self, runbook: AgentTestSuitesCombinatorSchema) -> None: if self.runbook.cloud not in self._DEFAULT_LOCATIONS: raise Exception(f"Invalid cloud: {self.runbook.cloud}") + if self.runbook.vm_name != '' and self.runbook.vmss_name != '': + raise Exception("Invalid runbook parameters: 'vm_name' and 'vmss_name' are mutually exclusive.") if self.runbook.vm_name != '' and (self.runbook.image != '' or self.runbook.vm_size != ''): raise Exception("Invalid runbook parameters: When 'vm_name' is specified, 'image' and 'vm_size' should not be specified.") + if self.runbook.vmss_name != '' and (self.runbook.image != '' or self.runbook.vm_size != ''): + raise Exception("Invalid runbook parameters: When 'vmss_name' is specified, 'image' and 'vm_size' should not be specified.") if self.runbook.vm_name != '': - self._environments = self.create_environment_for_existing_vm() + self._environments = [self.create_existing_vm_environment()] + elif self.runbook.vmss_name != '': + self._environments = [self.create_existing_vmss_environment()] else: self._environments = self.create_environment_list() self._index = 0 - @classmethod def type_name(cls) -> str: return "agent_test_suites" @@ -116,24 +145,6 @@ def _next(self) -> Optional[Dict[str, Any]]: "AzureUSGovernment": "usgovarizona", } - def create_environment_for_existing_vm(self) -> List[Dict[str, Any]]: - loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) - - environment: Dict[str, Any] = { - "c_env_name": self.runbook.vm_name, - "c_vm_name": self.runbook.vm_name, - "c_location": self.runbook.location, - "c_test_suites": loader.test_suites, - } - - log: logging.Logger = logging.getLogger("lisa") - log.info("******** Waagent: Settings for existing VM *****") - log.info("") - log.info("Settings for %s:\n%s\n", environment['c_env_name'], self._get_env_settings(environment)) - log.info("") - - return [environment] - def create_environment_list(self) -> List[Dict[str, Any]]: """ Examines the test_suites specified in the runbook and returns a list of the environments (i.e. test VMs) that need to be @@ -142,6 +153,7 @@ def create_environment_list(self) -> List[Dict[str, Any]]: Note that if the runbook provides an 'image', 'location', or 'vm_size', those values override any values provided in the configuration of the test suites. """ + log: logging.Logger = logging.getLogger("lisa") environments: List[Dict[str, Any]] = [] shared_environments: Dict[str, Dict[str, Any]] = {} # environments shared by multiple test suites @@ -150,76 +162,83 @@ def create_environment_list(self) -> List[Dict[str, Any]]: runbook_images = self._get_runbook_images(loader) skip_test_suites: List[str] = [] - for suite_info in loader.test_suites: - if self.runbook.cloud in suite_info.skip_on_clouds: - skip_test_suites.append(suite_info.name) + for test_suite_info in loader.test_suites: + if self.runbook.cloud in test_suite_info.skip_on_clouds: + skip_test_suites.append(test_suite_info.name) continue if len(runbook_images) > 0: images_info: List[VmImageInfo] = runbook_images else: - images_info: List[VmImageInfo] = self._get_test_suite_images(suite_info, loader) + images_info: List[VmImageInfo] = self._get_test_suite_images(test_suite_info, loader) for image in images_info: # 'image.urn' can actually be the URL to a VHD if the runbook provided it in the 'image' parameter if self._is_vhd(image.urn): - c_marketplace_image = "" - c_vhd = image.urn - image_name = "vhd" + marketplace_image = "" + vhd = image.urn + image_name = urllib.parse.urlparse(vhd).path.split('/')[-1] # take the last fragment of the URL's path (e.g. "RHEL_8_Standard-8.3.202006170423.vhd") else: - c_marketplace_image = image.urn - c_vhd = "" + marketplace_image = image.urn + vhd = "" image_name = self._get_image_name(image.urn) - c_location: str = self._get_location(suite_info, image) - if c_location is None: + location: str = self._get_location(test_suite_info, image) + if location is None: continue - c_vm_size = self._get_vm_size(image) - - # Note: Disabling "W0640: Cell variable 'foo' defined in loop (cell-var-from-loop)". This is a false positive, the closure is OK - # to use, since create_environment() is called within the same iteration of the loop. - # pylint: disable=W0640 - def create_environment(c_env_name: str) -> Dict[str, Any]: - c_vm_tags = {} - if suite_info.template != '': - c_vm_tags["templates"] = suite_info.template - return { - "c_marketplace_image": c_marketplace_image, - "c_location": c_location, - "c_vm_size": c_vm_size, - "c_vhd": c_vhd, - "c_test_suites": [suite_info], - "c_env_name": c_env_name, - "c_marketplace_image_information_location": self._MARKETPLACE_IMAGE_INFORMATION_LOCATIONS[self.runbook.cloud], - "c_shared_resource_group_location": self._SHARED_RESOURCE_GROUP_LOCATIONS[self.runbook.cloud], - "c_vm_tags": c_vm_tags - } - # pylint: enable=W0640 - - if suite_info.owns_vm: - # create an environment for exclusive use by this suite - environments.append(create_environment(f"{image_name}-{suite_info.name}")) + vm_size = self._get_vm_size(image) + + if test_suite_info.owns_vm or not test_suite_info.install_test_agent: + # create a VM environment for exclusive use by this suite + # TODO: Allow test suites that set 'install_test_agent' to False to share environments (we need to ensure that + # all the suites in the shared environment have the same value for 'install_test_agent') + env = self.create_vm_environment( + c_env_name=f"{image_name}-{test_suite_info.name}", + marketplace_image=marketplace_image, + vhd=vhd, + location=location, + vm_size=vm_size, + test_suite_info=test_suite_info) + environments.append(env) else: # add this suite to the shared environments - key: str = f"{image_name}-{c_location}" - env = shared_environments.get(key) + env_name: str = f"{image_name}-vmss-{location}" if test_suite_info.executes_on_scale_set else f"{image_name}-{location}" + env = shared_environments.get(env_name) if env is not None: - env["c_test_suites"].append(suite_info) - if suite_info.template != '': - vm_tags = env["c_vm_tags"] - if "templates" in vm_tags: - vm_tags["templates"] += ", " + suite_info.template - else: - vm_tags["templates"] = suite_info.template + env["c_test_suites"].append(test_suite_info) else: - shared_environments[key] = create_environment(key) + if test_suite_info.executes_on_scale_set: + # TODO: Add support for VHDs + if vhd != "": + raise Exception("VHDS are currently not supported on scale sets.") + env = self.create_vmss_environment( + env_name=env_name, + marketplace_image=marketplace_image, + location=location, + vm_size=vm_size, + test_suite_info=test_suite_info) + else: + env = self.create_vm_environment( + c_env_name=env_name, + marketplace_image=marketplace_image, + vhd=vhd, + location=location, + vm_size=vm_size, + test_suite_info=test_suite_info) + shared_environments[env_name] = env + + if test_suite_info.template != '': + vm_tags = env["c_vm_tags"] + if "templates" not in vm_tags: + vm_tags["templates"] = test_suite_info.template + else: + vm_tags["templates"] += ", " + test_suite_info.template environments.extend(shared_environments.values()) if len(environments) == 0: raise Exception("No VM images were found to execute the test suites.") - log: logging.Logger = logging.getLogger("lisa") if len(skip_test_suites) > 0: log.info("") log.info("Test suites skipped on %s:\n\n\t%s\n", self.runbook.cloud, '\n\t'.join(skip_test_suites)) @@ -233,6 +252,146 @@ def create_environment(c_env_name: str) -> Dict[str, Any]: return environments + def create_existing_vm_environment(self) -> Dict[str, Any]: + loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) + + vm = VirtualMachineClient( + cloud=self.runbook.cloud, + location=self.runbook.location, + subscription=self.runbook.subscription_id, + resource_group=self.runbook.resource_group_name, + name=self.runbook.vm_name) + ip_address = vm.get_ip_address() + + return { + "c_env_name": self.runbook.vm_name, + "c_platform": [ + { + "type": "ready" + } + ], + "c_environment": { + "environments": [ + { + "nodes": [ + { + "type": "remote", + "public_address": ip_address, + "public_port": 22, + "username": self.runbook.user, + "private_key_file": self.runbook.identity_file + } + ], + } + ] + }, + "c_location": self.runbook.location, + "c_test_suites": loader.test_suites, + } + + def create_existing_vmss_environment(self) -> Dict[str, Any]: + loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) + + vmss = VirtualMachineScaleSetClient( + cloud=self.runbook.cloud, + location=self.runbook.location, + subscription=self.runbook.subscription_id, + resource_group=self.runbook.resource_group_name, + name=self.runbook.vmss_name) + + ip_addresses = vmss.get_instances_ip_address() + + return { + "c_env_name": self.runbook.vmss_name, + "c_environment": { + "environments": [ + { + "nodes": [ + { + "type": "remote", + "public_address": i.ip_address, + "public_port": 22, + "username": self.runbook.user, + "private_key_file": self.runbook.identity_file + } for i in ip_addresses + ], + } + ] + }, + "c_platform": [ + { + "type": "ready" + } + ], + "c_location": self.runbook.location, + "c_test_suites": loader.test_suites, + } + + def create_vm_environment(self, c_env_name: str, marketplace_image: str, vhd: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: + vm_tags = {} + if test_suite_info.template != '': + vm_tags["templates"] = test_suite_info.template + return { + "c_env_name": c_env_name, + "c_platform": [ + { + "type": "azure", + "admin_username": self.runbook.user, + "admin_private_key_file": self.runbook.identity_file, + "keep_environment": self.runbook.keep_environment, + "azure": { + "deploy": True, + "cloud": self.runbook.cloud, + "marketplace_image_information_location": self._MARKETPLACE_IMAGE_INFORMATION_LOCATIONS[self.runbook.cloud], + "shared_resource_group_location": self._SHARED_RESOURCE_GROUP_LOCATIONS[self.runbook.cloud], + "subscription_id": self.runbook.subscription_id, + "wait_delete": False, + "vm_tags": vm_tags + }, + "requirement": { + "core_count": { + "min": 2 + }, + "azure": { + "marketplace": marketplace_image, + "vhd": vhd, + "location": location, + "vm_size": vm_size + } + } + } + ], + "c_environment": None, + "c_location": location, + "c_vhd": vhd, + "c_test_suites": [test_suite_info], + "vm_tags": vm_tags + } + + @staticmethod + def create_vmss_environment(env_name: str, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: + return { + "c_env_name": env_name, + "c_platform": [ + { + "type": "ready" + } + ], + "c_environment": { + "environments": [ + { + "nodes": [ + {"type": "local"} + ], + } + ] + }, + "c_test_suites": [test_suite_info], + "c_marketplace_image": marketplace_image, + "c_location": location, + "c_vm_size": vm_size + } + def _get_runbook_images(self, loader: AgentTestLoader) -> List[VmImageInfo]: """ Returns the images specified in the runbook, or an empty list if none are specified. @@ -322,6 +481,7 @@ def _get_vm_size(self, image: VmImageInfo) -> str: # Otherwise, set the size to empty and LISA will select an appropriate size. return "" + @staticmethod def _get_image_name(urn: str) -> str: """ @@ -349,3 +509,38 @@ def _is_vhd(vhd: str) -> bool: # VHDs are given as URIs to storage; do some basic validation, not intending to be exhaustive. parsed = urllib.parse.urlparse(vhd) return parsed.scheme == 'https' and parsed.netloc != "" and parsed.path != "" + + @staticmethod + def _report_test_result( + suite_name: str, + test_name: str, + status: TestStatus, + start_time: datetime.datetime, + message: str = "", + add_exception_stack_trace: bool = False + ) -> None: + """ + Reports a test result to the junit notifier + """ + # The junit notifier requires an initial RUNNING message in order to register the test in its internal cache. + msg: TestResultMessage = TestResultMessage() + msg.type = "AgentTestResultMessage" + msg.id_ = str(uuid.uuid4()) + msg.status = TestStatus.RUNNING + msg.suite_full_name = suite_name + msg.suite_name = msg.suite_full_name + msg.full_name = test_name + msg.name = msg.full_name + msg.elapsed = 0 + + notifier.notify(msg) + + # Now send the actual result. The notifier pipeline makes a deep copy of the message so it is OK to re-use the + # same object and just update a few fields. If using a different object, be sure that the "id_" is the same. + msg.status = status + msg.message = message + if add_exception_stack_trace: + msg.stacktrace = traceback.format_exc() + msg.elapsed = (datetime.datetime.now() - start_time).total_seconds() + + notifier.notify(msg) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 3492e9c80c..ff1e051849 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -8,23 +8,27 @@ extension: - "./lib" variable: - - name: name - value: "WALinuxAgent" # - # These variables define parameters handled by LISA. + # These variables define parameters handled by LISA; the variables marked with 'is_case_visible' + # are also referenced by the AgentTestSuite. # + - name: name + value: "WALinuxAgent" + is_case_visible: true - name: subscription_id value: "" - - name: user - value: "waagent" - - name: identity_file + is_case_visible: true + + - name: resource_group_name value: "" - is_secret: true - - name: admin_password + is_case_visible: true + - name: vm_name value: "" - is_secret: true - - name: keep_environment - value: "no" + is_case_visible: true + - name: vmss_name + value: "" + is_case_visible: true + # # These variables define parameters for the AgentTestSuite; see the test wiki for details. # @@ -61,6 +65,15 @@ variable: value: "" - name: vm_size value: "" + - name: user + value: "waagent" + is_case_visible: true + - name: identity_file + value: "" + is_case_visible: true + - name: keep_environment + value: "no" + is_case_visible: true # # The values for these variables are generated by the AgentTestSuitesCombinator combinator. They are @@ -79,6 +92,11 @@ variable: is_case_visible: true - name: c_marketplace_image value: "" + is_case_visible: true + - name: c_environment + value: {} + - name: c_platform + value: [] - name: c_marketplace_image_information_location value: "" - name: c_shared_resource_group_location @@ -96,6 +114,9 @@ variable: is_case_visible: true - name: c_vm_tags value: {} + - name: c_location + value: "" + is_case_visible: true # # Set these variables to use an SSH proxy when executing the runbook @@ -110,36 +131,24 @@ variable: value: "" is_secret: true -platform: - - type: azure - admin_username: $(user) - admin_private_key_file: $(identity_file) - admin_password: $(admin_password) - keep_environment: $(keep_environment) - azure: - deploy: True - cloud: $(cloud) - marketplace_image_information_location: $(c_marketplace_image_information_location) - shared_resource_group_location: $(c_shared_resource_group_location) - subscription_id: $(subscription_id) - wait_delete: false - vm_tags: $(c_vm_tags) - requirement: - core_count: - min: 2 - azure: - marketplace: $(c_marketplace_image) - vhd: $(c_vhd) - location: $(c_location) - vm_size: $(c_vm_size) +environment: $(c_environment) + +platform: $(c_platform) combinator: type: agent_test_suites test_suites: $(test_suites) cloud: $(cloud) + subscription_id: $(subscription_id) image: $(image) location: $(location) vm_size: $(vm_size) + keep_environment: $(keep_environment) + user: $(user) + identity_file: $(identity_file) + resource_group_name: $(resource_group_name) + vm_name: $(vm_name) + vmss_name: $(vmss_name) concurrency: 32 From eb4c49020f09c15bdd1871ea2f829aba7da5f312 Mon Sep 17 00:00:00 2001 From: narrieta Date: Fri, 20 Oct 2023 16:57:27 -0700 Subject: [PATCH 06/30] . --- .../orchestrator/lib/agent_test_suite.py | 143 ++++++------ .../lib/agent_test_suite_combinator.py | 216 +++++++++--------- tests_e2e/orchestrator/runbook.yml | 176 ++++++++------ tests_e2e/tests/lib/logging.py | 15 ++ .../lib/virtual_machine_scale_set_client.py | 3 +- 5 files changed, 313 insertions(+), 240 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 9428d0c435..b1a2e9e764 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -22,7 +22,7 @@ import uuid from pathlib import Path -from threading import current_thread, RLock +from threading import RLock from typing import Any, Dict, List, Tuple # Disable those warnings, since 'lisa' is an external, non-standard, dependency @@ -55,11 +55,10 @@ from tests_e2e.tests.lib.agent_log import AgentLog from tests_e2e.tests.lib.agent_test import TestSkipped, RemoteTestError from tests_e2e.tests.lib.agent_test_context import AgentTestContext, AgentVmTestContext, AgentVmssTestContext -from tests_e2e.tests.lib.logging import log -from tests_e2e.tests.lib.logging import set_current_thread_log +from tests_e2e.tests.lib.logging import log, set_thread_name, set_current_thread_log from tests_e2e.tests.lib.agent_log import AgentLogRecord from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient -from tests_e2e.tests.lib.shell import run_command, CommandError +from tests_e2e.tests.lib.shell import run_command from tests_e2e.tests.lib.ssh_client import SshClient @@ -87,19 +86,6 @@ def _initialize_lisa_logger(): _initialize_lisa_logger() -# -# Helper to change the current thread name temporarily -# -@contextlib.contextmanager -def _set_thread_name(name: str): - initial_name = current_thread().name - current_thread().name = name - try: - yield - finally: - current_thread().name = initial_name - - # # Possible values for the collect_logs parameter # @@ -135,54 +121,79 @@ class AgentTestSuite(LisaTestSuite): """ def __init__(self, metadata: TestSuiteMetadata) -> None: super().__init__(metadata) - self._test_source_directory: Path - self._working_directory: Path - self._test_agent_package_path: Path - self._test_tools_tarball_path: Path - self._log_path: Path - self._lisa_log: Logger - self._runbook_name: str - self._environment_name: str - self._subscription_id: str - self._cloud: str - self._location: str - self._marketplace_image: str - self._is_vhd: bool + self._working_directory: Path # Root directory for temporary files + self._log_path: Path # Root directory for log files + self._test_agent_package_path: Path # Path to the package for the test Agent + self._test_source_directory: Path # Root directory of the source code for the end-to-end tests + self._test_tools_tarball_path: Path # Path to the tarball with the tools needed on the test node + + self._runbook_name: str # name of the runbook execution, used as prefix on ARM resources created by the AgentTestSuite + + self._lisa_log: Logger # Main log for the LISA run + + self._lisa_environment_name: str # Name assigned by LISA to the test environment, useful for correlation with LISA logs + self._environment_name: str # Name assigned by the AgentTestSuiteCombinator to the test environment + + self._test_suites: List[AgentTestSuite] # Test suites to execute in the environment + + self._cloud: str # Azure cloud where test VMs are located + self._subscription_id: str # Azure subscription where test VMs are located + self._location: str # Azure location (region) where test VMs are located + self._image: str # Image used to create the test VMs; it can be empty if LISA chose the size, or when using an existing VM + + self._is_vhd: bool # True when the test VMs were created by LISA from a VHD; this is usually used to validate a new VHD and the test Agent is not installed + + # username and public SSH key for the admin account used to connect to the test VMs self._user: str self._identity_file: str - self._test_suites: List[AgentTestSuite] - self._skip_setup: bool - self._collect_logs: str - self._keep_environment: str - self._existing_resource_group: str - self._existing_vmss: str - self._resource_groups_to_delete: List[ResourceGroupClient] + + self._skip_setup: bool # If True, skip the setup of the test VMs + self._collect_logs: str # Whether to collect logs from the test VMs (one of 'always', 'failed', or 'no') + self._keep_environment: str # Whether to skip deletion of the resources created by the test suite (one of 'always', 'failed', or 'no') + + # Resource group, VM, and VMSS names passed as arguments to the runbook. They are non-empty only when executing the runbook on an existing VM/VMSS + self._resource_group_name_arg: str + self._vm_name_arg: str + self._vmss_name_arg: str + + self._resource_groups_to_delete: List[ResourceGroupClient] # Resource groups created by the AgentTestSuite, to be deleted at the end of the run def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_working_path: str, lisa_log_path: str, lisa_log: Logger): - self._test_source_directory = Path(tests_e2e.__path__[0]) self._working_directory = self._get_working_directory(lisa_working_path) + self._log_path = self._get_log_path(variables, lisa_log_path) self._test_agent_package_path = self._working_directory/"eggs"/f"WALinuxAgent-{AGENT_VERSION}.zip" + self._test_source_directory = Path(tests_e2e.__path__[0]) self._test_tools_tarball_path = self._working_directory/"waagent-tools.tar" self._pypy_x64_path = Path("/tmp/pypy3.7-x64.tar.bz2") self._pypy_arm64_path = Path("/tmp/pypy3.7-arm64.tar.bz2") - self._log_path = self._get_log_path(variables, lisa_log_path) - self._lisa_log = lisa_log + self._runbook_name = variables["name"] + + self._lisa_log = lisa_log + + self._lisa_environment_name = environment.name self._environment_name = variables["c_env_name"] - self._subscription_id = variables["subscription_id"] + + self._test_suites = variables["c_test_suites"] + self._cloud = variables["cloud"] + self._subscription_id = variables["subscription_id"] self._location = variables["c_location"] - self._marketplace_image = variables["c_marketplace_image"] - self._is_vhd = variables["c_vhd"] != "" + self._image = variables["c_image"] + + self._is_vhd = variables["c_is_vhd"] + self._user = variables["user"] self._identity_file = variables["identity_file"] - self._test_suites = variables["c_test_suites"] + self._skip_setup = variables["skip_setup"] self._keep_environment = variables["keep_environment"] self._collect_logs = variables["collect_logs"] - # If an existing VMSS was passed in the command line, these variables will contain its name and resource group, otherwise they will be empty - self._existing_resource_group = variables["resource_group_name"] - self._existing_vmss = variables["vmss_name"] + + self._resource_group_name_arg = variables["resource_group_name"] + self._vm_name_arg = variables["vm_name"] + self._vmss_name_arg = variables["vmss_name"] + self._resource_groups_to_delete = [] @staticmethod @@ -416,27 +427,22 @@ def main(self, environment: Environment, variables: Dict[str, Any], working_path Entry point from LISA """ self._initialize(environment, variables, working_path, log_path, log) - self._execute(environment, variables) + self._execute(environment) - def _execute(self, environment: Environment, variables: Dict[str, Any]) -> None: - """ - Executes each of the AgentTests included in the "c_test_suites" variable (which is generated by the AgentTestSuitesCombinator). - """ + def _execute(self, environment: Environment) -> None: unexpected_error = False test_suite_success = True # Set the thread name to the name of the environment. The thread name is added to each item in LISA's log. - with _set_thread_name(self._environment_name): + with set_thread_name(self._environment_name): log_path: Path = self._log_path / f"env-{self._environment_name}.log" with set_current_thread_log(log_path): start_time: datetime.datetime = datetime.datetime.now() try: # Log the environment's name and the variables received from the runbook (note that we need to expand the names of the test suites) - log.info("LISA Environment (for correlation with the LISA log): %s", environment.name) - log.info("Runbook variables:") - for name, value in variables.items(): - log.info(" %s: %s", name, value if name != 'c_test_suites' else [t.name for t in value]) + log.info("LISA Environment (for correlation with the LISA log): %s", self._lisa_environment_name) + log.info("Test suites: %s", [t.name for t in self._test_suites]) self._create_working_directory() @@ -444,7 +450,7 @@ def _execute(self, environment: Environment, variables: Dict[str, Any]) -> None: self._setup_test_run() try: - test_context, test_nodes = self._create_test_context(environment, variables) + test_context, test_nodes = self._create_test_context(environment) if not self._skip_setup: try: @@ -489,7 +495,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestConte suite_full_name = f"{suite_name}-{self._environment_name}" suite_start_time: datetime.datetime = datetime.datetime.now() - with _set_thread_name(suite_full_name): # The thread name is added to the LISA log + with set_thread_name(suite_full_name): # The thread name is added to the LISA log log_path: Path = self._log_path / f"{suite_full_name}.log" with set_current_thread_log(log_path): suite_success: bool = True @@ -663,14 +669,14 @@ def _check_agent_log_on_test_nodes(self, test_nodes: List[_TestNode], ignore_err return success - def _create_test_context(self, environment: Environment, variables: Dict[str, Any]) -> Tuple[AgentTestContext, List[_TestNode]]: + def _create_test_context(self, environment: Environment) -> Tuple[AgentTestContext, List[_TestNode]]: """ """ - # Note that all the test suites in the environment have the same value for executes_on_scale_set + # Note that all the test suites in the environment have the same value for executes_on_scale_set so we can use the first one if self._test_suites[0].executes_on_scale_set: log.info("Creating test context for scale set") - if self._existing_vmss != "": - scale_set_client = VirtualMachineScaleSetClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, resource_group=self._existing_resource_group, name=self._existing_vmss) + if self._vmss_name_arg != "": # If an existing scale set was passed as argument to the runbook + scale_set_client = VirtualMachineScaleSetClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, resource_group=self._resource_group_name_arg, name=self._vmss_name_arg) log.info("Using existing scale set %s", scale_set_client) else: log.info("Creating scale set") @@ -688,8 +694,8 @@ def _create_test_context(self, environment: Environment, variables: Dict[str, An if isinstance(test_node.features._platform, ReadyPlatform): # A "ready" platform indicates that the tests are running on an existing VM and the vm and resource group names # were passed as arguments in the command line - resource_group_name = variables["resource_group_name"] - node_name = variables["vm_name"] + resource_group_name = self._resource_group_name_arg + node_name = self._vm_name_arg else: # Else the test VM was created by LISA and we need to get the vm and resource group names from the node context node_context = get_node_context(test_node) @@ -702,9 +708,8 @@ def _create_test_context(self, environment: Environment, variables: Dict[str, An working_directory=self._working_directory, vm=vm, ip_address=connection_info['address'], - username=connection_info['username'], - identity_file=connection_info['private_key_file'], - ssh_port=connection_info['port']) + username=self._user, + identity_file=self._identity_file) nodes = [_TestNode(test_context.vm.name, test_context.ip_address)] @@ -790,7 +795,7 @@ def read_file(path: str) -> str: with open(path, "r") as file_: return file_.read().strip() - publisher, offer, sku, version = self._marketplace_image.replace(":", " ").split(' ') + publisher, offer, sku, version = self._image.replace(":", " ").split(' ') template: Dict[str, Any] = json.loads(read_file(str(self._test_source_directory/"orchestrator"/"templates/vmss.json"))) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 7ad5e2f3ce..d7e4781aae 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import datetime +import json import logging import random import re @@ -23,6 +24,7 @@ from lisa.util import field_metadata # pylint: disable=E0401 from tests_e2e.orchestrator.lib.agent_test_loader import AgentTestLoader, VmImageInfo, TestSuiteInfo +from tests_e2e.tests.lib.logging import set_thread_name from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient @@ -30,67 +32,34 @@ @dataclass_json() @dataclass class AgentTestSuitesCombinatorSchema(schema.Combinator): - test_suites: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - cloud: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - subscription_id: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - location: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - image: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - vm_size: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - resource_group_name: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - vm_name: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - vmss_name: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - keep_environment: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - user: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - identity_file: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) + """ + Defines parameters passed to the combinator from the runbook. + + The runbook is a static document and always passes all these parameters to the combinator, so they are all + marked as required. Optional parameters can pass an empty value to indicate that they are not specified. + """ + cloud: str = field(default_factory=str, metadata=field_metadata(required=True)) + identity_file: str = field(default_factory=str, metadata=field_metadata(required=True)) + image: str = field(default_factory=str, metadata=field_metadata(required=True)) + keep_environment: str = field(default_factory=str, metadata=field_metadata(required=True)) + location: str = field(default_factory=str, metadata=field_metadata(required=True)) + resource_group_name: str = field(default_factory=str, metadata=field_metadata(required=True)) + subscription_id: str = field(default_factory=str, metadata=field_metadata(required=True)) + test_suites: str = field(default_factory=str, metadata=field_metadata(required=True)) + user: str = field(default_factory=str, metadata=field_metadata(required=True)) + vm_name: str = field(default_factory=str, metadata=field_metadata(required=True)) + vm_size: str = field(default_factory=str, metadata=field_metadata(required=True)) + vmss_name: str = field(default_factory=str, metadata=field_metadata(required=True)) class AgentTestSuitesCombinator(Combinator): """ - The "agent_test_suites" combinator returns a list of variables that specify the environments (i.e. test VMs) that the agent - test suites must be executed on: - - * c_env_name: Unique name for the environment, e.g. "0001-com-ubuntu-server-focal-20_04-lts-westus2" - * c_marketplace_image: e.g. "Canonical UbuntuServer 18.04-LTS latest", - * c_location: e.g. "westus2", - * c_vm_size: e.g. "Standard_D2pls_v5" - * c_vhd: e.g "https://rhel.blob.core.windows.net/images/RHEL_8_Standard-8.3.202006170423.vhd?se=..." - * c_test_suites: e.g. [AgentBvt, FastTrack] - - (c_marketplace_image, c_location, c_vm_size) and vhd are mutually exclusive and define the environment (i.e. the test VM) - in which the test will be executed. c_test_suites defines the test suites that should be executed in that - environment. - - The 'vm_name' runbook parameter can be used to execute the test suites on an existing VM. In that case, the combinator - generates a single item with these variables: - - * c_env_name: Name for the environment, same as vm_name - * c_vm_name: Name of the test VM - * c_location: Location of the test VM e.g. "westus2", - * c_test_suites: e.g. [AgentBvt, FastTrack] + The "agent_test_suites" combinator returns a list of variables that specify the test environments (i.e. test VMs) that the + test suites must be executed on. These variables are prefixed with "c_" to distinguish them from the command line arguments + of the runbook. See the runbook definition for details on each of those variables. + + The combinator can generate environments for VMs created and managed by LISA, Scale Sets created and managed by the AgentTestSuite, + or existing VMs or Scale Sets. """ def __init__(self, runbook: AgentTestSuitesCombinatorSchema) -> None: super().__init__(runbook) @@ -99,18 +68,29 @@ def __init__(self, runbook: AgentTestSuitesCombinatorSchema) -> None: if self.runbook.vm_name != '' and self.runbook.vmss_name != '': raise Exception("Invalid runbook parameters: 'vm_name' and 'vmss_name' are mutually exclusive.") - if self.runbook.vm_name != '' and (self.runbook.image != '' or self.runbook.vm_size != ''): - raise Exception("Invalid runbook parameters: When 'vm_name' is specified, 'image' and 'vm_size' should not be specified.") - if self.runbook.vmss_name != '' and (self.runbook.image != '' or self.runbook.vm_size != ''): - raise Exception("Invalid runbook parameters: When 'vmss_name' is specified, 'image' and 'vm_size' should not be specified.") if self.runbook.vm_name != '': - self._environments = [self.create_existing_vm_environment()] - elif self.runbook.vmss_name != '': - self._environments = [self.create_existing_vmss_environment()] - else: - self._environments = self.create_environment_list() - self._index = 0 + if self.runbook.image != '' or self.runbook.vm_size != '': + raise Exception("Invalid runbook parameters: The 'vm_name' parameter indicates an existing VM, 'image' and 'vm_size' should not be specified.") + if self.runbook.resource_group_name == '': + raise Exception("Invalid runbook parameters: The 'vm_name' parameter indicates an existing VM, a 'resource_group_name' must be specified.") + + if self.runbook.vmss_name != '': + if self.runbook.image != '' or self.runbook.vm_size != '': + raise Exception("Invalid runbook parameters: The 'vmss_name' parameter indicates an existing VMSS, 'image' and 'vm_size' should not be specified.") + if self.runbook.resource_group_name == '': + raise Exception("Invalid runbook parameters: The 'vmss_name' parameter indicates an existing VMSS, a 'resource_group_name' must be specified.") + + self._log: logging.Logger = logging.getLogger("lisa") + + with set_thread_name("AgentTestSuitesCombinator"): + if self.runbook.vm_name != '': + self._environments = [self.create_existing_vm_environment()] + elif self.runbook.vmss_name != '': + self._environments = [self.create_existing_vmss_environment()] + else: + self._environments = self.create_environment_list() + self._index = 0 @classmethod def type_name(cls) -> str: @@ -147,13 +127,12 @@ def _next(self) -> Optional[Dict[str, Any]]: def create_environment_list(self) -> List[Dict[str, Any]]: """ - Examines the test_suites specified in the runbook and returns a list of the environments (i.e. test VMs) that need to be + Examines the test_suites specified in the runbook and returns a list of the environments (i.e. test VMs or scale sets) that need to be created in order to execute these suites. Note that if the runbook provides an 'image', 'location', or 'vm_size', those values override any values provided in the configuration of the test suites. """ - log: logging.Logger = logging.getLogger("lisa") environments: List[Dict[str, Any]] = [] shared_environments: Dict[str, Dict[str, Any]] = {} # environments shared by multiple test suites @@ -193,7 +172,7 @@ def create_environment_list(self) -> List[Dict[str, Any]]: # TODO: Allow test suites that set 'install_test_agent' to False to share environments (we need to ensure that # all the suites in the shared environment have the same value for 'install_test_agent') env = self.create_vm_environment( - c_env_name=f"{image_name}-{test_suite_info.name}", + env_name=f"{image_name}-{test_suite_info.name}", marketplace_image=marketplace_image, vhd=vhd, location=location, @@ -219,7 +198,7 @@ def create_environment_list(self) -> List[Dict[str, Any]]: test_suite_info=test_suite_info) else: env = self.create_vm_environment( - c_env_name=env_name, + env_name=env_name, marketplace_image=marketplace_image, vhd=vhd, location=location, @@ -228,27 +207,26 @@ def create_environment_list(self) -> List[Dict[str, Any]]: shared_environments[env_name] = env if test_suite_info.template != '': - vm_tags = env["c_vm_tags"] - if "templates" not in vm_tags: - vm_tags["templates"] = test_suite_info.template - else: - vm_tags["templates"] += ", " + test_suite_info.template + vm_tags = env.get("vm_tags") + if vm_tags is not None: + if "templates" not in vm_tags: + vm_tags["templates"] = test_suite_info.template + else: + vm_tags["templates"] += ", " + test_suite_info.template environments.extend(shared_environments.values()) if len(environments) == 0: raise Exception("No VM images were found to execute the test suites.") + # Log a summary of each environment and the suites that will be executed on it + format_suites = lambda suites: ", ".join([s.name for s in suites]) + summary = [f"{e['c_env_name']}: [{format_suites(e['c_test_suites'])}]" for e in environments] + summary.sort() + self._log.info("Executing tests on %d environments\n\n%s\n", len(environments), '\n'.join([f"\t{s}" for s in summary])) + if len(skip_test_suites) > 0: - log.info("") - log.info("Test suites skipped on %s:\n\n\t%s\n", self.runbook.cloud, '\n\t'.join(skip_test_suites)) - log.info("") - log.info("******** Waagent: Test Environments *****") - log.info("") - log.info("Will execute tests on %d environments:\n\n\t%s\n", len(environments), '\n\t'.join([env['c_env_name'] for env in environments])) - for env in environments: - log.info("Settings for %s:\n%s\n", env['c_env_name'], self._get_env_settings(env)) - log.info("") + self._log.info("Skipping test suites %s", skip_test_suites) return environments @@ -263,7 +241,7 @@ def create_existing_vm_environment(self) -> Dict[str, Any]: name=self.runbook.vm_name) ip_address = vm.get_ip_address() - return { + environment = { "c_env_name": self.runbook.vm_name, "c_platform": [ { @@ -289,6 +267,10 @@ def create_existing_vm_environment(self) -> Dict[str, Any]: "c_test_suites": loader.test_suites, } + self._log.info("Created environment %s for an existing VM *** Scale set: %s IP_Addresses: %s", environment['c_env_name'], self.runbook.vm_name, ip_address) + + return environment + def create_existing_vmss_environment(self) -> Dict[str, Any]: loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) @@ -301,7 +283,7 @@ def create_existing_vmss_environment(self) -> Dict[str, Any]: ip_addresses = vmss.get_instances_ip_address() - return { + environment = { "c_env_name": self.runbook.vmss_name, "c_environment": { "environments": [ @@ -327,12 +309,23 @@ def create_existing_vmss_environment(self) -> Dict[str, Any]: "c_test_suites": loader.test_suites, } - def create_vm_environment(self, c_env_name: str, marketplace_image: str, vhd: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: + self._log.info( + "Created environment %s for an existing scale set *** Scale set: %s IP_Addresses: %s", environment['c_env_name'], self.runbook.vmss_name, [i.ip_address for i in ip_addresses]) + + return environment + + def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: + # + # Custom ARM templates (to create the test VMs) require special handling. These templates are processed by the azure_update_arm_template + # hook, which does not have access to the runbook variables. Instead, we use a dummy VM tag named "template" and pass the + # names of the custom templates in its value. The hook can then retrieve the value from the Platform object (see wiki for more details). + # We also use a dummy item, "vm_tags" in the environment dictionary in order to concatenate templates from multiple test suites when they + # share the same test environment. + # vm_tags = {} if test_suite_info.template != '': vm_tags["templates"] = test_suite_info.template - return { - "c_env_name": c_env_name, + environment = { "c_platform": [ { "type": "azure", @@ -361,22 +354,34 @@ def create_vm_environment(self, c_env_name: str, marketplace_image: str, vhd: st } } ], + "c_environment": None, - "c_location": location, - "c_vhd": vhd, + + "c_env_name": env_name, "c_test_suites": [test_suite_info], + "c_location": location, + "c_image": marketplace_image, + "c_is_vhd": vhd != "", "vm_tags": vm_tags } - @staticmethod - def create_vmss_environment(env_name: str, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: - return { - "c_env_name": env_name, + self._log.info( + "Created environment %s for a new VM *** Image: %s Location: %s VM_Size: %s", + environment['c_env_name'], + vhd if vhd != "" else marketplace_image.replace(" ", ":"), + location, + vm_size if vm_size != "" else "default") + + return environment + + def create_vmss_environment(self, env_name: str, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: + environment = { "c_platform": [ { "type": "ready" } ], + "c_environment": { "environments": [ { @@ -386,12 +391,24 @@ def create_vmss_environment(env_name: str, marketplace_image: str, location: str } ] }, + + "c_env_name": env_name, "c_test_suites": [test_suite_info], - "c_marketplace_image": marketplace_image, "c_location": location, + "c_image": marketplace_image, + "c_is_vhd": False, "c_vm_size": vm_size } + self._log.info( + "Created environment %s for a new VMSS *** Image: %s Location: %s VM_Size: %s", + environment['c_env_name'], + marketplace_image.replace(" ", ":"), + location, + vm_size if vm_size != "" else "default") + + return environment + def _get_runbook_images(self, loader: AgentTestLoader) -> List[VmImageInfo]: """ Returns the images specified in the runbook, or an empty list if none are specified. @@ -492,11 +509,6 @@ def _get_image_name(urn: str) -> str: raise Exception(f"Invalid URN: {urn}") return f"{match.group('offer')}-{match.group('sku')}" - @staticmethod - def _get_env_settings(environment: Dict[str, Any]): - suite_names = [s.name for s in environment['c_test_suites']] - return '\n'.join([f"\t{name}: {value if name != 'c_test_suites' else suite_names}" for name, value in environment.items()]) - _URN = re.compile(r"(?P[^\s:]+)[\s:](?P[^\s:]+)[\s:](?P[^\s:]+)[\s:](?P[^\s:]+)") @staticmethod diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index ff1e051849..a076264036 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -9,16 +9,66 @@ extension: variable: # - # These variables define parameters handled by LISA; the variables marked with 'is_case_visible' - # are also referenced by the AgentTestSuite. + # The test environments are generated dynamically by the AgentTestSuitesCombinator using the 'platform' and 'environment' variables. + # Most of the variables below are parameters for the combinator and/or the AgentTestSuite (marked as 'is_case_visible'), but a few of + # them, such as the runbook name and the SSH proxy variables, are handled by LISA. + # + # Many of these variables are optional, depending on the scenario. An empty values indicates that the variable has not been specified. + # + + # + # The name of the runbook, it is added as a prefix ("lisa-") to ARM resources created by the test run. + # + # Set the name to your email alias when doing developer runs. # - name: name value: "WALinuxAgent" is_case_visible: true + + # + # Test suites to execute + # + - name: test_suites + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline" + + # + # Parameters used to create test VMs + # - name: subscription_id value: "" is_case_visible: true + - name: cloud + value: "AzureCloud" + is_case_visible: true + - name: location + value: "" + - name: image + value: "" + - name: vm_size + value: "" + + # + # Whether to skip deletion of the test VMs after the test run completes. + # + # Possible values: always, no, failed + # + - name: keep_environment + value: "no" + is_case_visible: true + # + # Username and SSH public key for the admin user on the test VMs + # + - name: user + value: "waagent" + is_case_visible: true + - name: identity_file + value: "" + is_case_visible: true + + # + # Set the resource group and vm, or the group and the vmss, to execute the test run on an existing VM or VMSS. + # - name: resource_group_name value: "" is_case_visible: true @@ -30,106 +80,96 @@ variable: is_case_visible: true # - # These variables define parameters for the AgentTestSuite; see the test wiki for details. + # Directory for test logs # - # NOTE: c_test_suites, generated by the AgentTestSuitesCombinator, is also a parameter - # for the AgentTestSuite - # - # Root directory for log files (optional) - name: log_path value: "" is_case_visible: true + # # Whether to collect logs from the test VM + # + # Possible values: always, no, failed + # - name: collect_logs value: "failed" is_case_visible: true - # Whether to skip setup of the test VM + # + # Whether to skip setup of the test VMs. This is useful in developer runs when using existing VMs to save initialization time. + # - name: skip_setup value: false is_case_visible: true # - # These variables are parameters for the AgentTestSuitesCombinator + # These variables are handled by LISA to use an SSH proxy when executing the runbook # - # The test suites to execute - - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline" - - name: cloud - value: "AzureCloud" - is_case_visible: true - - name: image - value: "" - - name: location - value: "" - - name: vm_size + - name: proxy + value: False + - name: proxy_host value: "" - - name: user - value: "waagent" - is_case_visible: true - - name: identity_file + - name: proxy_user + value: "foo" + - name: proxy_identity_file value: "" - is_case_visible: true - - name: keep_environment - value: "no" - is_case_visible: true + is_secret: true # - # The values for these variables are generated by the AgentTestSuitesCombinator combinator. They are + # The variables below are generated by the AgentTestSuitesCombinator combinator. They are # prefixed with "c_" to distinguish them from the rest of the variables, whose value can be set from # the command line. # - # Most of these variables are handled by LISA and are used to define the set of test VMs that need to be - # created. The variables marked with 'is_case_visible' are also referenced by the AgentTestSuite. + # - # 'c_vm_tags' is a special case: it is used by the azure_update_arm_template hook. This hook does not - # have access to the runbook variables, so instead we use a dummy VM tag named "template" to pass the - # name of the custom ARM template that the hook needs to use (see wiki for more details). + # The combinator generates the test environments using these two variables, which are passed to LISA # - - name: c_env_name - value: "" - is_case_visible: true - - name: c_marketplace_image - value: "" - is_case_visible: true - name: c_environment value: {} - name: c_platform value: [] - - name: c_marketplace_image_information_location - value: "" - - name: c_shared_resource_group_location - value: "" - - name: c_vm_size - value: "" - - name: c_location - value: "" - is_case_visible: true - - name: c_vhd + + # + # Name of the test environment, used for mainly for logging purposes + # + - name: c_env_name value: "" is_case_visible: true + + # + # Test suites assigned for execution in the current test environment. + # + # The combinator splits the test suites specified in the 'test_suites' variable in subsets and assigns each subset + # to a test environment. The AgentTestSuite uses 'c_test_suites' to execute the suites assigned to the current environment. + # - name: c_test_suites value: [] is_case_visible: true - - name: c_vm_tags - value: {} + + # + # These parameters are used by the AgentTestSuite to create the test scale sets. + # + # Note that there are other 3 variables named 'image', 'vm_size' and 'location', which can be passed + # from the command line. The combinator generates the values for these parameters using test metadata, + # but they can be overriden with these command line variables. The final values are passed to the + # AgentTestSuite in the corresponding 'c_*' variables. + # + - name: c_image + value: "" + is_case_visible: true + - name: c_vm_size + value: "" + is_case_visible: true - name: c_location value: "" is_case_visible: true # - # Set these variables to use an SSH proxy when executing the runbook + # True if the image is a VHD (instead of a URN) # - - name: proxy - value: False - - name: proxy_host - value: "" - - name: proxy_user - value: "foo" - - name: proxy_identity_file - value: "" - is_secret: true + - name: c_is_vhd + value: false + is_case_visible: true environment: $(c_environment) @@ -137,17 +177,17 @@ platform: $(c_platform) combinator: type: agent_test_suites - test_suites: $(test_suites) cloud: $(cloud) - subscription_id: $(subscription_id) + identity_file: $(identity_file) image: $(image) - location: $(location) - vm_size: $(vm_size) keep_environment: $(keep_environment) - user: $(user) - identity_file: $(identity_file) + location: $(location) resource_group_name: $(resource_group_name) + subscription_id: $(subscription_id) + test_suites: $(test_suites) + user: $(user) vm_name: $(vm_name) + vm_size: $(vm_size) vmss_name: $(vmss_name) concurrency: 32 diff --git a/tests_e2e/tests/lib/logging.py b/tests_e2e/tests/lib/logging.py index a6cf6566b1..e713dce9d3 100644 --- a/tests_e2e/tests/lib/logging.py +++ b/tests_e2e/tests/lib/logging.py @@ -155,3 +155,18 @@ def set_current_thread_log(log_file: Path): log.close_current_thread_log() if initial_value is not None: log.set_current_thread_log(initial_value) + + +@contextlib.contextmanager +def set_thread_name(name: str): + """ + Context Manager to change the name of the current thread temporarily + """ + initial_name = current_thread().name + current_thread().name = name + try: + yield + finally: + current_thread().name = initial_name + + diff --git a/tests_e2e/tests/lib/virtual_machine_scale_set_client.py b/tests_e2e/tests/lib/virtual_machine_scale_set_client.py index 5fd77bd03f..f4b92ac923 100644 --- a/tests_e2e/tests/lib/virtual_machine_scale_set_client.py +++ b/tests_e2e/tests/lib/virtual_machine_scale_set_client.py @@ -61,13 +61,14 @@ def list_vms(self) -> List[VirtualMachineScaleSetVM]: """ Returns the VM instances of the virtual machine scale set """ + log.info("Retrieving instances of scale set %s", self) return list(self._compute_client.virtual_machine_scale_set_vms.list(resource_group_name=self.resource_group, virtual_machine_scale_set_name=self.name)) def get_instances_ip_address(self) -> List[VmssInstanceIpAddress]: """ Returns a list containing the IP addresses of scale set instances """ - log.info("Retrieving instances of scale set %s", self) + log.info("Retrieving IP addresses of scale set %s", self) ip_addresses = self._network_client.public_ip_addresses.list_virtual_machine_scale_set_public_ip_addresses(resource_group_name=self.resource_group, virtual_machine_scale_set_name=self.name) ip_addresses = list(ip_addresses) From 7dcc600a0334b5d79ebd8abb1952a4ac53d54816 Mon Sep 17 00:00:00 2001 From: narrieta Date: Mon, 23 Oct 2023 07:34:54 -0700 Subject: [PATCH 07/30] . --- test-requirements.txt | 1 + .../orchestrator/lib/agent_test_suite.py | 7 ++-- .../lib/agent_test_suite_combinator.py | 36 +++---------------- .../agent_ext_workflow/extension_workflow.py | 2 +- .../agent_not_provisioned.py | 1 - tests_e2e/tests/agent_status/agent_status.py | 2 +- .../ext_telemetry_pipeline.py | 4 +-- .../extensions_disabled.py | 2 +- tests_e2e/tests/fips/fips.py | 2 +- .../keyvault_certificates.py | 2 +- tests_e2e/tests/lib/agent_test.py | 1 - tests_e2e/tests/lib/azure_sdk_client.py | 2 +- tests_e2e/tests/lib/resource_group_client.py | 6 ++-- tests_e2e/tests/lib/virtual_machine_client.py | 6 ++-- .../lib/virtual_machine_extension_client.py | 2 +- .../lib/virtual_machine_scale_set_client.py | 4 +-- .../multi_config_ext/multi_config_ext.py | 2 +- 17 files changed, 28 insertions(+), 54 deletions(-) diff --git a/test-requirements.txt b/test-requirements.txt index 89a2bb2c5d..2b9467870e 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -17,6 +17,7 @@ assertpy azure-core azure-identity azure-mgmt-compute>=22.1.0 +azure-mgmt-network>=19.3.0 azure-mgmt-resource>=15.0.0 msrestazure pytz diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index b1a2e9e764..a333858905 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import contextlib import datetime import json import logging @@ -123,6 +122,8 @@ def __init__(self, metadata: TestSuiteMetadata) -> None: super().__init__(metadata) self._working_directory: Path # Root directory for temporary files self._log_path: Path # Root directory for log files + self._pypy_x64_path: Path # Path to the Pypy x64 download + self._pypy_arm64_path: Path # Path to the Pypy ARM64 download self._test_agent_package_path: Path # Path to the package for the test Agent self._test_source_directory: Path # Root directory of the source code for the end-to-end tests self._test_tools_tarball_path: Path # Path to the tarball with the tools needed on the test node @@ -671,6 +672,7 @@ def _check_agent_log_on_test_nodes(self, test_nodes: List[_TestNode], ignore_err def _create_test_context(self, environment: Environment) -> Tuple[AgentTestContext, List[_TestNode]]: """ + Creates the context for the test suite run. Returns a tuple containing the test context and the list of test nodes """ # Note that all the test suites in the environment have the same value for executes_on_scale_set so we can use the first one if self._test_suites[0].executes_on_scale_set: @@ -762,6 +764,7 @@ def _report_test_result( def _create_scale_set(self) -> VirtualMachineScaleSetClient: """ + Creates a scale set for the test suite run """ self._resource_group_counter_lock.acquire() try: @@ -777,7 +780,7 @@ def _create_scale_set(self) -> VirtualMachineScaleSetClient: resource_group = ResourceGroupClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, name=resource_group_name) self._lisa_log.info("Creating resource group %s", resource_group) - resource_group.create() + resource_group.create_client() self._resource_groups_to_delete.append(resource_group) self._lisa_log.info("Creating scale set %s", scale_set_name) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index d7e4781aae..934a82993d 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import datetime -import json import logging import random import re @@ -241,7 +240,7 @@ def create_existing_vm_environment(self) -> Dict[str, Any]: name=self.runbook.vm_name) ip_address = vm.get_ip_address() - environment = { + return { "c_env_name": self.runbook.vm_name, "c_platform": [ { @@ -267,10 +266,6 @@ def create_existing_vm_environment(self) -> Dict[str, Any]: "c_test_suites": loader.test_suites, } - self._log.info("Created environment %s for an existing VM *** Scale set: %s IP_Addresses: %s", environment['c_env_name'], self.runbook.vm_name, ip_address) - - return environment - def create_existing_vmss_environment(self) -> Dict[str, Any]: loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) @@ -283,7 +278,7 @@ def create_existing_vmss_environment(self) -> Dict[str, Any]: ip_addresses = vmss.get_instances_ip_address() - environment = { + return { "c_env_name": self.runbook.vmss_name, "c_environment": { "environments": [ @@ -309,11 +304,6 @@ def create_existing_vmss_environment(self) -> Dict[str, Any]: "c_test_suites": loader.test_suites, } - self._log.info( - "Created environment %s for an existing scale set *** Scale set: %s IP_Addresses: %s", environment['c_env_name'], self.runbook.vmss_name, [i.ip_address for i in ip_addresses]) - - return environment - def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: # # Custom ARM templates (to create the test VMs) require special handling. These templates are processed by the azure_update_arm_template @@ -325,7 +315,7 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, vm_tags = {} if test_suite_info.template != '': vm_tags["templates"] = test_suite_info.template - environment = { + return { "c_platform": [ { "type": "azure", @@ -365,17 +355,8 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, "vm_tags": vm_tags } - self._log.info( - "Created environment %s for a new VM *** Image: %s Location: %s VM_Size: %s", - environment['c_env_name'], - vhd if vhd != "" else marketplace_image.replace(" ", ":"), - location, - vm_size if vm_size != "" else "default") - - return environment - def create_vmss_environment(self, env_name: str, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: - environment = { + return { "c_platform": [ { "type": "ready" @@ -400,15 +381,6 @@ def create_vmss_environment(self, env_name: str, marketplace_image: str, locatio "c_vm_size": vm_size } - self._log.info( - "Created environment %s for a new VMSS *** Image: %s Location: %s VM_Size: %s", - environment['c_env_name'], - marketplace_image.replace(" ", ":"), - location, - vm_size if vm_size != "" else "default") - - return environment - def _get_runbook_images(self, loader: AgentTestLoader) -> List[VmImageInfo]: """ Returns the images specified in the runbook, or an empty list if none are specified. diff --git a/tests_e2e/tests/agent_ext_workflow/extension_workflow.py b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py index edf179f39d..c262dac7ac 100644 --- a/tests_e2e/tests/agent_ext_workflow/extension_workflow.py +++ b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py @@ -61,7 +61,7 @@ class ExtensionWorkflow(AgentVmTest): """ def __init__(self, context: AgentVmTestContext): super().__init__(context) - self._ssh_client = self.create_ssh_client() + self._ssh_client = context.create_ssh_client() # This class represents the GuestAgentDcrTestExtension running on the test VM class GuestAgentDcrTestExtension: diff --git a/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py index ed8dc7caec..214affa61b 100755 --- a/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py +++ b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py @@ -26,7 +26,6 @@ from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.shell import CommandError from tests_e2e.tests.lib.ssh_client import SshClient -from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient diff --git a/tests_e2e/tests/agent_status/agent_status.py b/tests_e2e/tests/agent_status/agent_status.py index 612434b8c4..5173f630de 100644 --- a/tests_e2e/tests/agent_status/agent_status.py +++ b/tests_e2e/tests/agent_status/agent_status.py @@ -139,7 +139,7 @@ def run(self): log.info("") log.info("*******Verifying the agent status updates 3 times*******") - vm = VirtualMachineClient(self._context.vm) + vm = VirtualMachineClient(cloud=self._context.vm.cloud, location=self._context.vm.location, resource_group=self._context.vm.resource_group, name=self._context.vm.name) timeout = datetime.now() + timedelta(minutes=6) instance_view_exception = None diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py index e1ffd5fba5..bcd6c40387 100755 --- a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -77,7 +77,7 @@ def run(self): log.info("") log.info("Add good extension events and check they are reported...") max_events = random.randint(10, 50) - self._run_remote_test(self._ssh_client, + self._run_remote_test(ssh_client, f"ext_telemetry_pipeline-add_extension_events.py " f"--extensions {','.join(extensions)} " f"--num_events_total {max_events}", use_sudo=True) @@ -87,7 +87,7 @@ def run(self): # Add invalid events for each extension and check that the TelemetryEventsCollector drops them log.info("") log.info("Add bad extension events and check they are reported...") - self._run_remote_test(self._ssh_client, + self._run_remote_test(ssh_client, f"ext_telemetry_pipeline-add_extension_events.py " f"--extensions {','.join(extensions)} " f"--num_events_total {max_events} " diff --git a/tests_e2e/tests/extensions_disabled/extensions_disabled.py b/tests_e2e/tests/extensions_disabled/extensions_disabled.py index 084b29e4c0..6ca4f723a3 100755 --- a/tests_e2e/tests/extensions_disabled/extensions_disabled.py +++ b/tests_e2e/tests/extensions_disabled/extensions_disabled.py @@ -109,7 +109,7 @@ def run(self): # # Validate that the agent continued reporting status even if it is not processing extensions # - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + vm: VirtualMachineClient = VirtualMachineClient(cloud=self._context.vm.cloud, location=self._context.vm.location, resource_group=self._context.vm.resource_group, name=self._context.vm.name) log.info("") instance_view: VirtualMachineInstanceView = vm.get_instance_view() log.info("Instance view of VM Agent:\n%s", instance_view.vm_agent.serialize()) diff --git a/tests_e2e/tests/fips/fips.py b/tests_e2e/tests/fips/fips.py index a64f6fec7a..5ecc659cf9 100755 --- a/tests_e2e/tests/fips/fips.py +++ b/tests_e2e/tests/fips/fips.py @@ -45,7 +45,7 @@ def run(self): raise Exception(f"Failed to enable FIPS: {e}") log.info("Restarting test VM") - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + vm: VirtualMachineClient = VirtualMachineClient(cloud=self._context.vm.cloud, location=self._context.vm.location, resource_group=self._context.vm.resource_group, name=self._context.vm.name) vm.restart(wait_for_boot=True, ssh_client=ssh_client) try: diff --git a/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py index 01049bdf8b..d88da1db17 100755 --- a/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py +++ b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py @@ -59,7 +59,7 @@ def run(self): else: log.info("Some test certificates had already been downloaded to the test VM (they have been deleted now):\n%s", existing_certificates) - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + vm: VirtualMachineClient = VirtualMachineClient(cloud=self._context.vm.cloud, location=self._context.vm.location, resource_group=self._context.vm.resource_group, name=self._context.vm.name) osprofile = { "location": self._context.vm.location, diff --git a/tests_e2e/tests/lib/agent_test.py b/tests_e2e/tests/lib/agent_test.py index b64488dcc5..4f8544dd44 100644 --- a/tests_e2e/tests/lib/agent_test.py +++ b/tests_e2e/tests/lib/agent_test.py @@ -55,7 +55,6 @@ def run(self): """ Test must define this method, which is used to execute the test. """ - pass def get_ignore_error_rules(self) -> List[Dict[str, Any]]: """ diff --git a/tests_e2e/tests/lib/azure_sdk_client.py b/tests_e2e/tests/lib/azure_sdk_client.py index 078c6d1d62..f76d83ca72 100644 --- a/tests_e2e/tests/lib/azure_sdk_client.py +++ b/tests_e2e/tests/lib/azure_sdk_client.py @@ -32,7 +32,7 @@ class AzureSdkClient: _DEFAULT_TIMEOUT = 10 * 60 # (in seconds) @staticmethod - def create(client_type: type, cloud: str, subscription_id: str): + def create_client(client_type: type, cloud: str, subscription_id: str): """ Creates an SDK client of the given 'client_type' """ diff --git a/tests_e2e/tests/lib/resource_group_client.py b/tests_e2e/tests/lib/resource_group_client.py index d48a74d065..4445f6a019 100644 --- a/tests_e2e/tests/lib/resource_group_client.py +++ b/tests_e2e/tests/lib/resource_group_client.py @@ -38,10 +38,10 @@ def __init__(self, cloud: str, subscription: str, name: str, location: str = "") self.location = location self.subscription: str = subscription self.name: str = name - self._compute_client = AzureSdkClient.create(ComputeManagementClient, cloud, subscription) - self._resource_client = AzureSdkClient.create(ResourceManagementClient, cloud, subscription) + self._compute_client = AzureSdkClient.create_client(ComputeManagementClient, cloud, subscription) + self._resource_client = AzureSdkClient.create_client(ResourceManagementClient, cloud, subscription) - def create(self) -> None: + def create_client(self) -> None: """ Creates a resource group """ diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py index b82032b1b7..37dcfaef13 100644 --- a/tests_e2e/tests/lib/virtual_machine_client.py +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -48,9 +48,9 @@ def __init__(self, cloud: str, location: str, subscription: str, resource_group: self.subscription: str = subscription self.resource_group: str = resource_group self.name: str = name - self._compute_client = AzureSdkClient.create(ComputeManagementClient, cloud, subscription) - self._resource_client = AzureSdkClient.create(ResourceManagementClient, cloud, subscription) - self._network_client = AzureSdkClient.create(NetworkManagementClient, cloud, subscription) + self._compute_client = AzureSdkClient.create_client(ComputeManagementClient, cloud, subscription) + self._resource_client = AzureSdkClient.create_client(ResourceManagementClient, cloud, subscription) + self._network_client = AzureSdkClient.create_client(NetworkManagementClient, cloud, subscription) def get_ip_address(self) -> str: """ diff --git a/tests_e2e/tests/lib/virtual_machine_extension_client.py b/tests_e2e/tests/lib/virtual_machine_extension_client.py index 6d35756d16..b05e8d16da 100644 --- a/tests_e2e/tests/lib/virtual_machine_extension_client.py +++ b/tests_e2e/tests/lib/virtual_machine_extension_client.py @@ -43,7 +43,7 @@ def __init__(self, vm: VirtualMachineClient, extension: VmExtensionIdentifier, r self._vm: VirtualMachineClient = vm self._identifier = extension self._resource_name = resource_name or extension.type - self._compute_client: ComputeManagementClient = AzureSdkClient.create(ComputeManagementClient, self._vm.cloud, self._vm.subscription) + self._compute_client: ComputeManagementClient = AzureSdkClient.create_client(ComputeManagementClient, self._vm.cloud, self._vm.subscription) def get_instance_view(self) -> VirtualMachineExtensionInstanceView: """ diff --git a/tests_e2e/tests/lib/virtual_machine_scale_set_client.py b/tests_e2e/tests/lib/virtual_machine_scale_set_client.py index f4b92ac923..c8e5cf323f 100644 --- a/tests_e2e/tests/lib/virtual_machine_scale_set_client.py +++ b/tests_e2e/tests/lib/virtual_machine_scale_set_client.py @@ -54,8 +54,8 @@ def __init__(self, cloud: str, location: str, subscription: str, resource_group: self.subscription: str = subscription self.resource_group: str = resource_group self.name: str = name - self._compute_client = AzureSdkClient.create(ComputeManagementClient, cloud, subscription) - self._network_client = AzureSdkClient.create(NetworkManagementClient, cloud, subscription) + self._compute_client = AzureSdkClient.create_client(ComputeManagementClient, cloud, subscription) + self._network_client = AzureSdkClient.create_client(NetworkManagementClient, cloud, subscription) def list_vms(self) -> List[VirtualMachineScaleSetVM]: """ diff --git a/tests_e2e/tests/multi_config_ext/multi_config_ext.py b/tests_e2e/tests/multi_config_ext/multi_config_ext.py index a79d68f4fa..dd24a7cf50 100644 --- a/tests_e2e/tests/multi_config_ext/multi_config_ext.py +++ b/tests_e2e/tests/multi_config_ext/multi_config_ext.py @@ -68,7 +68,7 @@ def delete_extensions(self, test_cases: Dict[str, TestCase]): test_case.extension.delete() log.info("") - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + vm: VirtualMachineClient = VirtualMachineClient(cloud=self._context.vm.cloud, location=self._context.vm.location, resource_group=self._context.vm.resource_group, name=self._context.vm.name) instance_view: VirtualMachineInstanceView = vm.get_instance_view() if instance_view.extensions is not None: for ext in instance_view.extensions: From d49bd310e79fd329de72b5cf8f0c03224ac07634 Mon Sep 17 00:00:00 2001 From: narrieta Date: Mon, 23 Oct 2023 08:00:48 -0700 Subject: [PATCH 08/30] . --- tests_e2e/orchestrator/lib/agent_test_suite.py | 4 ++-- .../orchestrator/lib/agent_test_suite_combinator.py | 2 +- tests_e2e/tests/agent_status/agent_status.py | 7 ++++++- .../tests/extensions_disabled/extensions_disabled.py | 7 ++++++- tests_e2e/tests/fips/fips.py | 7 ++++++- .../keyvault_certificates/keyvault_certificates.py | 7 ++++++- tests_e2e/tests/lib/agent_test_context.py | 2 +- tests_e2e/tests/lib/resource_group_client.py | 2 +- tests_e2e/tests/multi_config_ext/multi_config_ext.py | 10 +++++++++- 9 files changed, 38 insertions(+), 10 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index a333858905..3adedd16fd 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -704,7 +704,7 @@ def _create_test_context(self, environment: Environment) -> Tuple[AgentTestConte resource_group_name = node_context.resource_group_name node_name = node_context.vm_name - vm = VirtualMachineClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, resource_group=resource_group_name, name=node_name) + vm: VirtualMachineClient = VirtualMachineClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, resource_group=resource_group_name, name=node_name) test_context = AgentVmTestContext( working_directory=self._working_directory, @@ -780,7 +780,7 @@ def _create_scale_set(self) -> VirtualMachineScaleSetClient: resource_group = ResourceGroupClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, name=resource_group_name) self._lisa_log.info("Creating resource group %s", resource_group) - resource_group.create_client() + resource_group.create() self._resource_groups_to_delete.append(resource_group) self._lisa_log.info("Creating scale set %s", scale_set_name) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 934a82993d..f8f74777b0 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -232,7 +232,7 @@ def create_environment_list(self) -> List[Dict[str, Any]]: def create_existing_vm_environment(self) -> Dict[str, Any]: loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) - vm = VirtualMachineClient( + vm: VirtualMachineClient = VirtualMachineClient( cloud=self.runbook.cloud, location=self.runbook.location, subscription=self.runbook.subscription_id, diff --git a/tests_e2e/tests/agent_status/agent_status.py b/tests_e2e/tests/agent_status/agent_status.py index 5173f630de..7da09a856c 100644 --- a/tests_e2e/tests/agent_status/agent_status.py +++ b/tests_e2e/tests/agent_status/agent_status.py @@ -139,7 +139,12 @@ def run(self): log.info("") log.info("*******Verifying the agent status updates 3 times*******") - vm = VirtualMachineClient(cloud=self._context.vm.cloud, location=self._context.vm.location, resource_group=self._context.vm.resource_group, name=self._context.vm.name) + vm: VirtualMachineClient = VirtualMachineClient( + cloud=self._context.vm.cloud, + location=self._context.vm.location, + subscription=self._context.vm.subscription, + resource_group=self._context.vm.resource_group, + name=self._context.vm.name) timeout = datetime.now() + timedelta(minutes=6) instance_view_exception = None diff --git a/tests_e2e/tests/extensions_disabled/extensions_disabled.py b/tests_e2e/tests/extensions_disabled/extensions_disabled.py index 6ca4f723a3..707a787c96 100755 --- a/tests_e2e/tests/extensions_disabled/extensions_disabled.py +++ b/tests_e2e/tests/extensions_disabled/extensions_disabled.py @@ -109,7 +109,12 @@ def run(self): # # Validate that the agent continued reporting status even if it is not processing extensions # - vm: VirtualMachineClient = VirtualMachineClient(cloud=self._context.vm.cloud, location=self._context.vm.location, resource_group=self._context.vm.resource_group, name=self._context.vm.name) + vm: VirtualMachineClient = VirtualMachineClient( + cloud=self._context.vm.cloud, + location=self._context.vm.location, + subscription=self._context.vm.subscription, + resource_group=self._context.vm.resource_group, + name=self._context.vm.name) log.info("") instance_view: VirtualMachineInstanceView = vm.get_instance_view() log.info("Instance view of VM Agent:\n%s", instance_view.vm_agent.serialize()) diff --git a/tests_e2e/tests/fips/fips.py b/tests_e2e/tests/fips/fips.py index 5ecc659cf9..f96507e768 100755 --- a/tests_e2e/tests/fips/fips.py +++ b/tests_e2e/tests/fips/fips.py @@ -45,7 +45,12 @@ def run(self): raise Exception(f"Failed to enable FIPS: {e}") log.info("Restarting test VM") - vm: VirtualMachineClient = VirtualMachineClient(cloud=self._context.vm.cloud, location=self._context.vm.location, resource_group=self._context.vm.resource_group, name=self._context.vm.name) + vm: VirtualMachineClient = VirtualMachineClient( + cloud=self._context.vm.cloud, + location=self._context.vm.location, + subscription=self._context.vm.subscription, + resource_group=self._context.vm.resource_group, + name=self._context.vm.name) vm.restart(wait_for_boot=True, ssh_client=ssh_client) try: diff --git a/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py index d88da1db17..e5fa9b3ba4 100755 --- a/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py +++ b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py @@ -59,7 +59,12 @@ def run(self): else: log.info("Some test certificates had already been downloaded to the test VM (they have been deleted now):\n%s", existing_certificates) - vm: VirtualMachineClient = VirtualMachineClient(cloud=self._context.vm.cloud, location=self._context.vm.location, resource_group=self._context.vm.resource_group, name=self._context.vm.name) + vm: VirtualMachineClient = VirtualMachineClient( + cloud=self._context.vm.cloud, + location=self._context.vm.location, + subscription=self._context.vm.subscription, + resource_group=self._context.vm.resource_group, + name=self._context.vm.name) osprofile = { "location": self._context.vm.location, diff --git a/tests_e2e/tests/lib/agent_test_context.py b/tests_e2e/tests/lib/agent_test_context.py index c807247b8a..b818b1298b 100644 --- a/tests_e2e/tests/lib/agent_test_context.py +++ b/tests_e2e/tests/lib/agent_test_context.py @@ -91,7 +91,7 @@ def from_args(): if not working_directory.exists(): working_directory.mkdir(exist_ok=True) - vm = VirtualMachineClient(cloud=args.cloud, location=args.location, subscription=args.subscription, resource_group=args.group, name=args.vm) + vm: VirtualMachineClient = VirtualMachineClient(cloud=args.cloud, location=args.location, subscription=args.subscription, resource_group=args.group, name=args.vm) ip_address = args.ip_address if args.ip_address is not None else args.vm return AgentVmTestContext(working_directory=working_directory, vm=vm, ip_address=ip_address, username=args.username, identity_file=Path(args.identity_file), ssh_port=args.ssh_port) diff --git a/tests_e2e/tests/lib/resource_group_client.py b/tests_e2e/tests/lib/resource_group_client.py index 4445f6a019..9ca07a2602 100644 --- a/tests_e2e/tests/lib/resource_group_client.py +++ b/tests_e2e/tests/lib/resource_group_client.py @@ -41,7 +41,7 @@ def __init__(self, cloud: str, subscription: str, name: str, location: str = "") self._compute_client = AzureSdkClient.create_client(ComputeManagementClient, cloud, subscription) self._resource_client = AzureSdkClient.create_client(ResourceManagementClient, cloud, subscription) - def create_client(self) -> None: + def create(self) -> None: """ Creates a resource group """ diff --git a/tests_e2e/tests/multi_config_ext/multi_config_ext.py b/tests_e2e/tests/multi_config_ext/multi_config_ext.py index dd24a7cf50..4f5c588504 100644 --- a/tests_e2e/tests/multi_config_ext/multi_config_ext.py +++ b/tests_e2e/tests/multi_config_ext/multi_config_ext.py @@ -68,8 +68,16 @@ def delete_extensions(self, test_cases: Dict[str, TestCase]): test_case.extension.delete() log.info("") - vm: VirtualMachineClient = VirtualMachineClient(cloud=self._context.vm.cloud, location=self._context.vm.location, resource_group=self._context.vm.resource_group, name=self._context.vm.name) + + vm: VirtualMachineClient = VirtualMachineClient( + cloud=self._context.vm.cloud, + location=self._context.vm.location, + subscription=self._context.vm.subscription, + resource_group=self._context.vm.resource_group, + name=self._context.vm.name) + instance_view: VirtualMachineInstanceView = vm.get_instance_view() + if instance_view.extensions is not None: for ext in instance_view.extensions: if ext.name in test_cases.keys(): From 31b02714b859654c08aa2b57f1609c29e9f98d3b Mon Sep 17 00:00:00 2001 From: narrieta Date: Mon, 23 Oct 2023 15:40:25 -0700 Subject: [PATCH 09/30] . --- .../orchestrator/lib/agent_test_suite.py | 222 ++++++++++-------- .../lib/agent_test_suite_combinator.py | 3 + tests_e2e/tests/agent_update/rsm_update.py | 4 +- .../lib/virtual_machine_scale_set_client.py | 13 +- 4 files changed, 144 insertions(+), 98 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 3adedd16fd..0904f72678 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -17,6 +17,7 @@ import datetime import json import logging +import re import traceback import uuid @@ -38,9 +39,10 @@ ) from lisa.environment import EnvironmentStatus # pylint: disable=E0401 from lisa.messages import TestStatus, TestResultMessage # pylint: disable=E0401 -from lisa.node import Node # pylint: disable=E0401 +from lisa.node import Node, LocalNode # pylint: disable=E0401 +from lisa.util.constants import RUN_ID # pylint: disable=E0401 from lisa.sut_orchestrator.azure.common import get_node_context # pylint: disable=E0401 -from lisa.sut_orchestrator.ready import ReadyPlatform # pylint: disable=E0401 +from lisa.sut_orchestrator.azure.platform_ import AzurePlatform # pylint: disable=E0401 import makepkg from azurelinuxagent.common.version import AGENT_VERSION @@ -152,12 +154,17 @@ def __init__(self, metadata: TestSuiteMetadata) -> None: self._collect_logs: str # Whether to collect logs from the test VMs (one of 'always', 'failed', or 'no') self._keep_environment: str # Whether to skip deletion of the resources created by the test suite (one of 'always', 'failed', or 'no') - # Resource group, VM, and VMSS names passed as arguments to the runbook. They are non-empty only when executing the runbook on an existing VM/VMSS - self._resource_group_name_arg: str - self._vm_name_arg: str - self._vmss_name_arg: str + # Resource group and VM/VMSS for the test machines. self._vm_name and self._vmss_name are mutually exclusive, only one of them will be set. + self._resource_group_name: str + self._vm_name: str + self._vm_ip_address: str + self._vmss_name: str - self._resource_groups_to_delete: List[ResourceGroupClient] # Resource groups created by the AgentTestSuite, to be deleted at the end of the run + self._test_nodes: List[_TestNode] # VMs or scale set instances the tests will run on + + # Whether to create and delete a scale set. + self._create_scale_set: bool + self._delete_scale_set: bool def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_working_path: str, lisa_log_path: str, lisa_log: Logger): self._working_directory = self._get_working_directory(lisa_working_path) @@ -191,11 +198,56 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ self._keep_environment = variables["keep_environment"] self._collect_logs = variables["collect_logs"] - self._resource_group_name_arg = variables["resource_group_name"] - self._vm_name_arg = variables["vm_name"] - self._vmss_name_arg = variables["vmss_name"] - - self._resource_groups_to_delete = [] + # The AgentTestSuiteCombinator can create 4 kinds of platform/environment combinations: + # + # * New VM + # The VM is created by LISA. The platform will be 'azure' and the environment will contain a single 'remote' node. + # + # * Existing VM + # The VM was passed as argument to the runbook. The platform will be 'ready' and the environment will contain a single 'remote' node. + # + # * New VMSS + # The AgentTestSuite will create the scale set before executing the tests. The platform will be 'ready' and the environment will a single 'local' node. + # + # * Existing VMSS + # The VMSS was passed as argument to the runbook. The platform will be 'ready' and the environment will contain a list 'remote' nodes, + # one for each instance of the scale set. + # + + # Note that _vm_name and _vmss_name are mutually exclusive, only one of them will be set. + self._vm_name = None + self._vm_ip_address = None + self._vmss_name = None + self._create_scale_set = False + self._delete_scale_set = False + + if isinstance(environment.nodes[0], LocalNode): + # We need to create a new VMSS. + # Use the same naming convention as LISA for the scale set name: lisa---e0-n0. Note that we hardcode the resource group + # id to "n0" and the scale set name to "n0" since we are creating a single scale set. + self._resource_group_name = f"lisa-{self._runbook_name}-{RUN_ID}-e0" + self._vmss_name = f"{self._resource_group_name}-n0" + self._test_nodes = [] # we'll fill this up when the scale set is created + self._create_scale_set = True + self._delete_scale_set = False # we set it to True once we create the scale set + else: + # Else we are using a VM that was created by LISA, or an existing VM/VMSS + node_context = get_node_context(environment.nodes[0]) + + if isinstance(environment.nodes[0].features._platform, AzurePlatform): # The test VM was created by LISA + self._resource_group_name = node_context.resource_group_name + self._vm_name = node_context.vm_name + self._vm_ip_address = environment.nodes[0].connection_info['address'] + self._test_nodes = [_TestNode(self._vm_name, self._vm_ip_address)] + else: # An existing VM/VMSS was passed as argument to the runbook + self._resource_group_name = variables["resource_group_name"] + if variables["vm_name"] != "": + self._vm_name = variables["vm_name"] + self._vm_ip_address = environment.nodes[0].connection_info['address'] + self._test_nodes = [_TestNode(self._vm_name, self._vm_ip_address)] + else: + self._vmss_name = variables["vmss_name"] + self._test_nodes = [_TestNode(node.name, node.connection_info['address']) for node in environment.nodes.list()] @staticmethod def _get_log_path(variables: Dict[str, Any], lisa_log_path: str) -> Path: @@ -275,6 +327,7 @@ def _setup_test_run(self) -> None: log.info("Found Pypy at %s", pypy) else: pypy_download = f"https://dcrdata.blob.core.windows.net/python/{pypy.name}" + self._lisa_log.info("Downloading %s to %s", pypy_download, pypy) log.info("Downloading %s to %s", pypy_download, pypy) run_command(["wget", pypy_download, "-O", pypy]) @@ -284,6 +337,7 @@ def _setup_test_run(self) -> None: # * bin - Executables file (Bash and Python scripts) # * lib - Library files (Python modules) # + self._lisa_log.info("Creating %s with the tools needed on the test node", self._test_tools_tarball_path) log.info("Creating %s with the tools needed on the test node", self._test_tools_tarball_path) log.info("Adding orchestrator/scripts") command = "cd {0} ; tar cf {1} --transform='s,^,bin/,' *".format(self._test_source_directory/"orchestrator"/"scripts", self._test_tools_tarball_path) @@ -309,18 +363,18 @@ def _clean_up(self, success: bool) -> None: """ Cleans up any items created by the test suite run. """ - if len(self._resource_groups_to_delete) > 0: + if self._delete_scale_set: if self._keep_environment == KeepEnvironment.Always: - log.info("Won't delete resource groups %s, per the test suite configuration.", self._resource_groups_to_delete) - elif self._keep_environment == KeepEnvironment.No or self._keep_environment == KeepEnvironment.Failed and not success: - for resource_group in self._resource_groups_to_delete: - try: - self._lisa_log.info("Deleting resource group %s", resource_group) - resource_group.delete() - except Exception as error: # pylint: disable=broad-except - log.warning("Error deleting resource group %s: %s", resource_group, error) + log.info("Won't delete the scale set %s, per the test suite configuration.", self._vmss_name) + elif self._keep_environment == KeepEnvironment.No or self._keep_environment == KeepEnvironment.Failed and success: + try: + self._lisa_log.info("Deleting resource group containing the test VMSS: %s", self._resource_group_name) + resource_group = ResourceGroupClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, name=self._resource_group_name) + resource_group.delete() + except Exception as error: # pylint: disable=broad-except + log.warning("Error deleting resource group %s: %s", self._resource_group_name, error) - def _setup_test_nodes(self, test_nodes: List[_TestNode]) -> None: + def _setup_test_nodes(self) -> None: """ Prepares the provided remote nodes for executing the test suite (installs tools and the test agent, etc) """ @@ -329,7 +383,7 @@ def _setup_test_nodes(self, test_nodes: List[_TestNode]) -> None: log.info("") log.info("************************************ [Test Nodes Setup] ************************************") log.info("") - for node in test_nodes: + for node in self._test_nodes: self._lisa_log.info(f"Setting up test node {node}") log.info("Test Node: %s", node.name) log.info("IP Address: %s", node.ip_address) @@ -386,11 +440,11 @@ def _setup_test_nodes(self, test_nodes: List[_TestNode]) -> None: log.info("Completed test node setup") - def _collect_logs_from_test_nodes(self, test_nodes: List[_TestNode]) -> None: + def _collect_logs_from_test_nodes(self) -> None: """ Collects the test logs from the provided remote nodes and copies them to the local machine """ - for node in test_nodes: + for node in self._test_nodes: node_name = node.name ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file)) try: @@ -428,9 +482,9 @@ def main(self, environment: Environment, variables: Dict[str, Any], working_path Entry point from LISA """ self._initialize(environment, variables, working_path, log_path, log) - self._execute(environment) + self._execute() - def _execute(self, environment: Environment) -> None: + def _execute(self) -> None: unexpected_error = False test_suite_success = True @@ -451,11 +505,11 @@ def _execute(self, environment: Environment) -> None: self._setup_test_run() try: - test_context, test_nodes = self._create_test_context(environment) + test_context = self._create_test_context() if not self._skip_setup: try: - self._setup_test_nodes(test_nodes) + self._setup_test_nodes() except: test_suite_success = False raise @@ -463,11 +517,11 @@ def _execute(self, environment: Environment) -> None: for suite in self._test_suites: log.info("Executing test suite %s", suite.name) self._lisa_log.info("Executing Test Suite %s", suite.name) - test_suite_success = self._execute_test_suite(suite, test_context, test_nodes) and test_suite_success + test_suite_success = self._execute_test_suite(suite, test_context) and test_suite_success finally: if self._collect_logs == CollectLogs.Always or self._collect_logs == CollectLogs.Failed and not test_suite_success: - self._collect_logs_from_test_nodes(test_nodes) + self._collect_logs_from_test_nodes() except Exception as e: # pylint: disable=bare-except # Report the error and raise an exception to let LISA know that the test errored out. @@ -488,7 +542,7 @@ def _execute(self, environment: Environment) -> None: if unexpected_error: self._mark_log_as_failed() - def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestContext, test_nodes: List[_TestNode]) -> bool: + def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestContext) -> bool: """ Executes the given test suite and returns True if all the tests in the suite succeeded. """ @@ -605,22 +659,22 @@ def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestConte if not suite_success: self._mark_log_as_failed() - suite_success = suite_success and self._check_agent_log_on_test_nodes(test_nodes, ignore_error_rules) + suite_success = suite_success and self._check_agent_log_on_test_nodes(ignore_error_rules) return suite_success - def _check_agent_log_on_test_nodes(self, test_nodes: List[_TestNode], ignore_error_rules: List[Dict[str, Any]]) -> bool: + def _check_agent_log_on_test_nodes(self, ignore_error_rules: List[Dict[str, Any]]) -> bool: """ Checks the agent log on the remote nodes for errors; returns true on success (no errors in the logs) """ success: bool = True - for node in test_nodes: + for node in self._test_nodes: node_name = node.name ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file)) test_result_name = self._environment_name - if len(test_nodes) > 1: + if len(self._test_nodes) > 1: # If there are multiple test nodes, as in a scale set, append the name of the node to the name of the result test_result_name += '_' + node_name.split('_')[-1] @@ -670,53 +724,48 @@ def _check_agent_log_on_test_nodes(self, test_nodes: List[_TestNode], ignore_err return success - def _create_test_context(self, environment: Environment) -> Tuple[AgentTestContext, List[_TestNode]]: + def _create_test_context(self,) -> AgentTestContext: """ Creates the context for the test suite run. Returns a tuple containing the test context and the list of test nodes """ - # Note that all the test suites in the environment have the same value for executes_on_scale_set so we can use the first one - if self._test_suites[0].executes_on_scale_set: + if self._vm_name is not None: + self._lisa_log.info("Creating test context for virtual machine") + vm: VirtualMachineClient = VirtualMachineClient( + cloud=self._cloud, + location=self._location, + subscription=self._subscription_id, + resource_group=self._resource_group_name, + name=self._vm_name) + return AgentVmTestContext( + working_directory=self._working_directory, + vm=vm, + ip_address=self._vm_ip_address, + username=self._user, + identity_file=self._identity_file) + else: log.info("Creating test context for scale set") - if self._vmss_name_arg != "": # If an existing scale set was passed as argument to the runbook - scale_set_client = VirtualMachineScaleSetClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, resource_group=self._resource_group_name_arg, name=self._vmss_name_arg) - log.info("Using existing scale set %s", scale_set_client) + if self._create_scale_set: + self._create_test_scale_set() else: - log.info("Creating scale set") - scale_set_client = self._create_scale_set() + log.info("Using existing scale set %s", self._vmss_name) - test_context = AgentVmssTestContext(working_directory=self._working_directory, vmss=scale_set_client, username=self._user, identity_file=self._identity_file) - ip_addresses = scale_set_client.get_instances_ip_address() - log.info("Scale set instances: %s", [str(i) for i in ip_addresses]) - nodes = [_TestNode(i.instance_name, i.ip_address) for i in ip_addresses] - else: - self._lisa_log.info("Creating test context for virtual machine") - test_node: Node = environment.nodes[0] - connection_info = test_node.connection_info - - if isinstance(test_node.features._platform, ReadyPlatform): - # A "ready" platform indicates that the tests are running on an existing VM and the vm and resource group names - # were passed as arguments in the command line - resource_group_name = self._resource_group_name_arg - node_name = self._vm_name_arg - else: - # Else the test VM was created by LISA and we need to get the vm and resource group names from the node context - node_context = get_node_context(test_node) - resource_group_name = node_context.resource_group_name - node_name = node_context.vm_name + scale_set = VirtualMachineScaleSetClient( + cloud=self._cloud, + location=self._location, + subscription=self._subscription_id, + resource_group=self._resource_group_name, + name=self._vmss_name) - vm: VirtualMachineClient = VirtualMachineClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, resource_group=resource_group_name, name=node_name) + # If we created the scale set, fill up the test nodes + if self._create_scale_set: + self._test_nodes = [_TestNode(name=i.instance_name, ip_address=i.ip_address) for i in scale_set.get_instances_ip_address()] - test_context = AgentVmTestContext( + return AgentVmssTestContext( working_directory=self._working_directory, - vm=vm, - ip_address=connection_info['address'], + vmss=scale_set, username=self._user, identity_file=self._identity_file) - nodes = [_TestNode(test_context.vm.name, test_context.ip_address)] - - return test_context, nodes - @staticmethod def _mark_log_as_failed(): """ @@ -759,37 +808,20 @@ def _report_test_result( notifier.notify(msg) - _resource_group_counter: int = 0 # Used to generate unique resource group names - _resource_group_counter_lock: RLock = RLock() - - def _create_scale_set(self) -> VirtualMachineScaleSetClient: + def _create_test_scale_set(self) -> None: """ Creates a scale set for the test suite run """ - self._resource_group_counter_lock.acquire() - try: - unique_id = self._resource_group_counter - self._resource_group_counter += 1 - finally: - self._resource_group_counter_lock.release() - - # We use a naming convention similar to LISA's, to facilitate automatic cleanup and to identify resources created by automation - timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - resource_group_name = f"lisa-{self._runbook_name}-{timestamp}-e{unique_id}" - scale_set_name = f"{resource_group_name}-n0" - - resource_group = ResourceGroupClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, name=resource_group_name) - self._lisa_log.info("Creating resource group %s", resource_group) + self._lisa_log.info("Creating resource group %s", self._resource_group_name) + resource_group = ResourceGroupClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, name=self._resource_group_name) resource_group.create() - self._resource_groups_to_delete.append(resource_group) + self._delete_scale_set = True - self._lisa_log.info("Creating scale set %s", scale_set_name) - log.info("Creating scale set %s", scale_set_name) - template, parameters = self._get_scale_set_deployment_template(scale_set_name) + self._lisa_log.info("Creating scale set %s", self._vmss_name) + log.info("Creating scale set %s", self._vmss_name) + template, parameters = self._get_scale_set_deployment_template(self._vmss_name) resource_group.deploy_template(template, parameters) - return VirtualMachineScaleSetClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, resource_group=resource_group_name, name=scale_set_name) - def _get_scale_set_deployment_template(self, scale_set_name: str) -> Tuple[Dict[str, Any], Dict[str, Any]]: """ Returns the deployment template for scale sets and its parameters diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index f8f74777b0..a755361238 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -238,6 +238,7 @@ def create_existing_vm_environment(self) -> Dict[str, Any]: subscription=self.runbook.subscription_id, resource_group=self.runbook.resource_group_name, name=self.runbook.vm_name) + ip_address = vm.get_ip_address() return { @@ -253,6 +254,7 @@ def create_existing_vm_environment(self) -> Dict[str, Any]: "nodes": [ { "type": "remote", + "name": self.runbook.vm_name, "public_address": ip_address, "public_port": 22, "username": self.runbook.user, @@ -286,6 +288,7 @@ def create_existing_vmss_environment(self) -> Dict[str, Any]: "nodes": [ { "type": "remote", + "name": i.instance_name, "public_address": i.ip_address, "public_port": 22, "username": self.runbook.user, diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 06c31a8116..8325599910 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -179,14 +179,14 @@ def _request_rsm_update(self, requested_version: str) -> None: else: log.info("Already enableVMAgentPlatformUpdates flag set to True") - cloud: Cloud = AZURE_CLOUDS[self._context.cloud] + cloud: Cloud = AZURE_CLOUDS[self._context.vm.cloud] credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) token = credential.get_token(cloud.endpoints.resource_manager + "/.default") headers = {'Authorization': 'Bearer ' + token.token, 'Content-Type': 'application/json'} # Later this api call will be replaced by azure-python-sdk wrapper base_url = cloud.endpoints.resource_manager url = base_url + "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Compute/virtualMachines/{2}/" \ - "UpgradeVMAgent?api-version=2022-08-01".format(self._context.subscription, self._context.resource_group, self._context.name) + "UpgradeVMAgent?api-version=2022-08-01".format(self._context.vm.subscription, self._context.vm.resource_group, self._context.vm.name) data = { "target": "Microsoft.OSTCLinuxAgent.Test", "targetVersion": requested_version diff --git a/tests_e2e/tests/lib/virtual_machine_scale_set_client.py b/tests_e2e/tests/lib/virtual_machine_scale_set_client.py index c8e5cf323f..92738576ca 100644 --- a/tests_e2e/tests/lib/virtual_machine_scale_set_client.py +++ b/tests_e2e/tests/lib/virtual_machine_scale_set_client.py @@ -24,11 +24,12 @@ from typing import List from azure.mgmt.compute import ComputeManagementClient -from azure.mgmt.compute.models import VirtualMachineScaleSetVM +from azure.mgmt.compute.models import VirtualMachineScaleSetVM, VirtualMachineScaleSetInstanceView from azure.mgmt.network import NetworkManagementClient from tests_e2e.tests.lib.azure_sdk_client import AzureSdkClient from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import execute_with_retry class VmssInstanceIpAddress(object): @@ -91,6 +92,16 @@ def delete_extension(self, extension: str, timeout: int = AzureSdkClient._DEFAUL operation_name=f"Delete {extension} from {self}", timeout=timeout) + def get_instance_view(self) -> VirtualMachineScaleSetInstanceView: + """ + Retrieves the instance view of the virtual machine + """ + log.info("Retrieving instance view for %s", self) + return execute_with_retry(lambda: self._compute_client.virtual_machine_scale_sets.get_instance_view( + resource_group_name=self.resource_group, + vm_scale_set_name=self.name + )) + def __str__(self): return f"{self.resource_group}:{self.name}" From 46c9eefb08422cff3a8e317b76db4cbaf37a23be Mon Sep 17 00:00:00 2001 From: narrieta Date: Mon, 23 Oct 2023 15:48:10 -0700 Subject: [PATCH 10/30] . --- tests_e2e/orchestrator/lib/agent_test_suite.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 0904f72678..9fcf969d75 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -17,7 +17,6 @@ import datetime import json import logging -import re import traceback import uuid @@ -39,7 +38,7 @@ ) from lisa.environment import EnvironmentStatus # pylint: disable=E0401 from lisa.messages import TestStatus, TestResultMessage # pylint: disable=E0401 -from lisa.node import Node, LocalNode # pylint: disable=E0401 +from lisa.node import LocalNode # pylint: disable=E0401 from lisa.util.constants import RUN_ID # pylint: disable=E0401 from lisa.sut_orchestrator.azure.common import get_node_context # pylint: disable=E0401 from lisa.sut_orchestrator.azure.platform_ import AzurePlatform # pylint: disable=E0401 @@ -456,7 +455,7 @@ def _collect_logs_from_test_nodes(self) -> None: # Copy the tarball to the local logs directory tgz_name = self._environment_name - if len(test_nodes) > 1: + if len(self._test_nodes) > 1: # Append instance of scale set to the end of tarball name tgz_name += '_' + node_name.split('_')[-1] remote_path = "/tmp/waagent-logs.tgz" From 2b9de7f689e80cb62c8dd726d07f1cdfc783b96e Mon Sep 17 00:00:00 2001 From: narrieta Date: Mon, 23 Oct 2023 19:41:44 -0700 Subject: [PATCH 11/30] . --- .../lib/agent_test_suite_combinator.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index a755361238..9e84041756 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -205,13 +205,13 @@ def create_environment_list(self) -> List[Dict[str, Any]]: test_suite_info=test_suite_info) shared_environments[env_name] = env - if test_suite_info.template != '': - vm_tags = env.get("vm_tags") - if vm_tags is not None: - if "templates" not in vm_tags: - vm_tags["templates"] = test_suite_info.template - else: - vm_tags["templates"] += ", " + test_suite_info.template + if test_suite_info.template != '': + vm_tags = env.get("vm_tags") + if vm_tags is not None: + if "templates" not in vm_tags: + vm_tags["templates"] = test_suite_info.template + else: + vm_tags["templates"] += "," + test_suite_info.template environments.extend(shared_environments.values()) From 59b509afa756d2e485081b367ec388c964f358dd Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Mon, 30 Oct 2023 12:03:41 -0700 Subject: [PATCH 12/30] Add new test cases --- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/test_suites/ext_sequencing.yml | 11 + .../ext_sequencing/ext_seq_test_cases.py | 292 ++++++++++++++++++ .../tests/ext_sequencing/ext_sequencing.py | 224 ++++++++++++++ .../ext_sequencing-get_ext_enable_time.py | 76 +++++ 5 files changed, 604 insertions(+), 1 deletion(-) create mode 100644 tests_e2e/test_suites/ext_sequencing.yml create mode 100644 tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py create mode 100644 tests_e2e/tests/ext_sequencing/ext_sequencing.py create mode 100755 tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index a076264036..223f63a7b0 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -29,7 +29,7 @@ variable: # Test suites to execute # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing" # # Parameters used to create test VMs diff --git a/tests_e2e/test_suites/ext_sequencing.yml b/tests_e2e/test_suites/ext_sequencing.yml new file mode 100644 index 0000000000..55932ac71e --- /dev/null +++ b/tests_e2e/test_suites/ext_sequencing.yml @@ -0,0 +1,11 @@ +# +# Adds extensions with multiple dependencies to VMSS using 'provisionAfterExtensions' property and validates they are +# enabled in order of dependencies. +# +name: "ExtSequencing" +tests: + - "ext_sequencing/ext_sequencing.py" +images: "endorsed" +locations: "AzureCloud:eastus2euap" +# This scenario is executed on instances of a scaleset created by the agent test suite. +executes_on_scale_set: true \ No newline at end of file diff --git a/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py new file mode 100644 index 0000000000..b3ffa0b7c2 --- /dev/null +++ b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py @@ -0,0 +1,292 @@ +def add_one_dependent_ext_without_settings(): + # Dependent extensions without settings should be enabled with dependencies + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": {} + } + } + ] + + +def add_two_extensions_with_dependencies(): + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True, + } + }, + { + "name": "RunCommandLinux", + "properties": { + "provisionAfterExtensions": ["AzureMonitorLinuxAgent"], + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": {} + } + }, + { + "name": "CustomScript", + "properties": { + "provisionAfterExtensions": ["RunCommandLinux", "AzureMonitorLinuxAgent"], + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": {} + } + } + ] + + +def remove_one_dependent_extension(): + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True, + } + }, + { + "name": "CustomScript", + "properties": { + "provisionAfterExtensions": ["AzureMonitorLinuxAgent"], + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": {} + } + } + ] + + +def remove_all_dependencies(): + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True, + } + }, + { + "name": "RunCommandLinux", + "properties": { + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": {} + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": {} + } + } + ] + + +def add_one_dependent_extension(): + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["RunCommandLinux", "CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True, + } + }, + { + "name": "RunCommandLinux", + "properties": { + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": {} + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": {} + } + } + ] + + +def add_single_dependencies(): + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True, + } + }, + { + "name": "RunCommandLinux", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": {} + } + }, + { + "name": "CustomScript", + "properties": { + "provisionAfterExtensions": ["AzureMonitorLinuxAgent"], + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": {} + } + } + ] + + +def remove_all_dependent_extensions(): + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True, + } + } + ] + + +def add_failing_dependent_extension_with_one_dependency(): + # This case tests that extensions dependent on a failing extensions are skipped, but extensions that are not + # dependent on the failing extension still get enabled + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True, + } + }, + { + "name": "RunCommandLinux", + "properties": { + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": {} + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + # script contents are base-64 encoded: + # #!/bin/bash + # + # echo "Exit script with non-zero exit code" + # exit 1 + "script": "IyEvYmluL2Jhc2gKCmVjaG8gIkV4aXQgc2NyaXB0IHdpdGggbm9uLXplcm8gZXhpdCBjb2RlIgpleGl0IDEK" + } + } + } + ] + + +def add_failing_dependent_extension_with_two_dependencies(): + # This case tests that all extensions dependent on a failing extensions are skipped + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True, + } + }, + { + "name": "RunCommandLinux", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": {} + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + # script contents are base-64 encoded: + # #!/bin/bash + # + # echo "Exit script with non-zero exit code" + # exit 2 + "script": "IyEvYmluL2Jhc2gKCmVjaG8gIkV4aXQgc2NyaXB0IHdpdGggbm9uLXplcm8gZXhpdCBjb2RlIgpleGl0IDIK" + } + } + } + ] diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py new file mode 100644 index 0000000000..732eccdf43 --- /dev/null +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This test adds extensions with multiple dependencies to a VMSS using the 'provisionAfterExtensions' property and +# validates they are enabled in order of dependencies. +# +import copy +import uuid +from datetime import datetime +from typing import List, Dict, Any + +from assertpy import fail, assert_that +from azure.mgmt.compute.models import VirtualMachineScaleSetVMExtensionsSummary + +from tests_e2e.tests.ext_sequencing.ext_seq_test_cases import add_one_dependent_ext_without_settings, add_two_extensions_with_dependencies, \ + remove_one_dependent_extension, remove_all_dependencies, add_one_dependent_extension, \ + add_single_dependencies, remove_all_dependent_extensions, add_failing_dependent_extension_with_one_dependency, add_failing_dependent_extension_with_two_dependencies +from tests_e2e.tests.lib.agent_test import AgentVmssTest +from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient +from tests_e2e.tests.lib.ssh_client import SshClient + + +class ExtSequencing(AgentVmssTest): + # Cases to test different dependency scenarios + test_cases = [ + add_one_dependent_ext_without_settings, + add_two_extensions_with_dependencies, + remove_one_dependent_extension, + remove_all_dependencies, + add_one_dependent_extension, + add_single_dependencies, + remove_all_dependent_extensions, + add_failing_dependent_extension_with_one_dependency, + add_failing_dependent_extension_with_two_dependencies + ] + + @staticmethod + def get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, List[str]]: + dependency_map = dict() + + for ext in extensions: + ext_name = ext['name'] + provisioned_after = ext['properties'].get('provisionAfterExtensions') + dependency_map[ext_name] = provisioned_after + + return dependency_map + + @staticmethod + def validate_dependent_extensions_fail(dependency_map: Dict[str, List[str]], extensions: List[VirtualMachineScaleSetVMExtensionsSummary]): + failed_extensions = [ext.name for ext in extensions if "failed" in ext.statuses_summary[0].code] + for ext, dependencies in dependency_map.items(): + for dep in dependencies: + if dep in failed_extensions: + assert_that(ext in failed_extensions).described_as("{0} dependent on failing extension {1} should also fail") + + for ext in extensions: + dependencies = dependency_map[ext.name] + assert_that("failed" in ext.statuses_summary[0].code).described_as( + "CustomScript should have failed to enable").is_true() + if "CustomScript" in dependency_map[ext.name]: + assert_that("failed" in ext.statuses_summary[0].code).described_as( + "{0} should have failed to enable as it's dependent on CustomScript".format(ext.name)).is_true() + log.info("Validated that all extensions dependent on a failing extension also failed") + + @staticmethod + def get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensionsSummary], ssh_client: SshClient) -> List[str]: + # Using VmExtensionIds to get publisher for each ext to be used in remote script + extension_full_names = { + "AzureMonitorLinuxAgent": VmExtensionIds.AzureMonitorLinuxAgent, + "RunCommandLinux": VmExtensionIds.RunCommand, + "CustomScript": VmExtensionIds.CustomScript + } + enabled_times = [] + for ext in extensions: + # Only add extensions which succeeded provisioning + if "succeeded" in ext.statuses_summary[0].code: + enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext_type {extension_full_names[ext.name]}", + use_sudo=True) + enabled_times.append( + { + "name": ext.name, + "enabled_time": datetime.strptime(enabled_time.replace('\n', ''), u'%Y-%m-%d %H:%M:%S') + } + ) + + # sort the extensions based on their enabled datetime + sorted_extensions = sorted(enabled_times, key=lambda ext_: ext_["enabled_time"]) + log.info("") + log.info("Extensions sorted by time they were enabled: {0}".format( + ', '.join(["{0}: {1}".format(ext["name"], ext["enabled_time"]) for ext in sorted_extensions]))) + sorted_extension_names = [ext["name"] for ext in sorted_extensions] + return sorted_extension_names + + @staticmethod + def validate_extension_sequencing(dependency_map: Dict[str, List[str]], sorted_extension_names: List[str]): + installed_ext = dict() + + # Iterate through the extensions in the enabled order and validate if their depending extensions are already + # enabled prior to that. + for ext in sorted_extension_names: + # Check if the depending extension are already installed + if ext not in dependency_map: + fail("Unwanted extension found in VMSS Instance view: {0}".format(ext)) + if dependency_map[ext] is not None: + for dep in dependency_map[ext]: + if installed_ext.get(dep) is None: + # The depending extension is not installed prior to the current extension + fail("{0} is not installed prior to {1}".format(dep, ext)) + + # Mark the current extension as installed + installed_ext[ext] = ext + + log.info("Validated extension sequencing") + + def run(self): + # This is the base ARM template that's used for deploying extensions for this scenario + base_extension_template = { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json", + "contentVersion": "1.0.0.0", + "resources": [ + { + "type": "Microsoft.Compute/virtualMachineScaleSets", + "name": f"{self._context.vmss.name}", + "location": "[resourceGroup().location]", + "apiVersion": "2018-06-01", + "properties": { + "virtualMachineProfile": { + "extensionProfile": { + "extensions": [] + } + } + } + } + ] + } + + for case in self.test_cases: + # Update the settings for each extension in this scenario to make sure they're always unique to force CRP + # to generate a new sequence number each time + test_guid = str(uuid.uuid4()) + deployment_should_fail = "failing" in case.__name__ + extensions = case() + for ext in extensions: + # We only want to update the settings if they are empty (so we don't overwrite any failing script + # scenarios) + if "settings" in ext["properties"] and not ext["properties"]["settings"]: + ext["properties"]["settings"].update({ + "commandToExecute": "echo \"{0}: $(date +%Y-%m-%dT%H:%M:%S.%3NZ)\"".format(test_guid) + }) + + # We update the extension template here with extensions that are specific to the scenario that we want to + # test out + log.info("") + log.info("Test case: {0}".format(case.__name__.replace('_', ' '))) + ext_template = copy.deepcopy(base_extension_template) + ext_template['resources'][0]['properties']['virtualMachineProfile']['extensionProfile'][ + 'extensions'] = extensions + + # Log the dependency map for the extensions in this test case + dependency_map = self.get_dependency_map(extensions) + log.info("") + log.info("The dependency map of the extensions for this test case is:") + for ext, dependencies in dependency_map.items(): + dependency_list = "-" if not dependencies else ' and '.join(dependencies) + log.info("{0} depends on {1}".format(ext, dependency_list)) + + # Deploy updated extension template to the scale set. + log.info("") + log.info("Deploying extensions with the above dependencies to the scale set...") + rg_client = ResourceGroupClient(self._context.vmss.cloud, self._context.vmss.subscription, + self._context.vmss.resource_group, self._context.vmss.location) + try: + rg_client.deploy_template(template=ext_template) + except Exception as e: + # We only expect to catch an exception during deployment if we are forcing one of the extensions to + # fail. Otherwise, report the failure. + if not deployment_should_fail: + fail("Extension template deployment unexpectedly failed: {0}".format(e)) + + # Get the extensions on the VMSS from the instance view + log.info("") + instance_view_extensions = self._context.vmss.get_instance_view().extensions + + # If deployment failed, assert that all and only dependent extensions failed + if deployment_should_fail: + self.validate_dependent_extensions_fail(dependency_map, instance_view_extensions) + + # Validate that the extensions were enabled in the correct order on each instance of the scale set + for address in self._context.vmss.get_instances_ip_address(): + ssh_client: SshClient = SshClient(ip_address=address.ip_address, username=self._context.username, identity_file=self._context.identity_file) + + log.info("") + log.info("Validate extension sequencing on {0}...".format(address.ip_address)) + + # Sort the VM extensions by the time they were enabled + sorted_extension_names = self.get_sorted_extension_names(instance_view_extensions, ssh_client) + + # Validate that the extensions were enabled in the correct order + self.validate_extension_sequencing(dependency_map, sorted_extension_names) + + log.info("------") + + +if __name__ == "__main__": + ExtSequencing.run_from_command_line() diff --git a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py new file mode 100755 index 0000000000..e94f95d360 --- /dev/null +++ b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py @@ -0,0 +1,76 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Gets the timestamp for when the provided extension was enabled +# +import argparse +import re +import sys + +from datetime import datetime +from pathlib import Path + + +def main(): + """ + Returns the timestamp of when the provided extension was enabled + """ + parser = argparse.ArgumentParser() + parser.add_argument("--ext_type", dest='ext_type', required=True) + args, _ = parser.parse_known_args() + + # Extension enabled time is in extension CommandExecution.log + command_exec_log_path = Path('/var/log/azure/' + args.ext_type + '/CommandExecution.log') + command_exec_log = open(command_exec_log_path, 'r') + enabled_match = None + for line in command_exec_log.readlines(): + line = line.rstrip() + if args.ext_type == "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": + # AMA logs enable succeeded and its timestamp to the agent log: + # 2023/09/26 04:07:33 [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.5] Enable,success,0,Enable succeeded + enable_pattern = r'.*(?P\d{4}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) \[Microsoft\.Azure\.Monitor\.AzureMonitorLinuxAgent\-.*] .*Enable succeeded.*' + match = re.match(enable_pattern, line) + if match: + enabled_match = match + else: + # For RC and CSE, we can determine when enable succeeded from the stdout of the enable command execution from + # the agent log: + # 2023-09-26T04:07:39.042948Z INFO ExtHandler [Microsoft.CPlat.Core.RunCommandLinux-1.0.5] Command: bin/run-command-shim enable + # [stdout] + # ... + # time=2023-09-26T04:07:37Z version=v1.0.4/git@b3be41d-dirty operation=enable seq=0 event=enabledevent=enabled + enable_pattern = r'time=(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z).*event=enabled' + match = re.match(enable_pattern, line) + if match: + enabled_match = match + + if not enabled_match: + # Try to get enabled time from extension command execution logs + print("Agent log does not show extension was enabled") + sys.exit(1) + + if args.ext_type == "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": + print(datetime.strptime(enabled_match.group('timestamp'), u'%Y/%m/%d %H:%M:%S')) + else: + print(datetime.strptime(enabled_match.group('timestamp'), u'%Y-%m-%dT%H:%M:%SZ')) + + sys.exit(0) + + +if __name__ == "__main__": + main() From a8d77eddb672050729fdeda5e2d2cf23f1358648 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Mon, 30 Oct 2023 13:33:07 -0700 Subject: [PATCH 13/30] Update scenario to support new tests --- .../tests/ext_sequencing/ext_sequencing.py | 48 ++++++++----------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index 732eccdf43..cabab56b8f 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -33,7 +33,7 @@ remove_one_dependent_extension, remove_all_dependencies, add_one_dependent_extension, \ add_single_dependencies, remove_all_dependent_extensions, add_failing_dependent_extension_with_one_dependency, add_failing_dependent_extension_with_two_dependencies from tests_e2e.tests.lib.agent_test import AgentVmssTest -from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient from tests_e2e.tests.lib.ssh_client import SshClient @@ -54,33 +54,19 @@ class ExtSequencing(AgentVmssTest): ] @staticmethod - def get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, List[str]]: - dependency_map = dict() + def get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: + dependency_map: Dict[str, Dict[str, Any]] = dict() for ext in extensions: ext_name = ext['name'] provisioned_after = ext['properties'].get('provisionAfterExtensions') - dependency_map[ext_name] = provisioned_after + # We know an extension should fail if a script was provided + ext_settings = ext['properties'].get("settings") + should_fail = True if ext_settings and "script" in ext_settings else False + dependency_map[ext_name] = {"should_fail": should_fail, "depends_on": provisioned_after} return dependency_map - @staticmethod - def validate_dependent_extensions_fail(dependency_map: Dict[str, List[str]], extensions: List[VirtualMachineScaleSetVMExtensionsSummary]): - failed_extensions = [ext.name for ext in extensions if "failed" in ext.statuses_summary[0].code] - for ext, dependencies in dependency_map.items(): - for dep in dependencies: - if dep in failed_extensions: - assert_that(ext in failed_extensions).described_as("{0} dependent on failing extension {1} should also fail") - - for ext in extensions: - dependencies = dependency_map[ext.name] - assert_that("failed" in ext.statuses_summary[0].code).described_as( - "CustomScript should have failed to enable").is_true() - if "CustomScript" in dependency_map[ext.name]: - assert_that("failed" in ext.statuses_summary[0].code).described_as( - "{0} should have failed to enable as it's dependent on CustomScript".format(ext.name)).is_true() - log.info("Validated that all extensions dependent on a failing extension also failed") - @staticmethod def get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensionsSummary], ssh_client: SshClient) -> List[str]: # Using VmExtensionIds to get publisher for each ext to be used in remote script @@ -91,7 +77,7 @@ def get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensio } enabled_times = [] for ext in extensions: - # Only add extensions which succeeded provisioning + # Only check extensions which succeeded provisioning if "succeeded" in ext.statuses_summary[0].code: enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext_type {extension_full_names[ext.name]}", use_sudo=True) @@ -111,7 +97,7 @@ def get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensio return sorted_extension_names @staticmethod - def validate_extension_sequencing(dependency_map: Dict[str, List[str]], sorted_extension_names: List[str]): + def validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sorted_extension_names: List[str]): installed_ext = dict() # Iterate through the extensions in the enabled order and validate if their depending extensions are already @@ -121,7 +107,8 @@ def validate_extension_sequencing(dependency_map: Dict[str, List[str]], sorted_e if ext not in dependency_map: fail("Unwanted extension found in VMSS Instance view: {0}".format(ext)) if dependency_map[ext] is not None: - for dep in dependency_map[ext]: + dependencies = dependency_map[ext].get('depends_on') + for dep in dependencies: if installed_ext.get(dep) is None: # The depending extension is not installed prior to the current extension fail("{0} is not installed prior to {1}".format(dep, ext)) @@ -129,6 +116,12 @@ def validate_extension_sequencing(dependency_map: Dict[str, List[str]], sorted_e # Mark the current extension as installed installed_ext[ext] = ext + # Validate that only extensions expected to fail, and their dependent extensions, failed + for ext, details in dependency_map.items(): + failing_ext_dependencies = [dep for dep in details['depends_on'] if dependency_map[dep]['should_fail']] + if ext not in installed_ext and not details['should_fail'] and not failing_ext_dependencies: + fail("{0} unexpectedly failed. Only extensions that are dependent on a failing extension should fail".format(ext)) + log.info("Validated extension sequencing") def run(self): @@ -179,7 +172,8 @@ def run(self): dependency_map = self.get_dependency_map(extensions) log.info("") log.info("The dependency map of the extensions for this test case is:") - for ext, dependencies in dependency_map.items(): + for ext, details in dependency_map.items(): + dependencies = details.get('depends_on') dependency_list = "-" if not dependencies else ' and '.join(dependencies) log.info("{0} depends on {1}".format(ext, dependency_list)) @@ -200,10 +194,6 @@ def run(self): log.info("") instance_view_extensions = self._context.vmss.get_instance_view().extensions - # If deployment failed, assert that all and only dependent extensions failed - if deployment_should_fail: - self.validate_dependent_extensions_fail(dependency_map, instance_view_extensions) - # Validate that the extensions were enabled in the correct order on each instance of the scale set for address in self._context.vmss.get_instances_ip_address(): ssh_client: SshClient = SshClient(ip_address=address.ip_address, username=self._context.username, identity_file=self._context.identity_file) From b04e06bf3456b0891c9fe17b91feb376f6666036 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Mon, 30 Oct 2023 16:28:00 -0700 Subject: [PATCH 14/30] Scenario should support failing extensions and extensions with no settings --- tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py | 12 ++++++++++-- tests_e2e/tests/ext_sequencing/ext_sequencing.py | 5 +++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py index b3ffa0b7c2..c226b5367b 100644 --- a/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py +++ b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py @@ -29,10 +29,12 @@ def add_two_extensions_with_dependencies(): { "name": "AzureMonitorLinuxAgent", "properties": { + "provisionAfterExtensions": [], "publisher": "Microsoft.Azure.Monitor", "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", "autoUpgradeMinorVersion": True, + "settings": {} } }, { @@ -69,6 +71,7 @@ def remove_one_dependent_extension(): "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", "autoUpgradeMinorVersion": True, + "settings": {} } }, { @@ -94,6 +97,7 @@ def remove_all_dependencies(): "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", "autoUpgradeMinorVersion": True, + "settings": {} } }, { @@ -128,7 +132,7 @@ def add_one_dependent_extension(): "publisher": "Microsoft.Azure.Monitor", "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", - "autoUpgradeMinorVersion": True, + "autoUpgradeMinorVersion": True } }, { @@ -159,10 +163,12 @@ def add_single_dependencies(): { "name": "AzureMonitorLinuxAgent", "properties": { + "provisionAfterExtensions": [], "publisher": "Microsoft.Azure.Monitor", "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", "autoUpgradeMinorVersion": True, + "settings": {} } }, { @@ -198,7 +204,7 @@ def remove_all_dependent_extensions(): "publisher": "Microsoft.Azure.Monitor", "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", - "autoUpgradeMinorVersion": True, + "autoUpgradeMinorVersion": True } } ] @@ -216,6 +222,7 @@ def add_failing_dependent_extension_with_one_dependency(): "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", "autoUpgradeMinorVersion": True, + "settings": {} } }, { @@ -259,6 +266,7 @@ def add_failing_dependent_extension_with_two_dependencies(): "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", "autoUpgradeMinorVersion": True, + "settings": {} } }, { diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index cabab56b8f..8af2a3ea97 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -26,7 +26,7 @@ from datetime import datetime from typing import List, Dict, Any -from assertpy import fail, assert_that +from assertpy import fail from azure.mgmt.compute.models import VirtualMachineScaleSetVMExtensionsSummary from tests_e2e.tests.ext_sequencing.ext_seq_test_cases import add_one_dependent_ext_without_settings, add_two_extensions_with_dependencies, \ @@ -60,10 +60,11 @@ def get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, Dict[str, for ext in extensions: ext_name = ext['name'] provisioned_after = ext['properties'].get('provisionAfterExtensions') + depends_on = provisioned_after if provisioned_after else [] # We know an extension should fail if a script was provided ext_settings = ext['properties'].get("settings") should_fail = True if ext_settings and "script" in ext_settings else False - dependency_map[ext_name] = {"should_fail": should_fail, "depends_on": provisioned_after} + dependency_map[ext_name] = {"should_fail": should_fail, "depends_on": depends_on} return dependency_map From 8e522b2ed59abaf33fbf8a00c91bb587d0f17f04 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Tue, 31 Oct 2023 12:04:07 -0700 Subject: [PATCH 15/30] Clean up test --- tests_e2e/test_suites/ext_sequencing.yml | 2 +- .../tests/ext_sequencing/ext_sequencing.py | 72 ++++++++++++++++--- 2 files changed, 65 insertions(+), 9 deletions(-) diff --git a/tests_e2e/test_suites/ext_sequencing.yml b/tests_e2e/test_suites/ext_sequencing.yml index 55932ac71e..995c7a2b2e 100644 --- a/tests_e2e/test_suites/ext_sequencing.yml +++ b/tests_e2e/test_suites/ext_sequencing.yml @@ -6,6 +6,6 @@ name: "ExtSequencing" tests: - "ext_sequencing/ext_sequencing.py" images: "endorsed" -locations: "AzureCloud:eastus2euap" +locations: "AzureCloud:eastus2" # This scenario is executed on instances of a scaleset created by the agent test suite. executes_on_scale_set: true \ No newline at end of file diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index 8af2a3ea97..dc054f0310 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -98,7 +98,7 @@ def get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensio return sorted_extension_names @staticmethod - def validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sorted_extension_names: List[str]): + def validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sorted_extension_names: List[str], relax_check: bool): installed_ext = dict() # Iterate through the extensions in the enabled order and validate if their depending extensions are already @@ -112,7 +112,10 @@ def validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sor for dep in dependencies: if installed_ext.get(dep) is None: # The depending extension is not installed prior to the current extension - fail("{0} is not installed prior to {1}".format(dep, ext)) + if relax_check: + log.info("{0} is not installed prior to {1}".format(dep, ext)) + else: + fail("{0} is not installed prior to {1}".format(dep, ext)) # Mark the current extension as installed installed_ext[ext] = ext @@ -120,8 +123,13 @@ def validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sor # Validate that only extensions expected to fail, and their dependent extensions, failed for ext, details in dependency_map.items(): failing_ext_dependencies = [dep for dep in details['depends_on'] if dependency_map[dep]['should_fail']] - if ext not in installed_ext and not details['should_fail'] and not failing_ext_dependencies: - fail("{0} unexpectedly failed. Only extensions that are dependent on a failing extension should fail".format(ext)) + if ext not in installed_ext: + if details['should_fail']: + log.info("Extension {0} failed as expected".format(ext)) + elif failing_ext_dependencies: + log.info("Extension {0} failed as expected because it is dependent on {1}".format(ext, ' and '.join(failing_ext_dependencies))) + else: + fail("{0} unexpectedly failed. Only extensions that are expected to fail or depend on a failing extension should fail".format(ext)) log.info("Validated extension sequencing") @@ -151,7 +159,6 @@ def run(self): # Update the settings for each extension in this scenario to make sure they're always unique to force CRP # to generate a new sequence number each time test_guid = str(uuid.uuid4()) - deployment_should_fail = "failing" in case.__name__ extensions = case() for ext in extensions: # We only want to update the settings if they are empty (so we don't overwrite any failing script @@ -188,7 +195,7 @@ def run(self): except Exception as e: # We only expect to catch an exception during deployment if we are forcing one of the extensions to # fail. Otherwise, report the failure. - if not deployment_should_fail: + if "failing" not in case.__name__: fail("Extension template deployment unexpectedly failed: {0}".format(e)) # Get the extensions on the VMSS from the instance view @@ -205,11 +212,60 @@ def run(self): # Sort the VM extensions by the time they were enabled sorted_extension_names = self.get_sorted_extension_names(instance_view_extensions, ssh_client) - # Validate that the extensions were enabled in the correct order - self.validate_extension_sequencing(dependency_map, sorted_extension_names) + # Validate that the extensions were enabled in the correct order. We relax this check if no settings + # are provided for a dependent extension. + relax_check = True if "settings" in case.__name__ else False + self.validate_extension_sequencing(dependency_map, sorted_extension_names, relax_check) log.info("------") + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # + # WARNING ExtHandler ExtHandler Missing dependsOnExtension on extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent + # This message appears when an extension doesn't depend on another extension + # + { + 'message': r"Missing dependsOnExtension on extension .*" + }, + # + # WARNING ExtHandler ExtHandler Extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent does not have any settings. Will ignore dependency (dependency level: 1) + # We currently ignore dependencies for extensions without settings + # + { + 'message': r"Extension .* does not have any settings\. Will ignore dependency \(dependency level: \d\)" + }, + # + # 2023-10-31T17:46:59.675959Z WARNING ExtHandler ExtHandler Dependent extension Microsoft.Azure.Extensions.CustomScript failed or timed out, will skip processing the rest of the extensions + # We intentionally make CustomScript fail to test that dependent extensions are skipped + # + { + 'message': r"Dependent extension Microsoft.Azure.Extensions.CustomScript failed or timed out, will skip processing the rest of the extensions" + }, + # + # 2023-10-31T17:48:13.349214Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Extensions.CustomScript, op=ExtensionProcessing, message=Dependent Extension Microsoft.Azure.Extensions.CustomScript did not succeed. Status was error, duration=0 + # We intentionally make CustomScript fail to test that dependent extensions are skipped + # + { + 'message': r"Event: name=Microsoft.Azure.Extensions.CustomScript, op=ExtensionProcessing, message=Dependent Extension Microsoft.Azure.Extensions.CustomScript did not succeed. Status was error, duration=0" + }, + # + # 2023-10-31T17:47:07.689083Z WARNING ExtHandler ExtHandler [PERIODIC] This status is being reported by the Guest Agent since no status file was reported by extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent: [ExtensionStatusError] Status file /var/lib/waagent/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.11/status/6.status does not exist + # We expect extensions that are dependent on a failing extension to not report status + # + { + 'message': r"\[PERIODIC\] This status is being reported by the Guest Agent since no status file was reported by extension .*: \[ExtensionStatusError\] Status file \/var\/lib\/waagent\/.*\/status\/\d.status does not exist" + }, + # + # 2023-10-31T17:48:11.306835Z WARNING ExtHandler ExtHandler A new goal state was received, but not all the extensions in the previous goal state have completed: [('Microsoft.Azure.Extensions.CustomScript', 'error'), ('Microsoft.Azure.Monitor.AzureMonitorLinuxAgent', 'transitioning'), ('Microsoft.CPlat.Core.RunCommandLinux', 'success')] + # This message appears when the previous test scenario had failing extensions due to extension dependencies + # + { + 'message': r"A new goal state was received, but not all the extensions in the previous goal state have completed: \[(\('.*', '(error|transitioning|success)'\),?)+\]" + } + ] + return ignore_rules + if __name__ == "__main__": ExtSequencing.run_from_command_line() From 106f383936508772163308bbdbc39c0e54f072bb Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Tue, 31 Oct 2023 12:09:03 -0700 Subject: [PATCH 16/30] Remove locations from test suite yml --- tests_e2e/test_suites/ext_sequencing.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests_e2e/test_suites/ext_sequencing.yml b/tests_e2e/test_suites/ext_sequencing.yml index 995c7a2b2e..1976a85025 100644 --- a/tests_e2e/test_suites/ext_sequencing.yml +++ b/tests_e2e/test_suites/ext_sequencing.yml @@ -6,6 +6,5 @@ name: "ExtSequencing" tests: - "ext_sequencing/ext_sequencing.py" images: "endorsed" -locations: "AzureCloud:eastus2" # This scenario is executed on instances of a scaleset created by the agent test suite. executes_on_scale_set: true \ No newline at end of file From a37e181aea5367239dae0111d27f11f0c347c225 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Tue, 31 Oct 2023 12:13:53 -0700 Subject: [PATCH 17/30] Fix deployment issue --- tests_e2e/orchestrator/lib/agent_test_suite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 9209b18f01..a61584de5e 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -232,7 +232,7 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ # We need to create a new VMSS. # Use the same naming convention as LISA for the scale set name: lisa---e0-n0. Note that we hardcode the resource group # id to "e0" and the scale set name to "n0" since we are creating a single scale set. - self._resource_group_name = f"lisa-{self._runbook_name}-{RUN_ID}-e0" + self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e0" self._vmss_name = f"{self._resource_group_name}-n0" self._test_nodes = [] # we'll fill this up when the scale set is created self._create_scale_set = True From 4ffa3ee822fe517754639ace1e346f2b3f604fa8 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Tue, 31 Oct 2023 15:01:00 -0700 Subject: [PATCH 18/30] Support creating multiple resource groups for vmss in one run --- tests_e2e/orchestrator/lib/agent_test_suite.py | 8 +++++--- tests_e2e/orchestrator/lib/agent_test_suite_combinator.py | 8 +++++++- tests_e2e/orchestrator/runbook.yml | 8 ++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index a61584de5e..ba2daa0fef 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -189,6 +189,8 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ self._lisa_environment_name = environment.name self._environment_name = variables["c_env_name"] + self._vmss_resource_group_count = variables["c_vmss_resource_group_count"] + self._test_suites = variables["c_test_suites"] self._cloud = variables["cloud"] @@ -230,9 +232,9 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ if isinstance(environment.nodes[0], LocalNode): # We need to create a new VMSS. - # Use the same naming convention as LISA for the scale set name: lisa---e0-n0. Note that we hardcode the resource group - # id to "e0" and the scale set name to "n0" since we are creating a single scale set. - self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e0" + # Use the same naming convention as LISA for the scale set name: lisa---e0-n0. Note + # that we hardcode the scale set name to "n0" since we are creating a single scale set. + self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e{self._vmss_resource_group_count}" self._vmss_name = f"{self._resource_group_name}-n0" self._test_nodes = [] # we'll fill this up when the scale set is created self._create_scale_set = True diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index b3d84a1211..059c6e2836 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -81,6 +81,7 @@ def __init__(self, runbook: AgentTestSuitesCombinatorSchema) -> None: raise Exception("Invalid runbook parameters: The 'vmss_name' parameter indicates an existing VMSS, a 'resource_group_name' must be specified.") self._log: logging.Logger = logging.getLogger("lisa") + self._vmss_resource_group_count: int = 0 with set_thread_name("AgentTestSuitesCombinator"): if self.runbook.vm_name != '': @@ -176,10 +177,12 @@ def create_environment_list(self) -> List[Dict[str, Any]]: if test_suite_info.executes_on_scale_set: env = self.create_vmss_environment( env_name=f"{image_name}-vmss-{test_suite_info.name}", + vmss_resource_group_count=self._vmss_resource_group_count, marketplace_image=marketplace_image, location=location, vm_size=vm_size, test_suite_info=test_suite_info) + self._vmss_resource_group_count += 1 else: env = self.create_vm_environment( env_name=f"{image_name}-{test_suite_info.name}", @@ -202,10 +205,12 @@ def create_environment_list(self) -> List[Dict[str, Any]]: raise Exception("VHDS are currently not supported on scale sets.") env = self.create_vmss_environment( env_name=env_name, + vmss_resource_group_count=self._vmss_resource_group_count, marketplace_image=marketplace_image, location=location, vm_size=vm_size, test_suite_info=test_suite_info) + self._vmss_resource_group_count += 1 else: env = self.create_vm_environment( env_name=env_name, @@ -369,7 +374,7 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, "vm_tags": vm_tags } - def create_vmss_environment(self, env_name: str, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: + def create_vmss_environment(self, env_name: str, vmss_resource_group_count: int, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: return { "c_platform": [ { @@ -388,6 +393,7 @@ def create_vmss_environment(self, env_name: str, marketplace_image: str, locatio }, "c_env_name": env_name, + "c_vmss_resource_group_count": vmss_resource_group_count, "c_test_suites": [test_suite_info], "c_location": location, "c_image": marketplace_image, diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 223f63a7b0..5962131ff1 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -136,6 +136,14 @@ variable: value: "" is_case_visible: true + # + # Count of how many resource groups have been created, used to create a unique resource group name for each vmss test + # environment + # + - name: c_vmss_resource_group_count + value: 0 + is_case_visible: true + # # Test suites assigned for execution in the current test environment. # From 59691ff9d68ea4b97f1a4fb077813b4fb1470801 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Tue, 31 Oct 2023 15:23:48 -0700 Subject: [PATCH 19/30] AzureMonitorLinuxAgent is not supported on flatcar --- tests_e2e/orchestrator/lib/agent_test_suite.py | 2 ++ tests_e2e/tests/ext_sequencing/ext_sequencing.py | 16 ++++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index ba2daa0fef..6ab93e85c5 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -139,6 +139,8 @@ def __init__(self, metadata: TestSuiteMetadata) -> None: self._lisa_environment_name: str # Name assigned by LISA to the test environment, useful for correlation with LISA logs self._environment_name: str # Name assigned by the AgentTestSuiteCombinator to the test environment + self._vmss_resource_group_count: int # Counter to keep track of how many resource groups have been created for vmss suites + self._test_suites: List[AgentTestSuite] # Test suites to execute in the environment self._cloud: str # Azure cloud where test VMs are located diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index dc054f0310..bed445f744 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -32,7 +32,7 @@ from tests_e2e.tests.ext_sequencing.ext_seq_test_cases import add_one_dependent_ext_without_settings, add_two_extensions_with_dependencies, \ remove_one_dependent_extension, remove_all_dependencies, add_one_dependent_extension, \ add_single_dependencies, remove_all_dependent_extensions, add_failing_dependent_extension_with_one_dependency, add_failing_dependent_extension_with_two_dependencies -from tests_e2e.tests.lib.agent_test import AgentVmssTest +from tests_e2e.tests.lib.agent_test import AgentVmssTest, TestSkipped from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient @@ -134,6 +134,12 @@ def validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sor log.info("Validated extension sequencing") def run(self): + instances_ip_address = self._context.vmss.get_instances_ip_address() + ssh_clients: List[SshClient] = [SshClient(ip_address=instance.ip_address, username=self._context.username, identity_file=self._context.identity_file) for instance in instances_ip_address] + + if not VmExtensionIds.AzureMonitorLinuxAgent.supports_distro(ssh_clients[0].run_command("uname -a")): + raise TestSkipped("Currently AzureMonitorLinuxAgent is not supported on this distro") + # This is the base ARM template that's used for deploying extensions for this scenario base_extension_template = { "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json", @@ -203,11 +209,9 @@ def run(self): instance_view_extensions = self._context.vmss.get_instance_view().extensions # Validate that the extensions were enabled in the correct order on each instance of the scale set - for address in self._context.vmss.get_instances_ip_address(): - ssh_client: SshClient = SshClient(ip_address=address.ip_address, username=self._context.username, identity_file=self._context.identity_file) - + for ssh_client in ssh_clients: log.info("") - log.info("Validate extension sequencing on {0}...".format(address.ip_address)) + log.info("Validate extension sequencing on {0}...".format(ssh_client.ip_address)) # Sort the VM extensions by the time they were enabled sorted_extension_names = self.get_sorted_extension_names(instance_view_extensions, ssh_client) @@ -261,7 +265,7 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]: # This message appears when the previous test scenario had failing extensions due to extension dependencies # { - 'message': r"A new goal state was received, but not all the extensions in the previous goal state have completed: \[(\('.*', '(error|transitioning|success)'\),?)+\]" + 'message': r"A new goal state was received, but not all the extensions in the previous goal state have completed: \[(\(u?'.*', u?'(error|transitioning|success)'\),?)+\]" } ] return ignore_rules From 4fd744dd89f398823425a490edb63909b8dbf9d6 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Tue, 31 Oct 2023 15:34:08 -0700 Subject: [PATCH 20/30] AzureMonitor is not supported on flatcar --- tests_e2e/tests/lib/vm_extension_identifier.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests_e2e/tests/lib/vm_extension_identifier.py b/tests_e2e/tests/lib/vm_extension_identifier.py index 26113e445c..fa304cb766 100644 --- a/tests_e2e/tests/lib/vm_extension_identifier.py +++ b/tests_e2e/tests/lib/vm_extension_identifier.py @@ -33,7 +33,8 @@ def __init__(self, publisher: str, ext_type: str, version: str): self.version: str = version unsupported_distros: Dict[str, List[str]] = { - "Microsoft.OSTCExtensions.VMAccessForLinux": ["flatcar"] + "Microsoft.OSTCExtensions.VMAccessForLinux": ["flatcar"], + "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": ["flatcar"] } def supports_distro(self, system_info: str) -> bool: From 7f3bb7b87b35f9b97a05cc242d36ae95dd55bdd8 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Tue, 31 Oct 2023 16:03:47 -0700 Subject: [PATCH 21/30] remove agent update --- tests_e2e/orchestrator/runbook.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 5962131ff1..0946348af6 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -29,7 +29,7 @@ variable: # Test suites to execute # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing" # # Parameters used to create test VMs From 2640a7e92b7f6dd084a78b9098c7f03d238569f0 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Wed, 1 Nov 2023 16:14:04 -0700 Subject: [PATCH 22/30] Address PR comments --- .../orchestrator/lib/agent_test_suite.py | 13 ++- .../ext_sequencing/ext_seq_test_cases.py | 89 +++++++++++-------- .../tests/ext_sequencing/ext_sequencing.py | 59 +++++++----- .../ext_sequencing-get_ext_enable_time.py | 2 +- 4 files changed, 99 insertions(+), 64 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 6ab93e85c5..f522ea4059 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -234,9 +234,14 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ if isinstance(environment.nodes[0], LocalNode): # We need to create a new VMSS. - # Use the same naming convention as LISA for the scale set name: lisa---e0-n0. Note - # that we hardcode the scale set name to "n0" since we are creating a single scale set. - self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e{self._vmss_resource_group_count}" + # Use the same naming convention as LISA for the scale set name: lisa---e-n0 + # Note that we hardcode the scale set name to "n0" since we are creating a single scale set. + # Resource group name cannot have any uppercase characters, because the publicIP cannot have uppercase + # characters in its domain name label. + self._rg_count_lock.acquire() + self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e{self._rg_count}" + self._rg_count += 1 + self._rg_count_lock.release() self._vmss_name = f"{self._resource_group_name}-n0" self._test_nodes = [] # we'll fill this up when the scale set is created self._create_scale_set = True @@ -290,6 +295,8 @@ def _get_working_directory(lisa_working_path: str) -> Path: # _working_directory_lock = RLock() _setup_lock = RLock() + _rg_count_lock = RLock() + _rg_count = 0 def _create_working_directory(self) -> None: """ diff --git a/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py index c226b5367b..5b3ac22ab5 100644 --- a/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py +++ b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py @@ -18,13 +18,16 @@ def add_one_dependent_ext_without_settings(): "type": "CustomScript", "typeHandlerVersion": "2.1", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } } ] def add_two_extensions_with_dependencies(): + # Checks that extensions are enabled in the correct order when there is only one valid sequence return [ { "name": "AzureMonitorLinuxAgent", @@ -33,8 +36,7 @@ def add_two_extensions_with_dependencies(): "publisher": "Microsoft.Azure.Monitor", "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", - "autoUpgradeMinorVersion": True, - "settings": {} + "autoUpgradeMinorVersion": True } }, { @@ -45,7 +47,9 @@ def add_two_extensions_with_dependencies(): "type": "RunCommandLinux", "typeHandlerVersion": "1.0", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } }, { @@ -56,13 +60,17 @@ def add_two_extensions_with_dependencies(): "type": "CustomScript", "typeHandlerVersion": "2.1", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } } ] def remove_one_dependent_extension(): + # Checks that remaining extensions with dependencies are enabled in the correct order after removing a dependent + # extension return [ { "name": "AzureMonitorLinuxAgent", @@ -70,8 +78,7 @@ def remove_one_dependent_extension(): "publisher": "Microsoft.Azure.Monitor", "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", - "autoUpgradeMinorVersion": True, - "settings": {} + "autoUpgradeMinorVersion": True } }, { @@ -82,13 +89,16 @@ def remove_one_dependent_extension(): "type": "CustomScript", "typeHandlerVersion": "2.1", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } } ] def remove_all_dependencies(): + # Checks that extensions are enabled after adding and removing dependencies return [ { "name": "AzureMonitorLinuxAgent", @@ -96,8 +106,7 @@ def remove_all_dependencies(): "publisher": "Microsoft.Azure.Monitor", "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", - "autoUpgradeMinorVersion": True, - "settings": {} + "autoUpgradeMinorVersion": True } }, { @@ -107,7 +116,9 @@ def remove_all_dependencies(): "type": "RunCommandLinux", "typeHandlerVersion": "1.0", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } }, { @@ -117,13 +128,16 @@ def remove_all_dependencies(): "type": "CustomScript", "typeHandlerVersion": "2.1", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } } ] def add_one_dependent_extension(): + # Checks that a valid enable sequence occurs when only one extension has dependencies return [ { "name": "AzureMonitorLinuxAgent", @@ -142,7 +156,9 @@ def add_one_dependent_extension(): "type": "RunCommandLinux", "typeHandlerVersion": "1.0", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } }, { @@ -152,13 +168,17 @@ def add_one_dependent_extension(): "type": "CustomScript", "typeHandlerVersion": "2.1", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } } ] def add_single_dependencies(): + # Checks that extensions are enabled in the correct order when there is only one valid sequence and each extension + # has no more than one dependency return [ { "name": "AzureMonitorLinuxAgent", @@ -167,8 +187,7 @@ def add_single_dependencies(): "publisher": "Microsoft.Azure.Monitor", "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", - "autoUpgradeMinorVersion": True, - "settings": {} + "autoUpgradeMinorVersion": True } }, { @@ -179,7 +198,9 @@ def add_single_dependencies(): "type": "RunCommandLinux", "typeHandlerVersion": "1.0", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } }, { @@ -190,13 +211,17 @@ def add_single_dependencies(): "type": "CustomScript", "typeHandlerVersion": "2.1", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } } ] def remove_all_dependent_extensions(): + # Checks that remaining extensions with dependencies are enabled in the correct order after removing all dependent + # extension return [ { "name": "AzureMonitorLinuxAgent", @@ -221,8 +246,7 @@ def add_failing_dependent_extension_with_one_dependency(): "publisher": "Microsoft.Azure.Monitor", "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", - "autoUpgradeMinorVersion": True, - "settings": {} + "autoUpgradeMinorVersion": True } }, { @@ -232,7 +256,9 @@ def add_failing_dependent_extension_with_one_dependency(): "type": "RunCommandLinux", "typeHandlerVersion": "1.0", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } }, { @@ -243,12 +269,7 @@ def add_failing_dependent_extension_with_one_dependency(): "typeHandlerVersion": "2.1", "autoUpgradeMinorVersion": True, "settings": { - # script contents are base-64 encoded: - # #!/bin/bash - # - # echo "Exit script with non-zero exit code" - # exit 1 - "script": "IyEvYmluL2Jhc2gKCmVjaG8gIkV4aXQgc2NyaXB0IHdpdGggbm9uLXplcm8gZXhpdCBjb2RlIgpleGl0IDEK" + "commandToExecute": "exit 1" } } } @@ -265,8 +286,7 @@ def add_failing_dependent_extension_with_two_dependencies(): "publisher": "Microsoft.Azure.Monitor", "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", - "autoUpgradeMinorVersion": True, - "settings": {} + "autoUpgradeMinorVersion": True } }, { @@ -277,7 +297,9 @@ def add_failing_dependent_extension_with_two_dependencies(): "type": "RunCommandLinux", "typeHandlerVersion": "1.0", "autoUpgradeMinorVersion": True, - "settings": {} + "settings": { + "commandToExecute": "date" + } } }, { @@ -288,12 +310,7 @@ def add_failing_dependent_extension_with_two_dependencies(): "typeHandlerVersion": "2.1", "autoUpgradeMinorVersion": True, "settings": { - # script contents are base-64 encoded: - # #!/bin/bash - # - # echo "Exit script with non-zero exit code" - # exit 2 - "script": "IyEvYmluL2Jhc2gKCmVjaG8gIkV4aXQgc2NyaXB0IHdpdGggbm9uLXplcm8gZXhpdCBjb2RlIgpleGl0IDIK" + "commandToExecute": "exit 2" } } } diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index bed445f744..98d8564973 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -33,6 +33,7 @@ remove_one_dependent_extension, remove_all_dependencies, add_one_dependent_extension, \ add_single_dependencies, remove_all_dependent_extensions, add_failing_dependent_extension_with_one_dependency, add_failing_dependent_extension_with_two_dependencies from tests_e2e.tests.lib.agent_test import AgentVmssTest, TestSkipped +from tests_e2e.tests.lib.virtual_machine_scale_set_client import VmssInstanceIpAddress from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient @@ -41,35 +42,42 @@ class ExtSequencing(AgentVmssTest): # Cases to test different dependency scenarios - test_cases = [ + _test_cases = [ add_one_dependent_ext_without_settings, add_two_extensions_with_dependencies, + # remove_one_dependent_extension should only be run after another test case which has RunCommandLinux in the + # model remove_one_dependent_extension, + # remove_all_dependencies should only be run after another test case which has extension dependencies in the + # model remove_all_dependencies, add_one_dependent_extension, add_single_dependencies, + # remove_all_dependent_extensions should only be run after another test case which has dependent extension in + # the model remove_all_dependent_extensions, add_failing_dependent_extension_with_one_dependency, add_failing_dependent_extension_with_two_dependencies ] @staticmethod - def get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: + def _get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: dependency_map: Dict[str, Dict[str, Any]] = dict() for ext in extensions: ext_name = ext['name'] provisioned_after = ext['properties'].get('provisionAfterExtensions') depends_on = provisioned_after if provisioned_after else [] - # We know an extension should fail if a script was provided + # We know an extension should fail if commandToExecute is exactly "exit 1" ext_settings = ext['properties'].get("settings") - should_fail = True if ext_settings and "script" in ext_settings else False + ext_command = ext['properties']['settings'].get("commandToExecute") if ext_settings else None + should_fail = ext_command == "exit 1" dependency_map[ext_name] = {"should_fail": should_fail, "depends_on": depends_on} return dependency_map @staticmethod - def get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensionsSummary], ssh_client: SshClient) -> List[str]: + def _get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensionsSummary], ssh_client: SshClient) -> List[str]: # Using VmExtensionIds to get publisher for each ext to be used in remote script extension_full_names = { "AzureMonitorLinuxAgent": VmExtensionIds.AzureMonitorLinuxAgent, @@ -85,7 +93,7 @@ def get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensio enabled_times.append( { "name": ext.name, - "enabled_time": datetime.strptime(enabled_time.replace('\n', ''), u'%Y-%m-%d %H:%M:%S') + "enabled_time": datetime.strptime(enabled_time.strip(), u'%Y-%m-%d %H:%M:%S') } ) @@ -98,7 +106,7 @@ def get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensio return sorted_extension_names @staticmethod - def validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sorted_extension_names: List[str], relax_check: bool): + def _validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sorted_extension_names: List[str], relax_check: bool): installed_ext = dict() # Iterate through the extensions in the enabled order and validate if their depending extensions are already @@ -106,6 +114,9 @@ def validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sor for ext in sorted_extension_names: # Check if the depending extension are already installed if ext not in dependency_map: + # There should not be any unexpected extensions on the scale set, even in the case we share the VMSS, + # because we update the scale set model with the extensions. Any extensions that are not in the scale + # set model would be disabled. fail("Unwanted extension found in VMSS Instance view: {0}".format(ext)) if dependency_map[ext] is not None: dependencies = dependency_map[ext].get('depends_on') @@ -134,10 +145,12 @@ def validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sor log.info("Validated extension sequencing") def run(self): - instances_ip_address = self._context.vmss.get_instances_ip_address() - ssh_clients: List[SshClient] = [SshClient(ip_address=instance.ip_address, username=self._context.username, identity_file=self._context.identity_file) for instance in instances_ip_address] + instances_ip_address: List[VmssInstanceIpAddress] = self._context.vmss.get_instances_ip_address() + ssh_clients: Dict[str, SshClient] = dict() + for instance in instances_ip_address: + ssh_clients[instance.instance_name] = SshClient(ip_address=instance.ip_address, username=self._context.username, identity_file=self._context.identity_file) - if not VmExtensionIds.AzureMonitorLinuxAgent.supports_distro(ssh_clients[0].run_command("uname -a")): + if not VmExtensionIds.AzureMonitorLinuxAgent.supports_distro(ssh_clients.values()[0].run_command("uname -a")): raise TestSkipped("Currently AzureMonitorLinuxAgent is not supported on this distro") # This is the base ARM template that's used for deploying extensions for this scenario @@ -161,18 +174,15 @@ def run(self): ] } - for case in self.test_cases: - # Update the settings for each extension in this scenario to make sure they're always unique to force CRP + for case in self._test_cases: + # Assign unique guid to forceUpdateTag for each extension to make sure they're always unique to force CRP # to generate a new sequence number each time test_guid = str(uuid.uuid4()) extensions = case() for ext in extensions: - # We only want to update the settings if they are empty (so we don't overwrite any failing script - # scenarios) - if "settings" in ext["properties"] and not ext["properties"]["settings"]: - ext["properties"]["settings"].update({ - "commandToExecute": "echo \"{0}: $(date +%Y-%m-%dT%H:%M:%S.%3NZ)\"".format(test_guid) - }) + ext["properties"].update({ + "forceUpdateTag": test_guid + }) # We update the extension template here with extensions that are specific to the scenario that we want to # test out @@ -183,7 +193,7 @@ def run(self): 'extensions'] = extensions # Log the dependency map for the extensions in this test case - dependency_map = self.get_dependency_map(extensions) + dependency_map = self._get_dependency_map(extensions) log.info("") log.info("The dependency map of the extensions for this test case is:") for ext, details in dependency_map.items(): @@ -209,17 +219,18 @@ def run(self): instance_view_extensions = self._context.vmss.get_instance_view().extensions # Validate that the extensions were enabled in the correct order on each instance of the scale set - for ssh_client in ssh_clients: + for instance_name, ssh_client in ssh_clients.items(): log.info("") - log.info("Validate extension sequencing on {0}...".format(ssh_client.ip_address)) + log.info("Validate extension sequencing on {0}:{1}...".format(instance_name, ssh_client.ip_address)) # Sort the VM extensions by the time they were enabled - sorted_extension_names = self.get_sorted_extension_names(instance_view_extensions, ssh_client) + sorted_extension_names = self._get_sorted_extension_names(instance_view_extensions, ssh_client) # Validate that the extensions were enabled in the correct order. We relax this check if no settings - # are provided for a dependent extension. + # are provided for a dependent extension, since the guest agent currently ignores dependencies in this + # case. relax_check = True if "settings" in case.__name__ else False - self.validate_extension_sequencing(dependency_map, sorted_extension_names, relax_check) + self._validate_extension_sequencing(dependency_map, sorted_extension_names, relax_check) log.info("------") diff --git a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py index e94f95d360..bb49c936ac 100755 --- a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py +++ b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py @@ -61,7 +61,7 @@ def main(): if not enabled_match: # Try to get enabled time from extension command execution logs - print("Agent log does not show extension was enabled") + print("Agent log does not show extension was enabled", file=sys.stderr) sys.exit(1) if args.ext_type == "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": From ecdb9ad705235269e0fb3bee3901ec311b4904fd Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Wed, 1 Nov 2023 16:20:14 -0700 Subject: [PATCH 23/30] Fix issue with getting random ssh client --- tests_e2e/tests/ext_sequencing/ext_sequencing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index 98d8564973..96ae829ea9 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -150,7 +150,7 @@ def run(self): for instance in instances_ip_address: ssh_clients[instance.instance_name] = SshClient(ip_address=instance.ip_address, username=self._context.username, identity_file=self._context.identity_file) - if not VmExtensionIds.AzureMonitorLinuxAgent.supports_distro(ssh_clients.values()[0].run_command("uname -a")): + if not VmExtensionIds.AzureMonitorLinuxAgent.supports_distro(next(iter(ssh_clients.values())).run_command("uname -a")): raise TestSkipped("Currently AzureMonitorLinuxAgent is not supported on this distro") # This is the base ARM template that's used for deploying extensions for this scenario From 7e693a103a0093d972449d9c5eba47010dfd58f3 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Thu, 2 Nov 2023 10:11:31 -0700 Subject: [PATCH 24/30] Address PR Comments --- tests_e2e/orchestrator/lib/agent_test_suite.py | 5 +++-- .../tests/ext_sequencing/ext_sequencing.py | 13 +++++++------ .../ext_sequencing-get_ext_enable_time.py | 18 ++++++++++++++---- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index f522ea4059..02d59a9348 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -170,6 +170,9 @@ def __init__(self, metadata: TestSuiteMetadata) -> None: self._create_scale_set: bool self._delete_scale_set: bool + _rg_count_lock = RLock() + _rg_count = 0 + def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_working_path: str, lisa_log_path: str, lisa_log: Logger): """ Initializes the AgentTestSuite from the data passed as arguments by LISA. @@ -295,8 +298,6 @@ def _get_working_directory(lisa_working_path: str) -> Path: # _working_directory_lock = RLock() _setup_lock = RLock() - _rg_count_lock = RLock() - _rg_count = 0 def _create_working_directory(self) -> None: """ diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index 96ae829ea9..8aaf6fcf87 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -71,13 +71,13 @@ def _get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, Dict[str, # We know an extension should fail if commandToExecute is exactly "exit 1" ext_settings = ext['properties'].get("settings") ext_command = ext['properties']['settings'].get("commandToExecute") if ext_settings else None - should_fail = ext_command == "exit 1" + should_fail = ext_command == "exit 1" or ext_command == "exit 2" dependency_map[ext_name] = {"should_fail": should_fail, "depends_on": depends_on} return dependency_map @staticmethod - def _get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensionsSummary], ssh_client: SshClient) -> List[str]: + def _get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensionsSummary], ssh_client: SshClient, test_case_start: datetime) -> List[str]: # Using VmExtensionIds to get publisher for each ext to be used in remote script extension_full_names = { "AzureMonitorLinuxAgent": VmExtensionIds.AzureMonitorLinuxAgent, @@ -88,8 +88,7 @@ def _get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensi for ext in extensions: # Only check extensions which succeeded provisioning if "succeeded" in ext.statuses_summary[0].code: - enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext_type {extension_full_names[ext.name]}", - use_sudo=True) + enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext_type {extension_full_names[ext.name]} --start_time {test_case_start.strftime(u'%Y-%m-%dT%H:%M:%SZ')}", use_sudo=True) enabled_times.append( { "name": ext.name, @@ -175,6 +174,8 @@ def run(self): } for case in self._test_cases: + test_case_start = datetime.now() + # Assign unique guid to forceUpdateTag for each extension to make sure they're always unique to force CRP # to generate a new sequence number each time test_guid = str(uuid.uuid4()) @@ -211,7 +212,7 @@ def run(self): except Exception as e: # We only expect to catch an exception during deployment if we are forcing one of the extensions to # fail. Otherwise, report the failure. - if "failing" not in case.__name__: + if "failing" not in case.__name__ or "VMExtensionProvisioningError" not in e.message or "Enable failed: failed to execute command" not in e.message: fail("Extension template deployment unexpectedly failed: {0}".format(e)) # Get the extensions on the VMSS from the instance view @@ -224,7 +225,7 @@ def run(self): log.info("Validate extension sequencing on {0}:{1}...".format(instance_name, ssh_client.ip_address)) # Sort the VM extensions by the time they were enabled - sorted_extension_names = self._get_sorted_extension_names(instance_view_extensions, ssh_client) + sorted_extension_names = self._get_sorted_extension_names(instance_view_extensions, ssh_client, test_case_start) # Validate that the extensions were enabled in the correct order. We relax this check if no settings # are provided for a dependent extension, since the guest agent currently ignores dependencies in this diff --git a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py index bb49c936ac..49eb2475ac 100755 --- a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py +++ b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py @@ -32,6 +32,7 @@ def main(): """ parser = argparse.ArgumentParser() parser.add_argument("--ext_type", dest='ext_type', required=True) + parser.add_argument("--start_time", dest='start_time', required=True) args, _ = parser.parse_known_args() # Extension enabled time is in extension CommandExecution.log @@ -41,7 +42,9 @@ def main(): for line in command_exec_log.readlines(): line = line.rstrip() if args.ext_type == "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": - # AMA logs enable succeeded and its timestamp to the agent log: + # AMA logs enable succeeded and its timestamp to the command execution log: + # 2023-11-01T23:22:53.124603Z INFO ExtHandler [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.11] Command: ./shim.sh -enable + # [stdout] # 2023/09/26 04:07:33 [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.5] Enable,success,0,Enable succeeded enable_pattern = r'.*(?P\d{4}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) \[Microsoft\.Azure\.Monitor\.AzureMonitorLinuxAgent\-.*] .*Enable succeeded.*' match = re.match(enable_pattern, line) @@ -49,7 +52,7 @@ def main(): enabled_match = match else: # For RC and CSE, we can determine when enable succeeded from the stdout of the enable command execution from - # the agent log: + # the command execution log: # 2023-09-26T04:07:39.042948Z INFO ExtHandler [Microsoft.CPlat.Core.RunCommandLinux-1.0.5] Command: bin/run-command-shim enable # [stdout] # ... @@ -65,9 +68,16 @@ def main(): sys.exit(1) if args.ext_type == "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": - print(datetime.strptime(enabled_match.group('timestamp'), u'%Y/%m/%d %H:%M:%S')) + enable_time = datetime.strptime(enabled_match.group('timestamp'), u'%Y/%m/%d %H:%M:%S') + else: + enable_time = datetime.strptime(enabled_match.group('timestamp'), u'%Y-%m-%dT%H:%M:%SZ') + + start_time = datetime.strptime(args.start_time, u'%Y-%m-%dT%H:%M:%SZ') + if enable_time < start_time: + print("Agent log does not show extension was enabled after this test case started", file=sys.stderr) + sys.exit(1) else: - print(datetime.strptime(enabled_match.group('timestamp'), u'%Y-%m-%dT%H:%M:%SZ')) + print(enable_time) sys.exit(0) From d826622c92b1813e9531462d581037c4f7f3f3cf Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Thu, 2 Nov 2023 11:15:18 -0700 Subject: [PATCH 25/30] Address PR Comments --- tests_e2e/orchestrator/lib/agent_test_suite.py | 13 +++++++++---- .../tests/ext_sequencing/ext_seq_test_cases.py | 3 ++- tests_e2e/tests/ext_sequencing/ext_sequencing.py | 8 +++++++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 02d59a9348..d6978e6e7f 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -170,6 +170,11 @@ def __init__(self, metadata: TestSuiteMetadata) -> None: self._create_scale_set: bool self._delete_scale_set: bool + # + # Test suites within the same runbook may be executed concurrently, and we need to keep track of how many resource + # groups are being created. We use this lock and counter to allow only 1 thread to increment the resource group + # count. + # _rg_count_lock = RLock() _rg_count = 0 @@ -241,10 +246,10 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ # Note that we hardcode the scale set name to "n0" since we are creating a single scale set. # Resource group name cannot have any uppercase characters, because the publicIP cannot have uppercase # characters in its domain name label. - self._rg_count_lock.acquire() - self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e{self._rg_count}" - self._rg_count += 1 - self._rg_count_lock.release() + AgentTestSuite._rg_count_lock.acquire() + self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e{AgentTestSuite._rg_count}" + AgentTestSuite._rg_count += 1 + AgentTestSuite._rg_count_lock.release() self._vmss_name = f"{self._resource_group_name}-n0" self._test_nodes = [] # we'll fill this up when the scale set is created self._create_scale_set = True diff --git a/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py index 5b3ac22ab5..1425517c9a 100644 --- a/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py +++ b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py @@ -246,7 +246,8 @@ def add_failing_dependent_extension_with_one_dependency(): "publisher": "Microsoft.Azure.Monitor", "type": "AzureMonitorLinuxAgent", "typeHandlerVersion": "1.5", - "autoUpgradeMinorVersion": True + "autoUpgradeMinorVersion": True, + "settings": {} } }, { diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index 8aaf6fcf87..7bd031584c 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -22,6 +22,7 @@ # validates they are enabled in order of dependencies. # import copy +import re import uuid from datetime import datetime from typing import List, Dict, Any @@ -212,8 +213,13 @@ def run(self): except Exception as e: # We only expect to catch an exception during deployment if we are forcing one of the extensions to # fail. Otherwise, report the failure. - if "failing" not in case.__name__ or "VMExtensionProvisioningError" not in e.message or "Enable failed: failed to execute command" not in e.message: + deployment_failure_pattern = r"[\s\S]*\"details\": [\s\S]* \"code\": \"(?P.*)\"[\s\S]* \"message\": \"(?P.*)\"[\s\S]*" + msg_pattern = r"Multiple VM extensions failed to be provisioned on the VM. Please see the VM extension instance view for other failures. The first extension failed due to the error: VM Extension '.*' is marked as failed since it depends upon the VM Extension 'CustomScript' which has failed." + deployment_failure_match = re.match(deployment_failure_pattern, str(e)) + if "failing" not in case.__name__: fail("Extension template deployment unexpectedly failed: {0}".format(e)) + elif not deployment_failure_match or deployment_failure_match.group("code") != "VMExtensionProvisioningError" or not re.match(msg_pattern, deployment_failure_match.group("msg")): + fail("Extension template deployment failed as expected, but with an unexpected error: {0}".format(e)) # Get the extensions on the VMSS from the instance view log.info("") From 0e2ea4f223be0184f8692f6ed0494b375669b7bd Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Thu, 2 Nov 2023 14:49:34 -0700 Subject: [PATCH 26/30] Address PR comments --- tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py | 2 +- tests_e2e/tests/ext_sequencing/ext_sequencing.py | 4 ++-- .../tests/scripts/ext_sequencing-get_ext_enable_time.py | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py index 1425517c9a..d1c942d0af 100644 --- a/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py +++ b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py @@ -311,7 +311,7 @@ def add_failing_dependent_extension_with_two_dependencies(): "typeHandlerVersion": "2.1", "autoUpgradeMinorVersion": True, "settings": { - "commandToExecute": "exit 2" + "commandToExecute": "exit 1" } } } diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index 7bd031584c..8175ece8a4 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -72,7 +72,7 @@ def _get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, Dict[str, # We know an extension should fail if commandToExecute is exactly "exit 1" ext_settings = ext['properties'].get("settings") ext_command = ext['properties']['settings'].get("commandToExecute") if ext_settings else None - should_fail = ext_command == "exit 1" or ext_command == "exit 2" + should_fail = ext_command == "exit 1" dependency_map[ext_name] = {"should_fail": should_fail, "depends_on": depends_on} return dependency_map @@ -89,7 +89,7 @@ def _get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensi for ext in extensions: # Only check extensions which succeeded provisioning if "succeeded" in ext.statuses_summary[0].code: - enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext_type {extension_full_names[ext.name]} --start_time {test_case_start.strftime(u'%Y-%m-%dT%H:%M:%SZ')}", use_sudo=True) + enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext_type '{extension_full_names[ext.name]}' --start_time '{str(test_case_start)}'", use_sudo=True) enabled_times.append( { "name": ext.name, diff --git a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py index 49eb2475ac..b9b2c66cb5 100755 --- a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py +++ b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py @@ -18,6 +18,7 @@ # # Gets the timestamp for when the provided extension was enabled # + import argparse import re import sys @@ -72,9 +73,9 @@ def main(): else: enable_time = datetime.strptime(enabled_match.group('timestamp'), u'%Y-%m-%dT%H:%M:%SZ') - start_time = datetime.strptime(args.start_time, u'%Y-%m-%dT%H:%M:%SZ') + start_time = datetime.strptime(args.start_time, u'%Y-%m-%d %H:%M:%S.%f') if enable_time < start_time: - print("Agent log does not show extension was enabled after this test case started", file=sys.stderr) + print("Agent log does not show extension was enabled after this test case started. Last enabled time was {0}. This test case started at {1}".format(enable_time, start_time), file=sys.stderr) sys.exit(1) else: print(enable_time) From 8cf4d19b60c1e368c43cadd8811d2239bb699599 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Thu, 2 Nov 2023 15:00:03 -0700 Subject: [PATCH 27/30] Do not keep rg count in runbook --- tests_e2e/orchestrator/lib/agent_test_suite.py | 4 ---- tests_e2e/orchestrator/lib/agent_test_suite_combinator.py | 8 +------- tests_e2e/orchestrator/runbook.yml | 8 -------- 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index d6978e6e7f..5cb014dd5d 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -139,8 +139,6 @@ def __init__(self, metadata: TestSuiteMetadata) -> None: self._lisa_environment_name: str # Name assigned by LISA to the test environment, useful for correlation with LISA logs self._environment_name: str # Name assigned by the AgentTestSuiteCombinator to the test environment - self._vmss_resource_group_count: int # Counter to keep track of how many resource groups have been created for vmss suites - self._test_suites: List[AgentTestSuite] # Test suites to execute in the environment self._cloud: str # Azure cloud where test VMs are located @@ -199,8 +197,6 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ self._lisa_environment_name = environment.name self._environment_name = variables["c_env_name"] - self._vmss_resource_group_count = variables["c_vmss_resource_group_count"] - self._test_suites = variables["c_test_suites"] self._cloud = variables["cloud"] diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 059c6e2836..b3d84a1211 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -81,7 +81,6 @@ def __init__(self, runbook: AgentTestSuitesCombinatorSchema) -> None: raise Exception("Invalid runbook parameters: The 'vmss_name' parameter indicates an existing VMSS, a 'resource_group_name' must be specified.") self._log: logging.Logger = logging.getLogger("lisa") - self._vmss_resource_group_count: int = 0 with set_thread_name("AgentTestSuitesCombinator"): if self.runbook.vm_name != '': @@ -177,12 +176,10 @@ def create_environment_list(self) -> List[Dict[str, Any]]: if test_suite_info.executes_on_scale_set: env = self.create_vmss_environment( env_name=f"{image_name}-vmss-{test_suite_info.name}", - vmss_resource_group_count=self._vmss_resource_group_count, marketplace_image=marketplace_image, location=location, vm_size=vm_size, test_suite_info=test_suite_info) - self._vmss_resource_group_count += 1 else: env = self.create_vm_environment( env_name=f"{image_name}-{test_suite_info.name}", @@ -205,12 +202,10 @@ def create_environment_list(self) -> List[Dict[str, Any]]: raise Exception("VHDS are currently not supported on scale sets.") env = self.create_vmss_environment( env_name=env_name, - vmss_resource_group_count=self._vmss_resource_group_count, marketplace_image=marketplace_image, location=location, vm_size=vm_size, test_suite_info=test_suite_info) - self._vmss_resource_group_count += 1 else: env = self.create_vm_environment( env_name=env_name, @@ -374,7 +369,7 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, "vm_tags": vm_tags } - def create_vmss_environment(self, env_name: str, vmss_resource_group_count: int, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: + def create_vmss_environment(self, env_name: str, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: return { "c_platform": [ { @@ -393,7 +388,6 @@ def create_vmss_environment(self, env_name: str, vmss_resource_group_count: int, }, "c_env_name": env_name, - "c_vmss_resource_group_count": vmss_resource_group_count, "c_test_suites": [test_suite_info], "c_location": location, "c_image": marketplace_image, diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 0946348af6..336d22cf67 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -136,14 +136,6 @@ variable: value: "" is_case_visible: true - # - # Count of how many resource groups have been created, used to create a unique resource group name for each vmss test - # environment - # - - name: c_vmss_resource_group_count - value: 0 - is_case_visible: true - # # Test suites assigned for execution in the current test environment. # From 6ae88f092c51b316255d103098c86a62595a2800 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Thu, 2 Nov 2023 15:44:22 -0700 Subject: [PATCH 28/30] Use try/finally with lock --- tests_e2e/orchestrator/lib/agent_test_suite.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 5cb014dd5d..c4a0c89b5f 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -243,9 +243,11 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ # Resource group name cannot have any uppercase characters, because the publicIP cannot have uppercase # characters in its domain name label. AgentTestSuite._rg_count_lock.acquire() - self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e{AgentTestSuite._rg_count}" - AgentTestSuite._rg_count += 1 - AgentTestSuite._rg_count_lock.release() + try: + self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e{AgentTestSuite._rg_count}" + AgentTestSuite._rg_count += 1 + finally: + AgentTestSuite._rg_count_lock.release() self._vmss_name = f"{self._resource_group_name}-n0" self._test_nodes = [] # we'll fill this up when the scale set is created self._create_scale_set = True From c5e79aea014471b78576a3767a6b3f8d775fa728 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Mon, 6 Nov 2023 16:09:14 -0800 Subject: [PATCH 29/30] only check logs after scenario startS --- tests_e2e/tests/ext_sequencing/ext_sequencing.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index 8175ece8a4..0a44183b89 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -61,6 +61,8 @@ class ExtSequencing(AgentVmssTest): add_failing_dependent_extension_with_two_dependencies ] + _scenario_start = datetime.min + @staticmethod def _get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: dependency_map: Dict[str, Dict[str, Any]] = dict() @@ -176,6 +178,8 @@ def run(self): for case in self._test_cases: test_case_start = datetime.now() + if self._scenario_start == datetime.min: + self._scenario_start = test_case_start # Assign unique guid to forceUpdateTag for each extension to make sure they're always unique to force CRP # to generate a new sequence number each time @@ -212,7 +216,8 @@ def run(self): rg_client.deploy_template(template=ext_template) except Exception as e: # We only expect to catch an exception during deployment if we are forcing one of the extensions to - # fail. Otherwise, report the failure. + # fail. We know an extension should fail if "failing" is in the case name. Otherwise, report the + # failure. deployment_failure_pattern = r"[\s\S]*\"details\": [\s\S]* \"code\": \"(?P.*)\"[\s\S]* \"message\": \"(?P.*)\"[\s\S]*" msg_pattern = r"Multiple VM extensions failed to be provisioned on the VM. Please see the VM extension instance view for other failures. The first extension failed due to the error: VM Extension '.*' is marked as failed since it depends upon the VM Extension 'CustomScript' which has failed." deployment_failure_match = re.match(deployment_failure_pattern, str(e)) @@ -241,6 +246,10 @@ def run(self): log.info("------") + def get_ignore_errors_before_timestamp(self) -> datetime: + # Ignore errors in the agent log before the first test case starts + return self._scenario_start + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: ignore_rules = [ # From 0c91f6de44996abface89fe0f31a3098ab1bf0b6 Mon Sep 17 00:00:00 2001 From: Maddie Ford Date: Mon, 6 Nov 2023 22:08:50 -0800 Subject: [PATCH 30/30] Change to instance member --- tests_e2e/tests/ext_sequencing/ext_sequencing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index 0a44183b89..3af9e64fe6 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -34,6 +34,7 @@ remove_one_dependent_extension, remove_all_dependencies, add_one_dependent_extension, \ add_single_dependencies, remove_all_dependent_extensions, add_failing_dependent_extension_with_one_dependency, add_failing_dependent_extension_with_two_dependencies from tests_e2e.tests.lib.agent_test import AgentVmssTest, TestSkipped +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.virtual_machine_scale_set_client import VmssInstanceIpAddress from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log @@ -42,6 +43,11 @@ class ExtSequencing(AgentVmssTest): + + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._scenario_start = datetime.min + # Cases to test different dependency scenarios _test_cases = [ add_one_dependent_ext_without_settings, @@ -61,8 +67,6 @@ class ExtSequencing(AgentVmssTest): add_failing_dependent_extension_with_two_dependencies ] - _scenario_start = datetime.min - @staticmethod def _get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: dependency_map: Dict[str, Dict[str, Any]] = dict()