Azure · narrieta · May 26, 2023 · May 24, 2023 · May 24, 2023 · May 24, 2023
@@ -105,9 +105,8 @@ def sleep_if_disabled(self):
         agent_disabled_file_path = conf.get_disable_agent_file_path()
         if os.path.exists(agent_disabled_file_path):
             import threading
-            logger.warn("Disabling the guest agent by sleeping forever; "
-                        "to re-enable, remove {0} and restart"
-                        .format(agent_disabled_file_path))
+            logger.warn("Disabling the guest agent by sleeping forever; to re-enable, remove {0} and restart".format(agent_disabled_file_path))
+            logger.warn("To enable VM extensions, also ensure that the VM's osProfile.allowExtensionOperations property is set to true.")
             self.running = False
             disable_event = threading.Event()
             disable_event.wait()

@@ -57,6 +57,8 @@ class TestSuiteInfo(object):
     location: str
     # Whether this suite must run on its own test VM
     owns_vm: bool
+    # Whether to install the test Agent on the test VM
+    install_test_agent: bool
     # Customization for the ARM template used when creating the test VM
     template: str
 
@@ -170,7 +172,7 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo:
         """
         Loads the description of a TestSuite from its YAML file.
 
-        A test suite has 5 properties: name, tests, images, location, and owns_vm. For example:
+        A test suite is described by the properties listed below. Sample test suite:
 
             name: "AgentBvt"
             tests:
@@ -180,6 +182,8 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo:
             images: "endorsed"
             location: "eastuseaup"
             owns_vm: true
+            install_test_agent: true
+            template: "bvts/template.py"
 
         * name     - A string used to identify the test suite
         * tests    - A list of the tests in the suite. Each test can be specified by a string (the path for its source code relative to
@@ -199,6 +203,9 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo:
                     This is useful for suites that modify the test VMs in such a way that the setup may cause problems
                     in other test suites (for example, some tests targeted to the HGAP block internet access in order to
                     force the agent to use the HGAP).
+        * install_test_agent - [Optional; boolean] By default the setup process installs the test Agent on the test VMs; set this property
+                    to False to skip the installation.
+        * template - [Optional; string] If given, the ARM template for the test VM is customized using the given Python module.
 
         """
         test_suite: Dict[str, Any] = AgentTestLoader._load_file(description_file)
@@ -232,7 +239,7 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo:
             test_suite_info.location = ""
 
         test_suite_info.owns_vm = "owns_vm" in test_suite and test_suite["owns_vm"]
-
+        test_suite_info.install_test_agent = "install_test_agent" not in test_suite or test_suite["install_test_agent"]
         test_suite_info.template = test_suite.get("template", "")
 
         return test_suite_info

@@ -275,7 +275,7 @@ def _clean_up(self) -> None:
         Cleans up any leftovers from the test suite run. Currently just an empty placeholder for future use.
         """
 
-    def _setup_node(self) -> None:
+    def _setup_node(self, install_test_agent: bool) -> None:
         """
         Prepares the remote node for executing the test suite (installs tools and the test agent, etc)
         """
@@ -317,11 +317,14 @@ def _setup_node(self) -> None:
         tarball_path: Path = Path("/tmp/waagent.tar")
         log.info("Creating %s with the files need on the test node", tarball_path)
         log.info("Adding orchestrator/scripts")
-        run_command(['tar', 'cvf', str(tarball_path), '--transform=s,.*/,bin/,', '-C', str(self.context.test_source_directory/"orchestrator"/"scripts"), '.'])
-        # log.info("Adding tests/scripts")
-        # run_command(['tar', 'rvf', str(tarball_path), '--transform=s,.*/,bin/,', '-C', str(self.context.test_source_directory/"tests"/"scripts"), '.'])
+        command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"orchestrator"/"scripts", str(tarball_path))
+        log.info("%s\n%s", command, run_command(command, shell=True))
+        log.info("Adding tests/scripts")
+        # command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path))
+        # log.info("%s\n%s", command, run_command(command, shell=True))
         log.info("Adding tests/lib")
-        run_command(['tar', 'rvf', str(tarball_path), '--transform=s,^,lib/,', '-C', str(self.context.test_source_directory.parent), '--exclude=__pycache__', 'tests_e2e/tests/lib'])
+        command = "cd {0} ; tar rvf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self.context.test_source_directory.parent, str(tarball_path))
+        log.info("%s\n%s", command, run_command(command, shell=True))
         log.info("Contents of %s:\n\n%s", tarball_path, run_command(['tar', 'tvf', str(tarball_path)]))
 
         #
@@ -353,6 +356,8 @@ def _setup_node(self) -> None:
 
         if self.context.is_vhd:
             log.info("Using a VHD; will not install the Test Agent.")
+        elif not install_test_agent:
+            log.info("Will not install the Test Agent per the test suite configuration.")
         else:
             log.info("Installing the Test Agent on the test node")
             command = f"install-agent --package ~/tmp/{agent_package_path.name} --version {AGENT_VERSION}"
@@ -424,10 +429,12 @@ def _execute(self, environment: Environment, variables: Dict[str, Any]):
                             self._setup()
 
                         if not self.context.skip_setup:
-                            self._setup_node()
+                            # pylint seems to think self.context.test_suites is not iterable. Suppressing this warning here and a few lines below, since
+                            # its type is List[AgentTestSuite].
+                            # E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable)
+                            install_test_agent = all([suite.install_test_agent for suite in self.context.test_suites])   # pylint: disable=E1133
+                            self._setup_node(install_test_agent)
 
-                        # pylint seems to think self.context.test_suites is not iterable. Suppressing warning, since its type is List[AgentTestSuite]
-                        #  E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable)
                         for suite in self.context.test_suites:  # pylint: disable=E1133
                             log.info("Executing test suite %s", suite.name)
                             self.context.lisa_log.info("Executing Test Suite %s", suite.name)

@@ -49,7 +49,7 @@ variable:
   #
   # The test suites to execute
   - name: test_suites
-    value: "agent_bvt, no_outbound_connections, extensions_disabled"
+    value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned"
   - name: cloud
     value: "AzureCloud"
     is_case_visible: true

@@ -129,7 +129,7 @@ check-version() {
 }
 
 if check-version "$version"; then
-  printf "\nThe agent was installed successfully\n"
+  printf "The agent was installed successfully\n"
   exit_code=0
 else
   printf "************************************\n"

@@ -9,7 +9,7 @@ parameters:
   - name: test_suites
     displayName: Test Suites
     type: string
-    default: agent_bvt, no_outbound_connections, extensions_disabled
+    default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned
 
     # NOTES:
     #         * 'image', 'location' and 'vm_size' override any values in the test suites/images definition

@@ -0,0 +1,12 @@
+#
+# Disables Agent provisioning using osProfile.linuxConfiguration.provisionVMAgent and verifies that the agent is disabled
+# and extension operations are not allowed.
+#
+name: "AgentNotProvisioned"
+tests:
+  - "agent_not_provisioned/agent_not_provisioned.py"
+images: "random(endorsed)"
+template: "agent_not_provisioned/template.py"
+owns_vm: true
+install_test_agent: false
+
@@ -16,5 +16,5 @@ tests:
   - "bvts/vm_access.py"
   - "no_outbound_connections/check_fallback_to_hgap.py"
 images: "random(endorsed)"
-template: "no_outbound_connections/nsg_template.py"
+template: "no_outbound_connections/template.py"
 owns_vm: true
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+# Microsoft Azure Linux Agent
+#
+# Copyright 2018 Microsoft Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from assertpy import fail, assert_that
+from typing import Any, Dict, List
+
+from azure.mgmt.compute.models import VirtualMachineInstanceView
+
+from tests_e2e.tests.lib.agent_test import AgentTest
+from tests_e2e.tests.lib.identifiers import VmExtensionIds
+from tests_e2e.tests.lib.logging import log
+from tests_e2e.tests.lib.shell import CommandError
+from tests_e2e.tests.lib.ssh_client import SshClient
+from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient
+from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient
+
+
+class AgentNotProvisioned(AgentTest):
+    """
+    When osProfile.linuxConfiguration.provisionVMAgent is set to 'false', this test verifies that
+    the agent is disabled and that extension operations are not allowed.
+    """
+    def run(self):
+        #
+        # Check the agent's log for the messages that indicate it is disabled.
+        #
+        ssh_client: SshClient = self._context.create_ssh_client()
+
+        log.info("Checking the Agent's log to verify that it is disabled.")
+        try:
+            output = ssh_client.run_command("""
+                grep -E 'WARNING.*Daemon.*Disabling guest agent in accordance with ovf-env.xml' /var/log/waagent.log || \
+                grep -E 'WARNING.*Daemon.*Disabling the guest agent by sleeping forever; to re-enable, remove /var/lib/waagent/disable_agent and restart' /var/log/waagent.log
+            """)
+            log.info("The Agent is disabled, log message: [%s]", output.rstrip())
+        except CommandError as e:
+            fail(f"The agent's log does not contain the expected messages: {e}")
+
+        #
+        # Validate that the agent is not reporting status.
+        #
+        log.info("Verifying that the Agent status is 'Not Ready' (i.e. it is not reporting status).")
+        vm: VirtualMachineClient = VirtualMachineClient(self._context.vm)
+        instance_view: VirtualMachineInstanceView = vm.get_instance_view()
+        log.info("Instance view of VM Agent:\n%s", instance_view.vm_agent.serialize())
+        assert_that(instance_view.vm_agent.statuses).described_as("The VM agent should have exactly 1 status").is_length(1)
+        assert_that(instance_view.vm_agent.statuses[0].code).described_as("The VM Agent should not be available").is_equal_to('ProvisioningState/Unavailable')
+        assert_that(instance_view.vm_agent.statuses[0].display_status).described_as("The VM Agent should not ready").is_equal_to('Not Ready')
+        log.info("The Agent status is 'Not Ready'")
+
+        #
+        # Validate that extensions cannot be executed.
+        #
+        log.info("Verifying that extension processing is disabled.")
+        log.info("Executing CustomScript; it should fail.")
+        custom_script = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript")
+        try:
+            custom_script.enable(settings={'commandToExecute': "date"}, force_update=True, timeout=20 * 60)
+            fail("CustomScript should have failed")
+        except Exception as error:
+            assert_that("OperationNotAllowed" in str(error)) \
+                .described_as(f"Expected an OperationNotAllowed: {error}") \
+                .is_true()
+            log.info("CustomScript failed, as expected: %s", error)
+
+    def get_ignore_error_rules(self) -> List[Dict[str, Any]]:
+        return [
+            {'message': 'Disabling guest agent in accordance with ovf-env.xml'},
+            {'message': 'Disabling the guest agent by sleeping forever; to re-enable, remove /var/lib/waagent/disable_agent and restart'}
+        ]
+
+
+if __name__ == "__main__":
+    AgentNotProvisioned.run_from_command_line()
+
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+# Microsoft Azure Linux Agent
+#
+# Copyright 2018 Microsoft Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Any
+
+
+def update_arm_template(template: Any) -> None:
+    """
+    Customizes the ARM template to set osProfile.linuxConfiguration.provisionVMAgent to false.
+    """
+    #
+    # NOTE: LISA's template uses this function to generate the value for osProfile.linuxConfiguration. The function is
+    #       under the 'lisa' namespace.
+    #
+    #     "getLinuxConfiguration": {
+    #         "parameters": [
+    #             {
+    #                 "name": "keyPath",
+    #                 "type": "string"
+    #             },
+    #             {
+    #                 "name": "publicKeyData",
+    #                 "type": "string"
+    #             }
+    #         ],
+    #         "output": {
+    #             "type": "object",
+    #             "value": {
+    #                 "disablePasswordAuthentication": true,
+    #                 "ssh": {
+    #                     "publicKeys": [
+    #                         {
+    #                             "path": "[parameters('keyPath')]",
+    #                             "keyData": "[parameters('publicKeyData')]"
+    #                         }
+    #                     ]
+    #                 },
+    #                 "provisionVMAgent": true
+    #             }
+    #         }
+    #     }
+    #
+    # The code below sets template['functions'][i]['members']['getLinuxConfiguration']['output']['value']['provisionVMAgent'] to True,
+    # where template['functions'][i] is the 'lisa' namespace.
+    #
+    functions = template.get("functions")
+    if functions is None:
+        raise Exception('Cannot find "functions" in the LISA template.')
+    for namespace in functions:
+        name = namespace.get("namespace")
+        if name is None:
+            raise Exception(f'Cannot find "namespace" in the LISA template: {namespace}')
+        if name == "lisa":
+            members = namespace.get('members')
+            if members is None:
+                raise Exception(f'Cannot find the members of the lisa namespace in the LISA template: {namespace}')
+            get_linux_configuration = members.get('getLinuxConfiguration')
+            if get_linux_configuration is None:
+                raise Exception(f'Cannot find the "getLinuxConfiguration" function the lisa namespace in the LISA template: {namespace}')
+            output = get_linux_configuration.get('output')
+            if output is None:
+                raise Exception(f'Cannot find the "output" of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}')
+            value = output.get('value')
+            if value is None:
+                raise Exception(f"Cannot find the output's value of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}")
+            value['provisionVMAgent'] = False
+            break
+    else:
+        raise Exception(f'Cannot find the "lisa" namespace in the LISA template: {functions}')
+
@@ -26,6 +26,7 @@
 import pytz
 
 from assertpy import assert_that, fail
+from typing import Any, Dict, List
 
 from azure.mgmt.compute.models import VirtualMachineInstanceView
 
@@ -81,6 +82,11 @@ def run(self):
             .is_greater_than(pytz.utc.localize(disabled_timestamp))
         log.info("The VM Agent reported status after extensions were disabled, as expected.")
 
+    def get_ignore_error_rules(self) -> List[Dict[str, Any]]:
+        return [
+            {'message': 'No handler status found for Microsoft.Azure.Extensions.CustomScript'},
+        ]
+
 
 if __name__ == "__main__":
     ExtensionsDisabled.run_from_command_line()
@@ -209,6 +209,15 @@ def get_errors(self) -> List[AgentLogRecord]:
                 'if': lambda r: DISTRO_NAME == 'ubuntu' and DISTRO_VERSION >= '22.00'
             },
             #
+            # Old daemons can produce this message
+            #
+            #    2023-05-24T18:04:27.467009Z WARNING Daemon Daemon Could not mount cgroups: [Errno 1] Operation not permitted: '/sys/fs/cgroup/cpu,cpuacct' -> '/sys/fs/cgroup/cpu'
+            #
+            {
+                'message': r"Could not mount cgroups: \[Errno 1\] Operation not permitted",
+                'if': lambda r: r.prefix == 'Daemon'
+            },
+            #
             # 2022-02-09T04:50:37.384810Z ERROR ExtHandler ExtHandler Error fetching the goal state: [ProtocolError] GET vmSettings [correlation ID: 2bed9b62-188e-4668-b1a8-87c35cfa4927 eTag: 7031887032544600793]: [Internal error in HostGAPlugin] [HTTP Failed] [502: Bad Gateway] b'{  "errorCode": "VMArtifactsProfileBlobContentNotFound",  "message": "VM artifacts profile blob has no content in it.",  "details": ""}'
             #
             # Fetching the goal state may catch the HostGAPlugin in the process of computing the vmSettings. This can be ignored, if the issue persist the log would include other errors as well.