Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Flatcar to end-to-end tests, install Pypy on test VMs, etc #2779

Merged
merged 9 commits into from
Mar 9, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions tests_e2e/orchestrator/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ RUN \
# \
# Install basic dependencies \
# \
apt-get install -y git python3.10 python3.10-dev && \
apt-get install -y git python3.10 python3.10-dev wget bzip2 && \
ln /usr/bin/python3.10 /usr/bin/python3 && \
\
# \
Expand Down Expand Up @@ -69,7 +69,14 @@ RUN \
python3 -m pip install azure-mgmt-compute --upgrade && \
\
# \
# The setup for the tests depends on a couple of paths; add those to the profile \
# Download Pypy to a known location, from which it will be installed to the test VMs. \
# \
mkdir $HOME/bin && \
wget https://downloads.python.org/pypy/pypy3.7-v7.3.5-linux64.tar.bz2 -O /tmp/pypy3.7-x64.tar.bz2 && \
wget https://downloads.python.org/pypy/pypy3.7-v7.3.5-aarch64.tar.bz2 -O /tmp/pypy3.7-arm64.tar.bz2 && \
\
# \
# The setup for the tests depends on a few paths; add those to the profile \
# \
echo 'export PYTHONPATH="$HOME/WALinuxAgent"' >> $HOME/.bash_profile && \
echo 'export PATH="$HOME/.local/bin:$PATH"' >> $HOME/.bash_profile && \
Expand Down
140 changes: 80 additions & 60 deletions tests_e2e/orchestrator/lib/agent_test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
# E0401: Unable to import 'lisa' (import-error)
# etc
from lisa import ( # pylint: disable=E0401
CustomScriptBuilder,
Logger,
Node,
notifier,
Expand All @@ -44,10 +43,13 @@
import makepkg
from azurelinuxagent.common.version import AGENT_VERSION
from tests_e2e.orchestrator.lib.agent_test_loader import TestSuiteInfo
from tests_e2e.tests.lib.agent_test import TestSkipped
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests can raise TestSkipped to skip a non-supported distro, etc

from tests_e2e.tests.lib.agent_test_context import AgentTestContext
from tests_e2e.tests.lib.identifiers import VmIdentifier
from tests_e2e.tests.lib.logging import log as agent_test_logger # Logger used by the tests
from tests_e2e.tests.lib.logging import set_current_thread_log
from tests_e2e.tests.lib.shell import run_command
from tests_e2e.tests.lib.ssh_client import SshClient


def _initialize_lisa_logger():
Expand Down Expand Up @@ -116,6 +118,7 @@ def __init__(self, vm: VmIdentifier, paths: AgentTestContext.Paths, connection:
self.test_suites: List[AgentTestSuite] = None
self.collect_logs: str = None
self.skip_setup: bool = None
self.ssh_client: SshClient = None

def __init__(self, metadata: TestSuiteMetadata) -> None:
super().__init__(metadata)
Expand Down Expand Up @@ -148,17 +151,12 @@ def _set_context(self, node: Node, variables: Dict[str, Any], lisa_log_path: str
self.__context.log_path = self._get_log_path(variables, lisa_log_path)
self.__context.log = log
self.__context.node = node
self.__context.is_vhd = self._get_required_parameter(variables, "c_vhd") != ""
self.__context.image_name = f"{node.os.name}-vhd" if self.__context.is_vhd else self._get_required_parameter(variables, "c_name")
self.__context.is_vhd = self._get_optional_parameter(variables, "c_vhd") != ""
self.__context.image_name = f"{node.os.name}-vhd" if self.__context.is_vhd else self._get_required_parameter(variables, "c_env_name")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a "vm_name" parameter for existing VMs, so I renamed "c_name" to "c_env_name"

self.__context.test_suites = self._get_required_parameter(variables, "c_test_suites")
self.__context.collect_logs = self._get_required_parameter(variables, "collect_logs")
self.__context.skip_setup = self._get_required_parameter(variables, "skip_setup")

self._log.info(
"Test suite parameters: [skip_setup: %s] [collect_logs: %s] [test_suites: %s]",
self.context.skip_setup,
self.context.collect_logs,
[t.name for t in self.context.test_suites])
self.__context.ssh_client = SshClient(ip_address=self.__context.vm_ip_address, username=self.__context.username, private_key_file=self.__context.private_key_file)

@staticmethod
def _get_required_parameter(variables: Dict[str, Any], name: str) -> Any:
Expand All @@ -167,6 +165,13 @@ def _get_required_parameter(variables: Dict[str, Any], name: str) -> Any:
raise Exception(f"The runbook is missing required parameter '{name}'")
return value

@staticmethod
def _get_optional_parameter(variables: Dict[str, Any], name: str, default_value: Any = "") -> Any:
value = variables.get(name)
if value is None:
return default_value
return value

@staticmethod
def _get_log_path(variables: Dict[str, Any], lisa_log_path: str):
# NOTE: If "log_path" is not given as argument to the runbook, use a path derived from LISA's log for the test suite.
Expand Down Expand Up @@ -211,7 +216,7 @@ def _setup(self) -> None:
completed: Path = self.context.working_directory/"completed"

if completed.exists():
self._log.info("Found %s. Build has already been done, skipping", completed)
self._log.info("Found %s. Build has already been done, skipping.", completed)
return

self._log.info("Creating working directory: %s", self.context.working_directory)
Expand Down Expand Up @@ -260,29 +265,51 @@ def _setup_node(self) -> None:
self._log.info("Resource Group: %s", self.context.vm.resource_group)
self._log.info("")

self._install_tools_on_node()

if self.context.is_vhd:
self._log.info("Using a VHD; will not install the test Agent.")
else:
self._install_agent_on_node()

def _install_tools_on_node(self) -> None:
"""
Installs the test tools on the test node
"""
self.context.ssh_client.run_command("mkdir -p ~/bin")

tools_path = self.context.test_source_directory/"orchestrator"/"scripts"
nagworld9 marked this conversation as resolved.
Show resolved Hide resolved
self._log.info(f"Copying {tools_path} to the test node")
self.context.ssh_client.copy(tools_path, Path("~/bin"), remote_target=True, recursive=True)

if self.context.ssh_client.get_architecture() == "aarch64":
pypy_path = Path("/tmp/pypy3.7-arm64.tar.bz2")
pypy_download = "https://downloads.python.org/pypy/pypy3.7-v7.3.5-aarch64.tar.bz2"
else:
pypy_path = Path("/tmp/pypy3.7-x64.tar.bz2")
pypy_download = "https://downloads.python.org/pypy/pypy3.7-v7.3.5-linux64.tar.bz2"

if not pypy_path.exists():
self._log.info(f"Downloading {pypy_download} to {pypy_path}")
run_command(["wget", pypy_download, "-O", pypy_path])
self._log.info(f"Copying {pypy_path} to the test node")
self.context.ssh_client.copy(pypy_path, Path("~/bin/pypy3.7.tar.bz2"), remote_target=True)

self._log.info(f'Installing tools on the test node\n{self.context.ssh_client.run_command("~/bin/scripts/install-tools")}')
self._log.info(f'Remote commands will use {self.context.ssh_client.run_command("which python3")}')

def _install_agent_on_node(self) -> None:
"""
Installs the given agent package on the test node.
"""
agent_package_path: Path = self._get_agent_package_path()

# The install script needs to unzip the agent package; ensure unzip is installed on the test node
self._log.info("Installing unzip tool on %s", self.context.node.name)
self.context.node.os.install_packages("unzip")

self._log.info("Installing %s on %s", agent_package_path, self.context.node.name)
agent_package_remote_path = self.context.remote_working_directory/agent_package_path.name
self._log.info("Copying %s to %s:%s", agent_package_path, self.context.node.name, agent_package_remote_path)
self.context.node.shell.copy(agent_package_path, agent_package_remote_path)
self.execute_script_on_node(
self.context.test_source_directory/"orchestrator"/"scripts"/"install-agent",
parameters=f"--package {agent_package_remote_path} --version {AGENT_VERSION}",
sudo=True)
self.context.ssh_client.copy(agent_package_path, agent_package_remote_path, remote_target=True)
nagworld9 marked this conversation as resolved.
Show resolved Hide resolved
stdout = self.context.ssh_client.run_command(f"install-agent --package {agent_package_remote_path} --version {AGENT_VERSION}", use_sudo=True)
self._log.info(stdout)

self._log.info("The agent was installed successfully.")

Expand All @@ -293,13 +320,14 @@ def _collect_node_logs(self) -> None:
try:
# Collect the logs on the test machine into a compressed tarball
self._log.info("Collecting logs on test machine [%s]...", self.context.node.name)
self.execute_script_on_node(self.context.test_source_directory/"orchestrator"/"scripts"/"collect-logs", sudo=True)
stdout = self.context.ssh_client.run_command("collect-logs", use_sudo=True)
nagworld9 marked this conversation as resolved.
Show resolved Hide resolved
self._log.info(stdout)

# Copy the tarball to the local logs directory
remote_path = "/tmp/waagent-logs.tgz"
local_path = self.context.log_path/'{0}.tgz'.format(self.context.image_name)
self._log.info("Copying %s:%s to %s", self.context.node.name, remote_path, local_path)
self.context.node.shell.copy_back(remote_path, local_path)
self.context.ssh_client.copy(remote_path, local_path, remote_source=True)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i removed the references to LISA's SSH client and now I am using the Agent's

except: # pylint: disable=bare-except
self._log.exception("Failed to collect logs from the test machine")

Expand All @@ -310,9 +338,14 @@ def agent_test_suite(self, node: Node, variables: Dict[str, Any], log_path: str,
"""
self._set_context(node, variables, log_path, log)

test_suite_success = True

with _set_thread_name(self.context.image_name): # The thread name is added to self._log
self._log.info(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I move this message here from the initialization function. We set the thread name for the log after the initialization, so it was missing from the log for this message

"Test suite parameters: [test_suites: %s] [skip_setup: %s] [collect_logs: %s]",
[t.name for t in self.context.test_suites], self.context.skip_setup, self.context.collect_logs)

start_time: datetime.datetime = datetime.datetime.now()
test_suite_success = True

try:
if not self.context.skip_setup:
self._setup()
Expand All @@ -332,12 +365,17 @@ def agent_test_suite(self, node: Node, variables: Dict[str, Any], log_path: str,
self._collect_node_logs()

except: # pylint: disable=bare-except
# Note that we report the error to the LISA log and then re-raise it. We log it here
# so that the message is decorated with the thread name in the LISA log; we re-raise
# to let LISA know the test errored out (LISA will report that error one more time
# in its log)
self._log.exception("UNHANDLED EXCEPTION")
raise
# Report the error and raise and exception to let LISA know that the test errored out.
self._log.exception("TEST FAILURE DUE TO AN UNEXPECTED ERROR.")
self._report_test_result(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't reporting those failures to the Junit file

self.context.image_name,
"Setup",
TestStatus.FAILED,
start_time,
message="TEST FAILURE DUE TO AN UNEXPECTED ERROR.",
add_exception_stack_trace=True)

raise Exception("STOPPING TEST EXECUTION DUE TO AN UNEXPECTED ERROR.")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaced the re-raise with this exception to eliminate the duplicated error message in the LISA log, and to make clearer that we are stopping the LISA run


finally:
self._clean_up()
Expand Down Expand Up @@ -373,17 +411,27 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool:

test(self.context).run()

summary.append(f"[Passed] {test_name}")
summary.append(f"[Passed] {test_name}")
agent_test_logger.info("******** [Passed] %s", test_name)
self._log.info("******** [Passed] %s", test_full_name)
self._report_test_result(
suite_full_name,
test_name,
TestStatus.PASSED,
test_start_time)
except TestSkipped as e:
summary.append(f"[Skipped] {test_name}")
agent_test_logger.info("******** [Skipped] %s: %s", test_name, e)
self._log.info("******** [Skipped] %s", test_full_name)
self._report_test_result(
suite_full_name,
test_name,
TestStatus.SKIPPED,
test_start_time,
message=str(e))
except AssertionError as e:
success = False
summary.append(f"[Failed] {test_name}")
summary.append(f"[Failed] {test_name}")
agent_test_logger.error("******** [Failed] %s: %s", test_name, e)
self._log.error("******** [Failed] %s", test_full_name)
self._report_test_result(
Expand All @@ -394,7 +442,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool:
message=str(e))
except: # pylint: disable=bare-except
success = False
summary.append(f"[Error] {test_name}")
summary.append(f"[Error] {test_name}")
agent_test_logger.exception("UNHANDLED EXCEPTION IN %s", test_name)
self._log.exception("UNHANDLED EXCEPTION IN %s", test_full_name)
self._report_test_result(
Expand Down Expand Up @@ -460,32 +508,4 @@ def _report_test_result(

notifier.notify(msg)

def execute_script_on_node(self, script_path: Path, parameters: str = "", sudo: bool = False) -> int:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now we use the Agent's SSH client to do this

"""
Executes the given script on the test node; if 'sudo' is True, the script is executed using the sudo command.
"""
custom_script_builder = CustomScriptBuilder(script_path.parent, [script_path.name])
custom_script = self.context.node.tools[custom_script_builder]

if parameters == '':
command_line = f"{script_path}"
else:
command_line = f"{script_path} {parameters}"

self._log.info("Executing [%s]", command_line)

result = custom_script.run(parameters=parameters, sudo=sudo)

if result.stdout != "":
separator = "\n" if "\n" in result.stdout else " "
self._log.info("stdout:%s%s", separator, result.stdout)
if result.stderr != "":
separator = "\n" if "\n" in result.stderr else " "
self._log.error("stderr:%s%s", separator, result.stderr)

if result.exit_code != 0:
raise Exception(f"[{command_line}] failed. Exit code: {result.exit_code}")

return result.exit_code


Loading