Skip to content

Commit

Permalink
Check agent log for errors; install test libraries (#2787)
Browse files Browse the repository at this point in the history
* Check agent log for errors; install test libraries

---------

Co-authored-by: narrieta <narrieta>
  • Loading branch information
narrieta authored Mar 22, 2023
1 parent e9b51d7 commit 1640510
Show file tree
Hide file tree
Showing 8 changed files with 709 additions and 77 deletions.
132 changes: 93 additions & 39 deletions tests_e2e/orchestrator/lib/agent_test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#
import contextlib
import datetime
import json
import logging
import re
import traceback
Expand All @@ -29,6 +30,7 @@
# E0401: Unable to import 'lisa' (import-error)
# etc
from lisa import ( # pylint: disable=E0401
Environment,
Logger,
Node,
notifier,
Expand All @@ -43,11 +45,13 @@
import makepkg
from azurelinuxagent.common.version import AGENT_VERSION
from tests_e2e.orchestrator.lib.agent_test_loader import TestSuiteInfo
from tests_e2e.tests.lib.agent_log import AgentLog
from tests_e2e.tests.lib.agent_test import TestSkipped
from tests_e2e.tests.lib.agent_test_context import AgentTestContext
from tests_e2e.tests.lib.identifiers import VmIdentifier
from tests_e2e.tests.lib.logging import log as agent_test_logger # Logger used by the tests
from tests_e2e.tests.lib.logging import set_current_thread_log
from tests_e2e.tests.lib.agent_log import AgentLogRecord
from tests_e2e.tests.lib.shell import run_command
from tests_e2e.tests.lib.ssh_client import SshClient

Expand Down Expand Up @@ -256,7 +260,7 @@ def _clean_up(self) -> None:

def _setup_node(self) -> None:
"""
Prepares the remote node for executing the test suite.
Prepares the remote node for executing the test suite (installs tools and the test agent, etc)
"""
self._log.info("")
self._log.info("************************************** [Node Setup] **************************************")
Expand All @@ -265,23 +269,27 @@ def _setup_node(self) -> None:
self._log.info("Resource Group: %s", self.context.vm.resource_group)
self._log.info("")

self._install_tools_on_node()
self.context.ssh_client.run_command("mkdir -p ~/bin/tests_e2e/tests; touch ~/bin/agent-env")

if self.context.is_vhd:
self._log.info("Using a VHD; will not install the test Agent.")
else:
self._install_agent_on_node()
# Copy the test tools
tools_path = self.context.test_source_directory/"orchestrator"/"scripts"
tools_target_path = Path("~/bin")
self._log.info("Copying %s to %s:%s", tools_path, self.context.node.name, tools_target_path)
self.context.ssh_client.copy_to_node(tools_path, tools_target_path, recursive=True)

def _install_tools_on_node(self) -> None:
"""
Installs the test tools on the test node
"""
self.context.ssh_client.run_command("mkdir -p ~/bin")
# Copy the test libraries
lib_path = self.context.test_source_directory/"tests"/"lib"
lib_target_path = Path("~/bin/tests_e2e/tests")
self._log.info("Copying %s to %s:%s", lib_path, self.context.node.name, lib_target_path)
self.context.ssh_client.copy_to_node(lib_path, lib_target_path, recursive=True)

tools_path = self.context.test_source_directory/"orchestrator"/"scripts"
self._log.info(f"Copying {tools_path} to the test node")
self.context.ssh_client.copy(tools_path, Path("~/bin"), remote_target=True, recursive=True)
# Copy the test agent
agent_package_path: Path = self._get_agent_package_path()
agent_package_target_path = Path("~/bin")/agent_package_path.name
self._log.info("Copying %s to %s:%s", agent_package_path, self.context.node.name, agent_package_target_path)
self.context.ssh_client.copy_to_node(agent_package_path, agent_package_target_path)

# Copy Pypy
if self.context.ssh_client.get_architecture() == "aarch64":
pypy_path = Path("/tmp/pypy3.7-arm64.tar.bz2")
pypy_download = "https://downloads.python.org/pypy/pypy3.7-v7.3.5-aarch64.tar.bz2"
Expand All @@ -292,26 +300,21 @@ def _install_tools_on_node(self) -> None:
if not pypy_path.exists():
self._log.info(f"Downloading {pypy_download} to {pypy_path}")
run_command(["wget", pypy_download, "-O", pypy_path])
self._log.info(f"Copying {pypy_path} to the test node")
self.context.ssh_client.copy(pypy_path, Path("~/bin/pypy3.7.tar.bz2"), remote_target=True)
pypy_target_path = Path("~/bin/pypy3.7.tar.bz2")
self._log.info("Copying %s to %s:%s", pypy_path, self.context.node.name, pypy_target_path)
self.context.ssh_client.copy_to_node(pypy_path, pypy_target_path)

self._log.info(f'Installing tools on the test node\n{self.context.ssh_client.run_command("~/bin/scripts/install-tools")}')
self._log.info(f'Remote commands will use {self.context.ssh_client.run_command("which python3")}')
# Install the tools and libraries
install_command = lambda: self.context.ssh_client.run_command(f"~/bin/scripts/install-tools --agent-package {agent_package_target_path}")
self._log.info('Installing tools on the test node\n%s', install_command())
self._log.info('Remote commands will use %s', self.context.ssh_client.run_command("which python3"))

def _install_agent_on_node(self) -> None:
"""
Installs the given agent package on the test node.
"""
agent_package_path: Path = self._get_agent_package_path()

self._log.info("Installing %s on %s", agent_package_path, self.context.node.name)
agent_package_remote_path = self.context.remote_working_directory/agent_package_path.name
self._log.info("Copying %s to %s:%s", agent_package_path, self.context.node.name, agent_package_remote_path)
self.context.ssh_client.copy(agent_package_path, agent_package_remote_path, remote_target=True)
stdout = self.context.ssh_client.run_command(f"install-agent --package {agent_package_remote_path} --version {AGENT_VERSION}", use_sudo=True)
self._log.info(stdout)

self._log.info("The agent was installed successfully.")
# Install the agent
if self.context.is_vhd:
self._log.info("Using a VHD; will not install the Test Agent.")
else:
install_command = lambda: self.context.ssh_client.run_command(f"install-agent --package {agent_package_target_path} --version {AGENT_VERSION}", use_sudo=True)
self._log.info("Installing the Test Agent on %s\n%s", self.context.node.name, install_command())

def _collect_node_logs(self) -> None:
"""
Expand All @@ -327,23 +330,25 @@ def _collect_node_logs(self) -> None:
remote_path = "/tmp/waagent-logs.tgz"
local_path = self.context.log_path/'{0}.tgz'.format(self.context.image_name)
self._log.info("Copying %s:%s to %s", self.context.node.name, remote_path, local_path)
self.context.ssh_client.copy(remote_path, local_path, remote_source=True)
self.context.ssh_client.copy_from_node(remote_path, local_path)

except: # pylint: disable=bare-except
self._log.exception("Failed to collect logs from the test machine")

@TestCaseMetadata(description="", priority=0)
def agent_test_suite(self, node: Node, variables: Dict[str, Any], log_path: str, log: Logger) -> None:
def agent_test_suite(self, node: Node, environment: Environment, variables: Dict[str, Any], log_path: str, log: Logger) -> None:
"""
Executes each of the AgentTests included in the "c_test_suites" variable (which is generated by the AgentTestSuitesCombinator).
"""
self._set_context(node, variables, log_path, log)

with _set_thread_name(self.context.image_name): # The thread name is added to self._log
# E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable)
# (OK to iterate, test_suite is a List)
# Set the thread name to the image; this name is added to self._log
with _set_thread_name(self.context.image_name):
# Log the environment's name and the variables received from the runbook (note that we need to expand the names of the test suites)
self._log.info("LISA Environment: %s", environment.name)
self._log.info(
"Test suite parameters: [test_suites: %s] [skip_setup: %s] [collect_logs: %s]",
[t.name for t in self.context.test_suites], self.context.skip_setup, self.context.collect_logs) # pylint: disable=E1133
"Runbook variables:\n%s",
'\n'.join([f"\t{name}: {value if name != 'c_test_suites' else [t.name for t in value] }" for name, value in variables.items()]))

start_time: datetime.datetime = datetime.datetime.now()
test_suite_success = True
Expand All @@ -361,6 +366,8 @@ def agent_test_suite(self, node: Node, variables: Dict[str, Any], log_path: str,
for suite in self.context.test_suites: # pylint: disable=E1133
test_suite_success = self._execute_test_suite(suite) and test_suite_success

test_suite_success = self._check_agent_log() and test_suite_success

finally:
collect = self.context.collect_logs
if collect == CollectLogs.Always or collect == CollectLogs.Failed and not test_suite_success:
Expand Down Expand Up @@ -475,6 +482,53 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool:

return success

def _check_agent_log(self) -> bool:
"""
Checks the agent log for errors; returns true on success (no errors int the log)
"""
start_time: datetime.datetime = datetime.datetime.now()

self._log.info("Checking agent log on the test node")
output = self.context.ssh_client.run_command("check-agent-log.py -j")
errors = json.loads(output, object_hook=AgentLogRecord.from_dictionary)

# Individual tests may have rules to ignore known errors; filter those out
ignore_error_rules = []
# pylint seems to think self.context.test_suites is not iterable. Suppressing warning, since its type is List[AgentTestSuite]
# E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable)
for suite in self.context.test_suites: # pylint: disable=E1133
for test in suite.tests:
ignore_error_rules.extend(test(self.context).get_ignore_error_rules())

if len(ignore_error_rules) > 0:
new = []
for e in errors:
if not AgentLog.matches_ignore_rule(e, ignore_error_rules):
new.append(e)
errors = new

if len(errors) == 0:
# If no errors, we are done; don't create a log or test result.
self._log.info("There are no errors in the agent log")
return True

log_path: Path = self.context.log_path/f"CheckAgentLog-{self.context.image_name}.log"
message = f"Detected {len(errors)} error(s) in the agent log. See {log_path} for a full report."
self._log.info(message)

with set_current_thread_log(log_path):
agent_test_logger.info("Detected %s error(s) in the agent log:\n\n%s", len(errors), '\n'.join(['\t' + e.text for e in errors]))

self._report_test_result(
self.context.image_name,
"CheckAgentLog",
TestStatus.FAILED,
start_time,
message=message + '\n' + '\n'.join([e.text for e in errors[0:3]]),
add_exception_stack_trace=True)

return False

@staticmethod
def _report_test_result(
suite_name: str,
Expand Down
49 changes: 49 additions & 0 deletions tests_e2e/orchestrator/scripts/check-agent-log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env python3

# Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
import json
import sys

from pathlib import Path
from tests_e2e.tests.lib.agent_log import AgentLog

try:
parser = argparse.ArgumentParser()
parser.add_argument('path', nargs='?', help='Path of the log file', default='/var/log/waagent.log')
parser.add_argument('-j', '--json', action='store_true', help='Produce a JSON report')
parser.set_defaults(json=False)
args = parser.parse_args()

error_list = AgentLog(Path(args.path)).get_errors()

if args.json:
print(json.dumps(error_list, default=lambda o: o.__dict__))
else:
if len(error_list) == 0:
print("No errors were found.")
else:
for e in error_list:
print(e.text)

except Exception as e:
print(f"{e}", file=sys.stderr)
sys.exit(1)

sys.exit(0)
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
#
set -euo pipefail

#

# On most distros, 'waagent' is in PATH
if which waagent 2> /dev/null; then
exit 0
Expand All @@ -42,6 +40,7 @@ fi

# try some well-known locations
declare -a known_locations=(
"/usr/sbin/waagent"
"/usr/share/oem/bin/waagent"
)

Expand Down
18 changes: 9 additions & 9 deletions tests_e2e/orchestrator/scripts/install-agent
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ else
fi
echo "Service name: $service_name"

#
# Find the path to the Agent's executable file
#
waagent=$(get-agent-path)
echo "Agent's path: $waagent"
$waagent --version
echo ""

#
# Install the package
#
Expand All @@ -98,20 +106,13 @@ service-start $service_name
#
# Verify that the new agent is running and output its status.
# Note that the extension handler may take some time to start so give 1 minute.
# Also, note that the default Python is set to Pypy, so before executing 'waagent' we need to set the
# Python path to the location of the azurelinuxagent module.
#
echo "Verifying agent installation..."

PYTHONPATH=$(get-agent-pythonpath)
export PYTHONPATH

check-version() {
waagent=$(get-waagent-path)

for i in {0..5}
do
if $waagent --version | grep -E "Goal state agent:\s+$1" > /dev/null; then
if $waagent --version | grep -E "Goal state agent:\s+$version" > /dev/null; then
return 0
fi
sleep 10
Expand All @@ -128,7 +129,6 @@ else
exit_code=1
fi

waagent=$(get-waagent-path)
$waagent --version
printf "\n"
service-status $service_name
Expand Down
Loading

0 comments on commit 1640510

Please sign in to comment.