Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16217 test: Update run_local(). #14748

Merged
merged 18 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/tests/ftest/dfuse/pil4dfs_fio.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _get_bandwidth(self, fio_result, rw):
"""Returns FIO bandwidth of a given I/O pattern

Args:
fio_result (RemoteCommandResult): results of a FIO command.
fio_result (CommandResult): results of a FIO command.
rw (str): Type of I/O pattern.

Returns:
Expand Down
11 changes: 5 additions & 6 deletions src/tests/ftest/harness/core_files.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2021-2023 Intel Corporation.
(C) Copyright 2021-2024 Intel Corporation.

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand All @@ -9,7 +9,7 @@

from apricot import TestWithServers
from ClusterShell.NodeSet import NodeSet
from run_utils import RunException, run_local, run_remote
from run_utils import run_local, run_remote


class HarnessCoreFilesTest(TestWithServers):
Expand Down Expand Up @@ -40,11 +40,10 @@ def test_core_files(self):
"""
# create a core.gdb file
self.log.debug("Create a core.gdb.harness.advanced file in core_pattern dir.")
try:
results = run_local(self.log, "cat /proc/sys/kernel/core_pattern", check=True)
except RunException:
result = run_local(self.log, "cat /proc/sys/kernel/core_pattern")
if not result.passed:
self.fail("Unable to find local core file pattern")
core_path = os.path.split(results.stdout.splitlines()[-1])[0]
core_path = os.path.split(result.joined_stdout.splitlines()[-1])[0]
core_file = "{}/core.gdb.harness.advanced".format(core_path)

self.log.debug("Creating %s", core_file)
Expand Down
261 changes: 219 additions & 42 deletions src/tests/ftest/harness/unit.py

Large diffs are not rendered by default.

62 changes: 26 additions & 36 deletions src/tests/ftest/process_core_files.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2022-2023 Intel Corporation.
(C) Copyright 2022-2024 Intel Corporation.

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -141,7 +141,8 @@ def process_core_files(self, directory, delete, test=None):
if os.path.splitext(core_name)[-1] == ".bz2":
# Decompress the file
command = f"lbzip2 -d -v '{os.path.join(core_dir, core_name)}'"
run_local(self.log, command)
if not run_local(self.log, command).passed:
raise CoreFileException(f"Error decompressing {core_name}")
core_name = os.path.splitext(core_name)[0]
exe_name = self._get_exe_name(os.path.join(core_dir, core_name))
self._create_stacktrace(core_dir, core_name, exe_name)
Expand Down Expand Up @@ -187,22 +188,23 @@ def _create_stacktrace(self, core_dir, core_name, exe_name):
stack_trace_file = os.path.join(core_dir, f"'{core_name}.stacktrace'")

self.log.debug("Generating a stacktrace from the %s core file from %s", core_full, host)
run_local(self.log, f"ls -l '{core_full}'")
if not run_local(self.log, f"ls -l '{core_full}'").passed:
raise RunException(f"Error listing {core_full}")

command = (
f"gdb -cd='{core_dir}' -ex 'set pagination off' -ex 'thread apply all bt full' -ex "
f"detach -ex quit '{exe_name}' '{core_name}'")
result = run_local(self.log, command, verbose=False)
if not result.passed:
raise RunException(f"Error creating {stack_trace_file}")

try:
output = run_local(self.log, command, check=False, verbose=False)
with open(stack_trace_file, "w", encoding="utf-8") as stack_trace:
stack_trace.writelines(output.stdout)
stack_trace.write(result.joined_stdout)

except IOError as error:
raise RunException(f"Error writing {stack_trace_file}") from error

except RunException as error:
raise RunException(f"Error creating {stack_trace_file}") from error

def _get_exe_name(self, core_file):
"""Get the executable name from the core file.

Expand All @@ -219,7 +221,7 @@ def _get_exe_name(self, core_file):
self.log.debug("Extracting the executable name from '%s'", core_file)
command = f"gdb -c '{core_file}' -ex 'info proc exe' -ex quit"
result = run_local(self.log, command, verbose=False)
last_line = result.stdout.splitlines()[-1]
last_line = result.joined_stdout.splitlines()[-1]
self.log.debug(" last line: %s", last_line)
cmd = last_line[7:]
self.log.debug(" last_line[7:-1]: %s", cmd)
Expand Down Expand Up @@ -277,7 +279,7 @@ def install_debuginfo_packages(self):
cmds.append(["sudo", "rm", "-f", path])

if self.USE_DEBUGINFO_INSTALL:
dnf_args = ["--exclude", "ompi-debuginfo"]
dnf_args = ["--nobest", "--exclude", "ompi-debuginfo"]
if os.getenv("TEST_RPMS", 'false') == 'true':
if "suse" in self.distro_info.name.lower():
dnf_args.extend(["libpmemobj1", "python3", "openmpi3"])
Expand All @@ -291,9 +293,8 @@ def install_debuginfo_packages(self):
else:
raise RunException(f"Unsupported distro: {self.distro_info}")
cmds.append(["sudo", "dnf", "-y", "install"] + dnf_args)
output = run_local(
self.log, " ".join(["rpm", "-q", "--qf", "'%{evr}'", "daos"]), check=False)
rpm_version = output.stdout
result = run_local(self.log, " ".join(["rpm", "-q", "--qf", "'%{evr}'", "daos"]))
rpm_version = result.joined_stdout
cmds.append(
["sudo", "dnf", "debuginfo-install", "-y"] + dnf_args
+ ["daos-" + rpm_version, "daos-*-" + rpm_version])
Expand Down Expand Up @@ -324,9 +325,7 @@ def install_debuginfo_packages(self):

retry = False
for cmd in cmds:
try:
run_local(self.log, " ".join(cmd), check=True)
except RunException:
if not run_local(self.log, " ".join(cmd)).passed:
# got an error, so abort this list of commands and re-run
# it with a dnf clean, makecache first
retry = True
Expand All @@ -339,9 +338,7 @@ def install_debuginfo_packages(self):
cmds.insert(0, cmd_prefix + ["clean", "all"])
cmds.insert(1, cmd_prefix + ["makecache"])
for cmd in cmds:
try:
run_local(self.log, " ".join(cmd))
except RunException:
if not run_local(self.log, " ".join(cmd)).passed:
break

def is_el(self):
Expand Down Expand Up @@ -380,14 +377,11 @@ def resolve_debuginfo(self, pkg):

"""
package_info = None
try:
# Eventually use python libraries for this rather than exec()ing out to rpm
output = run_local(
self.log,
" ".join(
["rpm", "-q", "--qf", "'%{name} %{version} %{release} %{epoch}'", pkg]),
check=False)
name, version, release, epoch = output.stdout.split()
# Eventually use python libraries for this rather than exec()ing out to rpm
command = f"rpm -q --qf '%{{name}} %{{version}} %{{release}} %{{epoch}}' {pkg}"
result = run_local(self.log, command)
if result.passed:
name, version, release, epoch = result.joined_stdout.split()

debuginfo_map = {"glibc": "glibc-debuginfo-common"}
try:
Expand All @@ -400,7 +394,7 @@ def resolve_debuginfo(self, pkg):
"release": release,
"epoch": epoch
}
except ValueError:
else:
self.log.debug("Package %s not installed, skipping debuginfo", pkg)

return package_info
Expand All @@ -413,20 +407,16 @@ def delete_gdb_core_files(self):

"""
self.log.debug("Checking core files generated by core file processing")
try:
results = run_local(self.log, "cat /proc/sys/kernel/core_pattern", check=True)
except RunException:
result = run_local(self.log, "cat /proc/sys/kernel/core_pattern")
if not result.passed:
self.log.error("Unable to find local core file pattern")
self.log.debug("Stacktrace", exc_info=True)
return 1
core_path = os.path.split(results.stdout.splitlines()[-1])[0]
core_path = os.path.split(result.joined_stdout.splitlines()[-1])[0]

self.log.debug("Deleting core.gdb.*.* core files located in %s", core_path)
other = ["-printf '%M %n %-12u %-12g %12k %t %p\n' -delete"]
try:
run_local(
self.log, find_command(core_path, "core.gdb.*.*", 1, other), check=True)
except RunException:
if not run_local(self.log, find_command(core_path, "core.gdb.*.*", 1, other)).passed:
self.log.debug("core.gdb.*.* files could not be removed")
return 1
return 0
Expand Down
5 changes: 3 additions & 2 deletions src/tests/ftest/server/multiengine_persocket.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2020-2023 Intel Corporation.
(C) Copyright 2020-2024 Intel Corporation.

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -181,7 +181,8 @@ def check_pmem(self, hosts, count):

def storage_format(self):
"""Perform storage format."""
run_local(self.log, "dmg storage format")
if not run_local(self.log, "dmg storage format").passed:
self.fail("dmg storage format failed")

def cleanup(self):
"""Servers clean up after test complete."""
Expand Down
4 changes: 2 additions & 2 deletions src/tests/ftest/slurm_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def _create_epilog_script(self, script):
"""
self.log.debug('Creating the slurm epilog script to run after each job.')
try:
with open(script, 'w') as script_file:
with open(script, 'w', encoding='utf-8') as script_file:
script_file.write('#!/bin/bash\n#\n')
script_file.write('/usr/bin/bash -c \'pkill --signal 9 dfuse\'\n')
script_file.write(
Expand Down Expand Up @@ -364,7 +364,7 @@ def _append_config_file(self, echo_command):
echo_command (str): command adding contents to the config file

Returns:
RemoteCommandResult: the result from the echo | tee command
CommandResult: the result from the echo | tee command
"""
tee_command = command_as_user(f'tee -a {self.SLURM_CONF}', self.root)
return run_remote(self.log, self.all_nodes, f'{echo_command} | {tee_command}')
Expand Down
6 changes: 2 additions & 4 deletions src/tests/ftest/util/agent_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2019-2023 Intel Corporation.
(C) Copyright 2019-2024 Intel Corporation.

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -294,9 +294,7 @@ def support_collect_log(self, **kwargs):
CommandFailure: if the daos_agent command fails.

Returns:
RemoteCommandResult: a grouping of the command results from
the same hosts with the same return status

CommandResult: groups of command results from the same hosts with the same return status
"""
cmd = DaosAgentCommand(self.manager.job.command_path)
cmd.sudo = True
Expand Down
32 changes: 13 additions & 19 deletions src/tests/ftest/util/collection_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# pylint: disable=import-error,no-name-in-module
from util.environment_utils import TestEnvironment
from util.host_utils import get_local_host
from util.run_utils import RunException, find_command, run_local, run_remote, stop_processes
from util.run_utils import find_command, run_local, run_remote, stop_processes
from util.user_utils import get_chown_command
from util.yaml_utils import get_test_category

Expand Down Expand Up @@ -562,20 +562,17 @@ def move_files(logger, hosts, source, pattern, destination, depth, timeout, test
# Clush -rcopy the temporary remote directory to this host
command = ["clush", "-w", str(hosts), "-pv", "--rcopy", f"'{tmp_copy_dir}'", "--dest",
f"'{rcopy_dest}'"]
try:
run_local(logger, " ".join(command), check=True, timeout=timeout)
except RunException:
if not run_local(logger, " ".join(command), timeout=timeout).passed:
message = f"Error copying remote files to {destination}"
test_result.fail_test(logger, "Process", message, sys.exc_info())
return_code = 16

finally:
# Remove the temporary remote directory on each host
command = f"{sudo_command}rm -fr '{tmp_copy_dir}'"
if not run_remote(logger, hosts, command).passed:
message = f"Error removing temporary remote copy directory '{tmp_copy_dir}'"
test_result.fail_test(logger, "Process", message)
return_code = 16
# Remove the temporary remote directory on each host
command = f"{sudo_command}rm -fr '{tmp_copy_dir}'"
if not run_remote(logger, hosts, command).passed:
message = f"Error removing temporary remote copy directory '{tmp_copy_dir}'"
test_result.fail_test(logger, "Process", message)
return_code = 16

return return_code

Expand Down Expand Up @@ -648,14 +645,13 @@ def create_steps_log(logger, job_results_dir, test_result):
job_log = os.path.join(test_logs_dir, 'job.log')
step_log = os.path.join(test_logs_dir, 'steps.log')
command = rf"grep -E '(INFO |ERROR)\| (==> Step|START|PASS|FAIL|ERROR)' {job_log}"
try:
result = run_local(logger, command)
with open(step_log, 'w', encoding="utf-8") as file:
file.write(result.stdout)
except Exception: # pylint: disable=broad-except
result = run_local(logger, command)
if not result.passed:
message = f"Error creating {step_log}"
test_result.fail_test(logger, "Process", message, sys.exc_info())
return 8192
with open(step_log, 'w', encoding="utf-8") as file:
file.write(result.joined_stdout)
return 0


Expand Down Expand Up @@ -713,9 +709,7 @@ def rename_avocado_test_dir(logger, test, job_results_dir, test_result, jenkins_
return 1024

# Remove latest symlink directory to avoid inclusion in the Jenkins build artifacts
try:
run_local(logger, f"rm -fr '{test_logs_lnk}'")
except RunException:
if not run_local(logger, f"rm -fr '{test_logs_lnk}'").passed:
message = f"Error removing {test_logs_lnk}"
test_result.fail_test(logger, "Process", message, sys.exc_info())
return 1024
Expand Down
12 changes: 4 additions & 8 deletions src/tests/ftest/util/dfuse_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ def _run_as_owner(self, hosts, command, timeout=120):
Defaults to 120 seconds.

Returns:
RemoteCommandResult: result of the command

CommandResult: result of the command
"""
return run_remote(
self.log, hosts, command_as_user(command, self.run_user), timeout=timeout)
Expand Down Expand Up @@ -233,7 +232,7 @@ def run(self, check=True, mount_callback=None):

Args:
check (bool): Check if dfuse mounted properly after mount is executed.
mount_callback (method, optional): method to pass RemoteCommandResult to
mount_callback (method, optional): method to pass CommandResult to
after mount. Default simply raises an exception on failure.

Raises:
Expand Down Expand Up @@ -517,8 +516,7 @@ def run(self):
CommandFailure: If the command fails

Returns:
RemoteCommandResult: result from run_remote

CommandResult: result from run_remote
"""
self.log.info('Running verify_perms.py on %s', str(self.hosts))
result = run_remote(self.log, self.hosts, self.with_exports, timeout=self.timeout)
Expand Down Expand Up @@ -568,9 +566,7 @@ def _run_process(self, raise_exception=None):
CommandFailure: if there is an error running the command

Returns:
RemoteCommandResult: a grouping of the command results from the same host with the
same return status

CommandResult: groups of command results from the same hosts with the same return status
"""
if raise_exception is None:
raise_exception = self.exit_status_exception
Expand Down
4 changes: 1 addition & 3 deletions src/tests/ftest/util/fio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,7 @@ def _run_process(self, raise_exception=None):
CommandFailure: if there is an error running the command

Returns:
RemoteCommandResult: a grouping of the command results from the same hosts with the
same return status

CommandResult: groups of command results from the same hosts with the same return status
"""
if not self._hosts:
raise CommandFailure('No hosts specified for fio command')
Expand Down
11 changes: 4 additions & 7 deletions src/tests/ftest/util/general_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2018-2023 Intel Corporation.
(C) Copyright 2018-2024 Intel Corporation.

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand All @@ -23,7 +23,7 @@
from avocado.utils import process
from ClusterShell.NodeSet import NodeSet
from ClusterShell.Task import task_self
from run_utils import RunException, get_clush_command, run_local, run_remote
from run_utils import get_clush_command, run_local, run_remote
from user_utils import get_chown_command, get_primary_group


Expand Down Expand Up @@ -1356,11 +1356,8 @@ def check_ping(log, host, expected_ping=True, cmd_timeout=60, verbose=True):
Returns:
bool: True if the expected number of pings were returned; False otherwise.
"""
log.debug("Checking for %s to be %sresponsive", host, "" if expected_ping else "un")
try:
run_local(
log, "ping -c 1 {}".format(host), check=True, timeout=cmd_timeout, verbose=verbose)
except RunException:
log.debug("Checking for %s to be %s", host, "responsive" if expected_ping else "unresponsive")
if not run_local(log, f"ping -c 1 {host}", timeout=cmd_timeout, verbose=verbose).passed:
return not expected_ping
return expected_ping

Expand Down
Loading
Loading