Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change run_shell_cmd to store command output in temporary file(s) by default + pass RunShellCmdResult instance to RunShellCmdError #4356

Merged
merged 10 commits into from
Nov 8, 2023
Merged
4 changes: 2 additions & 2 deletions easybuild/tools/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def check_module_function(self, allow_mismatch=False, regex=None):
output, exit_code = None, 1
else:
cmd = "type module"
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=False, hidden=True)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=False, hidden=True, output_file=False)
output, exit_code = res.output, res.exit_code

if regex is None:
Expand Down Expand Up @@ -821,7 +821,7 @@ def run_module(self, *args, **kwargs):
cmd = ' '.join(cmd_list)
# note: module commands are always run in dry mode, and are kept hidden in trace and dry run output
res = run_shell_cmd(cmd_list, env=environ, fail_on_error=False, shell=False, split_stderr=True,
hidden=True, in_dry_run=True)
hidden=True, in_dry_run=True, output_file=False)

# stdout will contain python code (to change environment etc)
# stderr will contain text (just like the normal module command)
Expand Down
103 changes: 61 additions & 42 deletions easybuild/tools/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,23 @@
]


RunShellCmdResult = namedtuple('RunShellCmdResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir'))
RunShellCmdResult = namedtuple('RunShellCmdResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir',
'out_file', 'err_file'))


class RunShellCmdError(BaseException):

def __init__(self, cmd, exit_code, work_dir, output, stderr, caller_info, *args, **kwargs):
def __init__(self, cmd_result, caller_info, *args, **kwargs):
"""Constructor for RunShellCmdError."""
self.cmd = cmd
self.cmd_name = cmd.split(' ')[0]
self.exit_code = exit_code
self.work_dir = work_dir
self.output = output
self.stderr = stderr
self.cmd = cmd_result.cmd
self.cmd_name = os.path.basename(self.cmd.split(' ')[0])
self.exit_code = cmd_result.exit_code
self.work_dir = cmd_result.work_dir
self.output = cmd_result.output
self.out_file = cmd_result.out_file
self.stderr = cmd_result.stderr
self.err_file = cmd_result.err_file

self.caller_info = caller_info

msg = f"Shell command '{self.cmd_name}' failed!"
Expand All @@ -110,21 +114,13 @@ def pad_4_spaces(msg):
pad_4_spaces(f"working directory -> {self.work_dir}"),
]

tmpdir = tempfile.mkdtemp(prefix='shell-cmd-error-')
output_fp = os.path.join(tmpdir, f"{self.cmd_name}.out")
with open(output_fp, 'w') as fp:
fp.write(self.output or '')
if self.out_file is not None:
# if there's no separate file for error/warnings, then out_file includes both stdout + stderr
out_info_msg = "output (stdout + stderr)" if self.err_file is None else "output (stdout) "
error_info.append(pad_4_spaces(f"{out_info_msg} -> {self.out_file}"))

if self.stderr is None:
error_info.append(pad_4_spaces(f"output (stdout + stderr) -> {output_fp}"))
else:
stderr_fp = os.path.join(tmpdir, f"{self.cmd_name}.err")
with open(stderr_fp, 'w') as fp:
fp.write(self.stderr)
error_info.extend([
pad_4_spaces(f"output (stdout) -> {output_fp}"),
pad_4_spaces(f"error/warnings (stderr) -> {stderr_fp}"),
])
if self.err_file is not None:
error_info.append(pad_4_spaces(f"error/warnings (stderr) -> {self.err_file}"))

caller_file_name, caller_line_nr, caller_function_name = self.caller_info
called_from_info = f"'{caller_function_name}' function in {caller_file_name} (line {caller_line_nr})"
Expand All @@ -136,9 +132,9 @@ def pad_4_spaces(msg):
sys.stderr.write('\n'.join(error_info) + '\n')


def raise_run_shell_cmd_error(cmd, exit_code, work_dir, output, stderr):
def raise_run_shell_cmd_error(cmd_res):
"""
Raise RunShellCmdError for failing shell command, after collecting additional caller info
Raise RunShellCmdError for failed shell command, after collecting additional caller info
"""

# figure out where failing command was run
Expand All @@ -150,7 +146,7 @@ def raise_run_shell_cmd_error(cmd, exit_code, work_dir, output, stderr):
frameinfo = inspect.getouterframes(inspect.currentframe())[3]
caller_info = (frameinfo.filename, frameinfo.lineno, frameinfo.function)

raise RunShellCmdError(cmd, exit_code, work_dir, output, stderr, caller_info)
raise RunShellCmdError(cmd_res, caller_info)


def run_cmd_cache(func):
Expand Down Expand Up @@ -185,7 +181,7 @@ def cache_aware_func(cmd, *args, **kwargs):
@run_shell_cmd_cache
def run_shell_cmd(cmd, fail_on_error=True, split_stderr=False, stdin=None, env=None,
hidden=False, in_dry_run=False, verbose_dry_run=False, work_dir=None, shell=True,
output_file=False, stream_output=False, asynchronous=False, with_hooks=True,
output_file=True, stream_output=False, asynchronous=False, with_hooks=True,
qa_patterns=None, qa_wait_patterns=None):
"""
Run specified (interactive) shell command, and capture output + exit code.
Expand Down Expand Up @@ -234,16 +230,23 @@ def to_cmd_str(cmd):
if work_dir is None:
work_dir = os.getcwd()

# temporary output file for command output, if requested
if output_file or not hidden:
# collect output of running command in temporary log file, if desired
fd, cmd_out_fp = tempfile.mkstemp(suffix='.log', prefix='easybuild-run-')
os.close(fd)
_log.info(f'run_cmd: Output of "{cmd}" will be logged to {cmd_out_fp}')
else:
cmd_out_fp = None

cmd_str = to_cmd_str(cmd)
cmd_name = os.path.basename(cmd_str.split(' ')[0])

# temporary output file(s) for command output
if output_file:
toptmpdir = os.path.join(tempfile.gettempdir(), 'run-shell-cmd-output')
os.makedirs(toptmpdir, exist_ok=True)
tmpdir = tempfile.mkdtemp(dir=toptmpdir, prefix=f'{cmd_name}-')
cmd_out_fp = os.path.join(tmpdir, 'out.txt')
_log.info(f'run_cmd: Output of "{cmd_str}" will be logged to {cmd_out_fp}')
if split_stderr:
cmd_err_fp = os.path.join(tmpdir, 'err.txt')
_log.info(f'run_cmd: Errors and warnings of "{cmd_str}" will be logged to {cmd_err_fp}')
else:
cmd_err_fp = None
else:
cmd_out_fp, cmd_err_fp = None, None

# early exit in 'dry run' mode, after printing the command that would be run (unless 'hidden' is enabled)
if not in_dry_run and build_option('extended_dry_run'):
Expand All @@ -253,11 +256,12 @@ def to_cmd_str(cmd):
msg += f" (in {work_dir})"
dry_run_msg(msg, silent=silent)

return RunShellCmdResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir)
return RunShellCmdResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir,
out_file=cmd_out_fp, err_file=cmd_err_fp)

start_time = datetime.now()
if not hidden:
cmd_trace_msg(cmd_str, start_time, work_dir, stdin, cmd_out_fp)
cmd_trace_msg(cmd_str, start_time, work_dir, stdin, cmd_out_fp, cmd_err_fp)

if stdin:
# 'input' value fed to subprocess.run must be a byte sequence
Expand Down Expand Up @@ -286,7 +290,19 @@ def to_cmd_str(cmd):
output = proc.stdout.decode('utf-8', 'ignore')
stderr = proc.stderr.decode('utf-8', 'ignore') if split_stderr else None

res = RunShellCmdResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr, work_dir=work_dir)
# store command output to temporary file(s)
if output_file:
try:
with open(cmd_out_fp, 'w') as fp:
fp.write(output)
if split_stderr:
with open(cmd_err_fp, 'w') as fp:
fp.write(stderr)
except IOError as err:
raise EasyBuildError(f"Failed to dump command output to temporary file: {err}")

res = RunShellCmdResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr, work_dir=work_dir,
out_file=cmd_out_fp, err_file=cmd_err_fp)

# always log command output
cmd_name = cmd_str.split(' ')[0]
Expand All @@ -301,7 +317,7 @@ def to_cmd_str(cmd):
else:
_log.warning(f"Shell command FAILED (exit code {res.exit_code}, see output above): {cmd_str}")
if fail_on_error:
raise_run_shell_cmd_error(res.cmd, res.exit_code, res.work_dir, output=res.output, stderr=res.stderr)
raise_run_shell_cmd_error(res)

if with_hooks:
run_hook_kwargs = {
Expand All @@ -319,15 +335,16 @@ def to_cmd_str(cmd):
return res


def cmd_trace_msg(cmd, start_time, work_dir, stdin, cmd_out_fp):
def cmd_trace_msg(cmd, start_time, work_dir, stdin, cmd_out_fp, cmd_err_fp):
"""
Helper function to construct and print trace message for command being run

:param cmd: command being run
:param start_time: datetime object indicating when command was started
:param work_dir: path of working directory in which command is run
:param stdin: stdin input value for command
:param cmd_out_fp: path to output log file for command
:param cmd_out_fp: path to output file for command
:param cmd_err_fp: path to errors/warnings output file for command
"""
start_time = start_time.strftime('%Y-%m-%d %H:%M:%S')

Expand All @@ -339,7 +356,9 @@ def cmd_trace_msg(cmd, start_time, work_dir, stdin, cmd_out_fp):
if stdin:
lines.append(f"\t[input: {stdin}]")
if cmd_out_fp:
lines.append(f"\t[output logged in {cmd_out_fp}]")
lines.append(f"\t[output saved to {cmd_out_fp}]")
if cmd_err_fp:
lines.append(f"\t[errors/warnings saved to {cmd_err_fp}]")

lines.append('\t' + cmd)

Expand Down
39 changes: 22 additions & 17 deletions easybuild/tools/systemtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def get_avail_core_count():
core_cnt = int(sum(sched_getaffinity()))
else:
# BSD-type systems
res = run_shell_cmd('sysctl -n hw.ncpu', in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd('sysctl -n hw.ncpu', in_dry_run=True, hidden=True, with_hooks=False, output_file=False)
try:
if int(res.output) > 0:
core_cnt = int(res.output)
Expand Down Expand Up @@ -310,7 +310,7 @@ def get_total_memory():
elif os_type == DARWIN:
cmd = "sysctl -n hw.memsize"
_log.debug("Trying to determine total memory size on Darwin via cmd '%s'", cmd)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False, output_file=False)
if res.exit_code == 0:
memtotal = int(res.output.strip()) // (1024**2)

Expand Down Expand Up @@ -392,14 +392,15 @@ def get_cpu_vendor():

elif os_type == DARWIN:
cmd = "sysctl -n machdep.cpu.vendor"
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False, output_file=False)
out = res.output.strip()
if res.exit_code == 0 and out in VENDOR_IDS:
vendor = VENDOR_IDS[out]
_log.debug("Determined CPU vendor on DARWIN as being '%s' via cmd '%s" % (vendor, cmd))
else:
cmd = "sysctl -n machdep.cpu.brand_string"
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False,
output_file=False)
out = res.output.strip().split(' ')[0]
if res.exit_code == 0 and out in CPU_VENDORS:
vendor = out
Expand Down Expand Up @@ -502,7 +503,7 @@ def get_cpu_model():

elif os_type == DARWIN:
cmd = "sysctl -n machdep.cpu.brand_string"
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False, output_file=False)
if res.exit_code == 0:
model = res.output.strip()
_log.debug("Determined CPU model on Darwin using cmd '%s': %s" % (cmd, model))
Expand Down Expand Up @@ -547,7 +548,7 @@ def get_cpu_speed():
elif os_type == DARWIN:
cmd = "sysctl -n hw.cpufrequency_max"
_log.debug("Trying to determine CPU frequency on Darwin via cmd '%s'" % cmd)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False, output_file=False)
out = res.output.strip()
cpu_freq = None
if res.exit_code == 0 and out:
Expand Down Expand Up @@ -595,7 +596,8 @@ def get_cpu_features():
for feature_set in ['extfeatures', 'features', 'leaf7_features']:
cmd = "sysctl -n machdep.cpu.%s" % feature_set
_log.debug("Trying to determine CPU features on Darwin via cmd '%s'", cmd)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, fail_on_error=False, with_hooks=False)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, fail_on_error=False, with_hooks=False,
output_file=False)
if res.exit_code == 0:
cpu_feat.extend(res.output.strip().lower().split())

Expand All @@ -622,7 +624,8 @@ def get_gpu_info():
try:
cmd = "nvidia-smi --query-gpu=gpu_name,driver_version --format=csv,noheader"
_log.debug("Trying to determine NVIDIA GPU info on Linux via cmd '%s'", cmd)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False,
output_file=False)
if res.exit_code == 0:
for line in res.output.strip().split('\n'):
nvidia_gpu_info = gpu_info.setdefault('NVIDIA', {})
Expand All @@ -640,13 +643,15 @@ def get_gpu_info():
try:
cmd = "rocm-smi --showdriverversion --csv"
_log.debug("Trying to determine AMD GPU driver on Linux via cmd '%s'", cmd)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False,
output_file=False)
if res.exit_code == 0:
amd_driver = res.output.strip().split('\n')[1].split(',')[1]

cmd = "rocm-smi --showproductname --csv"
_log.debug("Trying to determine AMD GPU info on Linux via cmd '%s'", cmd)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False,
output_file=False)
if res.exit_code == 0:
for line in res.output.strip().split('\n')[1:]:
amd_card_series = line.split(',')[1]
Expand Down Expand Up @@ -865,7 +870,7 @@ def check_os_dependency(dep):
pkg_cmd_flag.get(pkg_cmd),
dep,
])
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, output_file=False)
found = res.exit_code == 0
if found:
break
Expand All @@ -877,7 +882,7 @@ def check_os_dependency(dep):
# try locate if it's available
if not found and which('locate'):
cmd = 'locate -c --regexp "/%s$"' % dep
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, output_file=False)
try:
found = (res.exit_code == 0 and int(res.output.strip()) > 0)
except ValueError:
Expand All @@ -893,7 +898,7 @@ def get_tool_version(tool, version_option='--version', ignore_ec=False):
Output is returned as a single-line string (newlines are replaced by '; ').
"""
res = run_shell_cmd(' '.join([tool, version_option]), fail_on_error=False, in_dry_run=True,
hidden=True, with_hooks=False)
hidden=True, with_hooks=False, output_file=False)
if not ignore_ec and res.exit_code:
_log.warning("Failed to determine version of %s using '%s %s': %s" % (tool, tool, version_option, res.output))
return UNKNOWN
Expand All @@ -905,7 +910,7 @@ def get_gcc_version():
"""
Process `gcc --version` and return the GCC version.
"""
res = run_shell_cmd('gcc --version', fail_on_error=False, in_dry_run=True, hidden=True)
res = run_shell_cmd('gcc --version', fail_on_error=False, in_dry_run=True, hidden=True, output_file=False)
gcc_ver = None
if res.exit_code:
_log.warning("Failed to determine the version of GCC: %s", res.output)
Expand Down Expand Up @@ -961,7 +966,7 @@ def get_linked_libs_raw(path):
or None for other types of files.
"""

res = run_shell_cmd("file %s" % path, fail_on_error=False, hidden=True)
res = run_shell_cmd("file %s" % path, fail_on_error=False, hidden=True, output_file=False)
if res.exit_code:
fail_msg = "Failed to run 'file %s': %s" % (path, res.output)
_log.warning(fail_msg)
Expand Down Expand Up @@ -996,7 +1001,7 @@ def get_linked_libs_raw(path):
# take into account that 'ldd' may fail for strange reasons,
# like printing 'not a dynamic executable' when not enough memory is available
# (see also https://bugzilla.redhat.com/show_bug.cgi?id=1817111)
res = run_shell_cmd(linked_libs_cmd, fail_on_error=False, hidden=True)
res = run_shell_cmd(linked_libs_cmd, fail_on_error=False, hidden=True, output_file=False)
if res.exit_code == 0:
linked_libs_out = res.output
else:
Expand Down Expand Up @@ -1178,7 +1183,7 @@ def get_default_parallelism():
# No cache -> Calculate value from current system values
par = get_avail_core_count()
# determine max user processes via ulimit -u
res = run_shell_cmd("ulimit -u", in_dry_run=True, hidden=True)
res = run_shell_cmd("ulimit -u", in_dry_run=True, hidden=True, output_file=False)
try:
if res.output.startswith("unlimited"):
maxuserproc = 2 ** 32 - 1
Expand Down
Loading