Skip to content

Commit

Permalink
Merge pull request #4356 from boegel/run_shell_cmd_out_files
Browse files Browse the repository at this point in the history
change `run_shell_cmd` to store command output in temporary file(s) by default + pass `RunShellCmdResult` instance to `RunShellCmdError`
  • Loading branch information
lexming authored Nov 8, 2023
2 parents c073716 + 8620e64 commit 6b09872
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 70 deletions.
4 changes: 2 additions & 2 deletions easybuild/tools/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def check_module_function(self, allow_mismatch=False, regex=None):
output, exit_code = None, 1
else:
cmd = "type module"
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=False, hidden=True)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=False, hidden=True, output_file=False)
output, exit_code = res.output, res.exit_code

if regex is None:
Expand Down Expand Up @@ -821,7 +821,7 @@ def run_module(self, *args, **kwargs):
cmd = ' '.join(cmd_list)
# note: module commands are always run in dry mode, and are kept hidden in trace and dry run output
res = run_shell_cmd(cmd_list, env=environ, fail_on_error=False, shell=False, split_stderr=True,
hidden=True, in_dry_run=True)
hidden=True, in_dry_run=True, output_file=False)

# stdout will contain python code (to change environment etc)
# stderr will contain text (just like the normal module command)
Expand Down
103 changes: 61 additions & 42 deletions easybuild/tools/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,23 @@
]


RunShellCmdResult = namedtuple('RunShellCmdResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir'))
RunShellCmdResult = namedtuple('RunShellCmdResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir',
'out_file', 'err_file'))


class RunShellCmdError(BaseException):

def __init__(self, cmd, exit_code, work_dir, output, stderr, caller_info, *args, **kwargs):
def __init__(self, cmd_result, caller_info, *args, **kwargs):
"""Constructor for RunShellCmdError."""
self.cmd = cmd
self.cmd_name = cmd.split(' ')[0]
self.exit_code = exit_code
self.work_dir = work_dir
self.output = output
self.stderr = stderr
self.cmd = cmd_result.cmd
self.cmd_name = os.path.basename(self.cmd.split(' ')[0])
self.exit_code = cmd_result.exit_code
self.work_dir = cmd_result.work_dir
self.output = cmd_result.output
self.out_file = cmd_result.out_file
self.stderr = cmd_result.stderr
self.err_file = cmd_result.err_file

self.caller_info = caller_info

msg = f"Shell command '{self.cmd_name}' failed!"
Expand All @@ -110,21 +114,13 @@ def pad_4_spaces(msg):
pad_4_spaces(f"working directory -> {self.work_dir}"),
]

tmpdir = tempfile.mkdtemp(prefix='shell-cmd-error-')
output_fp = os.path.join(tmpdir, f"{self.cmd_name}.out")
with open(output_fp, 'w') as fp:
fp.write(self.output or '')
if self.out_file is not None:
# if there's no separate file for error/warnings, then out_file includes both stdout + stderr
out_info_msg = "output (stdout + stderr)" if self.err_file is None else "output (stdout) "
error_info.append(pad_4_spaces(f"{out_info_msg} -> {self.out_file}"))

if self.stderr is None:
error_info.append(pad_4_spaces(f"output (stdout + stderr) -> {output_fp}"))
else:
stderr_fp = os.path.join(tmpdir, f"{self.cmd_name}.err")
with open(stderr_fp, 'w') as fp:
fp.write(self.stderr)
error_info.extend([
pad_4_spaces(f"output (stdout) -> {output_fp}"),
pad_4_spaces(f"error/warnings (stderr) -> {stderr_fp}"),
])
if self.err_file is not None:
error_info.append(pad_4_spaces(f"error/warnings (stderr) -> {self.err_file}"))

caller_file_name, caller_line_nr, caller_function_name = self.caller_info
called_from_info = f"'{caller_function_name}' function in {caller_file_name} (line {caller_line_nr})"
Expand All @@ -136,9 +132,9 @@ def pad_4_spaces(msg):
sys.stderr.write('\n'.join(error_info) + '\n')


def raise_run_shell_cmd_error(cmd, exit_code, work_dir, output, stderr):
def raise_run_shell_cmd_error(cmd_res):
"""
Raise RunShellCmdError for failing shell command, after collecting additional caller info
Raise RunShellCmdError for failed shell command, after collecting additional caller info
"""

# figure out where failing command was run
Expand All @@ -150,7 +146,7 @@ def raise_run_shell_cmd_error(cmd, exit_code, work_dir, output, stderr):
frameinfo = inspect.getouterframes(inspect.currentframe())[3]
caller_info = (frameinfo.filename, frameinfo.lineno, frameinfo.function)

raise RunShellCmdError(cmd, exit_code, work_dir, output, stderr, caller_info)
raise RunShellCmdError(cmd_res, caller_info)


def run_cmd_cache(func):
Expand Down Expand Up @@ -185,7 +181,7 @@ def cache_aware_func(cmd, *args, **kwargs):
@run_shell_cmd_cache
def run_shell_cmd(cmd, fail_on_error=True, split_stderr=False, stdin=None, env=None,
hidden=False, in_dry_run=False, verbose_dry_run=False, work_dir=None, shell=True,
output_file=False, stream_output=False, asynchronous=False, with_hooks=True,
output_file=True, stream_output=False, asynchronous=False, with_hooks=True,
qa_patterns=None, qa_wait_patterns=None):
"""
Run specified (interactive) shell command, and capture output + exit code.
Expand Down Expand Up @@ -234,16 +230,23 @@ def to_cmd_str(cmd):
if work_dir is None:
work_dir = os.getcwd()

# temporary output file for command output, if requested
if output_file or not hidden:
# collect output of running command in temporary log file, if desired
fd, cmd_out_fp = tempfile.mkstemp(suffix='.log', prefix='easybuild-run-')
os.close(fd)
_log.info(f'run_cmd: Output of "{cmd}" will be logged to {cmd_out_fp}')
else:
cmd_out_fp = None

cmd_str = to_cmd_str(cmd)
cmd_name = os.path.basename(cmd_str.split(' ')[0])

# temporary output file(s) for command output
if output_file:
toptmpdir = os.path.join(tempfile.gettempdir(), 'run-shell-cmd-output')
os.makedirs(toptmpdir, exist_ok=True)
tmpdir = tempfile.mkdtemp(dir=toptmpdir, prefix=f'{cmd_name}-')
cmd_out_fp = os.path.join(tmpdir, 'out.txt')
_log.info(f'run_cmd: Output of "{cmd_str}" will be logged to {cmd_out_fp}')
if split_stderr:
cmd_err_fp = os.path.join(tmpdir, 'err.txt')
_log.info(f'run_cmd: Errors and warnings of "{cmd_str}" will be logged to {cmd_err_fp}')
else:
cmd_err_fp = None
else:
cmd_out_fp, cmd_err_fp = None, None

# early exit in 'dry run' mode, after printing the command that would be run (unless 'hidden' is enabled)
if not in_dry_run and build_option('extended_dry_run'):
Expand All @@ -253,11 +256,12 @@ def to_cmd_str(cmd):
msg += f" (in {work_dir})"
dry_run_msg(msg, silent=silent)

return RunShellCmdResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir)
return RunShellCmdResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir,
out_file=cmd_out_fp, err_file=cmd_err_fp)

start_time = datetime.now()
if not hidden:
cmd_trace_msg(cmd_str, start_time, work_dir, stdin, cmd_out_fp)
cmd_trace_msg(cmd_str, start_time, work_dir, stdin, cmd_out_fp, cmd_err_fp)

if stdin:
# 'input' value fed to subprocess.run must be a byte sequence
Expand Down Expand Up @@ -286,7 +290,19 @@ def to_cmd_str(cmd):
output = proc.stdout.decode('utf-8', 'ignore')
stderr = proc.stderr.decode('utf-8', 'ignore') if split_stderr else None

res = RunShellCmdResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr, work_dir=work_dir)
# store command output to temporary file(s)
if output_file:
try:
with open(cmd_out_fp, 'w') as fp:
fp.write(output)
if split_stderr:
with open(cmd_err_fp, 'w') as fp:
fp.write(stderr)
except IOError as err:
raise EasyBuildError(f"Failed to dump command output to temporary file: {err}")

res = RunShellCmdResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr, work_dir=work_dir,
out_file=cmd_out_fp, err_file=cmd_err_fp)

# always log command output
cmd_name = cmd_str.split(' ')[0]
Expand All @@ -301,7 +317,7 @@ def to_cmd_str(cmd):
else:
_log.warning(f"Shell command FAILED (exit code {res.exit_code}, see output above): {cmd_str}")
if fail_on_error:
raise_run_shell_cmd_error(res.cmd, res.exit_code, res.work_dir, output=res.output, stderr=res.stderr)
raise_run_shell_cmd_error(res)

if with_hooks:
run_hook_kwargs = {
Expand All @@ -319,15 +335,16 @@ def to_cmd_str(cmd):
return res


def cmd_trace_msg(cmd, start_time, work_dir, stdin, cmd_out_fp):
def cmd_trace_msg(cmd, start_time, work_dir, stdin, cmd_out_fp, cmd_err_fp):
"""
Helper function to construct and print trace message for command being run
:param cmd: command being run
:param start_time: datetime object indicating when command was started
:param work_dir: path of working directory in which command is run
:param stdin: stdin input value for command
:param cmd_out_fp: path to output log file for command
:param cmd_out_fp: path to output file for command
:param cmd_err_fp: path to errors/warnings output file for command
"""
start_time = start_time.strftime('%Y-%m-%d %H:%M:%S')

Expand All @@ -339,7 +356,9 @@ def cmd_trace_msg(cmd, start_time, work_dir, stdin, cmd_out_fp):
if stdin:
lines.append(f"\t[input: {stdin}]")
if cmd_out_fp:
lines.append(f"\t[output logged in {cmd_out_fp}]")
lines.append(f"\t[output saved to {cmd_out_fp}]")
if cmd_err_fp:
lines.append(f"\t[errors/warnings saved to {cmd_err_fp}]")

lines.append('\t' + cmd)

Expand Down
39 changes: 22 additions & 17 deletions easybuild/tools/systemtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def get_avail_core_count():
core_cnt = int(sum(sched_getaffinity()))
else:
# BSD-type systems
res = run_shell_cmd('sysctl -n hw.ncpu', in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd('sysctl -n hw.ncpu', in_dry_run=True, hidden=True, with_hooks=False, output_file=False)
try:
if int(res.output) > 0:
core_cnt = int(res.output)
Expand Down Expand Up @@ -310,7 +310,7 @@ def get_total_memory():
elif os_type == DARWIN:
cmd = "sysctl -n hw.memsize"
_log.debug("Trying to determine total memory size on Darwin via cmd '%s'", cmd)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False, output_file=False)
if res.exit_code == 0:
memtotal = int(res.output.strip()) // (1024**2)

Expand Down Expand Up @@ -392,14 +392,15 @@ def get_cpu_vendor():

elif os_type == DARWIN:
cmd = "sysctl -n machdep.cpu.vendor"
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False, output_file=False)
out = res.output.strip()
if res.exit_code == 0 and out in VENDOR_IDS:
vendor = VENDOR_IDS[out]
_log.debug("Determined CPU vendor on DARWIN as being '%s' via cmd '%s" % (vendor, cmd))
else:
cmd = "sysctl -n machdep.cpu.brand_string"
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False,
output_file=False)
out = res.output.strip().split(' ')[0]
if res.exit_code == 0 and out in CPU_VENDORS:
vendor = out
Expand Down Expand Up @@ -502,7 +503,7 @@ def get_cpu_model():

elif os_type == DARWIN:
cmd = "sysctl -n machdep.cpu.brand_string"
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False, output_file=False)
if res.exit_code == 0:
model = res.output.strip()
_log.debug("Determined CPU model on Darwin using cmd '%s': %s" % (cmd, model))
Expand Down Expand Up @@ -547,7 +548,7 @@ def get_cpu_speed():
elif os_type == DARWIN:
cmd = "sysctl -n hw.cpufrequency_max"
_log.debug("Trying to determine CPU frequency on Darwin via cmd '%s'" % cmd)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, with_hooks=False, output_file=False)
out = res.output.strip()
cpu_freq = None
if res.exit_code == 0 and out:
Expand Down Expand Up @@ -595,7 +596,8 @@ def get_cpu_features():
for feature_set in ['extfeatures', 'features', 'leaf7_features']:
cmd = "sysctl -n machdep.cpu.%s" % feature_set
_log.debug("Trying to determine CPU features on Darwin via cmd '%s'", cmd)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, fail_on_error=False, with_hooks=False)
res = run_shell_cmd(cmd, in_dry_run=True, hidden=True, fail_on_error=False, with_hooks=False,
output_file=False)
if res.exit_code == 0:
cpu_feat.extend(res.output.strip().lower().split())

Expand All @@ -622,7 +624,8 @@ def get_gpu_info():
try:
cmd = "nvidia-smi --query-gpu=gpu_name,driver_version --format=csv,noheader"
_log.debug("Trying to determine NVIDIA GPU info on Linux via cmd '%s'", cmd)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False,
output_file=False)
if res.exit_code == 0:
for line in res.output.strip().split('\n'):
nvidia_gpu_info = gpu_info.setdefault('NVIDIA', {})
Expand All @@ -640,13 +643,15 @@ def get_gpu_info():
try:
cmd = "rocm-smi --showdriverversion --csv"
_log.debug("Trying to determine AMD GPU driver on Linux via cmd '%s'", cmd)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False,
output_file=False)
if res.exit_code == 0:
amd_driver = res.output.strip().split('\n')[1].split(',')[1]

cmd = "rocm-smi --showproductname --csv"
_log.debug("Trying to determine AMD GPU info on Linux via cmd '%s'", cmd)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, with_hooks=False,
output_file=False)
if res.exit_code == 0:
for line in res.output.strip().split('\n')[1:]:
amd_card_series = line.split(',')[1]
Expand Down Expand Up @@ -865,7 +870,7 @@ def check_os_dependency(dep):
pkg_cmd_flag.get(pkg_cmd),
dep,
])
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, output_file=False)
found = res.exit_code == 0
if found:
break
Expand All @@ -877,7 +882,7 @@ def check_os_dependency(dep):
# try locate if it's available
if not found and which('locate'):
cmd = 'locate -c --regexp "/%s$"' % dep
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True)
res = run_shell_cmd(cmd, fail_on_error=False, in_dry_run=True, hidden=True, output_file=False)
try:
found = (res.exit_code == 0 and int(res.output.strip()) > 0)
except ValueError:
Expand All @@ -893,7 +898,7 @@ def get_tool_version(tool, version_option='--version', ignore_ec=False):
Output is returned as a single-line string (newlines are replaced by '; ').
"""
res = run_shell_cmd(' '.join([tool, version_option]), fail_on_error=False, in_dry_run=True,
hidden=True, with_hooks=False)
hidden=True, with_hooks=False, output_file=False)
if not ignore_ec and res.exit_code:
_log.warning("Failed to determine version of %s using '%s %s': %s" % (tool, tool, version_option, res.output))
return UNKNOWN
Expand All @@ -905,7 +910,7 @@ def get_gcc_version():
"""
Process `gcc --version` and return the GCC version.
"""
res = run_shell_cmd('gcc --version', fail_on_error=False, in_dry_run=True, hidden=True)
res = run_shell_cmd('gcc --version', fail_on_error=False, in_dry_run=True, hidden=True, output_file=False)
gcc_ver = None
if res.exit_code:
_log.warning("Failed to determine the version of GCC: %s", res.output)
Expand Down Expand Up @@ -961,7 +966,7 @@ def get_linked_libs_raw(path):
or None for other types of files.
"""

res = run_shell_cmd("file %s" % path, fail_on_error=False, hidden=True)
res = run_shell_cmd("file %s" % path, fail_on_error=False, hidden=True, output_file=False)
if res.exit_code:
fail_msg = "Failed to run 'file %s': %s" % (path, res.output)
_log.warning(fail_msg)
Expand Down Expand Up @@ -996,7 +1001,7 @@ def get_linked_libs_raw(path):
# take into account that 'ldd' may fail for strange reasons,
# like printing 'not a dynamic executable' when not enough memory is available
# (see also https://bugzilla.redhat.com/show_bug.cgi?id=1817111)
res = run_shell_cmd(linked_libs_cmd, fail_on_error=False, hidden=True)
res = run_shell_cmd(linked_libs_cmd, fail_on_error=False, hidden=True, output_file=False)
if res.exit_code == 0:
linked_libs_out = res.output
else:
Expand Down Expand Up @@ -1178,7 +1183,7 @@ def get_default_parallelism():
# No cache -> Calculate value from current system values
par = get_avail_core_count()
# determine max user processes via ulimit -u
res = run_shell_cmd("ulimit -u", in_dry_run=True, hidden=True)
res = run_shell_cmd("ulimit -u", in_dry_run=True, hidden=True, output_file=False)
try:
if res.output.startswith("unlimited"):
maxuserproc = 2 ** 32 - 1
Expand Down
Loading

0 comments on commit 6b09872

Please sign in to comment.