From 7df4f608168510945d0298f4aaefe1bae563f95f Mon Sep 17 00:00:00 2001 From: Bonan Zhu <33688599+zhubonan@users.noreply.github.com> Date: Wed, 14 Dec 2022 15:32:50 +0000 Subject: [PATCH] CLI: Add the `verdi calcjob remotecat` command (#4861) The command can be used to inspect the files in the working directory of a `CalcJob` while it is still running. The `outputcat` only works on the files in the `retrieved` folder so only works for retrieved jobs. There is the `verdi data core.remote cat` command, but there the user needs to specify the identifier of the `remote_folder` of the calcjob and the output file. The `verdi calcjob remotecat` command simplifies these steps. It was discussed to add the `--monitor` option to simulate the functionality of a `tail -f` call on an output file, but this required the adaptation of the `Transport` interface which may get refactored soon, so it was decided to omit that feature for the time being. Co-authored-by: Sebastiaan Huber --- aiida/cmdline/commands/cmd_calcjob.py | 78 ++++++++++++++++++++++++++ docs/source/reference/command_line.rst | 1 + tests/cmdline/commands/test_calcjob.py | 46 ++++++++++++--- 3 files changed, 118 insertions(+), 7 deletions(-) diff --git a/aiida/cmdline/commands/cmd_calcjob.py b/aiida/cmdline/commands/cmd_calcjob.py index fdce3b758c..72cec957f2 100644 --- a/aiida/cmdline/commands/cmd_calcjob.py +++ b/aiida/cmdline/commands/cmd_calcjob.py @@ -121,6 +121,31 @@ def calcjob_inputcat(calcjob, path): echo.echo_critical(f'Could not open output path "{path}". Exception: {exception}') +@verdi_calcjob.command('remotecat') +@arguments.CALCULATION('calcjob', type=CalculationParamType(sub_classes=('aiida.node:process.calculation.calcjob',))) +@click.argument('path', type=str, required=False) +@decorators.with_dbenv() +def calcjob_remotecat(calcjob, path): + """Show the contents of a file in the remote working directory. + + The file to show can be specified using the PATH argument. If PATH is not specified, the default output file path + as defined by the `CalcJob` plugin class will be used instead. + """ + import shutil + import sys + import tempfile + + remote_folder, path = get_remote_and_path(calcjob, path) + + with tempfile.NamedTemporaryFile() as tmp_path: + try: + remote_folder.getfile(path, tmp_path.name) + with open(tmp_path.name, 'rb') as handle: + shutil.copyfileobj(handle, sys.stdout.buffer) + except IOError as exception: + echo.echo_critical(str(exception)) + + @verdi_calcjob.command('outputcat') @arguments.CALCULATION('calcjob', type=CalculationParamType(sub_classes=('aiida.node:process.calculation.calcjob',))) @click.argument('path', type=click.STRING, required=False) @@ -279,3 +304,56 @@ def calcjob_cleanworkdir(calcjobs, past_days, older_than, computers, force, exit counter += 1 echo.echo_success(f'{counter} remote folders cleaned on {computer.label}') + + +def get_remote_and_path(calcjob, path=None): + """Return the remote folder output node and process the path argument. + + :param calcjob: The ``CalcJobNode`` whose remote_folder to be returned. + :param path: The relative path of file. If not defined, it is attempted to determine the default output file from + the node options or otherwise from the associated process class. If neither are defined, a ``ValueError`` is + raised. + :returns: A tuple of the ``RemoteData`` and the path of the output file to be used. + :raises ValueError: If path is not defined and no default output file is defined on the node nor its associated + process class. + """ + remote_folder_linkname = 'remote_folder' # The `remote_folder` is the standard output of a calculation. + + try: + remote_folder = getattr(calcjob.outputs, remote_folder_linkname) + except AttributeError: + echo.echo_critical( + f'`CalcJobNode<{calcjob.pk}>` has no `{remote_folder_linkname}` output. ' + 'It probably has not started running yet.' + ) + + if path is not None: + return remote_folder, path + + # Try to get the default output filename from the node + path = calcjob.get_option('output_filename') + + if path is not None: + return remote_folder, path + + try: + process_class = calcjob.process_class + except ValueError as exception: + raise ValueError( + f'The process class of `CalcJobNode<{calcjob.pk}>` cannot be loaded and so the default output filename ' + 'cannot be determined.\nPlease specify a path explicitly.' + ) from exception + + # Try to get the default output filename from the node's associated process class spec + port = process_class.spec_options.get('output_filename') + if port and port.has_default(): + path = port.default + + if path is not None: + return remote_folder, path + + raise ValueError( + f'`CalcJobNode<{calcjob.pk}>` does not define a default output file (option "output_filename" not found) ' + f'nor does its associated process class `{calcjob.process_class.__class__.__name__}`\n' + 'Please specify a path explicitly.' + ) diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst index 985d643816..9a3994a4c4 100644 --- a/docs/source/reference/command_line.rst +++ b/docs/source/reference/command_line.rst @@ -53,6 +53,7 @@ Below is a list with all available subcommands. inputls Show the list of the generated calcjob input files. outputcat Show the contents of one of the calcjob retrieved outputs. outputls Show the list of the retrieved calcjob output files. + remotecat Show the contents of a file in the remote working directory. res Print data from the result output Dict node of a calcjob. diff --git a/tests/cmdline/commands/test_calcjob.py b/tests/cmdline/commands/test_calcjob.py index 6a2161860f..a763b12751 100644 --- a/tests/cmdline/commands/test_calcjob.py +++ b/tests/cmdline/commands/test_calcjob.py @@ -33,9 +33,9 @@ class TestVerdiCalculation: """Tests for `verdi calcjob`.""" @pytest.fixture(autouse=True) - def init_profile(self, aiida_profile_clean, aiida_localhost): # pylint: disable=unused-argument + def init_profile(self, aiida_profile_clean, aiida_localhost, tmp_path): # pylint: disable=unused-argument """Initialize the profile.""" - # pylint: disable=attribute-defined-outside-init + # pylint: disable=attribute-defined-outside-init,too-many-statements self.computer = aiida_localhost self.code = orm.InstalledCode(computer=self.computer, filepath_executable='/bin/true').store() @@ -47,14 +47,20 @@ def init_profile(self, aiida_profile_clean, aiida_localhost): # pylint: disable process_type = get_entry_point_string_from_class(process_class.__module__, process_class.__name__) # Create 5 CalcJobNodes (one for each CalculationState) - for calculation_state in CalcJobState: + for index, calculation_state in enumerate(CalcJobState): + + dirpath = (tmp_path / str(index)) + dirpath.mkdir() calc = orm.CalcJobNode(computer=self.computer, process_type=process_type) calc.set_option('resources', {'num_machines': 1, 'num_mpiprocs_per_machine': 1}) - calc.set_remote_workdir('/tmp/aiida/work') - remote = RemoteData(remote_path='/tmp/aiida/work') + calc.set_option('output_filename', 'fileA.txt') + calc.set_remote_workdir(str(dirpath)) + remote = RemoteData(remote_path=str(dirpath)) remote.computer = calc.computer remote.base.links.add_incoming(calc, LinkType.CREATE, link_label='remote_folder') + (dirpath / 'fileA.txt').write_text('test stringA') + (dirpath / 'fileB.txt').write_text('test stringB') calc.store() remote.store() @@ -81,14 +87,16 @@ def init_profile(self, aiida_profile_clean, aiida_localhost): # pylint: disable self.group.add_nodes([calc]) # Create a single failed CalcJobNode + dirpath = (tmp_path / 'failed') + dirpath.mkdir() self.EXIT_STATUS = 100 calc = orm.CalcJobNode(computer=self.computer) calc.set_option('resources', {'num_machines': 1, 'num_mpiprocs_per_machine': 1}) calc.store() calc.set_exit_status(self.EXIT_STATUS) calc.set_process_state(ProcessState.FINISHED) - calc.set_remote_workdir('/tmp/aiida/work') - remote = RemoteData(remote_path='/tmp/aiida/work') + calc.set_remote_workdir(str(tmp_path)) + remote = RemoteData(remote_path=str(tmp_path)) remote.computer = calc.computer remote.base.links.add_incoming(calc, LinkType.CREATE, link_label='remote_folder') remote.store() @@ -325,3 +333,27 @@ def test_calcjob_inoutputcat_old(self): assert result.exception is None, result.output assert len(get_result_lines(result)) == 1 assert get_result_lines(result)[0] == '5' + + def test_calcjob_remotecat(self): + """Test the remotecat command that prints the remote file for a given calcjob""" + # Specifying no filtering options and no explicit calcjobs should exit with non-zero status + options = [] + result = self.cli_runner.invoke(command.calcjob_remotecat, options) + assert result.exception is not None, result.output + + # This should be the failed calc without remote data - exception raised + options = [str(self.calcs[-1].uuid), 'fileB.txt'] + result = self.cli_runner.invoke(command.calcjob_remotecat, options) + assert result.exception is not None, result.output + + options = [str(self.result_job.uuid), 'fileB.txt'] + result = self.cli_runner.invoke(command.calcjob_remotecat, options) + assert result.stdout == 'test stringB' + + options = [str(self.result_job.uuid)] + result = self.cli_runner.invoke(command.calcjob_remotecat, options) + assert result.stdout == 'test stringA' + + options = [str(self.result_job.uuid), 'fileA.txt'] + result = self.cli_runner.invoke(command.calcjob_remotecat, options) + assert result.stdout == 'test stringA'