From 7df4f608168510945d0298f4aaefe1bae563f95f Mon Sep 17 00:00:00 2001
From: Bonan Zhu <33688599+zhubonan@users.noreply.github.com>
Date: Wed, 14 Dec 2022 15:32:50 +0000
Subject: [PATCH] CLI: Add the `verdi calcjob remotecat` command (#4861)

The command can be used to inspect the files in the working directory
of a `CalcJob` while it is still running. The `outputcat` only works on
the files in the `retrieved` folder so only works for retrieved jobs.
There is the `verdi data core.remote cat` command, but there the user
needs to specify the identifier of the `remote_folder` of the calcjob
and the output file. The `verdi calcjob remotecat` command simplifies
these steps.

It was discussed to add the `--monitor` option to simulate the
functionality of a `tail -f` call on an output file, but this required
the adaptation of the `Transport` interface which may get refactored
soon, so it was decided to omit that feature for the time being.

Co-authored-by: Sebastiaan Huber <mail@sphuber.net>
---
 aiida/cmdline/commands/cmd_calcjob.py  | 78 ++++++++++++++++++++++++++
 docs/source/reference/command_line.rst |  1 +
 tests/cmdline/commands/test_calcjob.py | 46 ++++++++++++---
 3 files changed, 118 insertions(+), 7 deletions(-)

diff --git a/aiida/cmdline/commands/cmd_calcjob.py b/aiida/cmdline/commands/cmd_calcjob.py
index fdce3b758c..72cec957f2 100644
--- a/aiida/cmdline/commands/cmd_calcjob.py
+++ b/aiida/cmdline/commands/cmd_calcjob.py
@@ -121,6 +121,31 @@ def calcjob_inputcat(calcjob, path):
             echo.echo_critical(f'Could not open output path "{path}". Exception: {exception}')
 
 
+@verdi_calcjob.command('remotecat')
+@arguments.CALCULATION('calcjob', type=CalculationParamType(sub_classes=('aiida.node:process.calculation.calcjob',)))
+@click.argument('path', type=str, required=False)
+@decorators.with_dbenv()
+def calcjob_remotecat(calcjob, path):
+    """Show the contents of a file in the remote working directory.
+
+    The file to show can be specified using the PATH argument. If PATH is not specified, the default output file path
+    as defined by the `CalcJob` plugin class will be used instead.
+    """
+    import shutil
+    import sys
+    import tempfile
+
+    remote_folder, path = get_remote_and_path(calcjob, path)
+
+    with tempfile.NamedTemporaryFile() as tmp_path:
+        try:
+            remote_folder.getfile(path, tmp_path.name)
+            with open(tmp_path.name, 'rb') as handle:
+                shutil.copyfileobj(handle, sys.stdout.buffer)
+        except IOError as exception:
+            echo.echo_critical(str(exception))
+
+
 @verdi_calcjob.command('outputcat')
 @arguments.CALCULATION('calcjob', type=CalculationParamType(sub_classes=('aiida.node:process.calculation.calcjob',)))
 @click.argument('path', type=click.STRING, required=False)
@@ -279,3 +304,56 @@ def calcjob_cleanworkdir(calcjobs, past_days, older_than, computers, force, exit
                 counter += 1
 
         echo.echo_success(f'{counter} remote folders cleaned on {computer.label}')
+
+
+def get_remote_and_path(calcjob, path=None):
+    """Return the remote folder output node and process the path argument.
+
+    :param calcjob: The ``CalcJobNode`` whose remote_folder to be returned.
+    :param path: The relative path of file. If not defined, it is attempted to determine the default output file from
+        the node options or otherwise from the associated process class. If neither are defined, a ``ValueError`` is
+        raised.
+    :returns: A tuple of the ``RemoteData`` and the path of the output file to be used.
+    :raises ValueError: If path is not defined and no default output file is defined on the node nor its associated
+        process class.
+    """
+    remote_folder_linkname = 'remote_folder'  # The `remote_folder` is the standard output of a calculation.
+
+    try:
+        remote_folder = getattr(calcjob.outputs, remote_folder_linkname)
+    except AttributeError:
+        echo.echo_critical(
+            f'`CalcJobNode<{calcjob.pk}>` has no `{remote_folder_linkname}` output. '
+            'It probably has not started running yet.'
+        )
+
+    if path is not None:
+        return remote_folder, path
+
+    # Try to get the default output filename from the node
+    path = calcjob.get_option('output_filename')
+
+    if path is not None:
+        return remote_folder, path
+
+    try:
+        process_class = calcjob.process_class
+    except ValueError as exception:
+        raise ValueError(
+            f'The process class of `CalcJobNode<{calcjob.pk}>` cannot be loaded and so the default output filename '
+            'cannot be determined.\nPlease specify a path explicitly.'
+        ) from exception
+
+    # Try to get the default output filename from the node's associated process class spec
+    port = process_class.spec_options.get('output_filename')
+    if port and port.has_default():
+        path = port.default
+
+    if path is not None:
+        return remote_folder, path
+
+    raise ValueError(
+        f'`CalcJobNode<{calcjob.pk}>` does not define a default output file (option "output_filename" not found) '
+        f'nor does its associated process class `{calcjob.process_class.__class__.__name__}`\n'
+        'Please specify a path explicitly.'
+    )
diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst
index 985d643816..9a3994a4c4 100644
--- a/docs/source/reference/command_line.rst
+++ b/docs/source/reference/command_line.rst
@@ -53,6 +53,7 @@ Below is a list with all available subcommands.
       inputls       Show the list of the generated calcjob input files.
       outputcat     Show the contents of one of the calcjob retrieved outputs.
       outputls      Show the list of the retrieved calcjob output files.
+      remotecat     Show the contents of a file in the remote working directory.
       res           Print data from the result output Dict node of a calcjob.
 
 
diff --git a/tests/cmdline/commands/test_calcjob.py b/tests/cmdline/commands/test_calcjob.py
index 6a2161860f..a763b12751 100644
--- a/tests/cmdline/commands/test_calcjob.py
+++ b/tests/cmdline/commands/test_calcjob.py
@@ -33,9 +33,9 @@ class TestVerdiCalculation:
     """Tests for `verdi calcjob`."""
 
     @pytest.fixture(autouse=True)
-    def init_profile(self, aiida_profile_clean, aiida_localhost):  # pylint: disable=unused-argument
+    def init_profile(self, aiida_profile_clean, aiida_localhost, tmp_path):  # pylint: disable=unused-argument
         """Initialize the profile."""
-        # pylint: disable=attribute-defined-outside-init
+        # pylint: disable=attribute-defined-outside-init,too-many-statements
 
         self.computer = aiida_localhost
         self.code = orm.InstalledCode(computer=self.computer, filepath_executable='/bin/true').store()
@@ -47,14 +47,20 @@ def init_profile(self, aiida_profile_clean, aiida_localhost):  # pylint: disable
         process_type = get_entry_point_string_from_class(process_class.__module__, process_class.__name__)
 
         # Create 5 CalcJobNodes (one for each CalculationState)
-        for calculation_state in CalcJobState:
+        for index, calculation_state in enumerate(CalcJobState):
+
+            dirpath = (tmp_path / str(index))
+            dirpath.mkdir()
 
             calc = orm.CalcJobNode(computer=self.computer, process_type=process_type)
             calc.set_option('resources', {'num_machines': 1, 'num_mpiprocs_per_machine': 1})
-            calc.set_remote_workdir('/tmp/aiida/work')
-            remote = RemoteData(remote_path='/tmp/aiida/work')
+            calc.set_option('output_filename', 'fileA.txt')
+            calc.set_remote_workdir(str(dirpath))
+            remote = RemoteData(remote_path=str(dirpath))
             remote.computer = calc.computer
             remote.base.links.add_incoming(calc, LinkType.CREATE, link_label='remote_folder')
+            (dirpath / 'fileA.txt').write_text('test stringA')
+            (dirpath / 'fileB.txt').write_text('test stringB')
             calc.store()
             remote.store()
 
@@ -81,14 +87,16 @@ def init_profile(self, aiida_profile_clean, aiida_localhost):  # pylint: disable
                 self.group.add_nodes([calc])
 
         # Create a single failed CalcJobNode
+        dirpath = (tmp_path / 'failed')
+        dirpath.mkdir()
         self.EXIT_STATUS = 100
         calc = orm.CalcJobNode(computer=self.computer)
         calc.set_option('resources', {'num_machines': 1, 'num_mpiprocs_per_machine': 1})
         calc.store()
         calc.set_exit_status(self.EXIT_STATUS)
         calc.set_process_state(ProcessState.FINISHED)
-        calc.set_remote_workdir('/tmp/aiida/work')
-        remote = RemoteData(remote_path='/tmp/aiida/work')
+        calc.set_remote_workdir(str(tmp_path))
+        remote = RemoteData(remote_path=str(tmp_path))
         remote.computer = calc.computer
         remote.base.links.add_incoming(calc, LinkType.CREATE, link_label='remote_folder')
         remote.store()
@@ -325,3 +333,27 @@ def test_calcjob_inoutputcat_old(self):
         assert result.exception is None, result.output
         assert len(get_result_lines(result)) == 1
         assert get_result_lines(result)[0] == '5'
+
+    def test_calcjob_remotecat(self):
+        """Test the remotecat command that prints the remote file for a given calcjob"""
+        # Specifying no filtering options and no explicit calcjobs should exit with non-zero status
+        options = []
+        result = self.cli_runner.invoke(command.calcjob_remotecat, options)
+        assert result.exception is not None, result.output
+
+        # This should be the failed calc without remote data - exception raised
+        options = [str(self.calcs[-1].uuid), 'fileB.txt']
+        result = self.cli_runner.invoke(command.calcjob_remotecat, options)
+        assert result.exception is not None, result.output
+
+        options = [str(self.result_job.uuid), 'fileB.txt']
+        result = self.cli_runner.invoke(command.calcjob_remotecat, options)
+        assert result.stdout == 'test stringB'
+
+        options = [str(self.result_job.uuid)]
+        result = self.cli_runner.invoke(command.calcjob_remotecat, options)
+        assert result.stdout == 'test stringA'
+
+        options = [str(self.result_job.uuid), 'fileA.txt']
+        result = self.cli_runner.invoke(command.calcjob_remotecat, options)
+        assert result.stdout == 'test stringA'