AbstractCode: Add the with_mpi attribute (#5922)

The `with_mpi` attribute serves to indicate whether a code instance needs to be run with the MPI run command in front of the executable. The attribute is exposed through the `with_mpi` property getter and setter. The attribute can be set through the `verdi code create` command using the `--with-mpi/--no-with-mpi` flags. The attribute is set to `None` by default. This means that the code doesn't enforce MPI being used or not whatsoever and it is left up to the `CalcJob` plugin and the `metadata.options.withmpi` input. The `CalcJob.presubmit` logic is updated to take the `with_mpi` setting of the input codes into account. The default value whether to run with or without MPI is defined by the `metadata.options.withmpi` input. This can then be overridden by either the plugin, code input or through the `metadata.options.withmpi` input, but if multiple are explicitly specified, they have to agree or a `RuntimeError` is raised.
aiidateam · Mar 12, 2023 · cdb3eed · cdb3eed
1 parent f7d7a4f
commit cdb3eed
Show file tree

Hide file tree

Showing 10 changed files with 302 additions and 96 deletions.
diff --git a/aiida/cmdline/commands/cmd_code.py b/aiida/cmdline/commands/cmd_code.py
@@ -250,7 +250,10 @@ def export(code, output_file):
         else:
             value = getattr(code, key)
 
-        code_data[key] = str(value)
+        # If the attribute is not set, for example ``with_mpi`` do not export it, because the YAML won't be valid for
+        # use in ``verdi code create`` since ``None`` is not a valid value on the CLI.
+        if value is not None:
+            code_data[key] = str(value)
 
     with open(output_file, 'w', encoding='utf-8') as yfhandle:
         yaml.dump(code_data, yfhandle)

diff --git a/aiida/engine/processes/calcjobs/calcjob.py b/aiida/engine/processes/calcjobs/calcjob.py
@@ -913,32 +913,52 @@ def presubmit(self, folder: Folder) -> CalcInfo:
 
             if code_info.code_uuid is None:
                 raise PluginInternalError('CalcInfo should have the information of the code to be launched')
-            this_code = load_code(code_info.code_uuid)
 
-            # To determine whether this code should be run with MPI enabled, we get the value that was set in the inputs
-            # of the entire process, which can then be overwritten by the value from the `CodeInfo`. This allows plugins
-            # to force certain codes to run without MPI, even if the user wants to run all codes with MPI whenever
-            # possible. This use case is typically useful for `CalcJob`s that consist of multiple codes where one or
-            # multiple codes always have to be executed without MPI.
-
-            this_withmpi = self.node.get_option('withmpi')
-
-            # Override the value of `withmpi` with that of the `CodeInfo` if and only if it is set
-            if code_info.withmpi is not None:
-                this_withmpi = code_info.withmpi
+            code = load_code(code_info.code_uuid)
+
+            # Here are the three values that will determine whether the code is to be run with MPI _if_ they are not
+            # ``None``. If any of them are explicitly defined but are not equivalent, an exception is raised. We use the
+            # ``self._raw_inputs`` to determine the actual value passed for ``metadata.options.withmpi`` and
+            # distinghuish it from the default.
+            raw_inputs = self._raw_inputs or {}  # type: ignore[var-annotated]
+            with_mpi_option = raw_inputs.get('metadata', {}).get('options', {}).get('withmpi', None)
+            with_mpi_plugin = code_info.withmpi
+            with_mpi_code = code.with_mpi
+
+            with_mpi_values = [with_mpi_option, with_mpi_plugin, with_mpi_code]
+            with_mpi_values_defined = [value for value in with_mpi_values if value is not None]
+            with_mpi_values_set = set(with_mpi_values_defined)
+
+            # If more than one value is defined, they have to be identical, or we raise that a conflict is encountered
+            if len(with_mpi_values_set) > 1:
+                error = f'Inconsistent requirements as to whether code `{code}` should be run with or without MPI.'
+                if with_mpi_option is not None:
+                    error += f'\nThe `metadata.options.withmpi` input was set to `{with_mpi_option}`.'
+                if with_mpi_plugin is not None:
+                    error += f'\nThe plugin require `{with_mpi_plugin}`.'
+                if with_mpi_code is not None:
+                    error += f'\nThe code `{code}` required `{with_mpi_code}`.'
+                raise RuntimeError(error)
+
+            # At this point we know that the three explicit values agree if they are defined, so we simply set the value
+            if with_mpi_values_set:
+                with_mpi = with_mpi_values_set.pop()
+            else:
+                # Fall back to the default of the ``metadata.options.withmpi`` of the ``Calcjob`` class
+                with_mpi = self.node.get_option('withmpi')
 
-            if this_withmpi:
-                prepend_cmdline_params = this_code.get_prepend_cmdline_params(mpi_args, extra_mpirun_params)
+            if with_mpi:
+                prepend_cmdline_params = code.get_prepend_cmdline_params(mpi_args, extra_mpirun_params)
             else:
-                prepend_cmdline_params = this_code.get_prepend_cmdline_params()
+                prepend_cmdline_params = code.get_prepend_cmdline_params()
 
-            cmdline_params = this_code.get_executable_cmdline_params(code_info.cmdline_params)
+            cmdline_params = code.get_executable_cmdline_params(code_info.cmdline_params)
 
             tmpl_code_info = JobTemplateCodeInfo()
             tmpl_code_info.prepend_cmdline_params = prepend_cmdline_params
             tmpl_code_info.cmdline_params = cmdline_params
-            tmpl_code_info.use_double_quotes = [computer.get_use_double_quotes(), this_code.use_double_quotes]
-            tmpl_code_info.wrap_cmdline_params = this_code.wrap_cmdline_params
+            tmpl_code_info.use_double_quotes = [computer.get_use_double_quotes(), code.use_double_quotes]
+            tmpl_code_info.wrap_cmdline_params = code.wrap_cmdline_params
             tmpl_code_info.stdin_name = code_info.stdin_name
             tmpl_code_info.stdout_name = code_info.stdout_name
             tmpl_code_info.stderr_name = code_info.stderr_name

diff --git a/aiida/manage/tests/pytest_fixtures.py b/aiida/manage/tests/pytest_fixtures.py
@@ -428,16 +428,17 @@ def test_1(aiida_local_code_factory):
     :rtype: object
     """
 
-    def get_code(entry_point, executable, computer=aiida_localhost, label=None, prepend_text=None, append_text=None):
+    def get_code(entry_point, executable, computer=aiida_localhost, label=None, **kwargs):
         """Get local code.
 
         Sets up code for given entry point on given computer.
 
         :param entry_point: Entry point of calculation plugin
         :param executable: name of executable; will be searched for in local system PATH.
         :param computer: (local) AiiDA computer
-        :param prepend_text: a string of code that will be put in the scheduler script before the execution of the code.
-        :param append_text: a string of code that will be put in the scheduler script after the execution of the code.
+        :param label: Define the label of the code. By default the ``executable`` is taken. This can be useful if
+            multiple codes need to be created in a test which require unique labels.
+        :param kwargs: Additional keyword arguments that are passed to the code's constructor.
         :return: the `Code` either retrieved from the database or created if it did not yet exist.
         :rtype: :py:class:`~aiida.orm.Code`
         """
@@ -471,15 +472,10 @@ def get_code(entry_point, executable, computer=aiida_localhost, label=None, prep
             description=label,
             default_calc_job_plugin=entry_point,
             computer=computer,
-            filepath_executable=executable_path
+            filepath_executable=executable_path,
+            **kwargs
         )
 
-        if prepend_text is not None:
-            code.prepend_text = prepend_text
-
-        if append_text is not None:
-            code.append_text = append_text
-
         return code.store()
 
     return get_code

diff --git a/aiida/orm/nodes/data/code/abstract.py b/aiida/orm/nodes/data/code/abstract.py
@@ -40,6 +40,7 @@ class AbstractCode(Data, metaclass=abc.ABCMeta):
     _KEY_ATTRIBUTE_APPEND_TEXT: str = 'append_text'
     _KEY_ATTRIBUTE_PREPEND_TEXT: str = 'prepend_text'
     _KEY_ATTRIBUTE_USE_DOUBLE_QUOTES: str = 'use_double_quotes'
+    _KEY_ATTRIBUTE_WITH_MPI: str = 'with_mpi'
     _KEY_ATTRIBUTE_WRAP_CMDLINE_PARAMS: str = 'wrap_cmdline_params'
     _KEY_EXTRA_IS_HIDDEN: str = 'hidden'  # Should become ``is_hidden`` once ``Code`` is dropped
 
@@ -49,6 +50,7 @@ def __init__(
         append_text: str = '',
         prepend_text: str = '',
         use_double_quotes: bool = False,
+        with_mpi: bool | None = None,
         is_hidden: bool = False,
         wrap_cmdline_params: bool = False,
         **kwargs
@@ -59,6 +61,7 @@ def __init__(
         :param append_text: The text that should be appended to the run line in the job script.
         :param prepend_text: The text that should be prepended to the run line in the job script.
         :param use_double_quotes: Whether the command line invocation of this code should be escaped with double quotes.
+        :param with_mpi: Whether the command should be run as an MPI program.
         :param wrap_cmdline_params: Whether to wrap the executable and all its command line parameters into quotes to
             form a single string. This is required to enable support for Docker with the ``ContainerizedCode``.
         :param is_hidden: Whether the code is hidden.
@@ -68,6 +71,7 @@ def __init__(
         self.append_text = append_text
         self.prepend_text = prepend_text
         self.use_double_quotes = use_double_quotes
+        self.with_mpi = with_mpi
         self.wrap_cmdline_params = wrap_cmdline_params
         self.is_hidden = is_hidden
 
@@ -225,6 +229,24 @@ def use_double_quotes(self, value: bool) -> None:
         type_check(value, bool)
         self.base.attributes.set(self._KEY_ATTRIBUTE_USE_DOUBLE_QUOTES, value)
 
+    @property
+    def with_mpi(self) -> bool | None:
+        """Return whether the command should be run as an MPI program.
+
+        :return: ``True`` if the code should be run as an MPI program, ``False`` if it shouldn't, ``None`` if unknown.
+        """
+        return self.base.attributes.get(self._KEY_ATTRIBUTE_WITH_MPI, None)
+
+    @with_mpi.setter
+    def with_mpi(self, value: bool | None) -> None:
+        """Set whether the command should be run as an MPI program.
+
+        :param value: ``True`` if the code should be run as an MPI program, ``False`` if it shouldn't, ``None`` if
+            unknown.
+        """
+        type_check(value, bool, allow_none=True)
+        self.base.attributes.set(self._KEY_ATTRIBUTE_WITH_MPI, value)
+
     @property
     def wrap_cmdline_params(self) -> bool:
         """Return whether all command line parameters should be wrapped with double quotes to form a single argument.
@@ -361,4 +383,10 @@ def _get_cli_options(cls) -> dict:
                 'with single or double quotes.',
                 'prompt': 'Escape using double quotes',
             },
+            'with_mpi': {
+                'is_flag': True,
+                'default': None,
+                'help': 'Whether the executable should be run as an MPI program.',
+                'prompt': 'Run with MPI',
+            },
         }
diff --git a/docs/source/topics/calculations/usage.rst b/docs/source/topics/calculations/usage.rst
@@ -632,6 +632,112 @@ The ``rerunnable`` option enables the scheduler to re-launch the calculation if
 
 Because this depends on the scheduler, its configuration, and the code used, we cannot say conclusively when it will work -- do your own testing! It has been tested on a cluster using SLURM, but that does not guarantee other SLURM clusters behave in the same way.
 
+
+.. _topics:calculations:usage:calcjobs:mpi:
+
+Controlling MPI
+---------------
+
+The `Message Passing Interface <https://en.wikipedia.org/wiki/Message_Passing_Interface>`_ (MPI) is a standardized and portable message-passing standard designed to function on parallel computing architectures.
+AiiDA implements support for running calculation jobs with or without MPI enabled.
+There are a number of settings that can be used to control when and how MPI is used.
+
+.. _topics:calculations:usage:calcjobs:mpi:computer:
+
+The ``Computer``
+~~~~~~~~~~~~~~~~
+
+Each calculation job is executed on a compute resource, which is modeled by an instance of the :class:`~aiida.orm.computers.Computer` class.
+If the computer supports running with MPI, the command to use is stored in the ``mpirun_command`` attribute, which is retrieved and set using the :meth:`~aiida.orm.computers.Computer.get_mpirun_command` and :meth:`~aiida.orm.computers.Computer.get_mpirun_command`, respectively.
+For example, if the computer has `OpenMPI <https://docs.open-mpi.org/en/v5.0.x/index.html>`_ installed, it can be set to ``mpirun``.
+If the ``Computer`` does not specify an MPI command, then enabling MPI for a calculation job is ineffective.
+
+.. _topics:calculations:usage:calcjobs:mpi:code:
+
+The ``Code``
+~~~~~~~~~~~~
+
+.. versionadded:: 2.3
+
+When creating a code, you can tell AiiDA that it should be run as an MPI program, by setting the ``with_mpi`` attribute to ``True`` or ``False``.
+From AiiDA 2.3 onward, this is the **recommended** way of controlling MPI behavior.
+If the code can be run with or without MPI, setting the ``with_mpi`` attribute can be skipped.
+It will default to ``None``, leaving the question of whether to run with or without MPI up to the ``CalcJob`` plugin or user input.
+
+.. _topics:calculations:usage:calcjobs:mpi:calcjob-implementation:
+
+The ``CalcJob`` implementation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``CalcJob`` implementation instructs AiiDA how the codes should be run through the :class:`~aiida.common.datastructures.CalcInfo` object, which it returns from the :meth:`~aiida.engine.processes.calcjobs.calcjob.CalcJob.prepare_for_submission` method.
+For each code that the job should run (usually only a single one), a :class:`~aiida.common.datastructures.CodeInfo` object should be added to the list of the ``CalcInfo.codes_info`` attribute.
+If the plugin developer knows that the executable being wrapped is *always* MPI program (no serial version available) or *never* an MPI program, they can set the ``withmpi`` attribute of the ``CodeInfo`` to ``True`` or ``False``, respectively.
+Note that this setting is fully optional; if the code could be run either way, it is best not to set it and leave it up to the ``Code`` or the ``metadata.options.withmpi`` input.
+
+.. note::
+
+    When implementing a ``CalcJob`` that runs a single code, consider using specifying whether MPI should be enabled or disabled through the :ref:`metadata option<topics:calculations:usage:calcjobs:mpi:calcjob-inputs>`.
+    This can be accomplished by changing the default in the process specification:
+
+    .. code:: python
+
+        class SomeCalcJob(CalcJob):
+
+            @classmethod
+            def define(cls, spec):
+                super().define(spec)
+                spec.inputs['metadata']['options']['withmpi'].default = True
+
+    The advantage over using the ``CodeInfo.withmpi`` attribute is that the default of the metadata option can be introspected programmatically from the process spec, and so is more visible to the user.
+
+    Naturally, this approach is not viable for calculation jobs that run multiple codes that are different in whether they require MPI or not.
+    In this case, one should resort to using the ``CodeInfo.withmpi`` attribute.
+
+.. _topics:calculations:usage:calcjobs:mpi:calcjob-inputs:
+
+The ``CalcJob`` inputs
+~~~~~~~~~~~~~~~~~~~~~~
+
+Finally, the MPI setting can be controlled on a per-instance basis, using the ``withmpi`` :ref:`metadata option<topics:calculations:usage:calcjobs:options>`.
+If MPI should be enabled or disabled, explicitly set this option to ``True`` or ``False``, respectively.
+For example, the following instructs to run all codes in the calculation job with MPI enabled:
+
+.. code:: python
+
+    inputs = {
+        ...,
+        'metadata': {
+            'options': {
+                'withmpi': True
+            }
+        }
+    }
+    submit(CalcJob, **inputs)
+
+The default for this option is set to ``False`` on the base ``CalcJob`` implementation, but it will be overruled if explicitly defined.
+
+.. note::
+
+    The value set for the ``withmpi`` option will be applied to all codes.
+    If a calculation job runs more than one code, and each requires a different MPI setting, this option should not be used, and instead MPI should be controlled :ref:`through the code input <topics:calculations:usage:calcjobs:mpi:code>`.
+
+.. _topics:calculations:usage:calcjobs:mpi:conflict-resolution:
+
+Conflict resolution
+~~~~~~~~~~~~~~~~~~~
+
+As described above, MPI can be enabled or disabled for a calculation job on a number of levels:
+
+* The ``Code`` input
+* The ``CalcJob`` implementation
+* The ``metadata.options.withmpi`` input
+
+MPI is enabled or disabled if any of these values is explicitly set to ``True`` or ``False``, respectively.
+If multiple values are specified and they are not equivalent, a ``RuntimeError`` is raised.
+Depending on the conflict, one has to change the ``Code`` or ``metadata.options.withmpi`` input.
+If none of the values are explicitly defined, the value specified by the default of ``metadata.options.withmpi`` is taken.
+
+
 .. _topics:calculations:usage:calcjobs:launch:
 
 Launch

diff --git a/docs/source/topics/data_types.rst b/docs/source/topics/data_types.rst
@@ -590,8 +590,8 @@ If a default calculation job plugin is defined, a process builder can be obtaine
 
 .. important::
 
-    If a containerized code is used for a calculation that sets the :ref:`metadata option <topics:calculations:usage:calcjobs:options>` ``withmpi`` to ``True``, the MPI command line arguments are placed in front of the container runtime.
-    For example, when running Singularity with ``metadata.options.withmpi = True``, the runline in the submission script will be written as:
+    If a containerized code is used for a calculation that enables MPI (see :ref:`Controlling MPI <topics:calculations:usage:calcjobs:mpi>`), the MPI command line arguments are placed in front of the container runtime.
+    For example, when running Singularity with MPI enabled, the runline in the submission script will be written as:
 
     .. code-block:: bash