Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docs: Instructions on creating scheduler plugin #4476

Merged
merged 6 commits into from
Nov 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions docs/source/topics/include/scheduler_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# -*- coding: utf-8 -*-
"""Template for a scheduler plugin."""
import datetime
import logging

from aiida.common.escaping import escape_for_bash
from aiida.schedulers import Scheduler, SchedulerError, SchedulerParsingError
from aiida.schedulers.datastructures import JobInfo, JobState, JobResource

_LOGGER = logging.getLogger(__name__)


_MAP_SCHEDULER_AIIDA_STATUS = {
'SET_ME_STATUS_1': JobState.RUNNING,
'SET_ME_STATUS_5': JobState.QUEUED,
'SET_ME_STATUS_2': JobState.DONE,
'SET_ME_STATUS_3': JobState.QUEUED_HELD,
'SET_ME_STATUS_4': JobState.UNDETERMINED,
'SET_ME_STATUS_6': JobState.SUSPENDED,
}


class TemplateJobResource(JobResource):
"""Template class for job resources."""

def validate_resources(cls, **kwargs):
"""Validate the resources against the job resource class of this scheduler.

:param kwargs: dictionary of values to define the job resources
:raises ValueError: if the resources are invalid or incomplete
:return: optional tuple of parsed resource settings
"""

def accepts_default_mpiprocs_per_machine(cls):
"""Return True if this subclass accepts a `default_mpiprocs_per_machine` key, False otherwise."""

def get_tot_num_mpiprocs(self):
"""Return the total number of cpus of this job resource."""


class TemplateScheduler(Scheduler):
"""Base class template for a scheduler."""

# Query only by list of jobs and not by user
_features = {
'can_query_by_user': False,
}

# The class to be used for the job resource.
_job_resource_class = # This needs to be set to a subclass of :class:`~aiida.schedulers.datastructures.JobResource`

_map_status = _MAP_SCHEDULER_AIIDA_STATUS

def _get_joblist_command(self, jobs=None, user=None):
"""The command to report full information on existing jobs.

:return: a string of the command to be executed to determine the active jobs.
"""

return ''

def _get_detailed_job_info_command(self, job_id):
"""Return the command to run to get the detailed information on a job,
even after the job has finished.

The output text is just retrieved, and returned for logging purposes.
"""
raise exceptions.FeatureNotAvailable('Retrieving detailed job info is not implemented') # for instance f'tracejob -v {escape_for_bash(job_id)}'

def _get_submit_script_header(self, job_tmpl):
"""Return the submit script final part, using the parameters from the job template.

:param job_tmpl: a ``JobTemplate`` instance with relevant parameters set.
"""
return ''

def _get_submit_command(self, submit_script):
"""Return the string to execute to submit a given script.

.. warning:: the `submit_script` should already have been bash-escaped

:param submit_script: the path of the submit script relative to the working directory.
:return: the string to execute to submit a given script.
"""
submit_command = '' # for instance f'qsub {submit_script}'

_LOGGER.info(f'submitting with: {submit_command}')

return submit_command

def _parse_joblist_output(self, retval, stdout, stderr):
"""Parse the joblist output as returned by executing the command returned by `_get_joblist_command` method.

:return: list of `JobInfo` objects, one of each job each with at least its default params implemented.
"""
return []

def _parse_submit_output(self, retval, stdout, stderr):
"""Parse the output of the submit command returned by calling the `_get_submit_command` command.

:return: a string with the job ID.
"""
if retval != 0:
_LOGGER.error(f'Error in _parse_submit_output: retval={retval}; stdout={stdout}; stderr={stderr}')
raise SchedulerError(f'Error during submission, retval={retval}; stdout={stdout}; stderr={stderr}')

if stderr.strip():
_LOGGER.warning(f'in _parse_submit_output there was some text in stderr: {stderr}')

return stdout.strip()

def _get_kill_command(self, jobid):
"""Return the command to kill the job with specified jobid."""

_LOGGER.info(f'killing job {jobid}')

return '' # for instance f'qdel {jobid}'

def _parse_kill_output(self, retval, stdout, stderr):
"""Parse the output of the kill command.

:return: True if everything seems ok, False otherwise.
"""

return True

def parse_output(self, detailed_job_info, stdout, stderr):
"""Parse the output of the scheduler.

:param detailed_job_info: dictionary with the output returned by the `Scheduler.get_detailed_job_info` command.
This should contain the keys `retval`, `stdout` and `stderr` corresponding to the return value, stdout and
stderr returned by the accounting command executed for a specific job id.
:param stdout: string with the output written by the scheduler to stdout
:param stderr: string with the output written by the scheduler to stderr
:return: None or an instance of `aiida.engine.processes.exit_code.ExitCode`
:raises TypeError or ValueError: if the passed arguments have incorrect type or value
"""
raise exceptions.FeatureNotAvailable(f'output parsing is not available for `{self.__class__.__name__}`')
33 changes: 32 additions & 1 deletion docs/source/topics/schedulers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Batch Job Schedulers
Batch job schedulers manage the job queues and execution on a compute resource.
AiiDA ships with plugins for a range of schedulers, and this section describes the interface of these plugins.

See :ref:`this how-to <how-to:plugin-codes:scheduler>` for adding support for custom schedulers.
Follow :ref:`these instructions <topics:schedulers:develop_plugin>` to add support for a custom scheduler.

PBSPro
------
Expand Down Expand Up @@ -214,6 +214,37 @@ And setting the fields using the ``metadata.options`` input dictionary of the |C
}
}

.. _topics:schedulers:develop_plugin:

Developing a plugin
-------------------

A scheduler plugin allows AiiDA to communicate with a specific type of scheduler.
The plugin should subclass the :class:`~aiida.schedulers.scheduler.Scheduler` class and implement a number of methods, that will instruct how certain key commands are to be executed, such as submitting a new job or requesting the current active jobs.
To get you started, you can download :download:`this template <include/scheduler_template.py>` and implement the following methods:

1) ``_get_joblist_command``: returns the command to report a full information on existing jobs.
2) ``_get_detailed_job_info_command``: returns the command to get the detailed information on a job, even after the job has finished.
3) ``_get_submit_script_header``: return the submit script header.
4) ``_get_submit_command``: return the string to submit a given script.
5) ``_parse_joblist_output``: parse the queue output string, as returned by executing the command returned by `_get_joblist_command`.
6) ``_parse_submit_output``: parse the output of the submit command, as returned by executing the command returned by `_get_submit_command`.
7) ``_get_kill_command``: return the command to kill the job with specified jobid.
8) ``_parse_kill_output``: parse the output of the kill command.
9) ``parse_output``: parse the output of the scheduler.

All these methods *have* to be implemented, except for ``_get_detailed_job_info_command`` and ``parse_output``, which are optional.
In addition to these methods, the ``_job_resource_class`` class attribute needs to be set to a subclass :class:`~aiida.schedulers.datastructures.JobResource`.
For schedulers that work like SLURM, Torque and PBS, one can most likely simply reuse the :class:`~aiida.schedulers.datastructures.NodeNumberJobResource` class, that ships with ``aiida-core``.
Schedulers that work like LSF and SGE, may be able to reuse :class:`~aiida.schedulers.datastructures.ParEnvJobResource` instead.
If neither of these work, one can implement a custom subclass, a template for which, the class called ``TemplateJobResource``, is already included in the template file.


.. note::

To inform AiiDA about your new scheduler plugin you must register an entry point in the ``aiida.schedulers`` entry point group.
Refer to :ref:`the section on how to register plugins <how-to:plugins-develop:entrypoints>` for instructions.


.. |NodeNumberJobResource| replace:: :py:class:`~aiida.schedulers.datastructures.NodeNumberJobResource`
.. |JobResource| replace:: :py:class:`~aiida.schedulers.datastructures.JobResource`
Expand Down