Skip to content

Commit

Permalink
Computer: add the default_memory_per_machine property (#5260)
Browse files Browse the repository at this point in the history
This property allows a user to specify for a `Computer` what default
amount of memory in kB should be requested for a `CalcJob` by the
scheduler. This was already possible through the `max_memory_kb` metadata
option of the `CalcJob`, but in this way, it is possible to specify the
default value that should be used for a computer. If both the computer
specifies a default, but the `max_memory_kb` option is specified, the
latter takes precedence.

Since the `max_memory_kb` already existed and specifies the memory in
kB, the same unit was chosen for the computer property for consistency.

The `verdi computer setup` command adds a new option to define this new
property when creating a new computer.

Co-authored-by: Sebastiaan Huber <mail@sphuber.net>
  • Loading branch information
yakutovicha and sphuber authored Dec 15, 2021
1 parent 2ee6dd9 commit ff1318b
Show file tree
Hide file tree
Showing 9 changed files with 108 additions and 38 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ sed -i "s|PLACEHOLDER_SSH_KEY|${HOME}/.ssh/slurm_rsa|" "${CONFIG}/slurm-ssh-conf
verdi setup --config "${CONFIG}/profile.yaml"

# set up localhost computer
verdi computer setup --config "${CONFIG}/localhost.yaml"
verdi computer setup --config "${CONFIG}/localhost.yaml" --non-interactive
verdi computer configure core.local localhost --config "${CONFIG}/localhost-config.yaml"
verdi computer test localhost
verdi code setup --config "${CONFIG}/doubler.yaml"
verdi code setup --config "${CONFIG}/add.yaml"

# set up slurm-ssh computer
verdi computer setup --config "${CONFIG}/slurm-ssh.yaml"
verdi computer setup --config "${CONFIG}/slurm-ssh.yaml" --non-interactive
verdi computer configure core.ssh slurm-ssh --config "${CONFIG}/slurm-ssh-config.yaml" -n # needs slurm container
verdi computer test slurm-ssh --print-traceback

Expand Down
5 changes: 5 additions & 0 deletions aiida/cmdline/commands/cmd_computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def set_computer_builder(ctx, param, value):
@options_computer.WORKDIR()
@options_computer.MPI_RUN_COMMAND()
@options_computer.MPI_PROCS_PER_MACHINE()
@options_computer.DEFAULT_MEMORY_PER_MACHINE()
@options_computer.PREPEND_TEXT()
@options_computer.APPEND_TEXT()
@options.NON_INTERACTIVE()
Expand Down Expand Up @@ -253,6 +254,9 @@ def computer_setup(ctx, non_interactive, **kwargs):
@options_computer.WORKDIR(contextual_default=partial(get_parameter_default, 'work_dir'))
@options_computer.MPI_RUN_COMMAND(contextual_default=partial(get_parameter_default, 'mpirun_command'))
@options_computer.MPI_PROCS_PER_MACHINE(contextual_default=partial(get_parameter_default, 'mpiprocs_per_machine'))
@options_computer.DEFAULT_MEMORY_PER_MACHINE(
contextual_default=partial(get_parameter_default, 'default_memory_per_machine')
)
@options_computer.PREPEND_TEXT(contextual_default=partial(get_parameter_default, 'prepend_text'))
@options_computer.APPEND_TEXT(contextual_default=partial(get_parameter_default, 'append_text'))
@options.NON_INTERACTIVE()
Expand Down Expand Up @@ -384,6 +388,7 @@ def computer_show(computer):
['Shebang', computer.get_shebang()],
['Mpirun command', ' '.join(computer.get_mpirun_command())],
['Default #procs/machine', computer.get_default_mpiprocs_per_machine()],
['Default memory (kB)/machine', computer.get_default_memory_per_machine()],
['Prepend text', computer.get_prepend_text()],
['Append text', computer.get_append_text()],
]
Expand Down
8 changes: 8 additions & 0 deletions aiida/cmdline/params/options/commands/computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,14 @@ def should_call_default_mpiprocs_per_machine(ctx): # pylint: disable=invalid-na
'Use 0 to specify no default value.',
)

DEFAULT_MEMORY_PER_MACHINE = OverridableOption(
'--default-memory-per-machine',
prompt='Default amount of memory per machine (kB).',
cls=InteractiveOption,
type=click.INT,
help='The default amount of RAM (kB) that should be allocated per machine (node), if not otherwise specified.'
)

PREPEND_TEXT = OverridableOption(
'--prepend-text',
cls=TemplateInteractiveOption,
Expand Down
9 changes: 3 additions & 6 deletions aiida/engine/processes/calcjobs/calcjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,15 +744,12 @@ def presubmit(self, folder: Folder) -> CalcInfo:
priority = self.node.get_option('priority')
if priority is not None:
job_tmpl.priority = priority
max_memory_kb = self.node.get_option('max_memory_kb')
if max_memory_kb is not None:
job_tmpl.max_memory_kb = max_memory_kb

job_tmpl.max_memory_kb = self.node.get_option('max_memory_kb') or computer.get_default_memory_per_machine()

max_wallclock_seconds = self.node.get_option('max_wallclock_seconds')
if max_wallclock_seconds is not None:
job_tmpl.max_wallclock_seconds = max_wallclock_seconds
max_memory_kb = self.node.get_option('max_memory_kb')
if max_memory_kb is not None:
job_tmpl.max_memory_kb = max_memory_kb

submit_script_filename = self.node.get_option('submit_script_filename')
script_content = scheduler.get_submit_script(job_tmpl)
Expand Down
1 change: 1 addition & 0 deletions aiida/manage/tests/pytest_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def test_1(aiida_localhost):
computer.store()
computer.set_minimum_job_poll_interval(0.)
computer.set_default_mpiprocs_per_machine(1)
computer.set_default_memory_per_machine(100000)
computer.configure()

return computer
Expand Down
32 changes: 29 additions & 3 deletions aiida/orm/computers.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ def validate(self) -> None:
self._transport_type_validator(self.transport_type)
self._scheduler_type_validator(self.scheduler_type)
self._workdir_validator(self.get_workdir())
self.default_memory_per_machine_validator(self.get_default_memory_per_machine())

try:
mpirun_cmd = self.get_mpirun_command()
Expand All @@ -264,6 +265,17 @@ def _default_mpiprocs_per_machine_validator(cls, def_cpus_per_machine: Optional[
'do not want to provide a default value.'
)

@classmethod
def default_memory_per_machine_validator(cls, def_memory_per_machine: Optional[int]) -> None:
"""Validates the default amount of memory (kB) per machine (node)"""
if def_memory_per_machine is None:
return

if not isinstance(def_memory_per_machine, int) or def_memory_per_machine <= 0:
raise exceptions.ValidationError(
f'Invalid value for def_memory_per_machine, must be a positive int, got: {def_memory_per_machine}'
)

def copy(self) -> 'Computer':
"""
Return a copy of the current object to work with, not stored yet.
Expand Down Expand Up @@ -463,11 +475,25 @@ def set_default_mpiprocs_per_machine(self, def_cpus_per_machine: Optional[int])
"""
if def_cpus_per_machine is None:
self.delete_property('default_mpiprocs_per_machine', raise_exception=False)
else:
if not isinstance(def_cpus_per_machine, int):
raise TypeError('def_cpus_per_machine must be an integer (or None)')
elif not isinstance(def_cpus_per_machine, int):
raise TypeError('def_cpus_per_machine must be an integer (or None)')
self.set_property('default_mpiprocs_per_machine', def_cpus_per_machine)

def get_default_memory_per_machine(self) -> Optional[int]:
"""
Return the default amount of memory (kB) per machine (node) for this computer,
or None if it was not set.
"""
return self.get_property('default_memory_per_machine', None)

def set_default_memory_per_machine(self, def_memory_per_machine: Optional[int]) -> None:
"""
Set the default amount of memory (kB) per machine (node) for this computer.
Accepts None if you do not want to set this value.
"""
self.default_memory_per_machine_validator(def_memory_per_machine)
self.set_property('default_memory_per_machine', def_memory_per_machine)

def get_minimum_job_poll_interval(self) -> float:
"""
Get the minimum interval between subsequent requests to update the list
Expand Down
16 changes: 15 additions & 1 deletion aiida/orm/utils/builders/computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Manage computer objects with lazy loading of the db env"""

from aiida.cmdline.utils.decorators import with_dbenv
from aiida.common.exceptions import ValidationError
from aiida.common.utils import ErrorAccumulator


Expand Down Expand Up @@ -45,6 +45,7 @@ def get_computer_spec(computer):
spec['shebang'] = computer.get_shebang()
spec['mpirun_command'] = ' '.join(computer.get_mpirun_command())
spec['mpiprocs_per_machine'] = computer.get_default_mpiprocs_per_machine()
spec['default_memory_per_machine'] = computer.get_default_memory_per_machine()

return spec

Expand Down Expand Up @@ -99,6 +100,19 @@ def new(self):
)
computer.set_default_mpiprocs_per_machine(mpiprocs_per_machine)

def_memory_per_machine = self._get_and_count('default_memory_per_machine', used)
if def_memory_per_machine is not None:
try:
def_memory_per_machine = int(def_memory_per_machine)
except ValueError:
raise self.ComputerValidationError(
'Invalid value provided for memory_per_machine, must be a valid integer'
)
try:
computer.set_default_memory_per_machine(def_memory_per_machine)
except ValidationError as exception:
raise self.ComputerValidationError(f'Invalid value for `default_memory_per_machine`: {exception}')

mpirun_command_internal = self._get_and_count('mpirun_command', used).strip().split(' ')
if mpirun_command_internal == ['']:
mpirun_command_internal = []
Expand Down
70 changes: 44 additions & 26 deletions tests/cmdline/commands/test_computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
# pylint: disable=unused-argument
# pylint: disable=unused-argument,invalid-name
"""Tests for the 'verdi computer' command."""
from collections import OrderedDict
import os
Expand Down Expand Up @@ -56,6 +56,7 @@ def generate_setup_options_dict(replace_args=None, non_interactive=True):
valid_noninteractive_options['work-dir'] = '/scratch/{username}/aiida_run'
valid_noninteractive_options['mpirun-command'] = 'mpirun -np {tot_num_mpiprocs}'
valid_noninteractive_options['mpiprocs-per-machine'] = '2'
valid_noninteractive_options['default-memory-per-machine'] = '1000000'
# Make them multiline to test also multiline options
valid_noninteractive_options['prepend-text'] = "date\necho 'second line'"
valid_noninteractive_options['append-text'] = "env\necho '444'\necho 'third line'"
Expand Down Expand Up @@ -88,7 +89,7 @@ def generate_setup_options(ordereddict):
return options


def generate_setup_options_interactive(ordereddict): # pylint: disable=invalid-name
def generate_setup_options_interactive(ordereddict):
"""
Given an (ordered) dict, returns a list of options
Expand Down Expand Up @@ -162,6 +163,8 @@ def test_mixed(run_cli_command):
assert new_computer.get_shebang() == options_dict_full['shebang']
assert new_computer.get_workdir() == options_dict_full['work-dir']
assert new_computer.get_default_mpiprocs_per_machine() == int(options_dict_full['mpiprocs-per-machine'])
assert new_computer.get_default_memory_per_machine() == int(options_dict_full['default-memory-per-machine'])

# For now I'm not writing anything in them
assert new_computer.get_prepend_text() == options_dict_full['prepend-text']
assert new_computer.get_append_text() == options_dict_full['append-text']
Expand All @@ -178,7 +181,6 @@ def test_noninteractive(run_cli_command, aiida_localhost, non_interactive_editor

result = run_cli_command(computer_setup, options)

assert result.exception is None, result.output[-1000:]
new_computer = orm.Computer.objects.get(label=options_dict['label'])
assert isinstance(new_computer, orm.Computer)

Expand All @@ -190,63 +192,82 @@ def test_noninteractive(run_cli_command, aiida_localhost, non_interactive_editor
assert new_computer.get_shebang() == options_dict['shebang']
assert new_computer.get_workdir() == options_dict['work-dir']
assert new_computer.get_default_mpiprocs_per_machine() == int(options_dict['mpiprocs-per-machine'])
assert new_computer.get_default_memory_per_machine() == int(options_dict['default-memory-per-machine'])
assert new_computer.get_prepend_text() == options_dict['prepend-text']
assert new_computer.get_append_text() == options_dict['append-text']

# Test that I cannot generate twice a computer with the same label
result = run_cli_command(computer_setup, options, catch_exceptions=False)
assert isinstance(result.exception, SystemExit)
result = run_cli_command(computer_setup, options, raises=True)
assert 'already exists' in result.output


@pytest.mark.usefixtures('clear_database_before_test')
def test_noninteractive_optional_default_mpiprocs(run_cli_command): # pylint: disable=invalid-name
def test_noninteractive_optional_default_mpiprocs(run_cli_command):
"""
Check that if is ok not to specify mpiprocs-per-machine
"""
options_dict = generate_setup_options_dict({'label': 'computer_default_mpiprocs'})
options_dict.pop('mpiprocs-per-machine')
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)

assert result.exception is None, result.output[-1000:]
run_cli_command(computer_setup, options, catch_exceptions=False)

new_computer = orm.Computer.objects.get(label=options_dict['label'])
assert isinstance(new_computer, orm.Computer)
assert new_computer.get_default_mpiprocs_per_machine() is None


@pytest.mark.usefixtures('clear_database_before_test')
def test_noninteractive_optional_default_mpiprocs_2(run_cli_command): # pylint: disable=invalid-name
def test_noninteractive_optional_default_mpiprocs_2(run_cli_command):
"""
Check that if is the specified value is zero, it means unspecified
"""
options_dict = generate_setup_options_dict({'label': 'computer_default_mpiprocs_2'})
options_dict['mpiprocs-per-machine'] = 0
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)

assert result.exception is None, result.output[-1000:]
run_cli_command(computer_setup, options, catch_exceptions=False)

new_computer = orm.Computer.objects.get(label=options_dict['label'])
assert isinstance(new_computer, orm.Computer)
assert new_computer.get_default_mpiprocs_per_machine() is None


@pytest.mark.usefixtures('clear_database_before_test')
def test_noninteractive_optional_default_mpiprocs_3(run_cli_command): # pylint: disable=invalid-name
def test_noninteractive_optional_default_mpiprocs_3(run_cli_command):
"""
Check that it fails for a negative number of mpiprocs
"""
options_dict = generate_setup_options_dict({'label': 'computer_default_mpiprocs_3'})
options_dict['mpiprocs-per-machine'] = -1
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)

assert isinstance(result.exception, SystemExit)
result = run_cli_command(computer_setup, options, raises=True)
assert 'mpiprocs_per_machine, must be positive' in result.output


def test_noninteractive_optional_default_memory(run_cli_command):
"""
Check that if is ok not to specify default-memory-per-machine
"""
options_dict = generate_setup_options_dict({'label': 'computer_default_mem'})
options_dict.pop('default-memory-per-machine')
options = generate_setup_options(options_dict)
run_cli_command(computer_setup, options)

new_computer = orm.Computer.objects.get(label=options_dict['label'])
assert isinstance(new_computer, orm.Computer)
assert new_computer.get_default_memory_per_machine() is None


def test_noninteractive_optional_default_memory_invalid(run_cli_command):
"""
Check that it fails for a negative number of default_memory.
"""
options_dict = generate_setup_options_dict({'label': 'computer_default_memory_3'})
options_dict['default-memory-per-machine'] = -1
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, raises=True)
assert 'Invalid value for def_memory_per_machine, must be a positive int, got: -1' in result.output


@pytest.mark.usefixtures('clear_database_before_test')
def test_noninteractive_wrong_transport_fail(run_cli_command):
"""
Expand All @@ -255,8 +276,7 @@ def test_noninteractive_wrong_transport_fail(run_cli_command):
options_dict = generate_setup_options_dict(replace_args={'label': 'fail_computer'})
options_dict['transport'] = 'unknown_transport'
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)
assert isinstance(result.exception, SystemExit)
result = run_cli_command(computer_setup, options, raises=True)
assert "entry point 'unknown_transport' is not valid" in result.output


Expand All @@ -268,9 +288,7 @@ def test_noninteractive_wrong_scheduler_fail(run_cli_command):
options_dict = generate_setup_options_dict(replace_args={'label': 'fail_computer'})
options_dict['scheduler'] = 'unknown_scheduler'
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)

assert isinstance(result.exception, SystemExit)
result = run_cli_command(computer_setup, options, raises=True)
assert "entry point 'unknown_scheduler' is not valid" in result.output


Expand All @@ -282,9 +300,7 @@ def test_noninteractive_invalid_shebang_fail(run_cli_command):
options_dict = generate_setup_options_dict(replace_args={'label': 'fail_computer'})
options_dict['shebang'] = '/bin/bash' # Missing #! in front
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)

assert isinstance(result.exception, SystemExit)
result = run_cli_command(computer_setup, options, raises=True)
assert 'The shebang line should start with' in result.output


Expand Down Expand Up @@ -338,6 +354,7 @@ def setUp(self):
self.comp_builder.prepend_text = ''
self.comp_builder.append_text = ''
self.comp_builder.mpiprocs_per_machine = 8
self.comp_builder.default_memory_per_machine = 100000
self.comp_builder.mpirun_command = 'mpirun'
self.comp_builder.shebang = '#!xonsh'

Expand Down Expand Up @@ -556,6 +573,7 @@ def setUpClass(cls, *args, **kwargs):
workdir='/tmp/aiida'
)
cls.comp.set_default_mpiprocs_per_machine(1)
cls.comp.set_default_memory_per_machine(1000000)
cls.comp.set_prepend_text('text to prepend')
cls.comp.set_append_text('text to append')
cls.comp.store()
Expand Down Expand Up @@ -707,7 +725,7 @@ def test_computer_duplicate_interactive(run_cli_command, aiida_localhost, non_in
"""Test 'verdi computer duplicate' in interactive mode."""
label = 'computer_duplicate_interactive'
computer = aiida_localhost
user_input = f'{label}\n\n\n\n\n\n\n\n\n'
user_input = f'{label}\n\n\n\n\n\n\n\n\n\n'
result = run_cli_command(computer_duplicate, [str(computer.pk)], user_input=user_input, catch_exceptions=False)
assert result.exception is None, result.output

Expand Down
1 change: 1 addition & 0 deletions tests/orm/test_computers.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def setUp(self):
self.comp_builder.prepend_text = ''
self.comp_builder.append_text = ''
self.comp_builder.mpiprocs_per_machine = 8
self.comp_builder.default_memory_per_machine = 1000000
self.comp_builder.mpirun_command = 'mpirun'
self.comp_builder.shebang = '#!xonsh'
self.user = orm.User.objects.get_default()
Expand Down

0 comments on commit ff1318b

Please sign in to comment.