Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add default_memory_per_machine attribute to Computer. #5260

Merged
4 changes: 2 additions & 2 deletions .github/workflows/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ sed -i "s|PLACEHOLDER_SSH_KEY|${HOME}/.ssh/slurm_rsa|" "${CONFIG}/slurm-ssh-conf
verdi setup --config "${CONFIG}/profile.yaml"

# set up localhost computer
verdi computer setup --config "${CONFIG}/localhost.yaml"
verdi computer setup --config "${CONFIG}/localhost.yaml" --non-interactive
verdi computer configure core.local localhost --config "${CONFIG}/localhost-config.yaml"
verdi computer test localhost
verdi code setup --config "${CONFIG}/doubler.yaml"
verdi code setup --config "${CONFIG}/add.yaml"

# set up slurm-ssh computer
verdi computer setup --config "${CONFIG}/slurm-ssh.yaml"
verdi computer setup --config "${CONFIG}/slurm-ssh.yaml" --non-interactive
verdi computer configure core.ssh slurm-ssh --config "${CONFIG}/slurm-ssh-config.yaml" -n # needs slurm container
verdi computer test slurm-ssh --print-traceback

Expand Down
5 changes: 5 additions & 0 deletions aiida/cmdline/commands/cmd_computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def set_computer_builder(ctx, param, value):
@options_computer.WORKDIR()
@options_computer.MPI_RUN_COMMAND()
@options_computer.MPI_PROCS_PER_MACHINE()
@options_computer.DEFAULT_MEMORY_PER_MACHINE()
@options_computer.PREPEND_TEXT()
@options_computer.APPEND_TEXT()
@options.NON_INTERACTIVE()
Expand Down Expand Up @@ -253,6 +254,9 @@ def computer_setup(ctx, non_interactive, **kwargs):
@options_computer.WORKDIR(contextual_default=partial(get_parameter_default, 'work_dir'))
@options_computer.MPI_RUN_COMMAND(contextual_default=partial(get_parameter_default, 'mpirun_command'))
@options_computer.MPI_PROCS_PER_MACHINE(contextual_default=partial(get_parameter_default, 'mpiprocs_per_machine'))
@options_computer.DEFAULT_MEMORY_PER_MACHINE(
contextual_default=partial(get_parameter_default, 'default_memory_per_machine')
)
@options_computer.PREPEND_TEXT(contextual_default=partial(get_parameter_default, 'prepend_text'))
@options_computer.APPEND_TEXT(contextual_default=partial(get_parameter_default, 'append_text'))
@options.NON_INTERACTIVE()
Expand Down Expand Up @@ -384,6 +388,7 @@ def computer_show(computer):
['Shebang', computer.get_shebang()],
['Mpirun command', ' '.join(computer.get_mpirun_command())],
['Default #procs/machine', computer.get_default_mpiprocs_per_machine()],
['Default memory (kB)/machine', computer.get_default_memory_per_machine()],
['Prepend text', computer.get_prepend_text()],
['Append text', computer.get_append_text()],
]
Expand Down
8 changes: 8 additions & 0 deletions aiida/cmdline/params/options/commands/computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,14 @@ def should_call_default_mpiprocs_per_machine(ctx): # pylint: disable=invalid-na
'Use 0 to specify no default value.',
)

DEFAULT_MEMORY_PER_MACHINE = OverridableOption(
'--default-memory-per-machine',
prompt='Default amount of memory per machine (kB).',
cls=InteractiveOption,
sphuber marked this conversation as resolved.
Show resolved Hide resolved
type=click.INT,
sphuber marked this conversation as resolved.
Show resolved Hide resolved
help='The default amount of RAM (kB) that should be allocated per machine (node), if not otherwise specified.'
)

PREPEND_TEXT = OverridableOption(
'--prepend-text',
cls=TemplateInteractiveOption,
Expand Down
9 changes: 3 additions & 6 deletions aiida/engine/processes/calcjobs/calcjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,15 +744,12 @@ def presubmit(self, folder: Folder) -> CalcInfo:
priority = self.node.get_option('priority')
if priority is not None:
job_tmpl.priority = priority
max_memory_kb = self.node.get_option('max_memory_kb')
if max_memory_kb is not None:
job_tmpl.max_memory_kb = max_memory_kb

job_tmpl.max_memory_kb = self.node.get_option('max_memory_kb') or computer.get_default_memory_per_machine()

max_wallclock_seconds = self.node.get_option('max_wallclock_seconds')
if max_wallclock_seconds is not None:
job_tmpl.max_wallclock_seconds = max_wallclock_seconds
max_memory_kb = self.node.get_option('max_memory_kb')
if max_memory_kb is not None:
job_tmpl.max_memory_kb = max_memory_kb

submit_script_filename = self.node.get_option('submit_script_filename')
script_content = scheduler.get_submit_script(job_tmpl)
Expand Down
1 change: 1 addition & 0 deletions aiida/manage/tests/pytest_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def test_1(aiida_localhost):
computer.store()
computer.set_minimum_job_poll_interval(0.)
computer.set_default_mpiprocs_per_machine(1)
computer.set_default_memory_per_machine(100000)
computer.configure()

return computer
Expand Down
32 changes: 29 additions & 3 deletions aiida/orm/computers.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ def validate(self) -> None:
self._transport_type_validator(self.transport_type)
self._scheduler_type_validator(self.scheduler_type)
self._workdir_validator(self.get_workdir())
self.default_memory_per_machine_validator(self.get_default_memory_per_machine())

try:
mpirun_cmd = self.get_mpirun_command()
Expand All @@ -264,6 +265,17 @@ def _default_mpiprocs_per_machine_validator(cls, def_cpus_per_machine: Optional[
'do not want to provide a default value.'
)

@classmethod
def default_memory_per_machine_validator(cls, def_memory_per_machine: Optional[int]) -> None:
"""Validates the default amount of memory (kB) per machine (node)"""
if def_memory_per_machine is None:
return

if not isinstance(def_memory_per_machine, int) or def_memory_per_machine <= 0:
raise exceptions.ValidationError(
f'Invalid value for def_memory_per_machine, must be a positive int, got: {def_memory_per_machine}'
)

def copy(self) -> 'Computer':
"""
Return a copy of the current object to work with, not stored yet.
Expand Down Expand Up @@ -463,11 +475,25 @@ def set_default_mpiprocs_per_machine(self, def_cpus_per_machine: Optional[int])
"""
if def_cpus_per_machine is None:
self.delete_property('default_mpiprocs_per_machine', raise_exception=False)
else:
if not isinstance(def_cpus_per_machine, int):
raise TypeError('def_cpus_per_machine must be an integer (or None)')
elif not isinstance(def_cpus_per_machine, int):
raise TypeError('def_cpus_per_machine must be an integer (or None)')
self.set_property('default_mpiprocs_per_machine', def_cpus_per_machine)

def get_default_memory_per_machine(self) -> Optional[int]:
"""
Return the default amount of memory (kB) per machine (node) for this computer,
or None if it was not set.
"""
return self.get_property('default_memory_per_machine', None)

def set_default_memory_per_machine(self, def_memory_per_machine: Optional[int]) -> None:
"""
Set the default amount of memory (kB) per machine (node) for this computer.
Accepts None if you do not want to set this value.
"""
self.default_memory_per_machine_validator(def_memory_per_machine)
self.set_property('default_memory_per_machine', def_memory_per_machine)

def get_minimum_job_poll_interval(self) -> float:
"""
Get the minimum interval between subsequent requests to update the list
Expand Down
16 changes: 15 additions & 1 deletion aiida/orm/utils/builders/computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Manage computer objects with lazy loading of the db env"""

from aiida.cmdline.utils.decorators import with_dbenv
from aiida.common.exceptions import ValidationError
from aiida.common.utils import ErrorAccumulator


Expand Down Expand Up @@ -45,6 +45,7 @@ def get_computer_spec(computer):
spec['shebang'] = computer.get_shebang()
spec['mpirun_command'] = ' '.join(computer.get_mpirun_command())
spec['mpiprocs_per_machine'] = computer.get_default_mpiprocs_per_machine()
spec['default_memory_per_machine'] = computer.get_default_memory_per_machine()

return spec

Expand Down Expand Up @@ -99,6 +100,19 @@ def new(self):
)
computer.set_default_mpiprocs_per_machine(mpiprocs_per_machine)

def_memory_per_machine = self._get_and_count('default_memory_per_machine', used)
if def_memory_per_machine is not None:
try:
def_memory_per_machine = int(def_memory_per_machine)
except ValueError:
raise self.ComputerValidationError(
'Invalid value provided for memory_per_machine, must be a valid integer'
)
try:
computer.set_default_memory_per_machine(def_memory_per_machine)
except ValidationError as exception:
raise self.ComputerValidationError(f'Invalid value for `default_memory_per_machine`: {exception}')

mpirun_command_internal = self._get_and_count('mpirun_command', used).strip().split(' ')
if mpirun_command_internal == ['']:
mpirun_command_internal = []
Expand Down
70 changes: 44 additions & 26 deletions tests/cmdline/commands/test_computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
# pylint: disable=unused-argument
# pylint: disable=unused-argument,invalid-name
"""Tests for the 'verdi computer' command."""
from collections import OrderedDict
import os
Expand Down Expand Up @@ -56,6 +56,7 @@ def generate_setup_options_dict(replace_args=None, non_interactive=True):
valid_noninteractive_options['work-dir'] = '/scratch/{username}/aiida_run'
valid_noninteractive_options['mpirun-command'] = 'mpirun -np {tot_num_mpiprocs}'
valid_noninteractive_options['mpiprocs-per-machine'] = '2'
valid_noninteractive_options['default-memory-per-machine'] = '1000000'
# Make them multiline to test also multiline options
valid_noninteractive_options['prepend-text'] = "date\necho 'second line'"
valid_noninteractive_options['append-text'] = "env\necho '444'\necho 'third line'"
Expand Down Expand Up @@ -88,7 +89,7 @@ def generate_setup_options(ordereddict):
return options


def generate_setup_options_interactive(ordereddict): # pylint: disable=invalid-name
def generate_setup_options_interactive(ordereddict):
"""
Given an (ordered) dict, returns a list of options

Expand Down Expand Up @@ -162,6 +163,8 @@ def test_mixed(run_cli_command):
assert new_computer.get_shebang() == options_dict_full['shebang']
assert new_computer.get_workdir() == options_dict_full['work-dir']
assert new_computer.get_default_mpiprocs_per_machine() == int(options_dict_full['mpiprocs-per-machine'])
assert new_computer.get_default_memory_per_machine() == int(options_dict_full['default-memory-per-machine'])

# For now I'm not writing anything in them
assert new_computer.get_prepend_text() == options_dict_full['prepend-text']
assert new_computer.get_append_text() == options_dict_full['append-text']
Expand All @@ -178,7 +181,6 @@ def test_noninteractive(run_cli_command, aiida_localhost, non_interactive_editor

result = run_cli_command(computer_setup, options)

assert result.exception is None, result.output[-1000:]
new_computer = orm.Computer.objects.get(label=options_dict['label'])
assert isinstance(new_computer, orm.Computer)

Expand All @@ -190,63 +192,82 @@ def test_noninteractive(run_cli_command, aiida_localhost, non_interactive_editor
assert new_computer.get_shebang() == options_dict['shebang']
assert new_computer.get_workdir() == options_dict['work-dir']
assert new_computer.get_default_mpiprocs_per_machine() == int(options_dict['mpiprocs-per-machine'])
assert new_computer.get_default_memory_per_machine() == int(options_dict['default-memory-per-machine'])
assert new_computer.get_prepend_text() == options_dict['prepend-text']
assert new_computer.get_append_text() == options_dict['append-text']

# Test that I cannot generate twice a computer with the same label
result = run_cli_command(computer_setup, options, catch_exceptions=False)
assert isinstance(result.exception, SystemExit)
result = run_cli_command(computer_setup, options, raises=True)
assert 'already exists' in result.output


@pytest.mark.usefixtures('clear_database_before_test')
def test_noninteractive_optional_default_mpiprocs(run_cli_command): # pylint: disable=invalid-name
def test_noninteractive_optional_default_mpiprocs(run_cli_command):
"""
Check that if is ok not to specify mpiprocs-per-machine
"""
options_dict = generate_setup_options_dict({'label': 'computer_default_mpiprocs'})
options_dict.pop('mpiprocs-per-machine')
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)

assert result.exception is None, result.output[-1000:]
run_cli_command(computer_setup, options, catch_exceptions=False)

new_computer = orm.Computer.objects.get(label=options_dict['label'])
assert isinstance(new_computer, orm.Computer)
assert new_computer.get_default_mpiprocs_per_machine() is None


@pytest.mark.usefixtures('clear_database_before_test')
def test_noninteractive_optional_default_mpiprocs_2(run_cli_command): # pylint: disable=invalid-name
def test_noninteractive_optional_default_mpiprocs_2(run_cli_command):
"""
Check that if is the specified value is zero, it means unspecified
"""
options_dict = generate_setup_options_dict({'label': 'computer_default_mpiprocs_2'})
options_dict['mpiprocs-per-machine'] = 0
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)

assert result.exception is None, result.output[-1000:]
run_cli_command(computer_setup, options, catch_exceptions=False)

new_computer = orm.Computer.objects.get(label=options_dict['label'])
assert isinstance(new_computer, orm.Computer)
assert new_computer.get_default_mpiprocs_per_machine() is None


@pytest.mark.usefixtures('clear_database_before_test')
def test_noninteractive_optional_default_mpiprocs_3(run_cli_command): # pylint: disable=invalid-name
def test_noninteractive_optional_default_mpiprocs_3(run_cli_command):
"""
Check that it fails for a negative number of mpiprocs
"""
options_dict = generate_setup_options_dict({'label': 'computer_default_mpiprocs_3'})
options_dict['mpiprocs-per-machine'] = -1
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)

assert isinstance(result.exception, SystemExit)
result = run_cli_command(computer_setup, options, raises=True)
assert 'mpiprocs_per_machine, must be positive' in result.output


def test_noninteractive_optional_default_memory(run_cli_command):
"""
Check that if is ok not to specify default-memory-per-machine
"""
options_dict = generate_setup_options_dict({'label': 'computer_default_mem'})
options_dict.pop('default-memory-per-machine')
options = generate_setup_options(options_dict)
run_cli_command(computer_setup, options)

new_computer = orm.Computer.objects.get(label=options_dict['label'])
assert isinstance(new_computer, orm.Computer)
assert new_computer.get_default_memory_per_machine() is None


def test_noninteractive_optional_default_memory_invalid(run_cli_command):
"""
Check that it fails for a negative number of default_memory.
"""
options_dict = generate_setup_options_dict({'label': 'computer_default_memory_3'})
options_dict['default-memory-per-machine'] = -1
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, raises=True)
assert 'Invalid value for def_memory_per_machine, must be a positive int, got: -1' in result.output


@pytest.mark.usefixtures('clear_database_before_test')
def test_noninteractive_wrong_transport_fail(run_cli_command):
"""
Expand All @@ -255,8 +276,7 @@ def test_noninteractive_wrong_transport_fail(run_cli_command):
options_dict = generate_setup_options_dict(replace_args={'label': 'fail_computer'})
options_dict['transport'] = 'unknown_transport'
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)
assert isinstance(result.exception, SystemExit)
result = run_cli_command(computer_setup, options, raises=True)
assert "entry point 'unknown_transport' is not valid" in result.output


Expand All @@ -268,9 +288,7 @@ def test_noninteractive_wrong_scheduler_fail(run_cli_command):
options_dict = generate_setup_options_dict(replace_args={'label': 'fail_computer'})
options_dict['scheduler'] = 'unknown_scheduler'
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)

assert isinstance(result.exception, SystemExit)
result = run_cli_command(computer_setup, options, raises=True)
assert "entry point 'unknown_scheduler' is not valid" in result.output


Expand All @@ -282,9 +300,7 @@ def test_noninteractive_invalid_shebang_fail(run_cli_command):
options_dict = generate_setup_options_dict(replace_args={'label': 'fail_computer'})
options_dict['shebang'] = '/bin/bash' # Missing #! in front
options = generate_setup_options(options_dict)
result = run_cli_command(computer_setup, options, catch_exceptions=False)

assert isinstance(result.exception, SystemExit)
result = run_cli_command(computer_setup, options, raises=True)
assert 'The shebang line should start with' in result.output


Expand Down Expand Up @@ -338,6 +354,7 @@ def setUp(self):
self.comp_builder.prepend_text = ''
self.comp_builder.append_text = ''
self.comp_builder.mpiprocs_per_machine = 8
self.comp_builder.default_memory_per_machine = 100000
self.comp_builder.mpirun_command = 'mpirun'
self.comp_builder.shebang = '#!xonsh'

Expand Down Expand Up @@ -556,6 +573,7 @@ def setUpClass(cls, *args, **kwargs):
workdir='/tmp/aiida'
)
cls.comp.set_default_mpiprocs_per_machine(1)
cls.comp.set_default_memory_per_machine(1000000)
cls.comp.set_prepend_text('text to prepend')
cls.comp.set_append_text('text to append')
cls.comp.store()
Expand Down Expand Up @@ -707,7 +725,7 @@ def test_computer_duplicate_interactive(run_cli_command, aiida_localhost, non_in
"""Test 'verdi computer duplicate' in interactive mode."""
label = 'computer_duplicate_interactive'
computer = aiida_localhost
user_input = f'{label}\n\n\n\n\n\n\n\n\n'
user_input = f'{label}\n\n\n\n\n\n\n\n\n\n'
result = run_cli_command(computer_duplicate, [str(computer.pk)], user_input=user_input, catch_exceptions=False)
assert result.exception is None, result.output

Expand Down
1 change: 1 addition & 0 deletions tests/orm/test_computers.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def setUp(self):
self.comp_builder.prepend_text = ''
self.comp_builder.append_text = ''
self.comp_builder.mpiprocs_per_machine = 8
self.comp_builder.default_memory_per_machine = 1000000
self.comp_builder.mpirun_command = 'mpirun'
self.comp_builder.shebang = '#!xonsh'
self.user = orm.User.objects.get_default()
Expand Down