diff --git a/aiida/backends/djsite/db/migrations/__init__.py b/aiida/backends/djsite/db/migrations/__init__.py index 7c77f529a0..9b91f83294 100644 --- a/aiida/backends/djsite/db/migrations/__init__.py +++ b/aiida/backends/djsite/db/migrations/__init__.py @@ -14,6 +14,7 @@ from aiida.backends.manager import SCHEMA_VERSION_KEY, SCHEMA_VERSION_DESCRIPTION from aiida.backends.manager import SCHEMA_GENERATION_KEY, SCHEMA_GENERATION_DESCRIPTION from aiida.common.exceptions import AiidaException, DbContentError +from aiida.manage.configuration import get_config_option class DeserializationException(AiidaException): @@ -649,7 +650,7 @@ def set_value( # so in general it is good to recursively clean # all sub-items. self.del_value(key, subspecifier_value=subspecifier_value) - cls.objects.bulk_create(to_store) + cls.objects.bulk_create(to_store, batch_size=get_config_option('db.batch_size')) if with_transaction: transaction.savepoint_commit(sid) diff --git a/aiida/manage/configuration/options.py b/aiida/manage/configuration/options.py index 9033cb580a..f34ad758b7 100644 --- a/aiida/manage/configuration/options.py +++ b/aiida/manage/configuration/options.py @@ -21,70 +21,6 @@ 'Option', ['name', 'key', 'valid_type', 'valid_values', 'default', 'description', 'global_only'] ) - -def get_option(option_name): - """Return a configuration option.configuration - - :param option_name: the name of the configuration option - :return: the configuration option - :raises ValueError: if the configuration option does not exist - """ - try: - option = Option(option_name, **CONFIG_OPTIONS[option_name]) - except KeyError: - raise ValueError('the option {} does not exist'.format(option_name)) - else: - return option - - -def get_option_names(): - """Return a list of available option names. - - :return: list of available option names - """ - return CONFIG_OPTIONS.keys() - - -def parse_option(option_name, option_value): - """Parse and validate a value for a configuration option. - - :param option_name: the name of the configuration option - :param option_value: the option value - :return: a tuple of the option and the parsed value - """ - option = get_option(option_name) - - value = False - - if option.valid_type == 'bool': - if isinstance(option_value, str): - if option_value.strip().lower() in ['0', 'false', 'f']: - value = False - elif option_value.strip().lower() in ['1', 'true', 't']: - value = True - else: - raise ValueError('option {} expects a boolean value'.format(option.name)) - else: - value = bool(option_value) - elif option.valid_type == 'string': - value = str(option_value) - elif option.valid_type == 'int': - value = int(option_value) - elif option.valid_type == 'list_of_str': - value = option_value.split() - else: - raise NotImplementedError('Type string {} not implemented yet'.format(option.valid_type)) - - if option.valid_values is not None: - if value not in option.valid_values: - raise ValueError( - '{} is not among the list of accepted values for option {}.\nThe valid values are: ' - '{}'.format(value, option.name, ', '.join(option.valid_values)) - ) - - return option, value - - CONFIG_OPTIONS = { 'runner.poll.interval': { 'key': 'runner_poll_interval', @@ -102,6 +38,16 @@ def parse_option(option_name, option_value): 'description': 'The timeout in seconds for calls to the circus client', 'global_only': False, }, + 'db.batch_size': { + 'key': 'db_batch_size', + 'valid_type': 'int', + 'valid_values': None, + 'default': 100000, + 'description': + 'Batch size for bulk CREATE operations in the database. Avoids hitting MaxAllocSize of PostgreSQL' + '(1GB) when creating large numbers of database records in one go.', + 'global_only': False, + }, 'verdi.shell.auto_import': { 'key': 'verdi_shell_auto_import', 'valid_type': 'string', @@ -223,3 +169,66 @@ def parse_option(option_name, option_value): 'global_only': False, }, } + + +def get_option(option_name): + """Return a configuration option.configuration + + :param option_name: the name of the configuration option + :return: the configuration option + :raises ValueError: if the configuration option does not exist + """ + try: + option = Option(option_name, **CONFIG_OPTIONS[option_name]) + except KeyError: + raise ValueError('the option {} does not exist'.format(option_name)) + else: + return option + + +def get_option_names(): + """Return a list of available option names. + + :return: list of available option names + """ + return CONFIG_OPTIONS.keys() + + +def parse_option(option_name, option_value): + """Parse and validate a value for a configuration option. + + :param option_name: the name of the configuration option + :param option_value: the option value + :return: a tuple of the option and the parsed value + """ + option = get_option(option_name) + + value = False + + if option.valid_type == 'bool': + if isinstance(option_value, str): + if option_value.strip().lower() in ['0', 'false', 'f']: + value = False + elif option_value.strip().lower() in ['1', 'true', 't']: + value = True + else: + raise ValueError('option {} expects a boolean value'.format(option.name)) + else: + value = bool(option_value) + elif option.valid_type == 'string': + value = str(option_value) + elif option.valid_type == 'int': + value = int(option_value) + elif option.valid_type == 'list_of_str': + value = option_value.split() + else: + raise NotImplementedError('Type string {} not implemented yet'.format(option.valid_type)) + + if option.valid_values is not None: + if value not in option.valid_values: + raise ValueError( + '{} is not among the list of accepted values for option {}.\nThe valid values are: ' + '{}'.format(value, option.name, ', '.join(option.valid_values)) + ) + + return option, value diff --git a/aiida/tools/importexport/dbimport/backends/django/__init__.py b/aiida/tools/importexport/dbimport/backends/django/__init__.py index cad3b6eaab..d97ad70d1d 100644 --- a/aiida/tools/importexport/dbimport/backends/django/__init__.py +++ b/aiida/tools/importexport/dbimport/backends/django/__init__.py @@ -31,6 +31,7 @@ from aiida.tools.importexport.common.config import entity_names_to_signatures from aiida.tools.importexport.common.utils import export_shard_uuid from aiida.tools.importexport.dbimport.backends.utils import deserialize_field, merge_comment, merge_extras +from aiida.manage.configuration import get_config_option def import_data_dj( @@ -229,6 +230,10 @@ def import_data_dj( # IMPORT DATA # ############### # DO ALL WITH A TRANSACTION + + # batch size for bulk create operations + batch_size = get_config_option('db.batch_size') + with transaction.atomic(): foreign_ids_reverse_mappings = {} new_entries = {} @@ -471,9 +476,9 @@ def import_data_dj( if 'mtime' in [field.name for field in model._meta.local_fields]: with models.suppress_auto_now([(model, ['mtime'])]): # Store them all in once; however, the PK are not set in this way... - model.objects.bulk_create(objects_to_create) + model.objects.bulk_create(objects_to_create, batch_size=batch_size) else: - model.objects.bulk_create(objects_to_create) + model.objects.bulk_create(objects_to_create, batch_size=batch_size) # Get back the just-saved entries just_saved_queryset = model.objects.filter( @@ -625,7 +630,7 @@ def import_data_dj( if not silent: print(' ({} new links...)'.format(len(links_to_store))) - models.DbLink.objects.bulk_create(links_to_store) + models.DbLink.objects.bulk_create(links_to_store, batch_size=batch_size) else: if not silent: print(' (0 new links...)')