Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement the new file repository #4345

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
include aiida/cmdline/templates/*.tpl
include aiida/manage/backup/backup_info.json.tmpl
include aiida/manage/configuration/schema/*.json
include setup.json
include AUTHORS.txt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def verify_node_uuid_uniqueness(_, __):

:raises: IntegrityError if database contains nodes with duplicate UUIDS.
"""
from aiida.manage.database.integrity.duplicate_uuid import verify_uuid_uniqueness
from aiida.backends.general.migrations.utils import verify_uuid_uniqueness
verify_uuid_uniqueness(table='db_dbnode')


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
# pylint: disable=invalid-name,too-few-public-methods
"""Migration to add the `repository_metadata` JSONB column."""

# pylint: disable=no-name-in-module,import-error
import django.contrib.postgres.fields.jsonb
from django.db import migrations
from aiida.backends.djsite.db.migrations import upgrade_schema_version

REVISION = '1.0.46'
DOWN_REVISION = '1.0.45'


class Migration(migrations.Migration):
"""Migration to add the `repository_metadata` JSONB column."""

dependencies = [
('db', '0045_dbgroup_extras'),
]

operations = [
migrations.AddField(
model_name='dbnode',
name='repository_metadata',
field=django.contrib.postgres.fields.jsonb.JSONField(null=True),
),
upgrade_schema_version(REVISION, DOWN_REVISION),
]
132 changes: 132 additions & 0 deletions aiida/backends/djsite/db/migrations/0047_migrate_repository.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
# pylint: disable=invalid-name,too-few-public-methods
"""Migrate the file repository to the new disk object store based implementation."""
# pylint: disable=no-name-in-module,import-error
from django.core.exceptions import ObjectDoesNotExist
from django.db import migrations

from aiida.backends.djsite.db.migrations import upgrade_schema_version
from aiida.backends.general.migrations import utils
from aiida.cmdline.utils import echo

REVISION = '1.0.47'
DOWN_REVISION = '1.0.46'

REPOSITORY_UUID_KEY = 'repository|uuid'


def migrate_repository(apps, schema_editor):
"""Migrate the repository."""
# pylint: disable=too-many-locals
import json
from tempfile import NamedTemporaryFile
from aiida.common.progress_reporter import set_progress_bar_tqdm, get_progress_reporter
from aiida.manage.configuration import get_profile

DbNode = apps.get_model('db', 'DbNode')

profile = get_profile()
node_count = DbNode.objects.count()
missing_node_uuids = []
missing_repo_folder = []
shard_count = 256

set_progress_bar_tqdm()

with get_progress_reporter()(total=shard_count, desc='Migrating file repository') as progress:
for i in range(shard_count):

shard = '%.2x' % i # noqa flynt
progress.set_description_str(f'Migrating file repository: shard {shard}')

mapping_node_repository_metadata, missing_sub_repo_folder = utils.migrate_legacy_repository(
node_count, shard
)

if missing_sub_repo_folder:
missing_repo_folder.extend(missing_sub_repo_folder)
del missing_sub_repo_folder

if mapping_node_repository_metadata is None:
continue

for node_uuid, repository_metadata in mapping_node_repository_metadata.items():

# If `repository_metadata` is `{}` or `None`, we skip it, as we can leave the column default `null`.
if not repository_metadata:
continue

try:
# This can happen if the node was deleted but the repo folder wasn't, or the repo folder just never
# corresponded to an actual node. In any case, we don't want to fail but just log the warning.
node = DbNode.objects.get(uuid=node_uuid)
except ObjectDoesNotExist:
missing_node_uuids.append((node_uuid, repository_metadata))
else:
node.repository_metadata = repository_metadata
node.save()

del mapping_node_repository_metadata
progress.update()

# Store the UUID of the repository container in the `DbSetting` table. Note that for new databases, the profile
# setup will already have stored the UUID and so it should be skipped, or an exception for a duplicate key will be
# raised. This migration step is only necessary for existing databases that are migrated.
container_id = profile.get_repository_container().container_id
with schema_editor.connection.cursor() as cursor:
cursor.execute(
f"""
INSERT INTO db_dbsetting (key, val, description, time)
VALUES ('repository|uuid', to_json('{container_id}'::text), 'Repository UUID', current_timestamp)
ON CONFLICT (key) DO NOTHING;
"""
)

if not profile.is_test_profile:

if missing_node_uuids:
prefix = 'migration-repository-missing-nodes-'
with NamedTemporaryFile(prefix=prefix, suffix='.json', dir='.', mode='w+', delete=False) as handle:
json.dump(missing_node_uuids, handle)
echo.echo_warning(
'\nDetected node repository folders for nodes that do not exist in the database. The UUIDs of '
f'those nodes have been written to a log file: {handle.name}'
)

if missing_repo_folder:
prefix = 'migration-repository-missing-subfolder-'
with NamedTemporaryFile(prefix=prefix, suffix='.json', dir='.', mode='w+', delete=False) as handle:
json.dump(missing_repo_folder, handle)
echo.echo_warning(
'\nDetected repository folders that were missing the required subfolder `path` or `raw_input`.'
f' The paths of those nodes repository folders have been written to a log file: {handle.name}'
)

# If there were no nodes, most likely a new profile, there is not need to print the warning
if node_count:
import pathlib
echo.echo_warning(
'\nMigrated file repository to the new disk object store. The old repository has not been deleted '
f'out of safety and can be found at {pathlib.Path(profile.repository_path, "repository")}.'
)


class Migration(migrations.Migration):
"""Migrate the file repository to the new disk object store based implementation."""

dependencies = [
('db', '0046_add_node_repository_metadata'),
]

operations = [
migrations.RunPython(migrate_repository, reverse_code=migrations.RunPython.noop),
upgrade_schema_version(REVISION, DOWN_REVISION),
]
2 changes: 1 addition & 1 deletion aiida/backends/djsite/db/migrations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class DeserializationException(AiidaException):
pass


LATEST_MIGRATION = '0045_dbgroup_extras'
LATEST_MIGRATION = '0047_migrate_repository'


def _update_schema_version(version, apps, _):
Expand Down
1 change: 1 addition & 0 deletions aiida/backends/djsite/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ class DbNode(m.Model):
attributes = JSONField(default=dict, null=True)
# JSON Extras
extras = JSONField(default=dict, null=True)
repository_metadata = JSONField(null=True)

objects = m.Manager()
# Return aiida Node instances or their subclasses instead of DbNode instances
Expand Down
6 changes: 3 additions & 3 deletions aiida/backends/djsite/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def get_schema_generation_database(self):
from django.db.utils import ProgrammingError
from aiida.manage.manager import get_manager

backend = get_manager()._load_backend(schema_check=False) # pylint: disable=protected-access
backend = get_manager()._load_backend(schema_check=False, repository_check=False) # pylint: disable=protected-access

try:
result = backend.execute_raw(r"""SELECT tval FROM db_dbsetting WHERE key = 'schema_generation';""")
Expand All @@ -104,7 +104,7 @@ def get_schema_version_database(self):
from django.db.utils import ProgrammingError
from aiida.manage.manager import get_manager

backend = get_manager()._load_backend(schema_check=False) # pylint: disable=protected-access
backend = get_manager()._load_backend(schema_check=False, repository_check=False) # pylint: disable=protected-access

try:
result = backend.execute_raw(r"""SELECT tval FROM db_dbsetting WHERE key = 'db|schemaversion';""")
Expand All @@ -129,7 +129,7 @@ def _migrate_database_generation(self):
from aiida.manage.manager import get_manager
super()._migrate_database_generation()

backend = get_manager()._load_backend(schema_check=False) # pylint: disable=protected-access
backend = get_manager()._load_backend(schema_check=False, repository_check=False) # pylint: disable=protected-access
backend.execute_raw(r"""DELETE FROM django_migrations WHERE app = 'db';""")
backend.execute_raw(
r"""INSERT INTO django_migrations (app, name, applied) VALUES ('db', '0001_initial', NOW());"""
Expand Down
Loading