Skip to content

Commit

Permalink
Add checks for unexpected links in provenance redesign migration
Browse files Browse the repository at this point in the history
The migration for the provenance redesign will delete illegal links such
as `return` and `call` links outgoing from calculation nodes. Since the
migration will simply delete them, before we do so, we detect these illegal
links, and if found will print a warning to the console and write the
offending links to a migration log file.

This way, the user could potentially restore the deleted links.
  • Loading branch information
sphuber authored and giovannipizzi committed Dec 11, 2018
1 parent 6a1d7f3 commit 42bc788
Show file tree
Hide file tree
Showing 10 changed files with 243 additions and 139 deletions.
2 changes: 0 additions & 2 deletions aiida/backends/djsite/db/.gitignore

This file was deleted.

24 changes: 0 additions & 24 deletions aiida/backends/djsite/db/admin.py

This file was deleted.

48 changes: 28 additions & 20 deletions aiida/backends/djsite/db/migrations/0020_provenance_redesign.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from __future__ import absolute_import

# Remove when https://github.com/PyCQA/pylint/issues/1931 is fixed
# pylint: disable=no-name-in-module,import-error,line-too-long
# pylint: disable=no-name-in-module,import-error
from django.db import migrations
from aiida.backends.djsite.db.migrations import upgrade_schema_version

Expand All @@ -23,10 +23,7 @@ def migrate_infer_calculation_entry_point(apps, schema_editor):
:raises: IntegrityError if database contains nodes with duplicate UUIDS.
"""
from tabulate import tabulate
from tempfile import NamedTemporaryFile

from aiida.cmdline.utils import echo
from aiida.manage.database.integrity import write_database_integrity_violation
from aiida.manage.database.integrity.plugins import infer_calculation_entry_point
from aiida.plugins.entry_point import ENTRY_POINT_STRING_SEPARATOR

Expand All @@ -50,20 +47,30 @@ def migrate_infer_calculation_entry_point(apps, schema_editor):
DbNode.objects.filter(type=type_string).update(process_type=entry_point_string)

if fallback_cases:
with NamedTemporaryFile(prefix='migration-', suffix='.log', dir='.', delete=False) as handle:
name = handle.name
echo.echo('')
echo.echo_warning(
'\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n'
'Found one or multiple calculations nodes with a type string that could not be mapped \n'
'onto a known calculation entry point, either in your working environment or from the \n'
'plugin registry. We have used a fallback value for the `process_type` that would have \n'
'been the entry point name. The exact list of affected nodes and the used fallback \n'
'process type based on the found type has been written to a log file: \n'
'{}\n'
'!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n'.format(name))

handle.write(tabulate(fallback_cases, headers=['UUID', 'type (old)', 'process_type (fallback)']))
headers = ['UUID', 'type (old)', 'process_type (fallback)']
warning_message = 'found calculation nodes with a type string that could not be mapped onto a known entry point'
action_message = 'inferred `process_type` for all calculation nodes, using fallback for unknown entry points'
write_database_integrity_violation(fallback_cases, headers, warning_message, action_message)


def detect_unexpected_links(apps, schema_editor):
"""Scan the database for any links that are unexpected.from
The checks will verify that there are no outgoing `call` or `return` links from calculation nodes and that if a
workflow node has a `create` link, it has at least an accompanying return link to the same data node, or it has a
`call` link to a calculation node that takes the created data node as input.
"""
from aiida.backends.general.migrations.provenance_redesign import INVALID_LINK_SELECT_STATEMENTS
from aiida.manage.database.integrity import write_database_integrity_violation

with schema_editor.connection.cursor() as cursor:

for sql, warning_message in INVALID_LINK_SELECT_STATEMENTS:
cursor.execute(sql)
results = cursor.fetchall()
if results:
headers = ['UUID source', 'UUID target', 'link type', 'link label']
write_database_integrity_violation(results, headers, warning_message)


def reverse_code(apps, schema_editor):
Expand Down Expand Up @@ -91,6 +98,7 @@ class Migration(migrations.Migration):

operations = [
migrations.RunPython(migrate_infer_calculation_entry_point, reverse_code=reverse_code, atomic=True),
migrations.RunPython(detect_unexpected_links, reverse_code=reverse_code, atomic=True),
migrations.RunSQL(
"""
DELETE FROM db_dblink WHERE db_dblink.id IN (
Expand Down Expand Up @@ -141,7 +149,7 @@ class Migration(migrations.Migration):
WHERE type = 'calculation.function.FunctionCalculation.'; -- Update type for FunctionCalculation nodes
UPDATE db_dblink SET type = 'create' WHERE type = 'createlink'; -- Rename `createlink` to `create`
UPDATE db_dblink SET type = 'return' WHERE type = 'returnlink'; -- Rename `returnlink` to `create`
UPDATE db_dblink SET type = 'return' WHERE type = 'returnlink'; -- Rename `returnlink` to `return`
UPDATE db_dblink SET type = 'input_calc' FROM db_dbnode
WHERE db_dblink.output_id = db_dbnode.id AND db_dbnode.type LIKE 'node.process.calculation%'
Expand Down
12 changes: 0 additions & 12 deletions aiida/backends/djsite/db/views.py

This file was deleted.

4 changes: 0 additions & 4 deletions aiida/backends/djsite/settings/.gitignore

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,3 @@
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################


93 changes: 93 additions & 0 deletions aiida/backends/general/migrations/provenance_redesign.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
"""SQL statements to detect invalid/ununderstood links for the provenance redesign migration."""
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

SELECT_CALCULATIONS_WITH_OUTGOING_CALL = """
SELECT node_in.uuid, node_out.uuid, link.type, link.label
FROM db_dbnode AS node_in
JOIN db_dblink AS link ON node_in.id = link.input_id
JOIN db_dbnode AS node_out ON node_out.id = link.output_id
WHERE (
node_in.type LIKE 'calculation.job.%' OR
node_in.type LIKE 'calculation.inline.%'
)
AND link.type = 'calllink';
"""

SELECT_CALCULATIONS_WITH_OUTGOING_RETURN = """
SELECT node_in.uuid, node_out.uuid, link.type, link.label
FROM db_dbnode AS node_in
JOIN db_dblink AS link ON node_in.id = link.input_id
JOIN db_dbnode AS node_out ON node_out.id = link.output_id
WHERE (
node_in.type LIKE 'calculation.job.%' OR
node_in.type LIKE 'calculation.inline.%'
)
AND link.type = 'returnlink';
"""

SELECT_WORKFLOWS_WITH_ISOLATED_CREATE_LINK = """
SELECT node_in.uuid, node_out.uuid, link3.type, link3.label
FROM db_dbnode AS node_in
JOIN db_dblink AS link3 ON node_in.id = link3.input_id
JOIN db_dbnode AS node_out ON node_out.id = link3.output_id
JOIN
(
SELECT node2.id
FROM db_dbnode AS node2
JOIN db_dblink AS link2 ON node2.id = link2.input_id
WHERE (node2.type LIKE 'calculation.work.%' OR node2.type LIKE 'calculation.function.%')
AND link2.type = 'createlink'
EXCEPT (
SELECT returnlinks.input_id
FROM db_dblink AS returnlinks
JOIN (
SELECT node.id, node.type, link.label, link.input_id, link.output_id
FROM db_dbnode AS node
JOIN db_dblink AS link ON node.id = link.input_id
WHERE (node.type LIKE 'calculation.work.%' OR node.type LIKE 'calculation.function.%')
AND link.type = 'createlink'
) AS createlinks
ON (returnlinks.input_id = createlinks.input_id AND returnlinks.output_id = createlinks.output_id)
WHERE returnlinks.type = 'returnlink'
)
EXCEPT (
SELECT calllinks.wfid
FROM db_dblink AS inputlinks
JOIN
(
(
SELECT node2.id AS wfid, node2.type, link2.label, link2.input_id, link2.output_id AS subwfid
FROM db_dbnode AS node2 JOIN db_dblink AS link2 ON node2.id = link2.input_id
WHERE (node2.type LIKE 'calculation.work.%' OR node2.type LIKE 'calculation.function.%')
AND link2.type = 'calllink'
) AS calllinks
JOIN (
SELECT node.id AS wfid, node.type, link.label, link.input_id, link.output_id AS dataid
FROM db_dbnode AS node JOIN db_dblink AS link
ON node.id = link.input_id
WHERE (node.type LIKE 'calculation.work.%' OR node.type LIKE 'calculation.function.%')
AND link.type = 'createlink') AS createlinks
ON calllinks.wfid = createlinks.wfid
)
ON (inputlinks.input_id = createlinks.dataid AND inputlinks.output_id = calllinks.subwfid)
)
) AS node_in_subquery ON node_in.id = node_in_subquery.id
WHERE link3.type = 'createlink';
"""

INVALID_LINK_SELECT_STATEMENTS = (
(SELECT_CALCULATIONS_WITH_OUTGOING_CALL, 'detected calculation nodes with outgoing `call` links.'),
(SELECT_CALCULATIONS_WITH_OUTGOING_RETURN, 'detected calculation nodes with outgoing `return` links.'),
(SELECT_WORKFLOWS_WITH_ISOLATED_CREATE_LINK, 'detected workflow nodes with isolated `create` links.'),
)
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from __future__ import print_function

# Remove when https://github.com/PyCQA/pylint/issues/1931 is fixed
# pylint: disable=no-name-in-module,import-error,line-too-long
# pylint: disable=no-name-in-module,import-error
from alembic import op
from sqlalchemy.sql import text

Expand All @@ -39,13 +39,10 @@ def migrate_infer_calculation_entry_point():
:raises: IntegrityError if database contains nodes with duplicate UUIDS.
"""
from tabulate import tabulate
from tempfile import NamedTemporaryFile

from sqlalchemy.orm.session import Session

from aiida.backends.sqlalchemy.models.node import DbNode
from aiida.cmdline.utils import echo
from aiida.manage.database.integrity import write_database_integrity_violation
from aiida.manage.database.integrity.plugins import infer_calculation_entry_point
from aiida.plugins.entry_point import ENTRY_POINT_STRING_SEPARATOR

Expand All @@ -72,20 +69,29 @@ def migrate_infer_calculation_entry_point():
session.commit()

if fallback_cases:
with NamedTemporaryFile(prefix='migration-', suffix='.log', dir='.', delete=False) as handle:
name = handle.name
echo.echo('')
echo.echo_warning(
'\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n'
'Found one or multiple calculations nodes with a type string that could not be mapped \n'
'onto a known calculation entry point, either in your working environment or from the \n'
'plugin registry. We have used a fallback value for the `process_type` that would have \n'
'been the entry point name. The exact list of affected nodes and the used fallback \n'
'process type based on the found type has been written to a log file: \n'
'{}\n'
'!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n'.format(name))

handle.write(tabulate(fallback_cases, headers=['UUID', 'type (old)', 'process_type (fallback)']))
headers = ['UUID', 'type (old)', 'process_type (fallback)']
warning_message = 'found calculation nodes with a type string that could not be mapped onto a known entry point'
action_message = 'inferred `process_type` for all calculation nodes, using fallback for unknown entry points'
write_database_integrity_violation(fallback_cases, headers, warning_message, action_message)


def detect_unexpected_links():
"""Scan the database for any links that are unexpected.from
The checks will verify that there are no outgoing `call` or `return` links from calculation nodes and that if a
workflow node has a `create` link, it has at least an accompanying return link to the same data node, or it has a
`call` link to a calculation node that takes the created data node as input.
"""
from aiida.backends.general.migrations.provenance_redesign import INVALID_LINK_SELECT_STATEMENTS
from aiida.manage.database.integrity import write_database_integrity_violation

connection = op.get_bind()

for sql, warning_message in INVALID_LINK_SELECT_STATEMENTS:
results = list(connection.execute(text(sql)))
if results:
headers = ['UUID source', 'UUID target', 'link type', 'link label']
write_database_integrity_violation(results, headers, warning_message)


def upgrade():
Expand All @@ -95,6 +101,9 @@ def upgrade():
# Migrate calculation nodes by inferring the process type from the type string
migrate_infer_calculation_entry_point()

# Detect if the database contain any unexpected links
detect_unexpected_links()

statement = text("""
DELETE FROM db_dblink WHERE db_dblink.id IN (
SELECT db_dblink.id FROM db_dblink
Expand Down Expand Up @@ -143,7 +152,7 @@ def upgrade():
WHERE type = 'calculation.function.FunctionCalculation.'; -- Update type for FunctionCalculation nodes
UPDATE db_dblink SET type = 'create' WHERE type = 'createlink'; -- Rename `createlink` to `create`
UPDATE db_dblink SET type = 'return' WHERE type = 'returnlink'; -- Rename `returnlink` to `create`
UPDATE db_dblink SET type = 'return' WHERE type = 'returnlink'; -- Rename `returnlink` to `return`
UPDATE db_dblink SET type = 'input_calc' FROM db_dbnode
WHERE db_dblink.output_id = db_dbnode.id AND db_dbnode.type LIKE 'node.process.calculation%'
Expand Down
38 changes: 38 additions & 0 deletions aiida/manage/database/integrity/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,41 @@
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
# pylint: disable=invalid-name
"""Methods to validate the database integrity and fix violations."""

from __future__ import absolute_import

WARNING_BORDER = '*' * 120


def write_database_integrity_violation(results, headers, reason_message, action_message=None):
"""Emit a integrity violation warning and write the violating records to a log file in the current directory
:param results: a list of tuples representing the violating records
:param headers: a tuple of strings that will be used as a header for the log file. Should have the same length
as each tuple in the results list.
:param reason_message: a human readable message detailing the reason of the integrity violation
:param action_message: an optional human readable message detailing a performed action, if any
"""
from datetime import datetime
from tabulate import tabulate
from tempfile import NamedTemporaryFile

from aiida.cmdline.utils import echo

if action_message is None:
action_message = 'nothing'

with NamedTemporaryFile(prefix='migration-', suffix='.log', dir='.', delete=False) as handle:
echo.echo('')
echo.echo_warning(
'\n{}\nFound one or multiple records that violate the integrity of the database\nViolation reason: {}\n'
'Performed action: {}\nViolators written to: {}\n{}\n'.format(WARNING_BORDER, reason_message,
action_message, handle.name, WARNING_BORDER))

handle.write('# {}\n'.format(datetime.utcnow().isoformat()))
handle.write('# Violation reason: {}\n'.format(reason_message))
handle.write('# Performed action: {}\n'.format(action_message))
handle.write('\n')
handle.write(tabulate(results, headers))
Loading

0 comments on commit 42bc788

Please sign in to comment.