Skip to content

Commit

Permalink
Implement verdi database integrity endpoints for links and nodes
Browse files Browse the repository at this point in the history
These two new end points will scan the database for invalid links
and nodes, by running prepared SQL statements. If any violations
are found, they will be printed to the terminal. For now there
are no options implemented to apply a patch to remove the violations.
  • Loading branch information
sphuber committed Dec 12, 2018
1 parent 1673ec2 commit 7aa104a
Show file tree
Hide file tree
Showing 13 changed files with 426 additions and 10 deletions.
2 changes: 1 addition & 1 deletion aiida/backends/djsite/db/subtests/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def setUpBeforeMigration(self, apps):
verify_node_uuid_uniqueness()

# Now run the function responsible for solving duplicate UUIDs which would also be called by the user
# through the `verdi database integrity duplicate-node-uuid` command
# through the `verdi database integrity detect-duplicate-node-uuid` command
deduplicate_node_uuids(dry_run=False)

def test_deduplicated_uuids(self):
Expand Down
13 changes: 12 additions & 1 deletion aiida/backends/djsite/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function
import six

from contextlib import contextmanager

from six.moves import zip
from aiida.backends.general.abstractqueries import AbstractQueryManager


class DjangoQueryManager(AbstractQueryManager):

def __init__(self, backend):
super(DjangoQueryManager, self).__init__(backend)

Expand All @@ -33,6 +36,14 @@ def raw(self, query):

return results

@contextmanager
def cursor(self):
from django.db import connection
try:
yield connection.cursor()
finally:
pass

def get_creation_statistics(
self,
user_pk=None
Expand Down
23 changes: 22 additions & 1 deletion aiida/backends/general/abstractqueries.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

from abc import ABCMeta, abstractmethod
from six.moves import zip
import six


@six.add_metaclass(ABCMeta)
class AbstractQueryManager(object):

def __init__(self, backend):
"""
:param backend: The AiiDA backend
Expand All @@ -33,6 +34,26 @@ def raw(self, query):
"""
pass

@abstractmethod
def cursor(self):
pass

def prepared_statement(self, sql, parameters):
"""Execute an SQL statement with optional prepared statements.
:param sql: the SQL statement string
:param parameters: dictionary to use to populate the prepared statement
"""
results = []

with self.cursor() as cursor:
cursor.execute(sql, parameters)

for row in cursor:
results.append(row)

return results

def get_duplicate_node_uuids(self):
"""
Return a list of nodes that have an identical UUID
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def verify_node_uuid_uniqueness():
duplicates = conn.execute(query).fetchall()

if duplicates:
raise IntegrityError('your database contains nodes with duplicate UUIDS: '
'run `verdi database integrity duplicate-node-uuid` to return to a consistent state')
raise IntegrityError('your database contains nodes with duplicate UUIDS: run '
'`verdi database integrity detect-duplicate-node-uuid` to return to a consistent state')


def upgrade():
Expand Down
12 changes: 12 additions & 0 deletions aiida/backends/sqlalchemy/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

from contextlib import contextmanager

from aiida.backends.general.abstractqueries import AbstractQueryManager


Expand All @@ -34,6 +37,15 @@ def raw(self, query):

return result.fetchall()

@contextmanager
def cursor(self):
from aiida.backends import sqlalchemy as sa
try:
connection = sa.engine.raw_connection()
yield connection.cursor()
finally:
connection.close()

def get_creation_statistics(
self,
user_pk=None
Expand Down
1 change: 1 addition & 0 deletions aiida/backends/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
'cmdline.commands.comment': ['aiida.backends.tests.cmdline.commands.test_comment'],
'cmdline.commands.computer': ['aiida.backends.tests.cmdline.commands.test_computer'],
'cmdline.commands.data': ['aiida.backends.tests.cmdline.commands.test_data'],
'cmdline.commands.database': ['aiida.backends.tests.cmdline.commands.test_database'],
'cmdline.commands.devel': ['aiida.backends.tests.cmdline.commands.test_devel'],
'cmdline.commands.export': ['aiida.backends.tests.cmdline.commands.test_export'],
'cmdline.commands.graph': ['aiida.backends.tests.cmdline.commands.test_graph'],
Expand Down
153 changes: 153 additions & 0 deletions aiida/backends/tests/cmdline/commands/test_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida_core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
# pylint: disable=invalid-name,protected-access
"""Tests for `verdi database`."""
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import enum

from click.testing import CliRunner

from aiida.backends.testbase import AiidaTestCase
from aiida.cmdline.commands import cmd_database
from aiida.common.links import LinkType
from aiida.orm.data import Data
from aiida.orm.node import Node
from aiida.orm.node.process import CalculationNode, WorkflowNode


class TestVerdiDatabasaIntegrity(AiidaTestCase):
"""Tests for `verdi database integrity`."""

def setUp(self):
self.cli_runner = CliRunner()

def tearDown(self):
self.reset_database()

def test_detect_invalid_links_workflow_create(self):
"""Test `verdi database integrity detect-invalid-links` outgoing `create` from `workflow`."""
result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertEqual(result.exit_code, 0)
self.assertClickResultNoException(result)

# Create an invalid link: outgoing `create` from a workflow
data = Data().store()
workflow = WorkflowNode().store()

data._add_dblink_from(workflow, link_type=LinkType.CREATE, label='create')

result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertNotEqual(result.exit_code, 0)
self.assertIsNotNone(result.exception)

def test_detect_invalid_links_calculation_return(self):
"""Test `verdi database integrity detect-invalid-links` outgoing `return` from `calculation`."""
result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertEqual(result.exit_code, 0)
self.assertClickResultNoException(result)

# Create an invalid link: outgoing `return` from a calculation
data = Data().store()
calculation = CalculationNode().store()

data._add_dblink_from(calculation, link_type=LinkType.RETURN, label='return')

result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertNotEqual(result.exit_code, 0)
self.assertIsNotNone(result.exception)

def test_detect_invalid_links_calculation_call(self):
"""Test `verdi database integrity detect-invalid-links` outgoing `call` from `calculation`."""
result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertEqual(result.exit_code, 0)
self.assertClickResultNoException(result)

# Create an invalid link: outgoing `call` from a calculation
worklow = WorkflowNode().store()
calculation = CalculationNode().store()

worklow._add_dblink_from(calculation, link_type=LinkType.CALL_WORK, label='call')

result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertNotEqual(result.exit_code, 0)
self.assertIsNotNone(result.exception)

def test_detect_invalid_links_create_links(self):
"""Test `verdi database integrity detect-invalid-links` when there are multiple incoming `create` links."""
result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertEqual(result.exit_code, 0)
self.assertClickResultNoException(result)

# Create an invalid link: two `create` links
data = Data().store()
calculation = CalculationNode().store()

data._add_dblink_from(calculation, link_type=LinkType.CREATE, label='create')
data._add_dblink_from(calculation, link_type=LinkType.CREATE, label='create')

result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertNotEqual(result.exit_code, 0)
self.assertIsNotNone(result.exception)

def test_detect_invalid_links_call_links(self):
"""Test `verdi database integrity detect-invalid-links` when there are multiple incoming `call` links."""
result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertEqual(result.exit_code, 0)
self.assertClickResultNoException(result)

# Create an invalid link: two `call` links
workflow = WorkflowNode().store()
calculation = CalculationNode().store()

calculation._add_dblink_from(workflow, link_type=LinkType.CALL_CALC, label='call')
calculation._add_dblink_from(workflow, link_type=LinkType.CALL_CALC, label='call')

result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertNotEqual(result.exit_code, 0)
self.assertIsNotNone(result.exception)

def test_detect_invalid_links_unknown_link_type(self):
"""Test `verdi database integrity detect-invalid-links` when link type is invalid."""
result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertEqual(result.exit_code, 0)
self.assertClickResultNoException(result)

class WrongLinkType(enum.Enum):

WRONG_CREATE = 'wrong_create'

# Create an invalid link: invalid link type
data = Data().store()
calculation = CalculationNode().store()

data._add_dblink_from(calculation, link_type=WrongLinkType.WRONG_CREATE, label='create')

result = self.cli_runner.invoke(cmd_database.detect_invalid_links, [])
self.assertNotEqual(result.exit_code, 0)
self.assertIsNotNone(result.exception)

def test_detect_invalid_nodes_unknown_node_type(self):
"""Test `verdi database integrity detect-invalid-nodes` when node type is invalid."""
result = self.cli_runner.invoke(cmd_database.detect_invalid_nodes, [])
self.assertEqual(result.exit_code, 0)
self.assertClickResultNoException(result)

# Create a node with invalid type: a base Node type string is considered invalid
# Note that there is guard against storing base Nodes for this reason, which we temporarily disable
Node._storable = True
Node().store()
Node._storable = False

result = self.cli_runner.invoke(cmd_database.detect_invalid_nodes, [])
self.assertNotEqual(result.exit_code, 0)
self.assertIsNotNone(result.exception)
62 changes: 59 additions & 3 deletions aiida/cmdline/commands/cmd_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from aiida.cmdline.commands.cmd_verdi import verdi
from aiida.cmdline.params import options
from aiida.cmdline.utils import echo
from aiida.cmdline.utils import decorators, echo


@verdi.group('database')
Expand Down Expand Up @@ -51,13 +51,13 @@ def verdi_database_integrity():
pass


@verdi_database_integrity.command('duplicate-node-uuid')
@verdi_database_integrity.command('detect-duplicate-node-uuid')
@click.option(
'-a',
'--apply-patch',
is_flag=True,
help='Apply the proposed changes. If this flag is not passed, a dry run is performed instead.')
def duplicate_node_uuid(apply_patch):
def detect_duplicate_node_uuid(apply_patch):
"""Detect and solve nodes with duplicate UUIDs.
Before aiida-core v1.0.0, there was no uniqueness constraint on the UUID column of the Node table in the database.
Expand Down Expand Up @@ -85,3 +85,59 @@ def duplicate_node_uuid(apply_patch):
echo.echo_success('integrity patch completed')
else:
echo.echo_success('dry-run of integrity patch completed')


@verdi_database_integrity.command('detect-invalid-links')
@decorators.with_dbenv()
def detect_invalid_links():
"""Scan the database for invalid links."""
from tabulate import tabulate

from aiida.manage.database.integrity.sql.links import INVALID_LINK_SELECT_STATEMENTS
from aiida.manage.manager import get_manager

integrity_violated = False

query_manager = get_manager().get_backend().query_manager

for check in INVALID_LINK_SELECT_STATEMENTS:

result = query_manager.prepared_statement(check.sql, check.parameters)

if result:
integrity_violated = True
echo.echo_warning('{}:\n'.format(check.message))
echo.echo(tabulate(result, headers=check.headers))

if not integrity_violated:
echo.echo_success('no integrity violations detected')
else:
echo.echo_critical('one or more integrity violations detected')


@verdi_database_integrity.command('detect-invalid-nodes')
@decorators.with_dbenv()
def detect_invalid_nodes():
"""Scan the database for invalid nodes."""
from tabulate import tabulate

from aiida.manage.database.integrity.sql.nodes import INVALID_NODE_SELECT_STATEMENTS
from aiida.manage.manager import get_manager

integrity_violated = False

query_manager = get_manager().get_backend().query_manager

for check in INVALID_NODE_SELECT_STATEMENTS:

result = query_manager.prepared_statement(check.sql, check.parameters)

if result:
integrity_violated = True
echo.echo_warning('{}:\n'.format(check.message))
echo.echo(tabulate(result, headers=check.headers))

if not integrity_violated:
echo.echo_success('no integrity violations detected')
else:
echo.echo_critical('one or more integrity violations detected')
4 changes: 2 additions & 2 deletions aiida/manage/database/integrity/duplicate_uuid.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def verify_node_uuid_uniqueness():
duplicates = get_duplicate_node_uuids()

if duplicates:
raise IntegrityError('your database contains nodes with duplicate UUIDS: '
'run `verdi database integrity duplicate-node-uuid` to return to a consistent state')
raise IntegrityError('your database contains nodes with duplicate UUIDS: run '
'`verdi database integrity detect-duplicate-node-uuid` to return to a consistent state')


def get_duplicate_node_uuids():
Expand Down
Empty file.
Loading

0 comments on commit 7aa104a

Please sign in to comment.