From 99a10f5fbe2a7ced292f36b3e1b3eb6ca71810eb Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Mon, 4 Mar 2019 16:51:17 +0100 Subject: [PATCH] Add an index to columns of `DbLink` for SqlAlchemy Indices are added for the `input_id`, `output_id` and `type` column of the `DbLink` model for SqlAlchemy. This makes it congruent with the Django model and ensures that the engine will not grind to a halt for bigger production databases which often needs to get all incoming or outgoing links for a node. --- aiida/backends/djsite/db/models.py | 33 +++---------------- .../versions/5a49629f0d45_dblink_indices.py | 30 +++++++++++++++++ aiida/backends/sqlalchemy/models/node.py | 8 +++-- 3 files changed, 40 insertions(+), 31 deletions(-) create mode 100644 aiida/backends/sqlalchemy/migrations/versions/5a49629f0d45_dblink_indices.py diff --git a/aiida/backends/djsite/db/models.py b/aiida/backends/djsite/db/models.py index 433f7ed6ce..297795935b 100644 --- a/aiida/backends/djsite/db/models.py +++ b/aiida/backends/djsite/db/models.py @@ -224,41 +224,18 @@ def __str__(self): @python_2_unicode_compatible class DbLink(m.Model): - """ - Direct connection between two dbnodes. The label is identifying the - link type. - """ + """Direct connection between two dbnodes. The label is identifying thelink type.""" + # If I delete an output, delete also the link; if I delete an input, stop # NOTE: this will in most cases render a DbNode.objects.filter(...).delete() # call unusable because some nodes will be inputs; Nodes will have to # be deleted in the proper order (or links will need to be deleted first) - input = m.ForeignKey('DbNode', related_name='output_links', - on_delete=m.PROTECT) - output = m.ForeignKey('DbNode', related_name='input_links', - on_delete=m.CASCADE) - # label for data input for calculation + # The `input` and `output` columns do not need an explicit `db_index` as it is `True` by default for foreign keys + input = m.ForeignKey('DbNode', related_name='output_links', on_delete=m.PROTECT) + output = m.ForeignKey('DbNode', related_name='input_links', on_delete=m.CASCADE) label = m.CharField(max_length=255, db_index=True, blank=False) type = m.CharField(max_length=255, db_index=True, blank=True) - class Meta: - # I cannot add twice the same link - # I want unique labels among all inputs of a node - # NOTE! - # I cannot add ('input', 'label') because in general - # if the input is a 'data' and I want to add it more than - # once to different calculations, the different links must be - # allowed to have the same name. For calculations, it is the - # responsibility of the output plugin to avoid to have many - # times the same name. - # - # A calculation can have both a 'return' and a 'create' link to - # a single data output node, which would violate the unique constraint - # defined below, since the difference in link type is not considered. - # The distinction between the type of a 'create' and a 'return' link is not - # implemented at the moment, so the unique constraint is disabled. - # unique_together = ("output", "label") - pass - def __str__(self): return "{} ({}) --> {} ({})".format( self.input.get_simple_name(invalid_result="Unknown node"), diff --git a/aiida/backends/sqlalchemy/migrations/versions/5a49629f0d45_dblink_indices.py b/aiida/backends/sqlalchemy/migrations/versions/5a49629f0d45_dblink_indices.py new file mode 100644 index 0000000000..cd735723cb --- /dev/null +++ b/aiida/backends/sqlalchemy/migrations/versions/5a49629f0d45_dblink_indices.py @@ -0,0 +1,30 @@ +"""Adding indices on the `input_id`, `output_id` and `type` column of the `DbLink` table + +Revision ID: 5a49629f0d45 +Revises: 5ddd24e52864 +Create Date: 2019-03-04 16:38:42.249231 + +""" +# pylint: disable=invalid-name,no-member,import-error,no-name-in-module +from __future__ import absolute_import +from alembic import op + +# revision identifiers, used by Alembic. +revision = '5a49629f0d45' +down_revision = '5ddd24e52864' +branch_labels = None +depends_on = None + + +def upgrade(): + """Migrations for the upgrade.""" + op.create_index(op.f('ix_db_dblink_input_id'), 'db_dblink', ['input_id'], unique=False) + op.create_index(op.f('ix_db_dblink_output_id'), 'db_dblink', ['output_id'], unique=False) + op.create_index(op.f('ix_db_dblink_type'), 'db_dblink', ['type'], unique=False) + + +def downgrade(): + """Migrations for the downgrade.""" + op.drop_index(op.f('ix_db_dblink_type'), table_name='db_dblink') + op.drop_index(op.f('ix_db_dblink_output_id'), table_name='db_dblink') + op.drop_index(op.f('ix_db_dblink_input_id'), table_name='db_dblink') diff --git a/aiida/backends/sqlalchemy/models/node.py b/aiida/backends/sqlalchemy/models/node.py index c159b972e3..dfa59ea3a6 100644 --- a/aiida/backends/sqlalchemy/models/node.py +++ b/aiida/backends/sqlalchemy/models/node.py @@ -244,7 +244,8 @@ class DbLink(Base): id = Column(Integer, primary_key=True) input_id = Column( Integer, - ForeignKey('db_dbnode.id', deferrable=True, initially="DEFERRED") + ForeignKey('db_dbnode.id', deferrable=True, initially="DEFERRED"), + index=True ) output_id = Column( Integer, @@ -253,14 +254,15 @@ class DbLink(Base): ondelete="CASCADE", deferrable=True, initially="DEFERRED" - ) + ), + index=True ) input = relationship("DbNode", primaryjoin="DbLink.input_id == DbNode.id") output = relationship("DbNode", primaryjoin="DbLink.output_id == DbNode.id") label = Column(String(255), index=True, nullable=False) - type = Column(String(255)) + type = Column(String(255), index=True) # A calculation can have both a 'return' and a 'create' link to # a single data output node, which would violate the unique constraint