From 9833d3ea7bd223f25691a174c2948a37a440f390 Mon Sep 17 00:00:00 2001 From: Keith James Date: Wed, 28 Jun 2023 14:50:16 +0100 Subject: [PATCH 1/3] Fix incorrect value for sample_id metadata The value should be taken from the id_sample_lims column of the MLWH sample table, not the sanger_sample_id column. This change adds a failing test, expands the tests to include all the sample metadat that is set and then fixes the failing test by correcting the error above. --- src/npg_irods/metadata/lims.py | 2 +- tests/conftest.py | 13 +++++++++++-- tests/test_ont.py | 8 ++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/npg_irods/metadata/lims.py b/src/npg_irods/metadata/lims.py index 2d6f75ee..52c6b5e8 100644 --- a/src/npg_irods/metadata/lims.py +++ b/src/npg_irods/metadata/lims.py @@ -90,7 +90,7 @@ def make_sample_metadata(sample: Sample) -> list[AVU]: Returns: List[AVU] """ av = [ - [TrackedSample.ID, sample.sanger_sample_id], + [TrackedSample.ID, sample.id_sample_lims], [TrackedSample.NAME, sample.name], [TrackedSample.ACCESSION_NUMBER, sample.accession_number], [TrackedSample.DONOR_ID, sample.donor_id], diff --git a/tests/conftest.py b/tests/conftest.py index c25a5ca8..f3bdaef2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -196,11 +196,20 @@ def initialize_mlwh_ont(session: Session): num_samples = 200 for s in range(1, num_samples + 1): - sid = f"sample{s}" + lims_id = f"sample{s}" name = f"sample {s}" + donor_id = (f"donor {s}",) + accession = f"ACC{s}" + supplier_name = f"supplier_sample {s}" samples.append( Sample( - id_lims="LIMS_01", id_sample_lims=sid, name=name, **default_timestamps + accession_number=accession, + donor_id=donor_id, + id_lims="LIMS_01", + id_sample_lims=lims_id, + name=name, + supplier_name=supplier_name, + **default_timestamps, ) ) session.add_all(samples) diff --git a/tests/test_ont.py b/tests/test_ont.py index 06ec52a0..1f8af660 100644 --- a/tests/test_ont.py +++ b/tests/test_ont.py @@ -42,7 +42,11 @@ def test_add_new_sample_metadata(self, ont_synthetic, mlwh_session): coll = Collection(path) for avu in [ + AVU(TrackedSample.ACCESSION_NUMBER, "ACC1"), + AVU(TrackedSample.DONOR_ID, "donor 1"), + AVU(TrackedSample.ID, "sample1"), AVU(TrackedSample.NAME, "sample 1"), + AVU(TrackedSample.SUPPLIER_NAME, "supplier_sample 1"), AVU(TrackedStudy.ID, "2000"), AVU(TrackedStudy.NAME, "Study Y"), ]: @@ -93,7 +97,11 @@ def test_add_new_plex_sample_metadata(self, ont_synthetic, mlwh_session): bc_coll = Collection(path / subcoll / ont.barcode_name_from_id(tag_id)) for avu in [ + AVU(TrackedSample.ACCESSION_NUMBER, f"ACC{tag_index}"), + AVU(TrackedSample.DONOR_ID, f"donor {tag_index}"), + AVU(TrackedSample.ID, f"sample{tag_index}"), AVU(TrackedSample.NAME, f"sample {tag_index}"), + AVU(TrackedSample.SUPPLIER_NAME, f"supplier_sample {tag_index}"), AVU(TrackedStudy.ID, "3000"), AVU(TrackedStudy.NAME, "Study Z"), ]: From 73d0c16d7c6dc42868b19e9866326ca5ffcf0642 Mon Sep 17 00:00:00 2001 From: mksanger Date: Thu, 11 May 2023 11:00:32 +0100 Subject: [PATCH 2/3] Use supersede_metadata to perfom metadata updates on ont collections, and add tests --- src/npg_irods/ont.py | 23 +++++++----- tests/conftest.py | 43 +++++++++++++++++++---- tests/test_ont.py | 84 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 132 insertions(+), 18 deletions(-) diff --git a/src/npg_irods/ont.py b/src/npg_irods/ont.py index 825403d9..0eb1a69a 100644 --- a/src/npg_irods/ont.py +++ b/src/npg_irods/ont.py @@ -264,7 +264,7 @@ def annotate_results_collection( AVU(Instrument.INSTRUMENT_SLOT, instrument_slot), ] ] - coll.add_metadata(*avus) # These AVUs should be present already + coll.supersede_metadata(*avus) # These AVUs should be present already # A single fc record (for non-multiplexed data) if len(fc_info) == 1: @@ -273,9 +273,11 @@ def annotate_results_collection( ) fc = fc_info[0] try: - coll.add_metadata(*make_study_metadata(fc.study)) - coll.add_metadata(*make_sample_metadata(fc.sample)) - coll.add_permissions(*make_sample_acl(fc.sample, fc.study), recurse=True) + coll.supersede_metadata(*make_study_metadata(fc.study), history=True) + coll.supersede_metadata(*make_sample_metadata(fc.sample), history=True) + coll.supersede_permissions( + *make_sample_acl(fc.sample, fc.study), recurse=True + ) except RodsError as e: log.error(e.message, code=e.code) return False @@ -322,14 +324,17 @@ def annotate_results_collection( study=fc.study, ) - bc_coll.add_metadata( - AVU(SeqConcept.TAG_INDEX, tag_index_from_id(fc.tag_identifier)) + bc_coll.supersede_metadata( + AVU(SeqConcept.TAG_INDEX, tag_index_from_id(fc.tag_identifier)), + history=True, + ) + bc_coll.supersede_metadata(*make_study_metadata(fc.study), history=True) + bc_coll.supersede_metadata( + *make_sample_metadata(fc.sample), history=True ) - bc_coll.add_metadata(*make_study_metadata(fc.study)) - bc_coll.add_metadata(*make_sample_metadata(fc.sample)) # The ACL could be different for each plex - bc_coll.add_permissions( + bc_coll.supersede_permissions( *make_sample_acl(fc.sample, fc.study), recurse=True ) except RodsError as e: diff --git a/tests/conftest.py b/tests/conftest.py index f3bdaef2..97b393dd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,6 +26,7 @@ import logging import os +import re from datetime import datetime from pathlib import PurePath @@ -42,13 +43,7 @@ remove_specific_sql, rmgroup, ) -from partisan.irods import ( - AC, - AVU, - Collection, - DataObject, - Permission, -) +from partisan.irods import AC, AVU, Collection, DataObject, Permission, format_timestamp from partisan.metadata import DublinCore from sqlalchemy import create_engine, text from sqlalchemy.orm import Session, sessionmaker @@ -168,6 +163,40 @@ def ont_tag_identifier(tag_index: int) -> str: return f"NB{tag_index :02d}" +def ont_history_in_meta(history: AVU, meta: list[AVU]): + """Return true if the histories have no differences other than datetime. + False otherwise. + + Args: + history: An AVU created by the AVU.history method. + meta: The metadata list of an entitiy. + + Returns: bool + + """ + entity_histories = [] + for avu in meta: + # breakpoint() + if avu.attribute.endswith("_history"): + entity_histories.append(avu) + if entity_histories: + now = format_timestamp(datetime.utcnow()) + regex = r"\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})\]" + for h in entity_histories: + # breakpoint() + if all( + [ + history.attribute == h.attribute, + re.sub(regex, str(now), history.value) + == re.sub(regex, str(now), h.value), + history.units == h.units, + ] + ): + return True + + return False + + def initialize_mlwh_ont(session: Session): """Create test data for all synthetic simple and multiplexed experiments. diff --git a/tests/test_ont.py b/tests/test_ont.py index 1f8af660..2367d714 100644 --- a/tests/test_ont.py +++ b/tests/test_ont.py @@ -17,11 +17,12 @@ # # @author Keith James -from partisan.irods import AC, AVU, Collection, Permission +from partisan.irods import AC, AVU, Collection, Permission, format_timestamp from pytest import mark as m +from datetime import datetime from npg_irods import ont -from conftest import LATEST, ont_tag_identifier, tests_have_admin +from conftest import LATEST, ont_tag_identifier, tests_have_admin, ont_history_in_meta from npg_irods.metadata.lims import SeqConcept, TrackedSample, TrackedStudy from npg_irods.ont import MetadataUpdate, annotate_results_collection @@ -217,3 +218,82 @@ def test_find_updates_for_experiment_slot(self, ont_synthetic, mlwh_session): ), f"Found {num_expected} collections (slot 1 from simple experiment 1)" assert num_updated == num_expected assert num_errors == 0 + + @m.context("When metadata is updated") + @m.context("When the metadata is absent") + @m.it("Adds the metadata") + def test_updates_absent_metadata(self, ont_synthetic, mlwh_session): + coll = Collection( + ont_synthetic + / "simple_experiment_001/20190904_1514_G100000_flowcell011_69126024" + ) + assert AVU(TrackedSample.NAME, "sample 1") not in coll.metadata() + update = MetadataUpdate( + experiment_name="simple_experiment_001", instrument_slot=1 + ) + num_found, num_updated, num_errors = update.update_secondary_metadata( + mlwh_session=mlwh_session + ) + assert AVU(TrackedSample.NAME, "sample 1") in coll.metadata() + + @m.context("When correct metadata is already present") + @m.it("Leaves the metadata unchanged") + def test_updates_present_metadata(self, ont_synthetic, mlwh_session): + coll = Collection( + ont_synthetic + / "simple_experiment_001/20190904_1514_G100000_flowcell011_69126024" + ) + coll.add_metadata(AVU(TrackedSample.NAME, "sample 1")) + update = MetadataUpdate( + experiment_name="simple_experiment_001", instrument_slot=1 + ) + num_found, num_updated, num_errors = update.update_secondary_metadata( + mlwh_session=mlwh_session + ) + assert AVU(TrackedSample.NAME, "sample 1") in coll.metadata() + + @m.context("When incorrect metadata is present") + @m.it("Changes the metadata and adds history metadata") + def test_updates_changed_metadata(self, ont_synthetic, mlwh_session): + coll = Collection( + ont_synthetic + / "simple_experiment_001/20190904_1514_G100000_flowcell011_69126024" + ) + coll.add_metadata(AVU(TrackedSample.NAME, "sample 0")) + update = MetadataUpdate( + experiment_name="simple_experiment_001", instrument_slot=1 + ) + num_found, num_updated, num_errors = update.update_secondary_metadata( + mlwh_session=mlwh_session + ) + assert AVU(TrackedSample.NAME, "sample 1") in coll.metadata() + assert AVU(TrackedSample.NAME, "sample 0") not in coll.metadata() + assert ont_history_in_meta( + AVU.history(AVU(TrackedSample.NAME, "sample 0")), coll.metadata() + ) + + @m.context("When an attribute has multiple incorrect values") + @m.it("Groups those values in the history metadata") + def test_updates_multiple_metadata(self, ont_synthetic, mlwh_session): + coll = Collection( + ont_synthetic + / "simple_experiment_001/20190904_1514_G100000_flowcell011_69126024" + ) + coll.add_metadata(AVU(TrackedStudy.NAME, "Study A")) + coll.add_metadata(AVU(TrackedStudy.NAME, "Study B")) + update = MetadataUpdate( + experiment_name="simple_experiment_001", instrument_slot=1 + ) + num_found, num_updated, num_errors = update.update_secondary_metadata( + mlwh_session=mlwh_session + ) + assert AVU(TrackedStudy.NAME, "Study Y") in coll.metadata() + assert AVU(TrackedStudy.NAME, "Study A") not in coll.metadata() + assert AVU(TrackedStudy.NAME, "Study B") not in coll.metadata() + assert ont_history_in_meta( + AVU( + f"{TrackedStudy.NAME}_history", + f"[{format_timestamp(datetime.utcnow())}] Study A,Study B", + ), + coll.metadata(), + ) From 1f9995f16f7f2a20a802bf73b60e0e2b09af3b98 Mon Sep 17 00:00:00 2001 From: Keith James Date: Thu, 8 Jun 2023 13:41:56 +0100 Subject: [PATCH 3/3] Fix ONT permissions updates to retain unmanaged access controls iRODS permissions updates were removing permissions that were not intended for removal e.g. for the current user. The unit tests were not good enough to pick detect this problem. This change ensures that only permissions managed by this API are removed during updates. --- src/npg_irods/metadata/lims.py | 17 ++++++++++++++--- src/npg_irods/ont.py | 12 +++++++++--- tests/test_ont.py | 19 +++++++++++++------ 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/npg_irods/metadata/lims.py b/src/npg_irods/metadata/lims.py index 52c6b5e8..6c1c91da 100644 --- a/src/npg_irods/metadata/lims.py +++ b/src/npg_irods/metadata/lims.py @@ -162,6 +162,19 @@ def ensure_consent_withdrawn_metadata(obj: DataObject) -> bool: return _ensure_avus_present(obj, AVU(TrackedSample.CONSENT_WITHDRAWN, 1)) +def is_managed_access(ac: AC): + """Return True if the access control is managed this API and can safely be added or + removed. + + Args: + ac: The access control to test + + Returns: + True if managed by this API. + """ + return STUDY_IDENTIFIER_REGEX.match(ac.user) + + def has_consent_withdrawn_permissions(obj: DataObject) -> bool: """Return True if the object has permissions expected for data with consent withdrawn. @@ -175,9 +188,7 @@ def has_consent_withdrawn_permissions(obj: DataObject) -> bool: # Alternatively, we could keep a list of rodsadmin users who should have continued # access e.g. in order to redact the data, and check that no other users are in the # ACL. Using a list of rodsadmins would mean we don't need to use regex. - study_acl = [ - ac for ac in obj.permissions() if STUDY_IDENTIFIER_REGEX.match(ac.user) - ] + study_acl = [ac for ac in obj.permissions() if is_managed_access(ac)] return not study_acl diff --git a/src/npg_irods/ont.py b/src/npg_irods/ont.py index 0eb1a69a..72b0b6ed 100644 --- a/src/npg_irods/ont.py +++ b/src/npg_irods/ont.py @@ -23,7 +23,7 @@ from typing import Type, Union from partisan.exception import RodsError -from partisan.irods import AVU, Collection, query_metadata +from partisan.irods import AVU, Collection, query_metadata, current_user from sqlalchemy import asc, distinct from sqlalchemy.orm import Session from structlog import get_logger @@ -31,6 +31,7 @@ from npg_irods.db.mlwh import OseqFlowcell from npg_irods.metadata.lims import ( SeqConcept, + is_managed_access, make_sample_acl, make_sample_metadata, make_study_metadata, @@ -275,8 +276,11 @@ def annotate_results_collection( try: coll.supersede_metadata(*make_study_metadata(fc.study), history=True) coll.supersede_metadata(*make_sample_metadata(fc.sample), history=True) + + # Keep the access controls that we don't manage + keep = [ac for ac in coll.permissions() if not is_managed_access(ac)] coll.supersede_permissions( - *make_sample_acl(fc.sample, fc.study), recurse=True + *keep, *make_sample_acl(fc.sample, fc.study), recurse=True ) except RodsError as e: log.error(e.message, code=e.code) @@ -334,8 +338,10 @@ def annotate_results_collection( ) # The ACL could be different for each plex + # Keep the access controls that we don't manage + keep = [ac for ac in bc_coll.permissions() if not is_managed_access(ac)] bc_coll.supersede_permissions( - *make_sample_acl(fc.sample, fc.study), recurse=True + *keep, *make_sample_acl(fc.sample, fc.study), recurse=True ) except RodsError as e: log.error(e.message, code=e.code) diff --git a/tests/test_ont.py b/tests/test_ont.py index 2367d714..b6c44808 100644 --- a/tests/test_ont.py +++ b/tests/test_ont.py @@ -53,10 +53,13 @@ def test_add_new_sample_metadata(self, ont_synthetic, mlwh_session): ]: assert avu in coll.metadata(), f"{avu} is in {coll} metadata" - ac = AC("ss_2000", Permission.READ, zone="testZone") - assert ac in coll.acl() + expected_acl = [ + AC("irods", Permission.OWN, zone="testZone"), + AC("ss_2000", Permission.READ, zone="testZone"), + ] + assert coll.acl() == expected_acl for item in coll.contents(): - assert ac in item.acl(), f"{ac} is in {item} ACL" + assert item.acl() == expected_acl @tests_have_admin @m.context("When the experiment is multiplexed") @@ -108,10 +111,14 @@ def test_add_new_plex_sample_metadata(self, ont_synthetic, mlwh_session): ]: assert avu in bc_coll.metadata(), f"{avu} is in {bc_coll} metadata" - ac = AC("ss_3000", Permission.READ, zone="testZone") - assert ac in bc_coll.acl(), f"{ac} is in {bc_coll} ACL" + expected_acl = [ + AC("irods", Permission.OWN, zone="testZone"), + AC("ss_3000", Permission.READ, zone="testZone"), + ] + + assert bc_coll.acl() == expected_acl for item in bc_coll.contents(): - assert ac in item.acl(), f"{ac} is in {item} ACL" + assert item.acl() == expected_acl class TestMetadataUpdate(object):