From ae2c34f2bf2ac1ff54414c7e147b9a5d024bf656 Mon Sep 17 00:00:00 2001 From: Keith James Date: Wed, 12 Jul 2023 17:06:56 +0100 Subject: [PATCH 1/3] Add support for recursive consent withdrawn updates for ONT Add recurse keyword and recursive behaviour for collections to: has_consent_withdrawn has_consent_withdrawn_metadata has_consent_withdrawn_permissions ensure_consent_withdrawn Add an is_minknow_report function to help in managing permissions. Rename ont.update_metadata to ont.apply_metadata to distinguish it from the secondary metadata update process. Change do_*_update function names to update_* because they read better. --- scripts/update-ont-metadata | 4 +- src/npg_irods/common.py | 4 +- src/npg_irods/illumina.py | 19 +- src/npg_irods/metadata/lims.py | 179 +++++++++++----- src/npg_irods/ont.py | 60 +++--- tests/test_illumina.py | 46 ++++- tests/test_ont.py | 368 +++++++++++++++++++++------------ 7 files changed, 459 insertions(+), 221 deletions(-) diff --git a/scripts/update-ont-metadata b/scripts/update-ont-metadata index 9bb80308..e2a6a769 100755 --- a/scripts/update-ont-metadata +++ b/scripts/update-ont-metadata @@ -27,7 +27,7 @@ from sqlalchemy.orm import Session from npg_irods.cli import add_logging_arguments, configure_logging, parse_iso_date from npg_irods.db import DBConfig -from npg_irods.ont import update_metadata +from npg_irods.ont import apply_metadata from npg_irods.version import version description = """ @@ -94,7 +94,7 @@ def main(): engine = sqlalchemy.create_engine(dbconfig.url) with Session(engine) as session: - num_processed, num_updated, num_errors = update_metadata( + num_processed, num_updated, num_errors = apply_metadata( session, since=args.begin_date, zone=args.zone ) diff --git a/src/npg_irods/common.py b/src/npg_irods/common.py index 64ef8026..f7b9e2fd 100644 --- a/src/npg_irods/common.py +++ b/src/npg_irods/common.py @@ -201,7 +201,7 @@ def infer_data_source(path: PathLike | str) -> Tuple[Platform, AnalysisType]: raise ValueError(f"Failed to infer a data source for iRODS path '{path}'") -def do_metadata_update(item: Collection | DataObject, avus: list[AVU]) -> bool: +def update_metadata(item: Collection | DataObject, avus: list[AVU]) -> bool: """Update metadata on an iRODS path, removing existing metadata and replacing with the given AVUs and adding history of changes. @@ -225,7 +225,7 @@ def do_metadata_update(item: Collection | DataObject, avus: list[AVU]) -> bool: return num_removed > 0 or num_added > 0 -def do_permissions_update( +def update_permissions( item: Collection | DataObject, acl: list[AC], recurse=False ) -> bool: """Update permissions on an iRODS path, removing existing permissions and replacing diff --git a/src/npg_irods/illumina.py b/src/npg_irods/illumina.py index 6031bfe3..e0ff9c44 100644 --- a/src/npg_irods/illumina.py +++ b/src/npg_irods/illumina.py @@ -28,12 +28,13 @@ from sqlalchemy.orm import Session from structlog import get_logger -from npg_irods.common import do_metadata_update, do_permissions_update, infer_zone +from npg_irods.common import infer_zone, update_metadata, update_permissions from npg_irods.db.mlwh import IseqFlowcell, IseqProductMetrics, Sample, Study from npg_irods.metadata.common import SeqConcept, SeqSubset from npg_irods.metadata.illumina import Instrument from npg_irods.metadata.lims import ( ensure_consent_withdrawn, + has_consent_withdrawn_metadata, make_sample_acl, make_sample_metadata, make_study_metadata, @@ -190,7 +191,6 @@ def ensure_secondary_metadata_updated( True if updated. """ zone = infer_zone(item) - updated = False secondary_metadata, acl = [], [] components = [ @@ -205,14 +205,19 @@ def ensure_secondary_metadata_updated( secondary_metadata.extend(make_study_metadata(fc.study)) acl.extend(make_sample_acl(fc.sample, fc.study, zone=zone)) - updated = True if do_metadata_update(item, secondary_metadata) else updated + meta_update = update_metadata(item, secondary_metadata) - if any(c.contains_nonconsented_human() for c in components): # Illumina specific - updated = True if ensure_consent_withdrawn(item) else updated + cons_update = xahu_update = perm_update = False + if has_consent_withdrawn_metadata(item): + log.info("Consent withdrawn", path=item) + cons_update = ensure_consent_withdrawn(item) + elif any(c.contains_nonconsented_human() for c in components): # Illumina specific + log.info("Non-consented human data", path=item) + xahu_update = ensure_consent_withdrawn(item) else: - updated = True if do_permissions_update(item, acl) else updated + perm_update = update_permissions(item, acl) - return updated + return any([meta_update, cons_update, xahu_update, perm_update]) def find_flowcells_by_component( diff --git a/src/npg_irods/metadata/lims.py b/src/npg_irods/metadata/lims.py index 57034158..1326f0f3 100644 --- a/src/npg_irods/metadata/lims.py +++ b/src/npg_irods/metadata/lims.py @@ -173,7 +173,9 @@ def make_public_read_acl() -> list[AC]: return [AC("public", Permission.READ)] -def has_consent_withdrawn_metadata(item: Collection | DataObject) -> bool: +def has_consent_withdrawn_metadata( + item: Collection | DataObject, recurse=False +) -> bool: """Return True if the collection or data object is annotated in iRODS as having donor consent withdrawn. @@ -189,30 +191,63 @@ def has_consent_withdrawn_metadata(item: Collection | DataObject) -> bool: the collection or data object "fails closed" (unreadable), rather than "fails open" (readable). + A collection having has consent withdrawn metadata may imply that its contents, + should be likewise, recursively. This can be checked with the recurse keyword. + Args: item: The collection or data object to check. + recurse: Include a check of collection contents recursively. Defaults to False. Returns: True if consent was withdrawn. """ - meta = item.metadata() + if item.rods_type == DataObject and recurse: + raise ValueError(f"Cannot recursively check metadata on a data object: {item}") + + gapi_withdrawn = AVU(TrackedSample.CONSENT, 0) + npg_withdrawn = AVU(TrackedSample.CONSENT_WITHDRAWN, 1) + + def is_withdrawn(x): + meta = x.metadata() + return gapi_withdrawn in meta or npg_withdrawn in meta - return ( - AVU(TrackedSample.CONSENT, 0) in meta - or AVU(TrackedSample.CONSENT_WITHDRAWN, 1) in meta - ) + withdrawn_root = is_withdrawn(item) + withdrawn_child = True + if item.rods_type == Collection and recurse: + for it in item.iter_contents(recurse=True): + if not is_withdrawn(it): + withdrawn_child = False + break -def ensure_consent_withdrawn_metadata(item: Collection | DataObject) -> bool: + return withdrawn_root and withdrawn_child + + +def ensure_consent_withdrawn_metadata( + item: Collection | DataObject, recurse=False +) -> bool: """Ensure that consent withdrawn metadata are on the collection or data object. Args: - item: The collection or data object to check. + item: The collection or data object to update. + recurse: Apply to collection contents recursively. Defaults to False. Returns: True if metadata were added. """ - return ensure_avus_present(item, AVU(TrackedSample.CONSENT_WITHDRAWN, 1)) + if item.rods_type == DataObject and recurse: + raise ValueError(f"Cannot recursively update metadata on a data object: {item}") + + withdrawn_avu = AVU(TrackedSample.CONSENT_WITHDRAWN, 1) + updated_root = ensure_avus_present(item, withdrawn_avu) + updated_child = False + + if item.rods_type == Collection and recurse: + for it in item.iter_contents(recurse=True): + if ensure_avus_present(it, withdrawn_avu): + updated_child = True + + return updated_root or updated_child def is_managed_access(ac: AC): @@ -244,93 +279,135 @@ def has_mixed_ownership(acl: list[AC]): return len({ac.user for ac in acl if is_managed_access(ac)}) > 1 -def has_consent_withdrawn_permissions(item: Collection | DataObject) -> bool: +def has_consent_withdrawn_permissions( + item: Collection | DataObject, recurse=False +) -> bool: """Return True if the collection or data object has permissions expected for data with consent withdrawn. Args: item: The collection or data object to check. + recurse: Include a check of collection contents recursively. Defaults to False. Returns: True if the permissions were as expected. """ + if item.rods_type == DataObject and recurse: + raise ValueError( + f"Cannot recursively check permissions on a data object: {item}" + ) + # Alternatively, we could keep a list of rodsadmin users who should have continued # access e.g. in order to redact the data, and check that no other users are in the # ACL. Using a list of rodsadmins would mean we don't need to use regex. - study_acl = [ac for ac in item.permissions() if is_managed_access(ac)] + def has_study_perms(x): + return any([ac for ac in x.permissions() if is_managed_access(ac)]) - return not study_acl + withdrawn_root = not has_study_perms(item) + withdrawn_child = True + if item.rods_type == Collection and recurse: + for it in item.iter_contents(recurse=True): + if has_study_perms(it): + withdrawn_child = False + break -def has_consent_withdrawn(item: Collection | DataObject) -> bool: + return withdrawn_root and withdrawn_child + + +def has_consent_withdrawn(item: Collection | DataObject, recurse=False) -> bool: """Return True if the data object has metadata and permissions for data with consent withdrawn. Args: item: The collection or data object to check. + recurse: Include a check of the collection contents recursively. Defaults to + False. Returns: True if the metadata and permissions were as expected. """ - return has_consent_withdrawn_metadata(item) and has_consent_withdrawn_permissions( - item - ) + return has_consent_withdrawn_metadata( + item, recurse=recurse + ) and has_consent_withdrawn_permissions(item, recurse=recurse) -def ensure_consent_withdrawn(item: Collection | DataObject) -> bool: - """Ensure that a data object has its metadata and permissions in the correct state - for having consent withdrawn. All read permissions are withdrawn except for: +def ensure_consent_withdrawn(item: Collection | DataObject, recurse=False) -> bool: + """Ensure that a data object or collection has its metadata and permissions in the + correct state for having consent withdrawn. All read permissions are withdrawn + except for: - The current user making these changes. - Any rodsadmin. Args: item: The collection or data object to check. + recurse: Apply to collection contents recursively. Defaults to False. Returns: True if the metadata and/or permissions were updated. """ - if has_consent_withdrawn(item): + if item.rods_type == DataObject and recurse: + raise ValueError( + f"Cannot recursively withdraw permissions on a data object: {item}" + ) + + if has_consent_withdrawn(item, recurse=recurse): return False - if ensure_consent_withdrawn_metadata(item): + updated_meta = ensure_consent_withdrawn_metadata(item, recurse=recurse) + if updated_meta: log.info( - "Updated metadata", + "Updated consent withdrawn metadata", path=item, - has_withdrawn_meta=has_consent_withdrawn_metadata(item), + has_withdrawn_meta=has_consent_withdrawn_metadata(item, recurse=recurse), ) - to_remove = [] curr_user = current_user() - for ac in item.permissions(): - u = rods_user(ac.user) - if u is None: - log.info("Removing permissions (non-local user)", path=item, ac=ac) + def withdraw(x): # Withdraw perms from a single path, return True if changes made + to_remove = [] + + for ac in x.permissions(): + u = rods_user(ac.user) + + if u is None: + log.info("Removing permissions (non-local user)", path=x, ac=ac) + to_remove.append(ac) + continue + if u == curr_user: + log.info( + "Not removing permissions for self", path=x, user=str(u), ac=ac + ) + continue + if u.is_rodsadmin(): + log.info( + "Not removing permissions for rodsadmin", path=x, user=str(u), ac=ac + ) + continue + + log.info("Removing permissions", path=x, user=str(u), ac=ac) to_remove.append(ac) - continue - - if u == curr_user: - log.info("Not removing permissions for self", path=item, user=str(u), ac=ac) - continue - - if u.is_rodsadmin(): - log.info( - "Not removing permissions for rodsadmin", path=item, user=str(u), ac=ac - ) - continue - - log.info("Removing permissions", path=item, user=str(u), ac=ac) - to_remove.append(ac) - - num_removed = item.remove_permissions(*to_remove) - log.info( - "Removed permissions", - path=item, - num_removed=num_removed, - has_withdrawn_perm=has_consent_withdrawn_permissions(item), - ) - return True + + num_removed = item.remove_permissions(*to_remove) + log.info( + "Removed permissions", + path=x, + num_removed=num_removed, + has_withdrawn_perm=has_consent_withdrawn_permissions(x), + ) + + return num_removed > 0 + + updated_perms_root = withdraw(item) + updated_perms_child = False + + if item.rods_type == Collection and recurse: + for it in item.iter_contents(recurse=True): + if withdraw(it): + updated_perms_child = True + + return updated_meta or updated_perms_root or updated_perms_child def has_id_product_metadata(obj: DataObject): diff --git a/src/npg_irods/ont.py b/src/npg_irods/ont.py index 0603c175..3138ca5b 100644 --- a/src/npg_irods/ont.py +++ b/src/npg_irods/ont.py @@ -31,10 +31,12 @@ from sqlalchemy.orm import Session from structlog import get_logger -from npg_irods.common import do_metadata_update, do_permissions_update, infer_zone +from npg_irods.common import update_metadata, update_permissions, infer_zone from npg_irods.db.mlwh import OseqFlowcell, Sample, Study from npg_irods.metadata.common import SeqConcept from npg_irods.metadata.lims import ( + ensure_consent_withdrawn, + has_consent_withdrawn_metadata, is_managed_access, make_public_read_acl, make_sample_acl, @@ -75,17 +77,17 @@ def __init__( self.tag_identifier = tag_identifier -def update_metadata( +def apply_metadata( mlwh_session: Session, experiment_name=None, instrument_slot=None, since: datetime = None, zone=None, ) -> (int, int, int): - """Update iRODS metadata on ONT run collections whose corresponding ML warehouse + """Apply iRODS metadata on ONT run collections whose corresponding ML warehouse records have been updated at, or more recently than, the specified time. - Collections to update are identified by having ont:experiment_name and + Collections to annotate are identified by having ont:experiment_name and ont:instrument_slot metadata already attached to them. This is done for example, by the process which moves sequence data from the instrument into iRODS. @@ -99,7 +101,7 @@ def update_metadata( Returns: A tuple of the number of paths found, the number of paths whose metadata - was updated and the number of errors encountered. + were changed and the number of errors encountered. """ if since is None: since = datetime.fromtimestamp(0) # Everything since the Epoch @@ -190,12 +192,6 @@ def annotate_results_collection( log.warn("Collection does not exist", path=coll, comp=c) return False - avus = [ - AVU(Instrument.EXPERIMENT_NAME, c.experiment_name), - AVU(Instrument.INSTRUMENT_SLOT, c.instrument_slot), - ] - coll.supersede_metadata(*avus) # These AVUs should be present already - # A single fc record (for non-multiplexed data) if len(flowcells) == 1: log.info("Found non-multiplexed", comp=c) @@ -203,7 +199,7 @@ def annotate_results_collection( # Secondary metadata. Updating this here is an optimisation to reduce # turn-around-time. If we don't update, we just have to wait for a # cron job to call `ensure_secondary_metadata_updated`. - _do_metadata_and_permissions_update(coll, flowcells) + _do_secondary_metadata_and_perms_update(coll, flowcells) except RodsError as e: log.error(e.message, code=e.code) return False @@ -263,7 +259,7 @@ def annotate_results_collection( # Secondary metadata. Updating this here is an optimisation to reduce # turn-around-time. If we don't update, we just have to wait for a # cron job to call `ensure_secondary_metadata_updated`. - _do_metadata_and_permissions_update(bcoll, [fc]) + _do_secondary_metadata_and_perms_update(bcoll, [fc]) except RodsError as e: log.error(e.message, code=e.code) @@ -292,7 +288,7 @@ def ensure_secondary_metadata_updated( component = Component(expt.value, slot.value, tag_id.value) flowcells = find_flowcells_by_component(mlwh_session, component) - return _do_metadata_and_permissions_update(item, flowcells) + return _do_secondary_metadata_and_perms_update(item, flowcells) def find_recent_expt(sess: Session, since: datetime) -> list[str]: @@ -420,7 +416,19 @@ def barcode_name_from_id(tag_identifier: str) -> str: ) -def _do_metadata_and_permissions_update( +def is_minknow_report(obj: DataObject) -> bool: + """Return True if the data object is a MinKNOW run report. + + Args: + obj: iRODS path to check. + + Returns: + True if the object is a MinKNOW report. + """ + return obj.rods_type == DataObject and "report" in obj.name + + +def _do_secondary_metadata_and_perms_update( item: Collection | DataObject, flowcells ) -> bool: """Update metadata and permissions using sample/study information obtained from @@ -439,16 +447,22 @@ def _do_metadata_and_permissions_update( for fc in flowcells: metadata.extend(make_sample_metadata(fc.sample)) metadata.extend(make_study_metadata(fc.study)) - meta_updated = do_metadata_update(item, metadata) + meta_update = update_metadata(item, metadata) acl = [] for fc in flowcells: acl.extend(make_sample_acl(fc.sample, fc.study, zone=zone)) - perm_updated = do_permissions_update( - item, acl, recurse=(item.rods_type == Collection) - ) - return meta_updated or perm_updated + recurse = item.rods_type == Collection + cons_update = perm_update = False + + if has_consent_withdrawn_metadata(item): + log.info("Consent withdrawn", path=item) + cons_update = ensure_consent_withdrawn(item, recurse=recurse) + else: + perm_update = update_permissions(item, acl, recurse=recurse) + + return any([meta_update, cons_update, perm_update]) def _set_minknow_reports_public(coll: Collection) -> int: @@ -463,11 +477,7 @@ def _set_minknow_reports_public(coll: Collection) -> int: """ num_errors = 0 - reports = [ - obj - for obj in coll.contents() - if obj.rods_type == DataObject and "report" in obj.name - ] + reports = [obj for obj in coll.contents() if is_minknow_report(obj)] for report in reports: try: diff --git a/tests/test_illumina.py b/tests/test_illumina.py index 57345bdb..1de2024a 100644 --- a/tests/test_illumina.py +++ b/tests/test_illumina.py @@ -23,7 +23,11 @@ from conftest import history_in_meta from npg_irods.illumina import ensure_secondary_metadata_updated -from npg_irods.metadata.lims import TrackedSample, TrackedStudy +from npg_irods.metadata.lims import ( + TrackedSample, + TrackedStudy, + ensure_consent_withdrawn, +) class TestIlluminaMetadataUpdate(object): @@ -358,7 +362,7 @@ def test_updates_changed_study_permissions( @m.context("When data are multiplexed") @m.context("When data contain a human subset") @m.it("Removes managed access permissions") - def test_updates_human_permissions( + def test_updates_human_permissions_mx( self, illumina_synthetic_irods, illumina_synthetic_mlwh ): path = illumina_synthetic_irods / "12345/12345#1_human.cram" @@ -379,7 +383,7 @@ def test_updates_human_permissions( @m.context("When data are multiplexed") @m.context("When data contain a human X chromosome/autosome subset") @m.it("Removes managed access permissions") - def test_updates_xahuman_permissions( + def test_updates_xahuman_permissions_mx( self, illumina_synthetic_irods, illumina_synthetic_mlwh ): path = illumina_synthetic_irods / "12345/12345#1_xahuman.cram" @@ -400,7 +404,7 @@ def test_updates_xahuman_permissions( @m.context("When data are multiplexed") @m.context("When data are from multiple studies") @m.it("Removes managed access permissions") - def test_multiple_study_permissions( + def test_multiple_study_permissions_mx( self, illumina_synthetic_irods, illumina_synthetic_mlwh ): path = illumina_synthetic_irods / "12345/12345#2.cram" @@ -415,3 +419,37 @@ def test_multiple_study_permissions( obj, mlwh_session=illumina_synthetic_mlwh ) assert obj.permissions() == [AC("irods", perm=Permission.OWN, zone=zone)] + + @m.context("When data are not multiplexed") + @m.context("When data have had consent withdrawn") + @m.it("Does not restore access permissions") + def test_retains_consent_withdrawn( + self, illumina_synthetic_irods, illumina_synthetic_mlwh + ): + path = illumina_synthetic_irods / "12345/12345.cram" + zone = "testZone" + obj = DataObject(path) + + assert ensure_consent_withdrawn(obj) + assert obj.permissions() == [AC("irods", perm=Permission.OWN, zone=zone)] + assert ensure_secondary_metadata_updated( + obj, mlwh_session=illumina_synthetic_mlwh + ) + assert obj.permissions() == [AC("irods", perm=Permission.OWN, zone=zone)] + + @m.context("When data are multiplexed") + @m.context("When data have had consent withdrawn") + @m.it("Does not restore access permissions") + def test_updates_human_permissions_mx( + self, illumina_synthetic_irods, illumina_synthetic_mlwh + ): + path = illumina_synthetic_irods / "12345/12345#1_human.cram" + zone = "testZone" + obj = DataObject(path) + + assert ensure_consent_withdrawn(obj) + assert obj.permissions() == [AC("irods", perm=Permission.OWN, zone=zone)] + assert ensure_secondary_metadata_updated( + obj, mlwh_session=illumina_synthetic_mlwh + ) + assert obj.permissions() == [AC("irods", perm=Permission.OWN, zone=zone)] diff --git a/tests/test_ont.py b/tests/test_ont.py index 0e19f09d..2ddab292 100644 --- a/tests/test_ont.py +++ b/tests/test_ont.py @@ -25,132 +25,22 @@ from datetime import datetime from npg_irods import ont from conftest import LATEST, ont_tag_identifier, tests_have_admin, history_in_meta -from npg_irods.metadata.lims import TrackedSample, TrackedStudy +from npg_irods.metadata.lims import ( + TrackedSample, + TrackedStudy, + ensure_consent_withdrawn, +) from npg_irods.metadata.common import SeqConcept from npg_irods.ont import ( Component, annotate_results_collection, - update_metadata, + apply_metadata, + is_minknow_report, ) -class TestONTMetadataCreation(object): - @tests_have_admin - @m.context("When an ONT experiment collection is annotated") - @m.context("When the experiment is single-sample") - @m.it("Adds sample and study metadata to the run-folder collection") - def test_add_new_sample_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh): - expt = "simple_experiment_001" - slot = 1 - - path = ont_synthetic_irods / expt / "20190904_1514_GA10000_flowcell011_69126024" - c = Component(experiment_name=expt, instrument_slot=slot) - annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh) - - coll = Collection(path) - for avu in [ - AVU(TrackedSample.ACCESSION_NUMBER, "ACC1"), - AVU(TrackedSample.COMMON_NAME, "common_name1"), - AVU(TrackedSample.DONOR_ID, "donor_id1"), - AVU(TrackedSample.ID, "id_sample_lims1"), - AVU(TrackedSample.NAME, "name1"), - AVU(TrackedSample.SUPPLIER_NAME, "supplier_name1"), - AVU(TrackedSample.PUBLIC_NAME, "public_name1"), - AVU(TrackedStudy.ID, "2000"), - AVU(TrackedStudy.NAME, "Study Y"), - ]: - assert avu in coll.metadata(), f"{avu} is in {coll} metadata" - - expected_acl = [ - AC("irods", Permission.OWN, zone="testZone"), - AC("ss_2000", Permission.READ, zone="testZone"), - ] - assert coll.acl() == expected_acl, f"ACL of {coll} is { expected_acl}" - for item in coll.contents(): - assert item.acl() == expected_acl, f"ACL of {item} is {expected_acl}" - - @tests_have_admin - @m.context("When the experiment is multiplexed") - @m.it("Adds {tag_index_from_id => } metadata to barcode<0n> sub-collections") - def test_add_new_plex_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh): - expt = "multiplexed_experiment_001" - slot = 1 - - path = ont_synthetic_irods / expt / "20190904_1514_GA10000_flowcell101_cf751ba1" - c = Component(experiment_name=expt, instrument_slot=slot) - annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh) - - for subcoll in ["fast5_fail", "fast5_pass", "fastq_fail", "fastq_pass"]: - for tag_index in range(1, 12): - tag_identifier = ont_tag_identifier(tag_index) - bc_coll = Collection( - path / subcoll / ont.barcode_name_from_id(tag_identifier) - ) - avu = AVU(SeqConcept.TAG_INDEX, ont.tag_index_from_id(tag_identifier)) - assert avu in bc_coll.metadata(), f"{avu} is in {bc_coll} metadata" - - @tests_have_admin - @m.it("Adds sample and study metadata to barcode<0n> sub-collections") - def test_add_new_plex_sample_metadata( - self, ont_synthetic_irods, ont_synthetic_mlwh - ): - expt = "multiplexed_experiment_001" - slot = 1 - - path = ont_synthetic_irods / expt / "20190904_1514_GA10000_flowcell101_cf751ba1" - c = Component(experiment_name=expt, instrument_slot=slot) - annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh) - - for subcoll in ["fast5_fail", "fast5_pass", "fastq_fail", "fastq_pass"]: - for tag_index in range(1, 12): - tag_id = ont_tag_identifier(tag_index) - bc_coll = Collection(path / subcoll / ont.barcode_name_from_id(tag_id)) - - for avu in [ - AVU(TrackedSample.ACCESSION_NUMBER, f"ACC{tag_index}"), - AVU(TrackedSample.COMMON_NAME, f"common_name{tag_index}"), - AVU(TrackedSample.DONOR_ID, f"donor_id{tag_index}"), - AVU(TrackedSample.ID, f"id_sample_lims{tag_index}"), - AVU(TrackedSample.NAME, f"name{tag_index}"), - AVU(TrackedSample.PUBLIC_NAME, f"public_name{tag_index}"), - AVU(TrackedSample.SUPPLIER_NAME, f"supplier_name{tag_index}"), - AVU(TrackedStudy.ID, "3000"), - AVU(TrackedStudy.NAME, "Study Z"), - ]: - assert avu in bc_coll.metadata(), f"{avu} is in {bc_coll} metadata" - - expected_acl = [ - AC("irods", Permission.OWN, zone="testZone"), - AC("ss_3000", Permission.READ, zone="testZone"), - ] - - assert bc_coll.acl() == expected_acl - for item in bc_coll.contents(): - assert item.acl() == expected_acl - - @tests_have_admin - @m.it("Makes report files publicly readable") - def test_public_read_reports(self, ont_synthetic_irods, ont_synthetic_mlwh): - expt = "multiplexed_experiment_001" - slot = 1 - - path = ont_synthetic_irods / expt / "20190904_1514_GA10000_flowcell101_cf751ba1" - c = Component(experiment_name=expt, instrument_slot=slot) - annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh) - expected_acl = [ - AC("irods", Permission.OWN, zone="testZone"), - AC("public", Permission.READ, zone="testZone"), - ] - - for ext in ["html", "md", "json.gz"]: - assert ( - DataObject(path / f"report_multiplexed_synthetic.{ext}").acl() - == expected_acl - ) - - -class TestONTMetadataUpdate(object): +class TestONTFindUpdates: @tests_have_admin @m.context("When an ONT metadata update is requested") @m.context("When no experiment name is specified") @@ -161,7 +51,7 @@ def test_find_all(self, ont_synthetic_irods, ont_synthetic_mlwh): num_multiplexed_expts = 3 num_slots = 5 - num_found, num_updated, num_errors = update_metadata( + num_found, num_updated, num_errors = apply_metadata( mlwh_session=ont_synthetic_mlwh ) num_expected = (num_simple_expts * num_slots) + ( @@ -172,11 +62,12 @@ def test_find_all(self, ont_synthetic_irods, ont_synthetic_mlwh): assert num_updated == num_expected assert num_errors == 0 + @m.context("When an ONT metadata update is requested") @m.context("When no experiment name is specified") @m.context("When a time window is specified") @m.it("Finds only collections updated in that time window") def test_find_recent_updates(self, ont_synthetic_irods, ont_synthetic_mlwh): - num_found, num_updated, num_errors = update_metadata( + num_found, num_updated, num_errors = apply_metadata( mlwh_session=ont_synthetic_mlwh, since=LATEST ) @@ -202,10 +93,11 @@ def test_find_recent_updates(self, ont_synthetic_irods, ont_synthetic_mlwh): assert num_updated == num_expected assert num_errors == 0 + @m.context("When an ONT metadata update is requested") @m.context("When an experiment name is specified") @m.it("Finds only collections with that experiment name") def test_find_updates_for_experiment(self, ont_synthetic_irods, ont_synthetic_mlwh): - num_found, num_updated, num_errors = update_metadata( + num_found, num_updated, num_errors = apply_metadata( experiment_name="simple_experiment_001", mlwh_session=ont_synthetic_mlwh ) @@ -227,13 +119,14 @@ def test_find_updates_for_experiment(self, ont_synthetic_irods, ont_synthetic_ml assert num_updated == num_expected assert num_errors == 0 + @m.context("When an ONT metadata update is requested") @m.context("When an experiment name is specified") @m.context("When a slot position is specified") @m.it("Finds only collections with that experiment name and slot position") def test_find_updates_for_experiment_slot( self, ont_synthetic_irods, ont_synthetic_mlwh ): - num_found, num_updated, num_errors = update_metadata( + num_found, num_updated, num_errors = apply_metadata( experiment_name="simple_experiment_001", instrument_slot=1, mlwh_session=ont_synthetic_mlwh, @@ -253,8 +146,108 @@ def test_find_updates_for_experiment_slot( assert num_updated == num_expected assert num_errors == 0 - @m.context("When metadata is updated") - @m.context("When the metadata is absent") + +class TestONTMetadataCreation(object): + @tests_have_admin + @m.context("When an ONT experiment collection is annotated") + @m.context("When the experiment is single-sample") + @m.it("Adds sample and study metadata to the run-folder collection") + def test_add_new_sample_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh): + expt = "simple_experiment_001" + slot = 1 + + path = ont_synthetic_irods / expt / "20190904_1514_GA10000_flowcell011_69126024" + c = Component(experiment_name=expt, instrument_slot=slot) + annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh) + + coll = Collection(path) + for avu in [ + AVU(TrackedSample.ACCESSION_NUMBER, "ACC1"), + AVU(TrackedSample.COMMON_NAME, "common_name1"), + AVU(TrackedSample.DONOR_ID, "donor_id1"), + AVU(TrackedSample.ID, "id_sample_lims1"), + AVU(TrackedSample.NAME, "name1"), + AVU(TrackedSample.SUPPLIER_NAME, "supplier_name1"), + AVU(TrackedSample.PUBLIC_NAME, "public_name1"), + AVU(TrackedStudy.ID, "2000"), + AVU(TrackedStudy.NAME, "Study Y"), + ]: + assert avu in coll.metadata(), f"{avu} is in {coll} metadata" + + expected_acl = [ + AC("irods", Permission.OWN, zone="testZone"), + AC("ss_2000", Permission.READ, zone="testZone"), + ] + assert coll.acl() == expected_acl, f"ACL of {coll} is { expected_acl}" + for item in coll.contents(): + assert item.acl() == expected_acl, f"ACL of {item} is {expected_acl}" + + @tests_have_admin + @m.context("When an ONT experiment collection is annotated") + @m.context("When the experiment is multiplexed") + @m.it("Adds {tag_index_from_id => } metadata to barcode<0n> sub-collections") + def test_add_new_plex_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh): + expt = "multiplexed_experiment_001" + slot = 1 + + path = ont_synthetic_irods / expt / "20190904_1514_GA10000_flowcell101_cf751ba1" + c = Component(experiment_name=expt, instrument_slot=slot) + annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh) + + for subcoll in ["fast5_fail", "fast5_pass", "fastq_fail", "fastq_pass"]: + for tag_index in range(1, 12): + tag_identifier = ont_tag_identifier(tag_index) + bc_coll = Collection( + path / subcoll / ont.barcode_name_from_id(tag_identifier) + ) + avu = AVU(SeqConcept.TAG_INDEX, ont.tag_index_from_id(tag_identifier)) + assert avu in bc_coll.metadata(), f"{avu} is in {bc_coll} metadata" + + @tests_have_admin + @m.context("When an ONT experiment collection is annotated") + @m.context("When the experiment is multiplexed") + @m.it("Adds sample and study metadata to barcode<0n> sub-collections") + def test_add_new_plex_sample_metadata( + self, ont_synthetic_irods, ont_synthetic_mlwh + ): + expt = "multiplexed_experiment_001" + slot = 1 + + path = ont_synthetic_irods / expt / "20190904_1514_GA10000_flowcell101_cf751ba1" + c = Component(experiment_name=expt, instrument_slot=slot) + annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh) + + for subcoll in ["fast5_fail", "fast5_pass", "fastq_fail", "fastq_pass"]: + for tag_index in range(1, 12): + tag_id = ont_tag_identifier(tag_index) + bc_coll = Collection(path / subcoll / ont.barcode_name_from_id(tag_id)) + + for avu in [ + AVU(TrackedSample.ACCESSION_NUMBER, f"ACC{tag_index}"), + AVU(TrackedSample.COMMON_NAME, f"common_name{tag_index}"), + AVU(TrackedSample.DONOR_ID, f"donor_id{tag_index}"), + AVU(TrackedSample.ID, f"id_sample_lims{tag_index}"), + AVU(TrackedSample.NAME, f"name{tag_index}"), + AVU(TrackedSample.PUBLIC_NAME, f"public_name{tag_index}"), + AVU(TrackedSample.SUPPLIER_NAME, f"supplier_name{tag_index}"), + AVU(TrackedStudy.ID, "3000"), + AVU(TrackedStudy.NAME, "Study Z"), + ]: + assert avu in bc_coll.metadata(), f"{avu} is in {bc_coll} metadata" + + expected_acl = [ + AC("irods", Permission.OWN, zone="testZone"), + AC("ss_3000", Permission.READ, zone="testZone"), + ] + + assert bc_coll.acl() == expected_acl + for item in bc_coll.contents(): + assert item.acl() == expected_acl + + +class TestONTMetadataUpdate(object): + @m.context("When ONT metadata are updated") + @m.context("When the metadata are absent") @m.it("Adds the metadata") def test_updates_absent_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh): coll = Collection( @@ -263,7 +256,7 @@ def test_updates_absent_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh): ) assert AVU(TrackedSample.NAME, "name1") not in coll.metadata() - update_metadata( + apply_metadata( experiment_name="simple_experiment_001", instrument_slot=1, mlwh_session=ont_synthetic_mlwh, @@ -271,7 +264,8 @@ def test_updates_absent_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh): assert AVU(TrackedSample.NAME, "name1") in coll.metadata() - @m.context("When correct metadata is already present") + @m.context("When ONT metadata are updated") + @m.context("When correct metadata are already present") @m.it("Leaves the metadata unchanged") def test_updates_present_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh): coll = Collection( @@ -280,7 +274,7 @@ def test_updates_present_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh) ) coll.add_metadata(AVU(TrackedSample.NAME, "name1")) - update_metadata( + apply_metadata( experiment_name="simple_experiment_001", instrument_slot=1, mlwh_session=ont_synthetic_mlwh, @@ -288,7 +282,8 @@ def test_updates_present_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh) assert AVU(TrackedSample.NAME, "name1") in coll.metadata() - @m.context("When incorrect metadata is present") + @m.context("When ONT metadata are updated") + @m.context("When incorrect metadata are present") @m.it("Changes the metadata and adds history metadata") def test_updates_changed_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh): coll = Collection( @@ -297,7 +292,7 @@ def test_updates_changed_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh) ) coll.add_metadata(AVU(TrackedSample.NAME, "name0")) - update_metadata( + apply_metadata( experiment_name="simple_experiment_001", instrument_slot=1, mlwh_session=ont_synthetic_mlwh, @@ -309,6 +304,7 @@ def test_updates_changed_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh) AVU.history(AVU(TrackedSample.NAME, "name0")), coll.metadata() ) + @m.context("When ONT metadata are updated") @m.context("When an attribute has multiple incorrect values") @m.it("Groups those values in the history metadata") def test_updates_multiple_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh): @@ -319,7 +315,7 @@ def test_updates_multiple_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh coll.add_metadata(AVU(TrackedStudy.NAME, "Study A")) coll.add_metadata(AVU(TrackedStudy.NAME, "Study B")) - update_metadata( + apply_metadata( experiment_name="simple_experiment_001", instrument_slot=1, mlwh_session=ont_synthetic_mlwh, @@ -334,3 +330,115 @@ def test_updates_multiple_metadata(self, ont_synthetic_irods, ont_synthetic_mlwh ), coll.metadata(), ) + + +class TestONTPermissionsUpdate: + @tests_have_admin + @m.context("When ONT permissions are updated") + @m.it("Makes report files publicly readable") + def test_public_read_reports(self, ont_synthetic_irods, ont_synthetic_mlwh): + expt = "multiplexed_experiment_001" + slot = 1 + + path = ont_synthetic_irods / expt / "20190904_1514_GA10000_flowcell101_cf751ba1" + c = Component(experiment_name=expt, instrument_slot=slot) + expected_acl = [ + AC("irods", Permission.OWN, zone="testZone"), + AC("public", Permission.READ, zone="testZone"), + ] + + annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh) + + for ext in ["html", "md", "json.gz"]: + assert ( + DataObject(path / f"report_multiplexed_synthetic.{ext}").acl() + == expected_acl + ) + + @m.context("When ONT permissions are updated") + @m.context("When the experiment is single-sample") + @m.context("When data have had consent withdrawn") + @m.it("Does not restore access permissions") + def test_retains_consent_withdrawn(self, ont_synthetic_irods, ont_synthetic_mlwh): + zone = "testZone" + expt = "simple_experiment_001" + slot = 1 + + path = ont_synthetic_irods / expt / "20190904_1514_G100000_flowcell011_69126024" + c = Component(experiment_name=expt, instrument_slot=slot) + expected_acl = [AC("irods", perm=Permission.OWN, zone=zone)] + + coll = Collection(path) + assert ensure_consent_withdrawn(coll) + assert coll.acl() == expected_acl + + annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh) + + assert coll.acl() == expected_acl, f"ACL of {coll} is {expected_acl}" + for item in coll.contents(acl=True, recurse=True): + assert item.acl() == expected_acl, f"ACL of {item} is {expected_acl}" + + @m.context("When ONT permissions are updated") + @m.context("When the experiment is multiplexed") + @m.context("When data have had consent withdrawn") + @m.it("Does not restore access permissions") + def test_retains_consent_withdrawn_mx( + self, ont_synthetic_irods, ont_synthetic_mlwh + ): + zone = "testZone" + expt = "multiplexed_experiment_001" + slot = 1 + path = ( + ont_synthetic_irods + / "multiplexed_experiment_001/20190904_1514_GA10000_flowcell101_cf751ba1" + ) + c = Component(experiment_name=expt, instrument_slot=slot) + expected_acl = [AC("irods", perm=Permission.OWN, zone=zone)] + expected_report_acl = [ + AC("irods", perm=Permission.OWN, zone=zone), + AC("public", perm=Permission.READ, zone=zone), + ] + + coll = Collection(path) + sub_colls = ["fast5_fail", "fast5_pass", "fastq_fail", "fastq_pass"] + bc_colls = [ + it + for it in coll.contents(recurse=True) + if it.rods_type == Collection and "barcode" in it.path.name + ] + assert len(bc_colls) == 12 * len(sub_colls) + + for bc_coll in bc_colls: + assert ensure_consent_withdrawn( + bc_coll + ) # Mark barcode collections as consent withdrawn + + assert ( + bc_coll.acl() == expected_acl + ), f"ACL of barcode collection {bc_coll} is {expected_acl}" + for item in bc_coll.contents(acl=True, recurse=True): + assert ( + item.acl() == expected_acl + ), f"ACL of barcode collection member {item} is {expected_acl}" + + annotate_results_collection(path, c, mlwh_session=ont_synthetic_mlwh) + + for bc_coll in bc_colls: + assert bc_coll.acl() == expected_acl, f"ACL of {c} is {expected_acl}" + for item in bc_coll.contents(acl=True, recurse=True): + assert ( + item.acl() == expected_acl + ), f"ACL of barcode collection member {item} is {expected_acl}" + + assert ( + coll.acl() == expected_acl + ), f"ACL of root collection {coll} is {expected_acl}" + for item in coll.contents(acl=True, recurse=True): + if is_minknow_report(item): + assert ( + item.acl() == expected_report_acl + ), f"ACL of MinKNOW report {item} is {expected_report_acl}" + else: + assert ( + item.acl() == expected_acl + ), f"ACL of root collection member {item} is {expected_acl}" From ff553977d9df0390e7fce70faa3a42e74f0fff66 Mon Sep 17 00:00:00 2001 From: Keith James <47220353+kjsanger@users.noreply.github.com> Date: Mon, 17 Jul 2023 15:47:44 +0100 Subject: [PATCH 2/3] Update src/npg_irods/metadata/lims.py Co-authored-by: mksanger <73285932+mksanger@users.noreply.github.com> --- src/npg_irods/metadata/lims.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/npg_irods/metadata/lims.py b/src/npg_irods/metadata/lims.py index 1326f0f3..73eab640 100644 --- a/src/npg_irods/metadata/lims.py +++ b/src/npg_irods/metadata/lims.py @@ -191,8 +191,8 @@ def has_consent_withdrawn_metadata( the collection or data object "fails closed" (unreadable), rather than "fails open" (readable). - A collection having has consent withdrawn metadata may imply that its contents, - should be likewise, recursively. This can be checked with the recurse keyword. + A collection having consent withdrawn metadata may imply that its contents + should have permissions removed recursively. This can be checked with the recurse keyword. Args: item: The collection or data object to check. From 952f590218dd323e4a4680c02eaa275b7d9afbbd Mon Sep 17 00:00:00 2001 From: Keith James Date: Mon, 17 Jul 2023 16:31:32 +0100 Subject: [PATCH 3/3] Document possible exception for the recurse keyword --- src/npg_irods/metadata/lims.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/npg_irods/metadata/lims.py b/src/npg_irods/metadata/lims.py index 73eab640..6794c6d8 100644 --- a/src/npg_irods/metadata/lims.py +++ b/src/npg_irods/metadata/lims.py @@ -197,7 +197,7 @@ def has_consent_withdrawn_metadata( Args: item: The collection or data object to check. recurse: Include a check of collection contents recursively. Defaults to False. - + If True and item is a DataObject, raises a ValueError. Returns: True if consent was withdrawn. """ @@ -230,7 +230,8 @@ def ensure_consent_withdrawn_metadata( Args: item: The collection or data object to update. - recurse: Apply to collection contents recursively. Defaults to False. + recurse: Apply to collection contents recursively. Defaults to False. If True + and item is a DataObject, raises a ValueError. Returns: True if metadata were added. @@ -288,7 +289,7 @@ def has_consent_withdrawn_permissions( Args: item: The collection or data object to check. recurse: Include a check of collection contents recursively. Defaults to False. - + If True and item is a DataObject, raises a ValueError. Returns: True if the permissions were as expected. """ @@ -322,7 +323,7 @@ def has_consent_withdrawn(item: Collection | DataObject, recurse=False) -> bool: Args: item: The collection or data object to check. recurse: Include a check of the collection contents recursively. Defaults to - False. + False. If True and item is a DataObject, raises a ValueError. Returns: True if the metadata and permissions were as expected. @@ -342,7 +343,8 @@ def ensure_consent_withdrawn(item: Collection | DataObject, recurse=False) -> bo Args: item: The collection or data object to check. - recurse: Apply to collection contents recursively. Defaults to False. + recurse: Apply to collection contents recursively. Defaults to False. If True + and item is a DataObject, raises a ValueError. Returns: True if the metadata and/or permissions were updated.