Skip to content

Commit

Permalink
Add support for recursive consent withdrawn updates for ONT
Browse files Browse the repository at this point in the history
Add recurse keyword and recursive behaviour for collections to:

has_consent_withdrawn
has_consent_withdrawn_metadata
has_consent_withdrawn_permissions
ensure_consent_withdrawn

Add an is_minknow_report function to help in managing permissions.

Rename ont.update_metadata to ont.apply_metadata to distinguish it
from the secondary metadata update process.

Change do_*_update function names to update_* because they read better.
  • Loading branch information
kjsanger committed Jul 13, 2023
1 parent 4d22f3c commit ae2c34f
Show file tree
Hide file tree
Showing 7 changed files with 459 additions and 221 deletions.
4 changes: 2 additions & 2 deletions scripts/update-ont-metadata
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ from sqlalchemy.orm import Session

from npg_irods.cli import add_logging_arguments, configure_logging, parse_iso_date
from npg_irods.db import DBConfig
from npg_irods.ont import update_metadata
from npg_irods.ont import apply_metadata
from npg_irods.version import version

description = """
Expand Down Expand Up @@ -94,7 +94,7 @@ def main():

engine = sqlalchemy.create_engine(dbconfig.url)
with Session(engine) as session:
num_processed, num_updated, num_errors = update_metadata(
num_processed, num_updated, num_errors = apply_metadata(
session, since=args.begin_date, zone=args.zone
)

Expand Down
4 changes: 2 additions & 2 deletions src/npg_irods/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def infer_data_source(path: PathLike | str) -> Tuple[Platform, AnalysisType]:
raise ValueError(f"Failed to infer a data source for iRODS path '{path}'")


def do_metadata_update(item: Collection | DataObject, avus: list[AVU]) -> bool:
def update_metadata(item: Collection | DataObject, avus: list[AVU]) -> bool:
"""Update metadata on an iRODS path, removing existing metadata and replacing with
the given AVUs and adding history of changes.
Expand All @@ -225,7 +225,7 @@ def do_metadata_update(item: Collection | DataObject, avus: list[AVU]) -> bool:
return num_removed > 0 or num_added > 0


def do_permissions_update(
def update_permissions(
item: Collection | DataObject, acl: list[AC], recurse=False
) -> bool:
"""Update permissions on an iRODS path, removing existing permissions and replacing
Expand Down
19 changes: 12 additions & 7 deletions src/npg_irods/illumina.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@
from sqlalchemy.orm import Session
from structlog import get_logger

from npg_irods.common import do_metadata_update, do_permissions_update, infer_zone
from npg_irods.common import infer_zone, update_metadata, update_permissions
from npg_irods.db.mlwh import IseqFlowcell, IseqProductMetrics, Sample, Study
from npg_irods.metadata.common import SeqConcept, SeqSubset
from npg_irods.metadata.illumina import Instrument
from npg_irods.metadata.lims import (
ensure_consent_withdrawn,
has_consent_withdrawn_metadata,
make_sample_acl,
make_sample_metadata,
make_study_metadata,
Expand Down Expand Up @@ -190,7 +191,6 @@ def ensure_secondary_metadata_updated(
True if updated.
"""
zone = infer_zone(item)
updated = False
secondary_metadata, acl = [], []

components = [
Expand All @@ -205,14 +205,19 @@ def ensure_secondary_metadata_updated(
secondary_metadata.extend(make_study_metadata(fc.study))
acl.extend(make_sample_acl(fc.sample, fc.study, zone=zone))

updated = True if do_metadata_update(item, secondary_metadata) else updated
meta_update = update_metadata(item, secondary_metadata)

if any(c.contains_nonconsented_human() for c in components): # Illumina specific
updated = True if ensure_consent_withdrawn(item) else updated
cons_update = xahu_update = perm_update = False
if has_consent_withdrawn_metadata(item):
log.info("Consent withdrawn", path=item)
cons_update = ensure_consent_withdrawn(item)
elif any(c.contains_nonconsented_human() for c in components): # Illumina specific
log.info("Non-consented human data", path=item)
xahu_update = ensure_consent_withdrawn(item)
else:
updated = True if do_permissions_update(item, acl) else updated
perm_update = update_permissions(item, acl)

return updated
return any([meta_update, cons_update, xahu_update, perm_update])


def find_flowcells_by_component(
Expand Down
179 changes: 128 additions & 51 deletions src/npg_irods/metadata/lims.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,9 @@ def make_public_read_acl() -> list[AC]:
return [AC("public", Permission.READ)]


def has_consent_withdrawn_metadata(item: Collection | DataObject) -> bool:
def has_consent_withdrawn_metadata(
item: Collection | DataObject, recurse=False
) -> bool:
"""Return True if the collection or data object is annotated in iRODS as having
donor consent withdrawn.
Expand All @@ -189,30 +191,63 @@ def has_consent_withdrawn_metadata(item: Collection | DataObject) -> bool:
the collection or data object "fails closed" (unreadable), rather than "fails open"
(readable).
A collection having has consent withdrawn metadata may imply that its contents,
should be likewise, recursively. This can be checked with the recurse keyword.
Args:
item: The collection or data object to check.
recurse: Include a check of collection contents recursively. Defaults to False.
Returns:
True if consent was withdrawn.
"""
meta = item.metadata()
if item.rods_type == DataObject and recurse:
raise ValueError(f"Cannot recursively check metadata on a data object: {item}")

gapi_withdrawn = AVU(TrackedSample.CONSENT, 0)
npg_withdrawn = AVU(TrackedSample.CONSENT_WITHDRAWN, 1)

def is_withdrawn(x):
meta = x.metadata()
return gapi_withdrawn in meta or npg_withdrawn in meta

return (
AVU(TrackedSample.CONSENT, 0) in meta
or AVU(TrackedSample.CONSENT_WITHDRAWN, 1) in meta
)
withdrawn_root = is_withdrawn(item)
withdrawn_child = True

if item.rods_type == Collection and recurse:
for it in item.iter_contents(recurse=True):
if not is_withdrawn(it):
withdrawn_child = False
break

def ensure_consent_withdrawn_metadata(item: Collection | DataObject) -> bool:
return withdrawn_root and withdrawn_child


def ensure_consent_withdrawn_metadata(
item: Collection | DataObject, recurse=False
) -> bool:
"""Ensure that consent withdrawn metadata are on the collection or data object.
Args:
item: The collection or data object to check.
item: The collection or data object to update.
recurse: Apply to collection contents recursively. Defaults to False.
Returns:
True if metadata were added.
"""
return ensure_avus_present(item, AVU(TrackedSample.CONSENT_WITHDRAWN, 1))
if item.rods_type == DataObject and recurse:
raise ValueError(f"Cannot recursively update metadata on a data object: {item}")

withdrawn_avu = AVU(TrackedSample.CONSENT_WITHDRAWN, 1)
updated_root = ensure_avus_present(item, withdrawn_avu)
updated_child = False

if item.rods_type == Collection and recurse:
for it in item.iter_contents(recurse=True):
if ensure_avus_present(it, withdrawn_avu):
updated_child = True

return updated_root or updated_child


def is_managed_access(ac: AC):
Expand Down Expand Up @@ -244,93 +279,135 @@ def has_mixed_ownership(acl: list[AC]):
return len({ac.user for ac in acl if is_managed_access(ac)}) > 1


def has_consent_withdrawn_permissions(item: Collection | DataObject) -> bool:
def has_consent_withdrawn_permissions(
item: Collection | DataObject, recurse=False
) -> bool:
"""Return True if the collection or data object has permissions expected for data
with consent withdrawn.
Args:
item: The collection or data object to check.
recurse: Include a check of collection contents recursively. Defaults to False.
Returns:
True if the permissions were as expected.
"""
if item.rods_type == DataObject and recurse:
raise ValueError(
f"Cannot recursively check permissions on a data object: {item}"
)

# Alternatively, we could keep a list of rodsadmin users who should have continued
# access e.g. in order to redact the data, and check that no other users are in the
# ACL. Using a list of rodsadmins would mean we don't need to use regex.
study_acl = [ac for ac in item.permissions() if is_managed_access(ac)]
def has_study_perms(x):
return any([ac for ac in x.permissions() if is_managed_access(ac)])

return not study_acl
withdrawn_root = not has_study_perms(item)
withdrawn_child = True

if item.rods_type == Collection and recurse:
for it in item.iter_contents(recurse=True):
if has_study_perms(it):
withdrawn_child = False
break

def has_consent_withdrawn(item: Collection | DataObject) -> bool:
return withdrawn_root and withdrawn_child


def has_consent_withdrawn(item: Collection | DataObject, recurse=False) -> bool:
"""Return True if the data object has metadata and permissions for data with consent
withdrawn.
Args:
item: The collection or data object to check.
recurse: Include a check of the collection contents recursively. Defaults to
False.
Returns:
True if the metadata and permissions were as expected.
"""
return has_consent_withdrawn_metadata(item) and has_consent_withdrawn_permissions(
item
)
return has_consent_withdrawn_metadata(
item, recurse=recurse
) and has_consent_withdrawn_permissions(item, recurse=recurse)


def ensure_consent_withdrawn(item: Collection | DataObject) -> bool:
"""Ensure that a data object has its metadata and permissions in the correct state
for having consent withdrawn. All read permissions are withdrawn except for:
def ensure_consent_withdrawn(item: Collection | DataObject, recurse=False) -> bool:
"""Ensure that a data object or collection has its metadata and permissions in the
correct state for having consent withdrawn. All read permissions are withdrawn
except for:
- The current user making these changes.
- Any rodsadmin.
Args:
item: The collection or data object to check.
recurse: Apply to collection contents recursively. Defaults to False.
Returns:
True if the metadata and/or permissions were updated.
"""
if has_consent_withdrawn(item):
if item.rods_type == DataObject and recurse:
raise ValueError(
f"Cannot recursively withdraw permissions on a data object: {item}"
)

if has_consent_withdrawn(item, recurse=recurse):
return False

if ensure_consent_withdrawn_metadata(item):
updated_meta = ensure_consent_withdrawn_metadata(item, recurse=recurse)
if updated_meta:
log.info(
"Updated metadata",
"Updated consent withdrawn metadata",
path=item,
has_withdrawn_meta=has_consent_withdrawn_metadata(item),
has_withdrawn_meta=has_consent_withdrawn_metadata(item, recurse=recurse),
)

to_remove = []
curr_user = current_user()
for ac in item.permissions():
u = rods_user(ac.user)

if u is None:
log.info("Removing permissions (non-local user)", path=item, ac=ac)
def withdraw(x): # Withdraw perms from a single path, return True if changes made
to_remove = []

for ac in x.permissions():
u = rods_user(ac.user)

if u is None:
log.info("Removing permissions (non-local user)", path=x, ac=ac)
to_remove.append(ac)
continue
if u == curr_user:
log.info(
"Not removing permissions for self", path=x, user=str(u), ac=ac
)
continue
if u.is_rodsadmin():
log.info(
"Not removing permissions for rodsadmin", path=x, user=str(u), ac=ac
)
continue

log.info("Removing permissions", path=x, user=str(u), ac=ac)
to_remove.append(ac)
continue

if u == curr_user:
log.info("Not removing permissions for self", path=item, user=str(u), ac=ac)
continue

if u.is_rodsadmin():
log.info(
"Not removing permissions for rodsadmin", path=item, user=str(u), ac=ac
)
continue

log.info("Removing permissions", path=item, user=str(u), ac=ac)
to_remove.append(ac)

num_removed = item.remove_permissions(*to_remove)
log.info(
"Removed permissions",
path=item,
num_removed=num_removed,
has_withdrawn_perm=has_consent_withdrawn_permissions(item),
)
return True

num_removed = item.remove_permissions(*to_remove)
log.info(
"Removed permissions",
path=x,
num_removed=num_removed,
has_withdrawn_perm=has_consent_withdrawn_permissions(x),
)

return num_removed > 0

updated_perms_root = withdraw(item)
updated_perms_child = False

if item.rods_type == Collection and recurse:
for it in item.iter_contents(recurse=True):
if withdraw(it):
updated_perms_child = True

return updated_meta or updated_perms_root or updated_perms_child


def has_id_product_metadata(obj: DataObject):
Expand Down
Loading

0 comments on commit ae2c34f

Please sign in to comment.