Skip to content

Commit

Permalink
Merge pull request wtsi-npg#172 from kjsanger/feature/recursive-conse…
Browse files Browse the repository at this point in the history
…nt-withdrawn

Add support for recursive consent withdrawn updates for ONT
  • Loading branch information
mksanger authored Jul 17, 2023
2 parents 8b6427e + 952f590 commit 4b7dc7d
Show file tree
Hide file tree
Showing 7 changed files with 463 additions and 223 deletions.
4 changes: 2 additions & 2 deletions scripts/update-ont-metadata
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ from sqlalchemy.orm import Session

from npg_irods.cli import add_logging_arguments, configure_logging, parse_iso_date
from npg_irods.db import DBConfig
from npg_irods.ont import update_metadata
from npg_irods.ont import apply_metadata
from npg_irods.version import version

description = """
Expand Down Expand Up @@ -94,7 +94,7 @@ def main():

engine = sqlalchemy.create_engine(dbconfig.url)
with Session(engine) as session:
num_processed, num_updated, num_errors = update_metadata(
num_processed, num_updated, num_errors = apply_metadata(
session, since=args.begin_date, zone=args.zone
)

Expand Down
4 changes: 2 additions & 2 deletions src/npg_irods/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def infer_data_source(path: PathLike | str) -> Tuple[Platform, AnalysisType]:
raise ValueError(f"Failed to infer a data source for iRODS path '{path}'")


def do_metadata_update(item: Collection | DataObject, avus: list[AVU]) -> bool:
def update_metadata(item: Collection | DataObject, avus: list[AVU]) -> bool:
"""Update metadata on an iRODS path, removing existing metadata and replacing with
the given AVUs and adding history of changes.
Expand All @@ -225,7 +225,7 @@ def do_metadata_update(item: Collection | DataObject, avus: list[AVU]) -> bool:
return num_removed > 0 or num_added > 0


def do_permissions_update(
def update_permissions(
item: Collection | DataObject, acl: list[AC], recurse=False
) -> bool:
"""Update permissions on an iRODS path, removing existing permissions and replacing
Expand Down
19 changes: 12 additions & 7 deletions src/npg_irods/illumina.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@
from sqlalchemy.orm import Session
from structlog import get_logger

from npg_irods.common import do_metadata_update, do_permissions_update, infer_zone
from npg_irods.common import infer_zone, update_metadata, update_permissions
from npg_irods.db.mlwh import IseqFlowcell, IseqProductMetrics, Sample, Study
from npg_irods.metadata.common import SeqConcept, SeqSubset
from npg_irods.metadata.illumina import Instrument
from npg_irods.metadata.lims import (
ensure_consent_withdrawn,
has_consent_withdrawn_metadata,
make_sample_acl,
make_sample_metadata,
make_study_metadata,
Expand Down Expand Up @@ -190,7 +191,6 @@ def ensure_secondary_metadata_updated(
True if updated.
"""
zone = infer_zone(item)
updated = False
secondary_metadata, acl = [], []

components = [
Expand All @@ -205,14 +205,19 @@ def ensure_secondary_metadata_updated(
secondary_metadata.extend(make_study_metadata(fc.study))
acl.extend(make_sample_acl(fc.sample, fc.study, zone=zone))

updated = True if do_metadata_update(item, secondary_metadata) else updated
meta_update = update_metadata(item, secondary_metadata)

if any(c.contains_nonconsented_human() for c in components): # Illumina specific
updated = True if ensure_consent_withdrawn(item) else updated
cons_update = xahu_update = perm_update = False
if has_consent_withdrawn_metadata(item):
log.info("Consent withdrawn", path=item)
cons_update = ensure_consent_withdrawn(item)
elif any(c.contains_nonconsented_human() for c in components): # Illumina specific
log.info("Non-consented human data", path=item)
xahu_update = ensure_consent_withdrawn(item)
else:
updated = True if do_permissions_update(item, acl) else updated
perm_update = update_permissions(item, acl)

return updated
return any([meta_update, cons_update, xahu_update, perm_update])


def find_flowcells_by_component(
Expand Down
185 changes: 132 additions & 53 deletions src/npg_irods/metadata/lims.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,9 @@ def make_public_read_acl() -> list[AC]:
return [AC("public", Permission.READ)]


def has_consent_withdrawn_metadata(item: Collection | DataObject) -> bool:
def has_consent_withdrawn_metadata(
item: Collection | DataObject, recurse=False
) -> bool:
"""Return True if the collection or data object is annotated in iRODS as having
donor consent withdrawn.
Expand All @@ -189,30 +191,64 @@ def has_consent_withdrawn_metadata(item: Collection | DataObject) -> bool:
the collection or data object "fails closed" (unreadable), rather than "fails open"
(readable).
A collection having consent withdrawn metadata may imply that its contents
should have permissions removed recursively. This can be checked with the recurse keyword.
Args:
item: The collection or data object to check.
recurse: Include a check of collection contents recursively. Defaults to False.
If True and item is a DataObject, raises a ValueError.
Returns:
True if consent was withdrawn.
"""
meta = item.metadata()
if item.rods_type == DataObject and recurse:
raise ValueError(f"Cannot recursively check metadata on a data object: {item}")

gapi_withdrawn = AVU(TrackedSample.CONSENT, 0)
npg_withdrawn = AVU(TrackedSample.CONSENT_WITHDRAWN, 1)

def is_withdrawn(x):
meta = x.metadata()
return gapi_withdrawn in meta or npg_withdrawn in meta

withdrawn_root = is_withdrawn(item)
withdrawn_child = True

if item.rods_type == Collection and recurse:
for it in item.iter_contents(recurse=True):
if not is_withdrawn(it):
withdrawn_child = False
break

return (
AVU(TrackedSample.CONSENT, 0) in meta
or AVU(TrackedSample.CONSENT_WITHDRAWN, 1) in meta
)
return withdrawn_root and withdrawn_child


def ensure_consent_withdrawn_metadata(item: Collection | DataObject) -> bool:
def ensure_consent_withdrawn_metadata(
item: Collection | DataObject, recurse=False
) -> bool:
"""Ensure that consent withdrawn metadata are on the collection or data object.
Args:
item: The collection or data object to check.
item: The collection or data object to update.
recurse: Apply to collection contents recursively. Defaults to False. If True
and item is a DataObject, raises a ValueError.
Returns:
True if metadata were added.
"""
return ensure_avus_present(item, AVU(TrackedSample.CONSENT_WITHDRAWN, 1))
if item.rods_type == DataObject and recurse:
raise ValueError(f"Cannot recursively update metadata on a data object: {item}")

withdrawn_avu = AVU(TrackedSample.CONSENT_WITHDRAWN, 1)
updated_root = ensure_avus_present(item, withdrawn_avu)
updated_child = False

if item.rods_type == Collection and recurse:
for it in item.iter_contents(recurse=True):
if ensure_avus_present(it, withdrawn_avu):
updated_child = True

return updated_root or updated_child


def is_managed_access(ac: AC):
Expand Down Expand Up @@ -244,93 +280,136 @@ def has_mixed_ownership(acl: list[AC]):
return len({ac.user for ac in acl if is_managed_access(ac)}) > 1


def has_consent_withdrawn_permissions(item: Collection | DataObject) -> bool:
def has_consent_withdrawn_permissions(
item: Collection | DataObject, recurse=False
) -> bool:
"""Return True if the collection or data object has permissions expected for data
with consent withdrawn.
Args:
item: The collection or data object to check.
recurse: Include a check of collection contents recursively. Defaults to False.
If True and item is a DataObject, raises a ValueError.
Returns:
True if the permissions were as expected.
"""
if item.rods_type == DataObject and recurse:
raise ValueError(
f"Cannot recursively check permissions on a data object: {item}"
)

# Alternatively, we could keep a list of rodsadmin users who should have continued
# access e.g. in order to redact the data, and check that no other users are in the
# ACL. Using a list of rodsadmins would mean we don't need to use regex.
study_acl = [ac for ac in item.permissions() if is_managed_access(ac)]
def has_study_perms(x):
return any([ac for ac in x.permissions() if is_managed_access(ac)])

withdrawn_root = not has_study_perms(item)
withdrawn_child = True

if item.rods_type == Collection and recurse:
for it in item.iter_contents(recurse=True):
if has_study_perms(it):
withdrawn_child = False
break

return not study_acl
return withdrawn_root and withdrawn_child


def has_consent_withdrawn(item: Collection | DataObject) -> bool:
def has_consent_withdrawn(item: Collection | DataObject, recurse=False) -> bool:
"""Return True if the data object has metadata and permissions for data with consent
withdrawn.
Args:
item: The collection or data object to check.
recurse: Include a check of the collection contents recursively. Defaults to
False. If True and item is a DataObject, raises a ValueError.
Returns:
True if the metadata and permissions were as expected.
"""
return has_consent_withdrawn_metadata(item) and has_consent_withdrawn_permissions(
item
)
return has_consent_withdrawn_metadata(
item, recurse=recurse
) and has_consent_withdrawn_permissions(item, recurse=recurse)


def ensure_consent_withdrawn(item: Collection | DataObject) -> bool:
"""Ensure that a data object has its metadata and permissions in the correct state
for having consent withdrawn. All read permissions are withdrawn except for:
def ensure_consent_withdrawn(item: Collection | DataObject, recurse=False) -> bool:
"""Ensure that a data object or collection has its metadata and permissions in the
correct state for having consent withdrawn. All read permissions are withdrawn
except for:
- The current user making these changes.
- Any rodsadmin.
Args:
item: The collection or data object to check.
recurse: Apply to collection contents recursively. Defaults to False. If True
and item is a DataObject, raises a ValueError.
Returns:
True if the metadata and/or permissions were updated.
"""
if has_consent_withdrawn(item):
if item.rods_type == DataObject and recurse:
raise ValueError(
f"Cannot recursively withdraw permissions on a data object: {item}"
)

if has_consent_withdrawn(item, recurse=recurse):
return False

if ensure_consent_withdrawn_metadata(item):
updated_meta = ensure_consent_withdrawn_metadata(item, recurse=recurse)
if updated_meta:
log.info(
"Updated metadata",
"Updated consent withdrawn metadata",
path=item,
has_withdrawn_meta=has_consent_withdrawn_metadata(item),
has_withdrawn_meta=has_consent_withdrawn_metadata(item, recurse=recurse),
)

to_remove = []
curr_user = current_user()
for ac in item.permissions():
u = rods_user(ac.user)

if u is None:
log.info("Removing permissions (non-local user)", path=item, ac=ac)
def withdraw(x): # Withdraw perms from a single path, return True if changes made
to_remove = []

for ac in x.permissions():
u = rods_user(ac.user)

if u is None:
log.info("Removing permissions (non-local user)", path=x, ac=ac)
to_remove.append(ac)
continue
if u == curr_user:
log.info(
"Not removing permissions for self", path=x, user=str(u), ac=ac
)
continue
if u.is_rodsadmin():
log.info(
"Not removing permissions for rodsadmin", path=x, user=str(u), ac=ac
)
continue

log.info("Removing permissions", path=x, user=str(u), ac=ac)
to_remove.append(ac)
continue

if u == curr_user:
log.info("Not removing permissions for self", path=item, user=str(u), ac=ac)
continue

if u.is_rodsadmin():
log.info(
"Not removing permissions for rodsadmin", path=item, user=str(u), ac=ac
)
continue

log.info("Removing permissions", path=item, user=str(u), ac=ac)
to_remove.append(ac)

num_removed = item.remove_permissions(*to_remove)
log.info(
"Removed permissions",
path=item,
num_removed=num_removed,
has_withdrawn_perm=has_consent_withdrawn_permissions(item),
)
return True

num_removed = item.remove_permissions(*to_remove)
log.info(
"Removed permissions",
path=x,
num_removed=num_removed,
has_withdrawn_perm=has_consent_withdrawn_permissions(x),
)

return num_removed > 0

updated_perms_root = withdraw(item)
updated_perms_child = False

if item.rods_type == Collection and recurse:
for it in item.iter_contents(recurse=True):
if withdraw(it):
updated_perms_child = True

return updated_meta or updated_perms_root or updated_perms_child


def has_id_product_metadata(obj: DataObject):
Expand Down
Loading

0 comments on commit 4b7dc7d

Please sign in to comment.