diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 089e784f..91b2ca49 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -12,7 +12,7 @@ jobs: shell: bash -l -e -o pipefail {0} env: - PYTHON_VERSION: "3.10" + PYTHON_VERSION: "3.11" SINGULARITY_VERSION: "3.11.1" strategy: diff --git a/pyproject.toml b/pyproject.toml index 962263c9..de1b3bfd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ classifiers = [ "Programming Language :: Python" ] keywords = ["irods", "npg"] -requires-python = ">=3.10" +requires-python = ">=3.11" dynamic = ["version"] @@ -35,20 +35,21 @@ homepage = "https://github.com/wtsi-npg/npg-irods-python" repository = "https://github.com/wtsi-npg/npg-irods-python.git" [project.scripts] -"enhance-secondary-metadata" = "npg_irods.cli.enhance_secondary_metadata:main" "apply-ont-metadata" = "npg_irods.cli.apply_ont_metadata:main" "check-checksums" = "npg_irods.cli.check_checksums:main" "check-common-metadata" = "npg_irods.cli.check_common_metadata:main" "check-consent-withdrawn" = "npg_irods.cli.check_consent_withdrawn:main" "check-replicas" = "npg_irods.cli.check_replicas:main" "copy-confirm" = "npg_irods.cli.copy_confirm:main" +"enhance-secondary-metadata" = "npg_irods.cli.enhance_secondary_metadata:main" "locate-data-objects" = "npg_irods.cli.locate_data_objects:main" -"repair-common-metadata" = "npg_irods.cli.repair_common_metadata:main" "repair-checksums" = "npg_irods.cli.repair_checksums:main" +"repair-common-metadata" = "npg_irods.cli.repair_common_metadata:main" "repair-replicas" = "npg_irods.cli.repair_replicas:main" "safe-remove-script" = "npg_irods.cli.safe_remove_script:main" "update-secondary-metadata" = "npg_irods.cli.update_secondary_metadata:main" "withdraw-consent" = "npg_irods.cli.withdraw_consent:main" +"write-html-report" = "npg_irods.cli.write_html_report:main" [build-system] requires = ["setuptools>=41", "wheel", "setuptools-git-versioning<2"] diff --git a/requirements.txt b/requirements.txt index dbfee795..eb20ec0e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ setuptools-git-versioning==2.0.0 setuptools==69.5.1 sqlalchemy==2.0.29 structlog==24.1.0 +yattag==1.14.0 diff --git a/src/npg_irods/cli/write_html_report.py b/src/npg_irods/cli/write_html_report.py new file mode 100644 index 00000000..4b0e64cc --- /dev/null +++ b/src/npg_irods/cli/write_html_report.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2024 Genome Research Ltd. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# @author Keith James + +import argparse +import sys + +import structlog +from partisan.exception import RodsError +from yattag import indent + +from npg_irods.cli.util import add_logging_arguments, configure_logging +from npg_irods.html_reports import ont_runs_html_report_this_year +from npg_irods.version import version + +description = """Writes an HTML report summarising data in iRODS. + +The reports include HTTP links to data objects and collections in iRODS. The links +are only accessible if the report is rendered by a web server that can access the +relevant iRODS zone. + +Available reports are: + + - ont: Oxford Nanopore sequencing data objects and collections. + + A summary of ONT runs for the calendar year to date. + +""" + +parser = argparse.ArgumentParser( + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter, +) +add_logging_arguments(parser) +parser.add_argument( + "-o", + "--output", + help="Output filename.", + type=argparse.FileType("w", encoding="UTF-8"), + default=sys.stdout, +) +parser.add_argument( + "report", + help="Report type.", + type=str, + choices=["ont"], + nargs=1, +) +parser.add_argument( + "--zone", + help="Specify a federated iRODS zone in which to find data objects and/or " + "collections. This is not required if the target paths are on the local zone.", + type=str, +) +parser.add_argument( + "--version", help="Print the version and exit.", action="store_true" +) + +args = parser.parse_args() +configure_logging( + config_file=args.log_config, + debug=args.debug, + verbose=args.verbose, + colour=args.colour, + json=args.json, +) +log = structlog.get_logger("main") + + +def main(): + if args.version: + print(version()) + sys.exit(0) + + report = args.report[0] + + try: + match report: + case "ont": + doc = ont_runs_html_report_this_year(zone=args.zone) + case _: + raise ValueError(f"Invalid HTML report type '{report}'") + + print(indent(doc.getvalue()), file=args.output) + except RodsError as re: + log.error(re.message, code=re.code) + sys.exit(1) + except Exception as e: + log.error(e) + sys.exit(1) diff --git a/src/npg_irods/html_reports.py b/src/npg_irods/html_reports.py new file mode 100644 index 00000000..bbf88420 --- /dev/null +++ b/src/npg_irods/html_reports.py @@ -0,0 +1,333 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2024 Genome Research Ltd. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# @author Keith James + +import calendar +import re +from collections import defaultdict +from datetime import datetime +from enum import StrEnum + +from partisan.icommands import iquest +from partisan.irods import AC, AVU, Collection, DataObject, RodsItem +from partisan.metadata import DublinCore +from structlog import get_logger +from yattag import Doc, SimpleDoc + +from npg_irods.metadata import ont +from npg_irods.ont import is_minknow_report + +log = get_logger(__package__) + + +class Tags(StrEnum): + """HTML tags. Use to avoid typos, add as necessary.""" + + html = "html" + head = "head" + link = "link" + meta = "meta" + body = "body" + style = "style" + title = "title" + + h1 = "h1" + h2 = "h2" + h3 = "h3" + h4 = "h4" + h5 = "h5" + h6 = "h6" + + div = "div" + span = "span" + + a = "a" + p = "p" + + code = "code" + pre = "pre" + + ol = "ol" + ul = "ul" + li = "li" + + img = "img" + + +class Styles(StrEnum): + """CSS classes. Use to avoid typos, add as necessary.""" + + container = "container" + + main_cell = "main-cell" + top_cell = "top-cell" + top_left_cell = "top-left-cell" + top_right_cell = "top-right-cell" + + url_cell = "url-cell" + url_grid = "url-grid" + + acl_header = "acl-header" + info_header = "info-header" + metadata_header = "metadata-header" + path_header = "path-header" + + acl_bag = "acl-bag" + acl_cell = "acl-cell" + acl_item = "acl-item" + + info_bag = "info-bag" + info_cell = "info-cell" + info_item = "info-item" + + metadata_bag = "metadata-bag" + metadata_cell = "metadata-cell" + metadata_item = "metadata-item" + + +def ont_runs_this_year(zone: str = None) -> list[tuple[Collection, datetime]]: + """Query iRODS to find all ONT runs for the current year. + + Returns: + For each run, a tuple of the annotated run-folder collection and the + creation timestamp. + """ + # get the current year as a datetime object + start_of_year = datetime(datetime.now().year, 1, 1) + + # One would hope that the following would work, but it doesn't; iRODS seems to + # ignore the "and" clause on COLL_CREATE_TIME and returns all collections + # with the specified metadata. + # + # For whatever reason, iRODS stores timestamps as varchar left-padded with '0' to + # a width of 11 characters, so we need to compare lexically, with the argument + # similarly padded. + # + # Testing with hand-crafted iquest commands shows the COLL_CREATE_TIME is ignored. + # + # sec_since_epoch = (start_of_year.utcnow() - datetime(1970, 1, 1)).total_seconds() + # + # args = [ + # "%s %s", + # "-z", + # "seq", + # "select COLL_NAME, COLL_CREATE_TIME " + # f"where META_COLL_ATTR_NAME = '{ont.Instrument.EXPERIMENT_NAME}' " + # f"and COLL_CREATE_TIME >= '{sec_since_epoch:011.0f}'", + # ] + # + # Instead, we need to get all collections with the specified metadata and filter. + # The physical capacity of the lab limits this number to low hundreds per year, but + # we will need to revisit this if the number of collections becomes too large. + args = ["%s\t%s"] + + if zone is not None: + args.append("-z") + args.append(zone) + + query = ( + "select COLL_NAME, COLL_CREATE_TIME " + f"where META_COLL_ATTR_NAME = '{ont.Instrument.EXPERIMENT_NAME}'" + ) + + log.info("Querying iRODS for ONT runs this year", year=start_of_year.year) + + colls = [] + for n, line in enumerate(iquest(*args, query).splitlines()): + if re.match(r"^Zone is", line) and n == 0: + continue + + try: + path, timestamp = line.split("\t") + coll = Collection(path) + created = datetime.utcfromtimestamp(int(timestamp)) + if created >= start_of_year: + colls.append((coll, created)) + except Exception as e: + log.error(f"Error processing iquest result line", n=n, line=line, error=e) + continue + + return colls + + +def ont_runs_html_report_this_year( + zone: str = None, all_avu=False, all_ac=False +) -> SimpleDoc: + """Generate an HTML report of all ONT runs for the current year + + Args: + zone: The zone to query. Optional, defaults to the current zone. + all_avu: Report all AVUs, even those that are system-related and not normally + relevant to data customers. + all_ac: Report all access control entries, even those that are system-related + and not normally relevant to data customers. + Returns: + A yattag SimpleDoc object containing the HTML report. + """ + now = datetime.now() + colls_by_month: defaultdict[int, list[Collection]] = defaultdict(list) + for coll, created in ont_runs_this_year(zone=zone): + colls_by_month[created.month].append(coll) + + def report_ac(ac: AC) -> bool: + """Return True if the AC should be reported.""" + if all_ac: + return True + return ac.user not in [ + "irods", + "irods-g1", + "ont1", + "rodsBoot", + "srpipe", + ] + + def report_avu(avu: AVU) -> bool: + """Return True if the AVU should be reported.""" + if all_avu: + return True + if avu.namespace == AVU.IRODS_NAMESPACE: + return False + if avu.namespace == DublinCore.namespace: + return False + if avu.namespace == ont.Instrument.namespace and avu.without_namespace in [ + term.value + for term in [ + ont.Instrument.DISTRIBUTION_VERSION, + ont.Instrument.GUPPY_VERSION, + ont.Instrument.HOSTNAME, + ont.Instrument.PROTOCOL_GROUP_ID, + ont.Instrument.RUN_ID, + ] + ]: + return False + return True + + def do_info_cell(x: DataObject): + """Add an info cell (data object size, creation timestamp) to the report.""" + with tag(Tags.div, klass=Styles.info_cell): + with tag(Tags.div, klass=Styles.info_bag): + # Use doc.asis to insert non-breaking spaces + with tag(Tags.div, klass=Styles.info_item): + doc.asis(f"{x.created().strftime('%Y-%m-%d %H:%M:%S')}") + with tag(Tags.div, klass=Styles.info_item): + doc.asis(f"{x.size()} B") + + def do_acl_cell(x: RodsItem): + """Add an ACL cell to the report, if the ACL is not empty.""" + to_report = [ac for ac in x.acl() if report_ac(ac)] + if len(to_report) == 0: + return + + with tag(Tags.div, klass=Styles.acl_cell): + with tag(Tags.div, klass=Styles.acl_bag): + for ac in to_report: + line(Tags.div, str(ac), klass=Styles.acl_item) + + def do_metadata_cell(x: RodsItem): + """Add a metadata cell to the report, if AVUs are present.""" + to_report = [avu for avu in x.metadata() if report_avu(avu)] + if len(to_report) == 0: + return + + with tag(Tags.div, klass=Styles.metadata_cell): + with tag(Tags.div, klass=Styles.metadata_bag): + for avu in to_report: + with tag(Tags.div, klass=Styles.metadata_item): + text(f"{avu.attribute}={avu.value}") + + def do_contents(c: Collection): + contents = c.contents(acl=True, avu=True) + if len(contents) == 0: + return + + for item in contents: + if item.rods_type == DataObject and is_minknow_report(item): + with tag(Tags.div, klass=Styles.url_cell): + with tag(Tags.a, href=str(item)): + text(f"{coll.path.name}/{item.name}") + do_info_cell(item) + do_acl_cell(item) + do_metadata_cell(item) + + doc, tag, text, line = Doc().ttl() + doc.asis("") + + with tag(Tags.html): + with tag(Tags.head): + with tag(Tags.title): + text(f"ONT runs for {now.year}") + + doc.asis(f'<{Tags.link} href="style.css" rel="stylesheet" />') + + with tag(Tags.body): + with tag(Tags.div, klass=Styles.container): + # Top row cells containing title and report metadata + with tag(Tags.div, klass=Styles.top_left_cell): + text("") + with tag(Tags.div, klass=Styles.top_right_cell): + text(f"Generated: {now.strftime('%Y-%m-%d %H:%M:%S')}") + with tag(Tags.div, klass=Styles.top_cell): + line(Tags.h1, "ONT Meta-report") + + # Main cell containing the report content + with tag(Tags.div, klass=Styles.main_cell): + for month in sorted(colls_by_month.keys()): + colls = colls_by_month[month] + log.debug("Found ONT runs for month", month=month, n=len(colls)) + + with tag(Tags.h2): + text(f"{calendar.month_name[month]} {now.year}") + + with tag(Tags.div, klass=Styles.url_grid): + with tag(Tags.div, klass=Styles.url_cell): + line( + Tags.h3, + "iRODS Path", + klass=Styles.path_header, + ) + with tag(Tags.div, klass=Styles.info_cell): + line( + Tags.h3, + "Created/Size", + klass=Styles.info_header, + ) + with tag(Tags.div, klass=Styles.acl_cell): + line( + Tags.h3, + "Access Control List", + klass=Styles.acl_header, + ) + with tag(Tags.div, klass=Styles.metadata_cell): + line( + Tags.h3, + "Metadata", + klass=Styles.metadata_header, + ) + + for coll in colls: + with tag(Tags.div, klass=Styles.url_cell): + with tag(Tags.a, href=str(coll)): + text(coll.path.name) + + # Don't report on the collection's ACL because it + # can be huge + do_metadata_cell(coll) + + do_contents(coll) + + return doc diff --git a/style.css b/style.css new file mode 100644 index 00000000..c7a72c37 --- /dev/null +++ b/style.css @@ -0,0 +1,154 @@ + +html { + --primary: hsl(118, 24%, 53%); + --primary-content: hsl(120, 29%, 3%); + --primary-dark: hsl(118, 24%, 43%); + --primary-light: hsl(119, 23%, 63%); + + --secondary: hsl(188, 24%, 53%); + --secondary-content: hsl(195, 29%, 3%); + --secondary-dark: hsl(188, 24%, 43%); + --secondary-light: hsl(188, 23%, 63%); + + --background: hsl(120, 16%, 94%); + --foreground: hsl(120, 14%, 99%); + --border: hsl(120, 17%, 88%); + + --copy: hsl(120, 17%, 15%); + --copy-light: hsl(118, 18%, 40%); + --copy-lighter: hsl(117, 17%, 55%); + + --success: hsl(120, 24%, 53%); + --warning: hsl(60, 24%, 53%); + --error: hsl(0, 24%, 53%); + --success-content: hsl(120, 29%, 3%); + --warning-content: hsl(60, 29%, 3%); + --error-content: hsl(0, 0%, 100%); + + --default-acl-item-background-color: #8fcb8f; + --default-met-item-background-color: #85cad3; + --default-inf-item-background-color: #b7e0cd; +} + +body { + background-color: var(--background); + color: var(--copy); + font-family: Arial, sans-serif; + font-size: x-small; +} + +.container { + display: grid; + grid-template-columns: 1fr 20fr 1fr; + grid-template-rows: auto; +} + +.top-left-cell { + grid-column: 1; + margin: auto; +} + +.top-cell { + grid-column: 2; + margin: auto; +} + +.top-right-cell { + grid-column: 3; + margin: auto; +} + +.left-cell { + grid-column: 1; +} + +.main-cell { + grid-column: 2; +} + +.right-cell { + grid-column: 3; +} + +.url-grid { + display: grid; + gap: 0.1rem; + grid-template-columns: repeat(24, [col] minmax(0, auto)); + grid-template-rows: repeat(auto-fit, [row] minmax(0, auto)); + background-color: var(--background); +} + +.url-cell { + text-align: left; + grid-column: col 1 / span 10; +} + +.info-cell { + font-size: smaller; + grid-column: col 11 / span 4; +} + +.acl-cell { + font-size: smaller; + grid-column: col 15 / span 4; +} + +.metadata-cell { + font-size: smaller; + grid-column: col 19 / span 6; +} + +.info-bag { + padding: 0.2rem; + border-radius: calc(0.5rem); + gap: 0.2rem; + background-color: var(--border); + display: flex; + flex-wrap: nowrap; +} + +.info-item { + padding: 0.4rem; + border-radius: calc(0.5rem); + text-align: right; + font-size: x-small; + background-color: var(--default-inf-item-background-color); +} + +.acl-bag { + padding: 0.2rem; + border-radius: calc(0.5rem); + gap: 0.2rem; + background-color: var(--border); + display: flex; + flex-wrap: wrap; +} + +.acl-item { + padding: 0.2rem; + border-radius: calc(0.5rem); + text-align: center; + font-size: x-small; + background-color: var(--default-acl-item-background-color); +} + +.metadata-bag { + padding: 0.2rem; + border-radius: calc(0.5rem); + gap: 0.2rem; + background-color: var(--border); + display: flex; + flex-wrap: wrap; +} + +.metadata-header { + text-align: right; +} + +.metadata-item { + padding: 0.2rem; + border-radius: calc(0.5rem); + text-align: center; + font-size: x-small; + background-color: var(--default-met-item-background-color); +} diff --git a/tests/ont/test_html_reports.py b/tests/ont/test_html_reports.py new file mode 100644 index 00000000..53109a1f --- /dev/null +++ b/tests/ont/test_html_reports.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2024 Genome Research Ltd. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# @author Keith James + +from pytest import mark as m +from yattag import indent + +from npg_irods.html_reports import ont_runs_html_report_this_year + + +@m.describe("HTML Meta-Reports") +class TestHTMLReports: + @m.context("When an ONT metadata report is generated") + @m.it("Contains the expected number of links to iRODS objects and collections") + def test_ont_runs_html_report(self, ont_synthetic_irods): + doc = ont_runs_html_report_this_year(zone="testZone") + + # Uncomment to write the HTML to a file for manual inspection + # + # with open("ont_meta_report.html", "w") as f: + # f.write(indent((doc.getvalue()))) + + links = [x for x in doc.result if x.startswith('