diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index 089e784f..91b2ca49 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -12,7 +12,7 @@ jobs:
shell: bash -l -e -o pipefail {0}
env:
- PYTHON_VERSION: "3.10"
+ PYTHON_VERSION: "3.11"
SINGULARITY_VERSION: "3.11.1"
strategy:
diff --git a/pyproject.toml b/pyproject.toml
index 962263c9..de1b3bfd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,7 @@ classifiers = [
"Programming Language :: Python"
]
keywords = ["irods", "npg"]
-requires-python = ">=3.10"
+requires-python = ">=3.11"
dynamic = ["version"]
@@ -35,20 +35,21 @@ homepage = "https://github.com/wtsi-npg/npg-irods-python"
repository = "https://github.com/wtsi-npg/npg-irods-python.git"
[project.scripts]
-"enhance-secondary-metadata" = "npg_irods.cli.enhance_secondary_metadata:main"
"apply-ont-metadata" = "npg_irods.cli.apply_ont_metadata:main"
"check-checksums" = "npg_irods.cli.check_checksums:main"
"check-common-metadata" = "npg_irods.cli.check_common_metadata:main"
"check-consent-withdrawn" = "npg_irods.cli.check_consent_withdrawn:main"
"check-replicas" = "npg_irods.cli.check_replicas:main"
"copy-confirm" = "npg_irods.cli.copy_confirm:main"
+"enhance-secondary-metadata" = "npg_irods.cli.enhance_secondary_metadata:main"
"locate-data-objects" = "npg_irods.cli.locate_data_objects:main"
-"repair-common-metadata" = "npg_irods.cli.repair_common_metadata:main"
"repair-checksums" = "npg_irods.cli.repair_checksums:main"
+"repair-common-metadata" = "npg_irods.cli.repair_common_metadata:main"
"repair-replicas" = "npg_irods.cli.repair_replicas:main"
"safe-remove-script" = "npg_irods.cli.safe_remove_script:main"
"update-secondary-metadata" = "npg_irods.cli.update_secondary_metadata:main"
"withdraw-consent" = "npg_irods.cli.withdraw_consent:main"
+"write-html-report" = "npg_irods.cli.write_html_report:main"
[build-system]
requires = ["setuptools>=41", "wheel", "setuptools-git-versioning<2"]
diff --git a/requirements.txt b/requirements.txt
index dbfee795..eb20ec0e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ setuptools-git-versioning==2.0.0
setuptools==69.5.1
sqlalchemy==2.0.29
structlog==24.1.0
+yattag==1.14.0
diff --git a/src/npg_irods/cli/write_html_report.py b/src/npg_irods/cli/write_html_report.py
new file mode 100644
index 00000000..4b0e64cc
--- /dev/null
+++ b/src/npg_irods/cli/write_html_report.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2024 Genome Research Ltd. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+# @author Keith James
+
+import argparse
+import sys
+
+import structlog
+from partisan.exception import RodsError
+from yattag import indent
+
+from npg_irods.cli.util import add_logging_arguments, configure_logging
+from npg_irods.html_reports import ont_runs_html_report_this_year
+from npg_irods.version import version
+
+description = """Writes an HTML report summarising data in iRODS.
+
+The reports include HTTP links to data objects and collections in iRODS. The links
+are only accessible if the report is rendered by a web server that can access the
+relevant iRODS zone.
+
+Available reports are:
+
+ - ont: Oxford Nanopore sequencing data objects and collections.
+
+ A summary of ONT runs for the calendar year to date.
+
+"""
+
+parser = argparse.ArgumentParser(
+ description=description,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+)
+add_logging_arguments(parser)
+parser.add_argument(
+ "-o",
+ "--output",
+ help="Output filename.",
+ type=argparse.FileType("w", encoding="UTF-8"),
+ default=sys.stdout,
+)
+parser.add_argument(
+ "report",
+ help="Report type.",
+ type=str,
+ choices=["ont"],
+ nargs=1,
+)
+parser.add_argument(
+ "--zone",
+ help="Specify a federated iRODS zone in which to find data objects and/or "
+ "collections. This is not required if the target paths are on the local zone.",
+ type=str,
+)
+parser.add_argument(
+ "--version", help="Print the version and exit.", action="store_true"
+)
+
+args = parser.parse_args()
+configure_logging(
+ config_file=args.log_config,
+ debug=args.debug,
+ verbose=args.verbose,
+ colour=args.colour,
+ json=args.json,
+)
+log = structlog.get_logger("main")
+
+
+def main():
+ if args.version:
+ print(version())
+ sys.exit(0)
+
+ report = args.report[0]
+
+ try:
+ match report:
+ case "ont":
+ doc = ont_runs_html_report_this_year(zone=args.zone)
+ case _:
+ raise ValueError(f"Invalid HTML report type '{report}'")
+
+ print(indent(doc.getvalue()), file=args.output)
+ except RodsError as re:
+ log.error(re.message, code=re.code)
+ sys.exit(1)
+ except Exception as e:
+ log.error(e)
+ sys.exit(1)
diff --git a/src/npg_irods/html_reports.py b/src/npg_irods/html_reports.py
new file mode 100644
index 00000000..bbf88420
--- /dev/null
+++ b/src/npg_irods/html_reports.py
@@ -0,0 +1,333 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2024 Genome Research Ltd. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+# @author Keith James
+
+import calendar
+import re
+from collections import defaultdict
+from datetime import datetime
+from enum import StrEnum
+
+from partisan.icommands import iquest
+from partisan.irods import AC, AVU, Collection, DataObject, RodsItem
+from partisan.metadata import DublinCore
+from structlog import get_logger
+from yattag import Doc, SimpleDoc
+
+from npg_irods.metadata import ont
+from npg_irods.ont import is_minknow_report
+
+log = get_logger(__package__)
+
+
+class Tags(StrEnum):
+ """HTML tags. Use to avoid typos, add as necessary."""
+
+ html = "html"
+ head = "head"
+ link = "link"
+ meta = "meta"
+ body = "body"
+ style = "style"
+ title = "title"
+
+ h1 = "h1"
+ h2 = "h2"
+ h3 = "h3"
+ h4 = "h4"
+ h5 = "h5"
+ h6 = "h6"
+
+ div = "div"
+ span = "span"
+
+ a = "a"
+ p = "p"
+
+ code = "code"
+ pre = "pre"
+
+ ol = "ol"
+ ul = "ul"
+ li = "li"
+
+ img = "img"
+
+
+class Styles(StrEnum):
+ """CSS classes. Use to avoid typos, add as necessary."""
+
+ container = "container"
+
+ main_cell = "main-cell"
+ top_cell = "top-cell"
+ top_left_cell = "top-left-cell"
+ top_right_cell = "top-right-cell"
+
+ url_cell = "url-cell"
+ url_grid = "url-grid"
+
+ acl_header = "acl-header"
+ info_header = "info-header"
+ metadata_header = "metadata-header"
+ path_header = "path-header"
+
+ acl_bag = "acl-bag"
+ acl_cell = "acl-cell"
+ acl_item = "acl-item"
+
+ info_bag = "info-bag"
+ info_cell = "info-cell"
+ info_item = "info-item"
+
+ metadata_bag = "metadata-bag"
+ metadata_cell = "metadata-cell"
+ metadata_item = "metadata-item"
+
+
+def ont_runs_this_year(zone: str = None) -> list[tuple[Collection, datetime]]:
+ """Query iRODS to find all ONT runs for the current year.
+
+ Returns:
+ For each run, a tuple of the annotated run-folder collection and the
+ creation timestamp.
+ """
+ # get the current year as a datetime object
+ start_of_year = datetime(datetime.now().year, 1, 1)
+
+ # One would hope that the following would work, but it doesn't; iRODS seems to
+ # ignore the "and" clause on COLL_CREATE_TIME and returns all collections
+ # with the specified metadata.
+ #
+ # For whatever reason, iRODS stores timestamps as varchar left-padded with '0' to
+ # a width of 11 characters, so we need to compare lexically, with the argument
+ # similarly padded.
+ #
+ # Testing with hand-crafted iquest commands shows the COLL_CREATE_TIME is ignored.
+ #
+ # sec_since_epoch = (start_of_year.utcnow() - datetime(1970, 1, 1)).total_seconds()
+ #
+ # args = [
+ # "%s %s",
+ # "-z",
+ # "seq",
+ # "select COLL_NAME, COLL_CREATE_TIME "
+ # f"where META_COLL_ATTR_NAME = '{ont.Instrument.EXPERIMENT_NAME}' "
+ # f"and COLL_CREATE_TIME >= '{sec_since_epoch:011.0f}'",
+ # ]
+ #
+ # Instead, we need to get all collections with the specified metadata and filter.
+ # The physical capacity of the lab limits this number to low hundreds per year, but
+ # we will need to revisit this if the number of collections becomes too large.
+ args = ["%s\t%s"]
+
+ if zone is not None:
+ args.append("-z")
+ args.append(zone)
+
+ query = (
+ "select COLL_NAME, COLL_CREATE_TIME "
+ f"where META_COLL_ATTR_NAME = '{ont.Instrument.EXPERIMENT_NAME}'"
+ )
+
+ log.info("Querying iRODS for ONT runs this year", year=start_of_year.year)
+
+ colls = []
+ for n, line in enumerate(iquest(*args, query).splitlines()):
+ if re.match(r"^Zone is", line) and n == 0:
+ continue
+
+ try:
+ path, timestamp = line.split("\t")
+ coll = Collection(path)
+ created = datetime.utcfromtimestamp(int(timestamp))
+ if created >= start_of_year:
+ colls.append((coll, created))
+ except Exception as e:
+ log.error(f"Error processing iquest result line", n=n, line=line, error=e)
+ continue
+
+ return colls
+
+
+def ont_runs_html_report_this_year(
+ zone: str = None, all_avu=False, all_ac=False
+) -> SimpleDoc:
+ """Generate an HTML report of all ONT runs for the current year
+
+ Args:
+ zone: The zone to query. Optional, defaults to the current zone.
+ all_avu: Report all AVUs, even those that are system-related and not normally
+ relevant to data customers.
+ all_ac: Report all access control entries, even those that are system-related
+ and not normally relevant to data customers.
+ Returns:
+ A yattag SimpleDoc object containing the HTML report.
+ """
+ now = datetime.now()
+ colls_by_month: defaultdict[int, list[Collection]] = defaultdict(list)
+ for coll, created in ont_runs_this_year(zone=zone):
+ colls_by_month[created.month].append(coll)
+
+ def report_ac(ac: AC) -> bool:
+ """Return True if the AC should be reported."""
+ if all_ac:
+ return True
+ return ac.user not in [
+ "irods",
+ "irods-g1",
+ "ont1",
+ "rodsBoot",
+ "srpipe",
+ ]
+
+ def report_avu(avu: AVU) -> bool:
+ """Return True if the AVU should be reported."""
+ if all_avu:
+ return True
+ if avu.namespace == AVU.IRODS_NAMESPACE:
+ return False
+ if avu.namespace == DublinCore.namespace:
+ return False
+ if avu.namespace == ont.Instrument.namespace and avu.without_namespace in [
+ term.value
+ for term in [
+ ont.Instrument.DISTRIBUTION_VERSION,
+ ont.Instrument.GUPPY_VERSION,
+ ont.Instrument.HOSTNAME,
+ ont.Instrument.PROTOCOL_GROUP_ID,
+ ont.Instrument.RUN_ID,
+ ]
+ ]:
+ return False
+ return True
+
+ def do_info_cell(x: DataObject):
+ """Add an info cell (data object size, creation timestamp) to the report."""
+ with tag(Tags.div, klass=Styles.info_cell):
+ with tag(Tags.div, klass=Styles.info_bag):
+ # Use doc.asis to insert non-breaking spaces
+ with tag(Tags.div, klass=Styles.info_item):
+ doc.asis(f"{x.created().strftime('%Y-%m-%d %H:%M:%S')}")
+ with tag(Tags.div, klass=Styles.info_item):
+ doc.asis(f"{x.size()} B")
+
+ def do_acl_cell(x: RodsItem):
+ """Add an ACL cell to the report, if the ACL is not empty."""
+ to_report = [ac for ac in x.acl() if report_ac(ac)]
+ if len(to_report) == 0:
+ return
+
+ with tag(Tags.div, klass=Styles.acl_cell):
+ with tag(Tags.div, klass=Styles.acl_bag):
+ for ac in to_report:
+ line(Tags.div, str(ac), klass=Styles.acl_item)
+
+ def do_metadata_cell(x: RodsItem):
+ """Add a metadata cell to the report, if AVUs are present."""
+ to_report = [avu for avu in x.metadata() if report_avu(avu)]
+ if len(to_report) == 0:
+ return
+
+ with tag(Tags.div, klass=Styles.metadata_cell):
+ with tag(Tags.div, klass=Styles.metadata_bag):
+ for avu in to_report:
+ with tag(Tags.div, klass=Styles.metadata_item):
+ text(f"{avu.attribute}={avu.value}")
+
+ def do_contents(c: Collection):
+ contents = c.contents(acl=True, avu=True)
+ if len(contents) == 0:
+ return
+
+ for item in contents:
+ if item.rods_type == DataObject and is_minknow_report(item):
+ with tag(Tags.div, klass=Styles.url_cell):
+ with tag(Tags.a, href=str(item)):
+ text(f"{coll.path.name}/{item.name}")
+ do_info_cell(item)
+ do_acl_cell(item)
+ do_metadata_cell(item)
+
+ doc, tag, text, line = Doc().ttl()
+ doc.asis("")
+
+ with tag(Tags.html):
+ with tag(Tags.head):
+ with tag(Tags.title):
+ text(f"ONT runs for {now.year}")
+
+ doc.asis(f'<{Tags.link} href="style.css" rel="stylesheet" />')
+
+ with tag(Tags.body):
+ with tag(Tags.div, klass=Styles.container):
+ # Top row cells containing title and report metadata
+ with tag(Tags.div, klass=Styles.top_left_cell):
+ text("")
+ with tag(Tags.div, klass=Styles.top_right_cell):
+ text(f"Generated: {now.strftime('%Y-%m-%d %H:%M:%S')}")
+ with tag(Tags.div, klass=Styles.top_cell):
+ line(Tags.h1, "ONT Meta-report")
+
+ # Main cell containing the report content
+ with tag(Tags.div, klass=Styles.main_cell):
+ for month in sorted(colls_by_month.keys()):
+ colls = colls_by_month[month]
+ log.debug("Found ONT runs for month", month=month, n=len(colls))
+
+ with tag(Tags.h2):
+ text(f"{calendar.month_name[month]} {now.year}")
+
+ with tag(Tags.div, klass=Styles.url_grid):
+ with tag(Tags.div, klass=Styles.url_cell):
+ line(
+ Tags.h3,
+ "iRODS Path",
+ klass=Styles.path_header,
+ )
+ with tag(Tags.div, klass=Styles.info_cell):
+ line(
+ Tags.h3,
+ "Created/Size",
+ klass=Styles.info_header,
+ )
+ with tag(Tags.div, klass=Styles.acl_cell):
+ line(
+ Tags.h3,
+ "Access Control List",
+ klass=Styles.acl_header,
+ )
+ with tag(Tags.div, klass=Styles.metadata_cell):
+ line(
+ Tags.h3,
+ "Metadata",
+ klass=Styles.metadata_header,
+ )
+
+ for coll in colls:
+ with tag(Tags.div, klass=Styles.url_cell):
+ with tag(Tags.a, href=str(coll)):
+ text(coll.path.name)
+
+ # Don't report on the collection's ACL because it
+ # can be huge
+ do_metadata_cell(coll)
+
+ do_contents(coll)
+
+ return doc
diff --git a/style.css b/style.css
new file mode 100644
index 00000000..c7a72c37
--- /dev/null
+++ b/style.css
@@ -0,0 +1,154 @@
+
+html {
+ --primary: hsl(118, 24%, 53%);
+ --primary-content: hsl(120, 29%, 3%);
+ --primary-dark: hsl(118, 24%, 43%);
+ --primary-light: hsl(119, 23%, 63%);
+
+ --secondary: hsl(188, 24%, 53%);
+ --secondary-content: hsl(195, 29%, 3%);
+ --secondary-dark: hsl(188, 24%, 43%);
+ --secondary-light: hsl(188, 23%, 63%);
+
+ --background: hsl(120, 16%, 94%);
+ --foreground: hsl(120, 14%, 99%);
+ --border: hsl(120, 17%, 88%);
+
+ --copy: hsl(120, 17%, 15%);
+ --copy-light: hsl(118, 18%, 40%);
+ --copy-lighter: hsl(117, 17%, 55%);
+
+ --success: hsl(120, 24%, 53%);
+ --warning: hsl(60, 24%, 53%);
+ --error: hsl(0, 24%, 53%);
+ --success-content: hsl(120, 29%, 3%);
+ --warning-content: hsl(60, 29%, 3%);
+ --error-content: hsl(0, 0%, 100%);
+
+ --default-acl-item-background-color: #8fcb8f;
+ --default-met-item-background-color: #85cad3;
+ --default-inf-item-background-color: #b7e0cd;
+}
+
+body {
+ background-color: var(--background);
+ color: var(--copy);
+ font-family: Arial, sans-serif;
+ font-size: x-small;
+}
+
+.container {
+ display: grid;
+ grid-template-columns: 1fr 20fr 1fr;
+ grid-template-rows: auto;
+}
+
+.top-left-cell {
+ grid-column: 1;
+ margin: auto;
+}
+
+.top-cell {
+ grid-column: 2;
+ margin: auto;
+}
+
+.top-right-cell {
+ grid-column: 3;
+ margin: auto;
+}
+
+.left-cell {
+ grid-column: 1;
+}
+
+.main-cell {
+ grid-column: 2;
+}
+
+.right-cell {
+ grid-column: 3;
+}
+
+.url-grid {
+ display: grid;
+ gap: 0.1rem;
+ grid-template-columns: repeat(24, [col] minmax(0, auto));
+ grid-template-rows: repeat(auto-fit, [row] minmax(0, auto));
+ background-color: var(--background);
+}
+
+.url-cell {
+ text-align: left;
+ grid-column: col 1 / span 10;
+}
+
+.info-cell {
+ font-size: smaller;
+ grid-column: col 11 / span 4;
+}
+
+.acl-cell {
+ font-size: smaller;
+ grid-column: col 15 / span 4;
+}
+
+.metadata-cell {
+ font-size: smaller;
+ grid-column: col 19 / span 6;
+}
+
+.info-bag {
+ padding: 0.2rem;
+ border-radius: calc(0.5rem);
+ gap: 0.2rem;
+ background-color: var(--border);
+ display: flex;
+ flex-wrap: nowrap;
+}
+
+.info-item {
+ padding: 0.4rem;
+ border-radius: calc(0.5rem);
+ text-align: right;
+ font-size: x-small;
+ background-color: var(--default-inf-item-background-color);
+}
+
+.acl-bag {
+ padding: 0.2rem;
+ border-radius: calc(0.5rem);
+ gap: 0.2rem;
+ background-color: var(--border);
+ display: flex;
+ flex-wrap: wrap;
+}
+
+.acl-item {
+ padding: 0.2rem;
+ border-radius: calc(0.5rem);
+ text-align: center;
+ font-size: x-small;
+ background-color: var(--default-acl-item-background-color);
+}
+
+.metadata-bag {
+ padding: 0.2rem;
+ border-radius: calc(0.5rem);
+ gap: 0.2rem;
+ background-color: var(--border);
+ display: flex;
+ flex-wrap: wrap;
+}
+
+.metadata-header {
+ text-align: right;
+}
+
+.metadata-item {
+ padding: 0.2rem;
+ border-radius: calc(0.5rem);
+ text-align: center;
+ font-size: x-small;
+ background-color: var(--default-met-item-background-color);
+}
diff --git a/tests/ont/test_html_reports.py b/tests/ont/test_html_reports.py
new file mode 100644
index 00000000..53109a1f
--- /dev/null
+++ b/tests/ont/test_html_reports.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright © 2024 Genome Research Ltd. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+# @author Keith James
+
+from pytest import mark as m
+from yattag import indent
+
+from npg_irods.html_reports import ont_runs_html_report_this_year
+
+
+@m.describe("HTML Meta-Reports")
+class TestHTMLReports:
+ @m.context("When an ONT metadata report is generated")
+ @m.it("Contains the expected number of links to iRODS objects and collections")
+ def test_ont_runs_html_report(self, ont_synthetic_irods):
+ doc = ont_runs_html_report_this_year(zone="testZone")
+
+ # Uncomment to write the HTML to a file for manual inspection
+ #
+ # with open("ont_meta_report.html", "w") as f:
+ # f.write(indent((doc.getvalue())))
+
+ links = [x for x in doc.result if x.startswith('