add validation headers script (#225)

* add validation headers script * cleanup * cleanup
ecmwf-projects · Oct 10, 2024 · 817060e · 817060e
1 parent d1e3b60
commit 817060e
Show file tree

Hide file tree

Showing 5 changed files with 64 additions and 5 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -41,6 +41,14 @@ repos:
     language: python
     types: [jupyter]
     additional_dependencies: [nbformat]
+- repo: local
+  hooks:
+  - id: validate-headers
+    name: validate-headers
+    entry: python scripts/validate-headers.py
+    language: python
+    types: [jupyter]
+    additional_dependencies: [nbformat]
 - repo: https://github.com/executablebooks/mdformat
   rev: 0.7.17
   hooks:

diff --git a/...and_Biosphere/satellite_satellite-fire-burned-area_climate-and-weather-extremes_q01.ipynb b/...and_Biosphere/satellite_satellite-fire-burned-area_climate-and-weather-extremes_q01.ipynb
@@ -15,7 +15,7 @@
     "\n",
     "Produced by: Rita Cunha (CoLAB +ATLANTIC)\n",
     "\n",
-    "## 🌍 Use Case: Identification of fire-prone areas for monitoring and prevention\n",
+    "## 🌍 Use case: Identification of fire-prone areas for monitoring and prevention\n",
     "\n",
     "## ❓ Quality assessment question\n",
     "* **How well can we disclose what are the spatial patterns of the total burned area per season over the Iberian Peninsula?**\n",

diff --git a/...llite_ECVs/Land_Biosphere/satellite_satellite-fire-burned-area_trend-assessment_q02.ipynb b/...llite_ECVs/Land_Biosphere/satellite_satellite-fire-burned-area_trend-assessment_q02.ipynb
@@ -16,7 +16,7 @@
     "Produced by: Rita Cunha (CoLAB +ATLANTIC)\n",
     "\n",
     "\n",
-    "## 🌍 Use Case: Identification of fire risk trends in fire-prone areas\n",
+    "## 🌍 Use case: Identification of fire risk trends in fire-prone areas\n",
     "\n",
     "## ❓ Quality assessment question\n",
     "\n",

diff --git a/scripts/fix-legacy-urls.py b/scripts/fix-legacy-urls.py
@@ -3,19 +3,21 @@
 
 import nbformat
 
-STRING_MAPPER = {"/cdsapp#!/dataset/": "/datasets/"}
+STRING_MAPPER = {
+    "/cdsapp#!/dataset/": "/datasets/",
+}
 
 
 def fix_legacy_urls(path: Path) -> None:
     notebook = nbformat.read(path, nbformat.NO_CONVERT)
 
     write = False
     for cell in notebook.cells:
-        if "source" not in cell:
+        if cell["cell_type"] != "markdown":
             continue
 
         for old, new in STRING_MAPPER.items():
-            if old in (source := cell["source"]):
+            if old in (source := cell.get("source", "")):
                 cell["source"] = source.replace(old, new)
                 write = True
 

diff --git a/scripts/validate-headers.py b/scripts/validate-headers.py
@@ -0,0 +1,49 @@
+import argparse
+from pathlib import Path
+
+import nbformat
+
+HEADERS = (
+    "## 🌍 Use case:",
+    "## ❓ Quality assessment question",
+    "## 📢 Quality assessment statement",
+    "## 📋 Methodology",
+    "## 📈 Analysis and results",
+    "## ℹ️ If you want to know more",
+)
+
+
+def validate_headers(path: Path) -> None:
+    notebook = nbformat.read(path, nbformat.NO_CONVERT)
+
+    title_count = 0
+    headers_count = dict.fromkeys(HEADERS, 0)
+    for cell in notebook.cells:
+        if cell["cell_type"] != "markdown":
+            continue
+
+        for line in cell.get("source", "").splitlines():
+            line = line.strip()
+            if line.startswith("# "):
+                title_count += 1
+                continue
+
+            for header in headers_count:
+                if line.startswith(header):
+                    headers_count[header] += 1
+
+    assert title_count == 1, f"{path=!s}: Invalid {title_count=}"
+    for header, header_count in headers_count.items():
+        assert header_count == 1, f"{path=!s}: Invalid {header_count=} of {header=}"
+
+
+def main(paths: list[Path]) -> None:
+    for path in paths:
+        validate_headers(path)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("paths", action="store", type=Path, nargs="*")
+    args = parser.parse_args()
+    main(args.paths)