Skip to content

Commit

Permalink
add validation headers script (#225)
Browse files Browse the repository at this point in the history
* add validation headers script

* cleanup

* cleanup
  • Loading branch information
malmans2 authored Oct 10, 2024
1 parent d1e3b60 commit 817060e
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 5 deletions.
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ repos:
language: python
types: [jupyter]
additional_dependencies: [nbformat]
- repo: local
hooks:
- id: validate-headers
name: validate-headers
entry: python scripts/validate-headers.py
language: python
types: [jupyter]
additional_dependencies: [nbformat]
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.17
hooks:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"\n",
"Produced by: Rita Cunha (CoLAB +ATLANTIC)\n",
"\n",
"## 🌍 Use Case: Identification of fire-prone areas for monitoring and prevention\n",
"## 🌍 Use case: Identification of fire-prone areas for monitoring and prevention\n",
"\n",
"## ❓ Quality assessment question\n",
"* **How well can we disclose what are the spatial patterns of the total burned area per season over the Iberian Peninsula?**\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"Produced by: Rita Cunha (CoLAB +ATLANTIC)\n",
"\n",
"\n",
"## 🌍 Use Case: Identification of fire risk trends in fire-prone areas\n",
"## 🌍 Use case: Identification of fire risk trends in fire-prone areas\n",
"\n",
"## ❓ Quality assessment question\n",
"\n",
Expand Down
8 changes: 5 additions & 3 deletions scripts/fix-legacy-urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,21 @@

import nbformat

STRING_MAPPER = {"/cdsapp#!/dataset/": "/datasets/"}
STRING_MAPPER = {
"/cdsapp#!/dataset/": "/datasets/",
}


def fix_legacy_urls(path: Path) -> None:
notebook = nbformat.read(path, nbformat.NO_CONVERT)

write = False
for cell in notebook.cells:
if "source" not in cell:
if cell["cell_type"] != "markdown":
continue

for old, new in STRING_MAPPER.items():
if old in (source := cell["source"]):
if old in (source := cell.get("source", "")):
cell["source"] = source.replace(old, new)
write = True

Expand Down
49 changes: 49 additions & 0 deletions scripts/validate-headers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import argparse
from pathlib import Path

import nbformat

HEADERS = (
"## 🌍 Use case:",
"## ❓ Quality assessment question",
"## 📢 Quality assessment statement",
"## 📋 Methodology",
"## 📈 Analysis and results",
"## ℹ️ If you want to know more",
)


def validate_headers(path: Path) -> None:
notebook = nbformat.read(path, nbformat.NO_CONVERT)

title_count = 0
headers_count = dict.fromkeys(HEADERS, 0)
for cell in notebook.cells:
if cell["cell_type"] != "markdown":
continue

for line in cell.get("source", "").splitlines():
line = line.strip()
if line.startswith("# "):
title_count += 1
continue

for header in headers_count:
if line.startswith(header):
headers_count[header] += 1

assert title_count == 1, f"{path=!s}: Invalid {title_count=}"
for header, header_count in headers_count.items():
assert header_count == 1, f"{path=!s}: Invalid {header_count=} of {header=}"


def main(paths: list[Path]) -> None:
for path in paths:
validate_headers(path)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("paths", action="store", type=Path, nargs="*")
args = parser.parse_args()
main(args.paths)

0 comments on commit 817060e

Please sign in to comment.