WIP

scientific-python · Nov 10, 2023 · a89f0ed · a89f0ed
1 parent 6164346
commit a89f0ed
Show file tree

Hide file tree

Showing 4 changed files with 96 additions and 51 deletions.
diff --git a/README.md b/README.md
@@ -83,7 +83,9 @@ ignored_user_logins = [
 ]
 
 # If this regex matches a pull requests description, the captured content
-# is included instead of the pull request title.
+# is included instead of the pull request title. The regex is allowed to match
+# more than once in which case a single pull request may result in multiple
+# items.
 # E.g. the default regex below is matched by
 #
 # ```release-note
@@ -93,7 +95,7 @@ ignored_user_logins = [
 #
 # If you modify this regex, make sure to match the content with a capture
 # group named "summary".
-pr_summary_regex = "^```release-note\\s*(?P<summary>[\\s\\S]*?\\w[\\s\\S]*?)\\s*^```"
+pr_summary_regex = "^```release-note\\s*((?P<label>[^:]*):)?(?P<summary>[\\s\\S]*?\\w[\\s\\S]*?)\\s*^```"
 
 # If any of a pull request's labels matches one of the regexes on the left side
 # its summary will appear in the appropriate section with the title given on

diff --git a/src/changelist/_cli.py b/src/changelist/_cli.py
@@ -12,7 +12,7 @@
 from tqdm import tqdm
 
 from ._config import add_config_defaults, local_config, remote_config
-from ._format import MdFormatter, RstFormatter
+from ._format import ChangeItem, MdFormatter, RstFormatter
 from ._query import commits_between, contributors, pull_requests_from_commits
 
 logger = logging.getLogger(__name__)
@@ -152,18 +152,22 @@ def main(
         pull_requests=lazy_tqdm(pull_requests, desc="Fetching reviewers"),
     )
 
+    change_items = ChangeItem.prs_to_items(
+        pull_requests,
+        pr_summary_regex=re.compile(config["pr_summary_regex"], flags=re.MULTILINE),
+    )
+
     Formatter = {"md": MdFormatter, "rst": RstFormatter}[format]
     formatter = Formatter(
         repo_name=org_repo.split("/")[-1],
-        pull_requests=pull_requests,
+        change_items=change_items,
         authors=authors,
         reviewers=reviewers,
         version=version,
+        label_section_map=config["label_section_map"],
         title_template=config["title_template"],
         intro_template=config["intro_template"],
         outro_template=config["outro_template"],
-        label_section_map=config["label_section_map"],
-        pr_summary_regex=re.compile(config["pr_summary_regex"], flags=re.MULTILINE),
         ignored_user_logins=config["ignored_user_logins"],
     )
 

diff --git a/src/changelist/_format.py b/src/changelist/_format.py
@@ -3,21 +3,71 @@
 from collections import OrderedDict
 from collections.abc import Iterable
 from dataclasses import dataclass
-from typing import Union
+from datetime import datetime
 
 from github.NamedUser import NamedUser
 from github.PullRequest import PullRequest
 
 logger = logging.getLogger(__name__)
 
 
+@dataclass(frozen=True, slots=True)
+class ChangeItem:
+    content: str
+    reference_name: str
+    reference_url: str
+    labels: tuple[str]
+    timestamp: datetime
+
+    @classmethod
+    def prs_to_items(
+        cls,
+        pull_requests: set[PullRequest],
+        pr_summary_regex: re.Pattern,
+    ) -> "set[ChangeItem]":
+        items = set()
+        for pr in pull_requests:
+            matches = tuple(pr_summary_regex.finditer(pr.body))
+            if not matches:
+                logger.debug("falling back to title for %s", pr.html_url)
+                matches = [{"summary": pr.title, "label": None}]
+            assert len(matches) >= 1
+            for match in matches:
+                summary = match["summary"]
+                if match["label"] is not None:
+                    labels = (match["label"],)
+                else:
+                    labels = tuple(label.name for label in pr.labels)
+                items.add(
+                    cls(
+                        content=summary,
+                        reference_name=f"#{pr.number}",
+                        reference_url=pr.html_url,
+                        labels=labels,
+                        timestamp=pr.merged_at,
+                    )
+                )
+        return items
+
+
+@dataclass
+class UserItem:
+    name: str
+    reference_name: str
+    reference_url: str
+
+    @classmethod
+    def users_to_items(cls, users):
+        pass
+
+
 @dataclass(frozen=True, kw_only=True)
 class MdFormatter:
     """Format release notes in Markdown from PRs, authors and reviewers."""
 
     repo_name: str
-    pull_requests: set[PullRequest]
-    authors: set[Union[NamedUser]]
+    change_items: set[ChangeItem]
+    authors: set[NamedUser]
     reviewers: set[NamedUser]
 
     version: str
@@ -27,7 +77,7 @@ class MdFormatter:
 
     # Associate regexes matching PR labels to a section titles in the release notes
     label_section_map: dict[str, str]
-    pr_summary_regex: re.Pattern
+
     ignored_user_logins: tuple[str]
 
     def __str__(self) -> str:
@@ -51,43 +101,39 @@ def iter_lines(self) -> Iterable[str]:
         yield from self._format_section_title(title, level=1)
         yield "\n"
         yield from self._format_intro()
-        for title, pull_requests in self._prs_by_section.items():
-            yield from self._format_pr_section(title, pull_requests)
+        for title, items in self._items_by_section.items():
+            yield from self._format_change_section(title, items)
         yield from self._format_contributor_section(self.authors, self.reviewers)
         yield from self._format_outro()
 
     @property
-    def _prs_by_section(self) -> OrderedDict[str, set[PullRequest]]:
-        """Map pull requests to section titles.
-
-        Pull requests whose labels do not match one of the sections given in
-        `regex_section_map`, are sorted into a section named "Other".
-        """
+    def _items_by_section(self) -> OrderedDict[str, set[ChangeItem]]:
+        """Map ChangeItems to section titles."""
         label_section_map = {
-            re.compile(pattern): section_name
+            re.compile(pattern, flags=re.IGNORECASE): section_name
             for pattern, section_name in self.label_section_map.items()
         }
-        prs_by_section = OrderedDict()
+
+        items_by_section = OrderedDict()
         for _, section_name in self.label_section_map.items():
-            prs_by_section[section_name] = set()
-        prs_by_section["Other"] = set()
+            items_by_section[section_name] = set()
+        items_by_section["Other"] = set()
 
-        for pr in self.pull_requests:
+        for item in self.change_items:
             matching_sections = [
                 section_name
                 for regex, section_name in label_section_map.items()
-                if any(regex.match(label.name) for label in pr.labels)
+                if any(regex.match(label) for label in item.labels)
             ]
             for section_name in matching_sections:
-                prs_by_section[section_name].add(pr)
+                items_by_section[section_name].add(item)
             if not matching_sections:
                 logger.warning(
                     "%s without matching label, sorting into section 'Other'",
-                    pr.html_url,
+                    item.reference_url,
                 )
-                prs_by_section["Other"].add(pr)
-
-        return prs_by_section
+                items_by_section["Other"].add(item)
+        return items_by_section
 
     def _sanitize_text(self, text: str) -> str:
         text = text.strip()
@@ -101,34 +147,25 @@ def _format_link(self, name: str, target: str) -> str:
     def _format_section_title(self, title: str, *, level: int) -> Iterable[str]:
         yield f"{'#' * level} {title}\n"
 
-    def _parse_pull_request_summary(self, pr: PullRequest) -> str:
-        if pr.body and (match := self.pr_summary_regex.search(pr.body)):
-            summary = match["summary"]
-        else:
-            logger.debug("falling back to title for %s", pr.html_url)
-            summary = pr.title
-        summary = self._sanitize_text(summary)
-        return summary
-
-    def _format_pull_request(self, pr: PullRequest) -> Iterable[str]:
-        link = self._format_link(f"#{pr.number}", f"{pr.html_url}")
-        summary = self._parse_pull_request_summary(pr).rstrip(".")
+    def _format_change_item(self, item: ChangeItem) -> Iterable[str]:
+        link = self._format_link(item.reference_name, item.reference_url)
+        summary = self._sanitize_text(item.content).rstrip(".")
         summary = f"- {summary} ({link}).\n"
         yield summary
 
-    def _format_pr_section(
-        self, title: str, pull_requests: set[PullRequest]
+    def _format_change_section(
+        self, title: str, items: set[ChangeItem]
     ) -> Iterable[str]:
-        """Format a section title and list its pull requests sorted by merge date."""
-        if pull_requests:
+        """Format a section title and list its items sorted by merge date."""
+        if items:
             yield from self._format_section_title(title, level=2)
             yield "\n"
 
-            for pr in sorted(pull_requests, key=lambda pr: pr.merged_at):
-                yield from self._format_pull_request(pr)
+            for item in sorted(items, key=lambda item: item.timestamp):
+                yield from self._format_change_item(item)
             yield "\n"
 
-    def _format_user_line(self, user: Union[NamedUser]) -> str:
+    def _format_user_line(self, user: NamedUser) -> str:
         line = f"@{user.login}"
         line = self._format_link(line, user.html_url)
         if user.name:
@@ -137,7 +174,7 @@ def _format_user_line(self, user: Union[NamedUser]) -> str:
 
     def _format_contributor_section(
         self,
-        authors: set[Union[NamedUser]],
+        authors: set[NamedUser],
         reviewers: set[NamedUser],
     ) -> Iterable[str]:
         """Format contributor section and list users sorted by login handle."""

diff --git a/src/changelist/default_config.toml b/src/changelist/default_config.toml
@@ -27,7 +27,9 @@ ignored_user_logins = [
 ]
 
 # If this regex matches a pull requests description, the captured content
-# is included instead of the pull request title.
+# is included instead of the pull request title. The regex is allowed to match
+# more than once in which case a single pull request may result in multiple
+# items.
 # E.g. the default regex below is matched by
 #
 # ```release-note
@@ -37,7 +39,7 @@ ignored_user_logins = [
 #
 # If you modify this regex, make sure to match the content with a capture
 # group named "summary".
-pr_summary_regex = "^```release-note\\s*(?P<summary>[\\s\\S]*?\\w[\\s\\S]*?)\\s*^```"
+pr_summary_regex = "^```release-note\\s*((?P<label>[^:]*):)?(?P<summary>[\\s\\S]*?\\w[\\s\\S]*?)\\s*^```"
 
 # If any of a pull request's labels matches one of the regexes on the left side
 # its summary will appear in the appropriate section with the title given on