-
Notifications
You must be signed in to change notification settings - Fork 593
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Label PRs when the json schema changes (#2240)
* label PRs when the json schema changes Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * moderate pr comments Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * be more strict about processing file names Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> --------- Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
- Loading branch information
Showing
6 changed files
with
354 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
from __future__ import annotations | ||
|
||
import sys | ||
import glob | ||
import subprocess | ||
import os | ||
import re | ||
|
||
DRY_RUN = False | ||
|
||
|
||
def main(changed_files: str | None = None, merge_base_schema_files: str | None = None): | ||
global DRY_RUN | ||
|
||
pr_number = os.environ.get("GITHUB_PR_NUMBER") | ||
comment_file_path = os.environ.get("CI_COMMENT_FILE") | ||
|
||
if not comment_file_path: | ||
print("CI_COMMENT_FILE not set") | ||
sys.exit(1) | ||
|
||
if not pr_number: | ||
DRY_RUN = True | ||
|
||
if changed_files: | ||
DRY_RUN = True | ||
|
||
# read lines from file... this is useful for local testing | ||
with open(changed_files) as f: | ||
pr_changed_files = f.read().splitlines() | ||
|
||
with open(merge_base_schema_files) as f: | ||
og_json_schema_files = sort_json_schema_files(f.read().splitlines()) | ||
|
||
else: | ||
if not is_ci(): | ||
print("Not in CI") | ||
sys.exit(1) | ||
|
||
if not pr_number: | ||
print("Not a PR") | ||
sys.exit(1) | ||
|
||
pr_changed_files = get_pr_changed_files(pr_number) | ||
# since we are running this in the context of the pull_request_target, the checkout is the merge base.. | ||
# that is the main branch of the original repo, NOT the branch in the forked repo (or branch in the target | ||
# repo for non-forked PRs). This means we just need to list the current checkedout files to get a sense of | ||
# the changes before a merge. | ||
og_json_schema_files = list_json_schema_files() | ||
|
||
pr_json_schema_files = filter_to_schema_files(pr_changed_files) | ||
|
||
# print("schema files in pr: ", summarize_schema_files(pr_json_schema_files)) | ||
# print("og schema files: ", summarize_schema_files(og_json_schema_files)) | ||
|
||
if not og_json_schema_files: | ||
print("No schema files found in merge base") | ||
sys.exit(1) | ||
|
||
# pr_json_schema_files = set of PR files are added, removed, and changed files | ||
new_schema_files = set(pr_json_schema_files) - set(og_json_schema_files) | ||
removed_or_modified_schema_files = set(pr_json_schema_files) - set(new_schema_files) | ||
|
||
print("new schemas: ", summarize_schema_files(new_schema_files)) | ||
print("removed or modified schemas:", summarize_schema_files(removed_or_modified_schema_files)) | ||
|
||
# if there is a new or modified schema, we should add the "json-schema" label to the PR... | ||
if new_schema_files or removed_or_modified_schema_files: | ||
print("\nAdding json-schema label...") | ||
add_label(pr_number, "json-schema") | ||
else: | ||
remove_label(pr_number, "json-schema") | ||
|
||
# new schema files should be scrutinized, comparing the latest and added versions to see if it's a breaking | ||
# change (major version bump). Warn about it on the PR via adding a breaking-change label... | ||
if is_breaking_change(new_schema_files, og_json_schema_files[-1]): | ||
print("\nBreaking change detected...") | ||
add_label(pr_number, "breaking-change") | ||
else: | ||
remove_label(pr_number, "breaking-change") | ||
|
||
# modifying an existing schema could be a breaking change, we should warn about it on the PR via a comment... | ||
# removing schema files should never be allowed, we should warn about it on the PR via a comment... | ||
if removed_or_modified_schema_files: | ||
print("\nRemoved or modified schema detected...") | ||
schemas = sort_json_schema_files(list(removed_or_modified_schema_files)) | ||
schemas_str = "\n".join([f" - {schema}" for schema in schemas]) | ||
add_comment(comment_file_path, f"Detected modification or removal of existing json schemas:\n{schemas_str}", warning=True) | ||
|
||
|
||
def add_comment(comment_file_path: str, comment: str, warning: bool = False, important: bool = False): | ||
if warning or important: | ||
comment_lines = comment.splitlines() | ||
comment = "\n".join([f"> {line}" for line in comment_lines]) | ||
|
||
if warning: | ||
comment = f"> [!WARNING]\n{comment}" | ||
elif important: | ||
comment = f"> [!IMPORTANT]\n{comment}" | ||
|
||
# create any parent directories if they don't exist | ||
os.makedirs(os.path.dirname(comment_file_path), exist_ok=True) | ||
|
||
with open(comment_file_path, "w") as f: | ||
f.write(comment) | ||
|
||
print(f"Comment file contents: {comment_file_path}") | ||
print(comment) | ||
|
||
|
||
def add_label(pr_number: str, label: str): | ||
# run "gh pr edit --add-label <label>" | ||
result = run(f"gh pr edit {pr_number} --add-label {label}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
if result.returncode != 0: | ||
print(f"Unable to add {label!r} label to PR with") | ||
print(str(result.stderr)) | ||
sys.exit(1) | ||
|
||
|
||
def remove_label(pr_number: str, label: str): | ||
# run "gh pr edit --remove-label <label>" | ||
result = run(f"gh pr edit {pr_number} --remove-label {label}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
if result.returncode != 0: | ||
print(f"Unable to label PR with {label!r}") | ||
print(str(result.stderr)) | ||
sys.exit(1) | ||
|
||
|
||
def major_version(semver: str) -> int: | ||
return int(semver.split(".")[0]) | ||
|
||
|
||
def is_breaking_change(new_schema_files: set[str], latest_schema_file: str) -> bool: | ||
latest_major_version = major_version(get_semver(latest_schema_file)) | ||
for file in new_schema_files: | ||
change_major_version = major_version(get_semver(file)) | ||
if change_major_version > latest_major_version: | ||
return True | ||
return False | ||
|
||
|
||
def summarize_schema_files(files: list[str]) -> list[str]: | ||
return [get_semver(file) for file in files] | ||
|
||
|
||
def is_ci() -> bool: | ||
return "CI" in os.environ | ||
|
||
|
||
def get_pr_changed_files(pr_number: str) -> list[str]: | ||
result = run(f"gh pr view {pr_number} --json files --jq '.files.[].path'", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | ||
if result.returncode != 0: | ||
print("Unable to get list of changed files in PR") | ||
print(str(result.stderr)) | ||
sys.exit(1) | ||
|
||
list_of_files = result.stdout.splitlines() | ||
return list_of_files | ||
|
||
|
||
def filter_to_schema_files(list_of_files: list[str]) -> list[str]: | ||
# get files matching "schema/json/schema-*.json" | ||
files = [] | ||
for file in list_of_files: | ||
if re.match(r"^schema/json/schema-\d+\.\d+\.\d+\.json$", file): | ||
files.append(file) | ||
return sort_json_schema_files(files) | ||
|
||
|
||
def list_json_schema_files() -> list[str]: | ||
# list files in "schema/json" directory matching the pattern of "schema-*.json" | ||
return sort_json_schema_files(list(glob.glob("schema/json/schema-*.json"))) | ||
|
||
|
||
def run(command: str, **kwargs) -> subprocess.CompletedProcess: | ||
if DRY_RUN: | ||
print(f"[DRY RUN] {command}") | ||
return subprocess.CompletedProcess(args=[command], returncode=0) | ||
print(f"[RUN] {command}") | ||
return subprocess.run(command, **kwargs) | ||
|
||
|
||
def get_semver(input_file: str) -> str: | ||
return input_file.split("-")[1].split(".json")[0] | ||
|
||
|
||
def sort_json_schema_files(files: list[str]) -> list[str]: | ||
# sort files by schema version, where the input looks like "schema/json/schema-1.12.1.json" | ||
# we should sort by the semantic version embedded within the basename, not the string | ||
# so that "schema/json/schema-1.2.1.json" comes before "schema/json/schema-1.12.1.json". | ||
versions = [get_semver(file) for file in files if file] | ||
|
||
versions = sorted(versions, key=lambda s: [int(u) for u in s.split('.')]) | ||
|
||
return [f"schema/json/schema-{version}.json" for version in versions] | ||
|
||
|
||
# allow for test files that have line-by-line list of files: | ||
|
||
# .binny.yaml | ||
# .github/actions/bootstrap/action.yaml | ||
# .github/scripts/goreleaser-install.sh | ||
# .github/workflows/release.yaml | ||
# .github/workflows/update-bootstrap-tools.yml | ||
# .github/workflows/update-cpe-dictionary-index.yml | ||
# .github/workflows/update-stereoscope-release.yml | ||
# .github/workflows/validations.yaml | ||
# .gitignore | ||
# .goreleaser.yaml | ||
# Makefile | ||
# Taskfile.yaml | ||
# schema/cyclonedx/Makefile | ||
|
||
if __name__ == "__main__": | ||
# these are variables for a single file name that contains a list of files (line separated) | ||
changed_files = None | ||
merge_base_schema_files = None | ||
|
||
if len(sys.argv) > 2: | ||
changed_files = sys.argv[1] | ||
merge_base_schema_files = sys.argv[2] | ||
|
||
main(changed_files, merge_base_schema_files) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import unittest | ||
from unittest.mock import patch | ||
import subprocess | ||
|
||
import labeler | ||
|
||
class Labeler(unittest.TestCase): | ||
|
||
def test_major_version(self): | ||
self.assertEqual(labeler.major_version("1.2.3"), 1) | ||
self.assertEqual(labeler.major_version("2.0.0"), 2) | ||
|
||
def test_is_breaking_change(self): | ||
new_schema_files = ["schema/json/schema-2.0.0.json"] | ||
latest_schema_file = "schema/json/schema-1.2.0.json" | ||
self.assertTrue(labeler.is_breaking_change(new_schema_files, latest_schema_file)) | ||
|
||
new_schema_files = ["schema/json/schema-1.3.0.json"] | ||
latest_schema_file = "schema/json/schema-1.2.0.json" | ||
self.assertFalse(labeler.is_breaking_change(new_schema_files, latest_schema_file)) | ||
|
||
def test_summarize_schema_files(self): | ||
files = ["schema/json/schema-1.0.0.json", "schema/json/schema-2.0.0.json"] | ||
expected = ["1.0.0", "2.0.0"] | ||
self.assertEqual(labeler.summarize_schema_files(files), expected) | ||
|
||
def test_is_ci(self): | ||
# Mock os.environ to simulate CI environment | ||
with patch.dict("os.environ", {"CI": "true"}): | ||
self.assertTrue(labeler.is_ci()) | ||
|
||
def test_get_pr_changed_files(self): | ||
expected_command = "gh pr view 123 --json files --jq '.files.[].path'" | ||
expected_output = "file1.json\nfile2.json\n" | ||
|
||
subprocess.CompletedProcess.returncode = 0 | ||
subprocess.CompletedProcess.stdout = expected_output | ||
with patch("labeler.run", return_value=subprocess.CompletedProcess) as mock_run: | ||
result = labeler.get_pr_changed_files("123") | ||
mock_run.assert_called_with(expected_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | ||
self.assertEqual(result, ["file1.json", "file2.json"]) | ||
|
||
def test_filter_to_schema_files(self): | ||
input_files = ["schema/json/schema-1.0.0.json", "not_schema.txt", "schema/json/schema-2.0.0.json"] | ||
expected_files = ["schema/json/schema-1.0.0.json", "schema/json/schema-2.0.0.json"] | ||
self.assertEqual(labeler.filter_to_schema_files(input_files), expected_files) | ||
|
||
# we should be strict about what files are allowed to be processed | ||
input_files = ["schema/json/schema-1.0.0extracontent.json", "schema/json/schema-1.0.0.md", "schema/json/schema-1.0.0.json.extracontent"] | ||
expected_files = [] | ||
self.assertEqual(labeler.filter_to_schema_files(input_files), expected_files) | ||
|
||
def test_get_semver(self): | ||
input_file = "schema/json/schema-1.0.0.json" | ||
expected_semver = "1.0.0" | ||
self.assertEqual(labeler.get_semver(input_file), expected_semver) | ||
|
||
def test_sort_json_schema_files(self): | ||
files = ["schema/json/schema-1.12.1.json", "schema/json/schema-1.2.1.json"] | ||
expected_sorted_files = ["schema/json/schema-1.2.1.json", "schema/json/schema-1.12.1.json"] | ||
self.assertEqual(labeler.sort_json_schema_files(files), expected_sorted_files) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
name: "Detect schema changes" | ||
|
||
on: | ||
# IMPORTANT! This workflow is triggered by the `pull_request_target` event | ||
# which means that forked PRs will run with access secrets from the repo | ||
# it's forked from (the "target" repo). | ||
# | ||
# For this reason we only NEVER checkout the code from the pull request | ||
# (e.g. "ref: ${{ github.event.pull_request.head.sha }}") to prevent | ||
# accidentally running potentially untrusted code. | ||
# | ||
# By default the checkout will be: | ||
# - GITHUB_SHA: Last commit on the PR base branch | ||
# - GITHUB_REF: PR base branch | ||
# | ||
# ...unlike a typical PR where: | ||
# - GITHUB_SHA: Last merge commit on the GITHUB_REF branch | ||
# - GITHUB_REF: PR merge branch refs/pull/:prNumber/merge | ||
pull_request_target: | ||
|
||
env: | ||
# note: this is used within hashFiles() so must be within the GITHUB_WORKSPACE path (or will silently fail) | ||
CI_COMMENT_FILE: .tmp/labeler-comment.txt | ||
# needs to be any string to uniquely identify the comment on a PR across multiple runs | ||
COMMENT_HEADER: "label-commentary" | ||
|
||
jobs: | ||
label: | ||
name: "Label changes" | ||
runs-on: ubuntu-22.04 | ||
steps: | ||
|
||
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 #v4.1.1 | ||
|
||
- run: python .github/scripts/labeler.py | ||
env: | ||
# note: this token has write access to the repo | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
GITHUB_PR_NUMBER: ${{ github.event.number }} | ||
|
||
- name: Delete existing comment | ||
if: ${{ hashFiles( env.CI_COMMENT_FILE ) == '' }} | ||
uses: marocchino/sticky-pull-request-comment@efaaab3fd41a9c3de579aba759d2552635e590fd #v2.8.0 | ||
with: | ||
header: ${{ env.COMMENT_HEADER }} | ||
hide: true | ||
hide_classify: "OUTDATED" | ||
|
||
- name: Add comment | ||
if: ${{ hashFiles( env.CI_COMMENT_FILE ) != '' }} | ||
uses: marocchino/sticky-pull-request-comment@efaaab3fd41a9c3de579aba759d2552635e590fd #v2.8.0 | ||
with: | ||
header: ${{ env.COMMENT_HEADER }} | ||
path: ${{ env.CI_COMMENT_FILE }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters