From 98bc310612c6519f6c9c99f3590b0004f192ead1 Mon Sep 17 00:00:00 2001 From: driazati Date: Wed, 22 Jun 2022 14:31:17 -0700 Subject: [PATCH] [release] Add script to gather PRs for a release --- tests/scripts/release/gather_prs.py | 216 ++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 tests/scripts/release/gather_prs.py diff --git a/tests/scripts/release/gather_prs.py b/tests/scripts/release/gather_prs.py new file mode 100644 index 0000000000000..0720a87d042bc --- /dev/null +++ b/tests/scripts/release/gather_prs.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import argparse +import os +import pickle +from pathlib import Path +import csv +import sys +from typing import Callable, Dict, List, Any + +REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent +sys.path.append(str(REPO_ROOT / "tests" / "scripts")) + +from git_utils import git, GitHubRepo +from github_tag_teams import tags_from_title + +GITHUB_TOKEN = os.environ["GITHUB_TOKEN"] + + +PRS_QUERY = """ +query ($owner: String!, $name: String!, $after: String, $pageSize: Int!) { + repository(owner: $owner, name: $name) { + defaultBranchRef { + name + target { + ... on Commit { + oid + history(after: $after, first: $pageSize) { + pageInfo { + hasNextPage + endCursor + } + nodes { + oid + committedDate + associatedPullRequests(first: 1) { + nodes { + number + additions + changedFiles + deletions + author { + login + } + title + body + } + } + } + } + } + } + } + } +} +""" + + +def append_and_save(items, file): + if not file.exists(): + data = [] + else: + with open(file, "rb") as f: + data = pickle.load(f) + + data += items + with open(file, "wb") as f: + pickle.dump(data, f) + + +def fetch_pr_data(args, cache): + github = GitHubRepo(user=user, repo=repo, token=GITHUB_TOKEN) + + if args.from_commit is None or args.to_commit is None: + print("--from-commit and --to-commit must be specified if --skip-query is not used") + exit(1) + + i = 0 + page_size = 80 + cursor = f"{args.from_commit} {i}" + + while True: + r = github.graphql( + query=PRS_QUERY, + variables={ + "owner": user, + "name": repo, + "after": cursor, + "pageSize": page_size, + }, + ) + data = r["data"]["repository"]["defaultBranchRef"]["target"]["history"] + if not data["pageInfo"]["hasNextPage"]: + break + cursor = data["pageInfo"]["endCursor"] + results = data["nodes"] + + to_add = [] + stop = False + for r in results: + if r["oid"] == args.to_commit: + print(f"Found {r['oid']}, stopping") + stop = True + break + else: + to_add.append(r) + + oids = [r["oid"] for r in to_add] + print(oids) + append_and_save(to_add, cache) + if stop: + break + print(i) + i += page_size + + +def write_csv( + filename: str, data: List[Dict[str, Any]], filter: Callable[[Dict[str, Any]], bool] +) -> None: + with open(filename, "w", newline="") as csvfile: + writer = csv.writer(csvfile, quotechar='"') + writer.writerow( + ( + "category", + "description", + "date", + "number", + "author", + "tags", + "title", + "additions", + "deletions", + "changed files", + ) + ) + for item in data: + pr = item["associatedPullRequests"]["nodes"][0] + if not filter(pr): + continue + tags = tags_from_title(pr["title"]) + actual_tags = [] + for t in tags: + items = [x.strip() for x in t.split(",")] + actual_tags += items + tags = actual_tags + tags = [t.lower() for t in tags] + category = "" + if len(tags) == 1: + category = tags[0] + writer.writerow( + ( + category, + "", + item["committedDate"], + f'https://github.com/apache/tvm/pull/{pr["number"]}', + pr["author"]["login"], + ", ".join(tags), + pr["title"], + pr["additions"], + pr["deletions"], + pr["changedFiles"], + ) + ) + + +if __name__ == "__main__": + help = "List out commits with attached PRs since a certain commit" + parser = argparse.ArgumentParser(description=help) + parser.add_argument("--from-commit", help="commit to start checking PRs from") + parser.add_argument("--to-commit", help="commit to stop checking PRs from") + parser.add_argument( + "--threshold", default=150, help="sum of additions + deletions to consider large" + ) + parser.add_argument( + "--skip-query", action="store_true", help="don't query GitHub and instead use cache file" + ) + args = parser.parse_args() + user = "apache" + repo = "tvm" + threshold = int(args.threshold) + + cache = Path("out.pkl") + if not args.skip_query: + fetch_pr_data(args, cache) + + with open(cache, "rb") as f: + data = pickle.load(f) + + print(f"Found {len(data)} PRs") + + write_csv( + filename="out-large.csv", + data=data, + filter=lambda pr: pr["additions"] + pr["deletions"] > threshold, + ) + write_csv( + filename="out-small.csv", + data=data, + filter=lambda pr: pr["additions"] + pr["deletions"] <= threshold, + )