Skip to content

Commit

Permalink
Merge scorecard (#4401)
Browse files Browse the repository at this point in the history
* initial commit

* changes to run on cluster

* support multiple repos

put token in kubenetes secrets

* fixed deployment

* fix ci fq repo name

* updated Makefile

use delete/create for redeploy until we stop using latest image

* added custom user pages

* fixed indentation

* fix treating pulls as issues

* poll Github

* fix import

* fix changes requested reporting

* Small improvements

 - declare language="en" so I don't get translate notifications ?????
 - use defaultdict for great profit
 - use daemon thread so ctrl-C actually kills the server

* fix

* Small improvement 2

* add logging, restart on poll thread

* fix users head/header

* autoformat htmls (#12)

* BANISH THE SERIFS WHENCE THEY CAME (#13)

* add author column to "needs review" table (#14)

* Add a list of failing builds to the user page (#16)

* Slightly better formatting. (#17)

Removed headers on tables in the user page

* import sys so the retry stuff works (#18)

* assertion is going off, log instead (#19)

* reverse reviews (come in chrological order) (#20)

We want the most recent.  From https://developer.github.com/v3/pulls/reviews/:

 > The list of reviews returns in chronological order.

scorecard was categorizing #4328 incorrectly.

* move to project dir to merge into monorepo

* updated .gitignore

* backed off spacing

* make targets phony
  • Loading branch information
cseed authored and danking committed Sep 24, 2018
1 parent f52dc50 commit c0f8585
Show file tree
Hide file tree
Showing 7 changed files with 529 additions and 0 deletions.
13 changes: 13 additions & 0 deletions scorecard/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM continuumio/miniconda
MAINTAINER Hail Team <hail@broadinstitute.org>

COPY environment.yml .
RUN conda env create scorecard -f environment.yml && \
rm -f environment.yml && \
rm -rf /home/root/.conda/pkgs/*

COPY scorecard /scorecard

EXPOSE 5000

CMD ["bash", "-c", "source activate scorecard; python /scorecard/scorecard.py"]
19 changes: 19 additions & 0 deletions scorecard/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
.PHONY: build push run-docker run deploy

build:
docker build . -t scorecard

push: build
docker tag scorecard gcr.io/broad-ctsa/scorecard
docker push gcr.io/broad-ctsa/scorecard

run-docker:
docker run -i -p 5000:5000 -v secrets:/secrets -t scorecard

run:
GITHUB_TOKEN_PATH=secrets/scorecard-github-access-token.txt python scorecard/scorecard.py

deploy:
kubectl delete -f deployment.yaml
sleep 5
kubectl create -f deployment.yaml
43 changes: 43 additions & 0 deletions scorecard/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
apiVersion: apps/v1beta2
kind: Deployment
metadata:
name: scorecard-deployment
spec:
selector:
matchLabels:
app: scorecard
replicas: 1
template:
metadata:
labels:
app: scorecard
spec:
containers:
- name: scorecard
image: gcr.io/broad-ctsa/scorecard
ports:
- containerPort: 5000
volumeMounts:
- mountPath: /secrets
readOnly: true
name: scorecard-github-access-token
volumes:
- name: scorecard-github-access-token
secret:
secretName: scorecard-github-access-token
---
apiVersion: v1
kind: Service
metadata:
name: scorecard
labels:
app: scorecard
spec:
ports:
- port: 80
protocol: TCP
targetPort: 5000
selector:
app: scorecard
loadBalancerIP: 35.202.189.158 # scorecard
type: LoadBalancer
8 changes: 8 additions & 0 deletions scorecard/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: scorecard
dependencies:
- python=3.7
- flask
- humanize
- pip
- pip:
- PyGithub
253 changes: 253 additions & 0 deletions scorecard/scorecard/scorecard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
import time
import collections
import datetime
import os
import sys
from flask import Flask, render_template, request, jsonify, abort, url_for
from github import Github
import random
import threading
import humanize
import logging

fmt = logging.Formatter(
# NB: no space after levename because WARNING is so long
'%(levelname)s\t| %(asctime)s \t| %(filename)s \t| %(funcName)s:%(lineno)d | '
'%(message)s')

fh = logging.FileHandler('scorecard.log')
fh.setLevel(logging.INFO)
fh.setFormatter(fmt)

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(fmt)

log = logging.getLogger('scorecard')
log.setLevel(logging.INFO)

logging.basicConfig(
handlers=[fh, ch],
level=logging.INFO)

GITHUB_TOKEN_PATH = os.environ.get('GITHUB_TOKEN_PATH',
'/secrets/scorecard-github-access-token.txt')
with open(GITHUB_TOKEN_PATH, 'r') as f:
token = f.read().strip()
github = Github(token)

users = ['danking', 'cseed', 'tpoterba', 'jigold', 'jbloom22', 'catoverdrive', 'patrick-schultz', 'rcownie', 'chrisvittal']

default_repo = 'hail'
repos = {
'hail': 'hail-is/hail',
'batch': 'hail-is/batch',
'ci': 'hail-is/ci',
'scorecard': 'hail-is/scorecard',
'cloudtools': 'Nealelab/cloudtools'
}

app = Flask('scorecard')

data = None
timsetamp = None

@app.route('/')
def index():
cur_data = data
cur_timestamp = timestamp

unassigned = []
user_data = collections.defaultdict(
lambda: {'CHANGES_REQUESTED': [],
'NEEDS_REVIEW': [],
'ISSUES': []})

def add_pr(repo_name, pr):
state = pr['state']

if state == 'CHANGES_REQUESTED':
d = user_data[pr['user']]
d[state].append(pr)
elif state == 'NEEDS_REVIEW':
for user in pr['assignees']:
d = user_data[user]
d[state].append(pr)
else:
assert state == 'APPROVED'

def add_issue(repo_name, issue):
for user in issue['assignees']:
d = user_data[user]
d['ISSUES'].append(issue)

for repo_name, repo_data in cur_data.items():
for pr in repo_data['prs']:
if len(pr['assignees']) == 0:
unassigned.append(pr)
continue

add_pr(repo_name, pr)

for issue in repo_data['issues']:
add_issue(repo_name, issue)

random_user = random.choice(users)

updated = humanize.naturaltime(
datetime.datetime.now() - datetime.timedelta(seconds = time.time() - cur_timestamp))

return render_template('index.html', unassigned=unassigned,
user_data=user_data, random_user=random_user, updated=updated)

@app.route('/users/<user>')
def get_user(user):
global data, timestamp

cur_data = data
cur_timestamp = timestamp

user_data = {
'CHANGES_REQUESTED': [],
'NEEDS_REVIEW': [],
'FAILING': [],
'ISSUES': []
}

for repo_name, repo_data in cur_data.items():
for pr in repo_data['prs']:
state = pr['state']
if state == 'CHANGES_REQUESTED':
if user == pr['user']:
user_data[state].append(pr)
elif state == 'NEEDS_REVIEW':
if user in pr['assignees']:
user_data[state].append(pr)
else:
assert state == 'APPROVED'

if pr['status'] == 'failure' and user == pr['user']:
user_data['FAILING'].append(pr)

for issue in repo_data['issues']:
if user in issue['assignees']:
user_data['ISSUES'].append(issue)

updated = humanize.naturaltime(
datetime.datetime.now() - datetime.timedelta(seconds = time.time() - cur_timestamp))

return render_template('user.html', user=user, user_data=user_data, updated=updated)

def get_id(repo_name, number):
if repo_name == default_repo:
return f'{number}'
else:
return f'{repo_name}/{number}'

def get_pr_data(repo, repo_name, pr):
assignees = [a.login for a in pr.assignees]

state = 'NEEDS_REVIEW'
for review in pr.get_reviews().reversed:
if review.state == 'CHANGES_REQUESTED':
state = review.state
break
elif review.state == 'DISMISSED':
break
elif review.state == 'APPROVED':
state = 'APPROVED'
break
else:
if review.state != 'COMMENTED':
log.warning(f'unknown review state {review.state} on review {review} in pr {pr}')

sha = pr.head.sha
status = repo.get_commit(sha=sha).get_combined_status().state

return {
'repo': repo_name,
'id': get_id(repo_name, pr.number),
'title': pr.title,
'user': pr.user.login,
'assignees': assignees,
'html_url': pr.html_url,
'state': state,
'status': status
}

def get_issue_data(repo_name, issue):
assignees = [a.login for a in issue.assignees]
return {
'repo': repo_name,
'id': get_id(repo_name, issue.number),
'title': issue.title,
'assignees': assignees,
'html_url': issue.html_url
}

def update_data():
global data, timestamp

log.info(f'rate_limit {github.get_rate_limit()}')
log.info('start updating_data')

new_data = {}

for repo_name in repos:
new_data[repo_name] = {
'prs': [],
'issues': []
}

for repo_name, fq_repo in repos.items():
repo = github.get_repo(fq_repo)

for pr in repo.get_pulls(state='open'):
pr_data = get_pr_data(repo, repo_name, pr)
new_data[repo_name]['prs'].append(pr_data)

for issue in repo.get_issues(state='open'):
if issue.pull_request is None:
issue_data = get_issue_data(repo_name, issue)
new_data[repo_name]['issues'].append(issue_data)

log.info('updating_data done')

now = time.time()

data = new_data
timestamp = now

def poll():
while True:
time.sleep(180)
update_data()

update_data()

def run_forever(target, *args, **kwargs):
# target should be a function
target_name = target.__name__

expected_retry_interval_ms = 15 * 1000 # 15s
while True:
start = time.time()
try:
log.info(f'run target {target_name}')
target(*args, **kwargs)
log.info(f'target {target_name} returned')
except:
log.error(f'target {target_name} threw exception', exc_info=sys.exc_info())
end = time.time()

run_time_ms = int((end - start) * 1000 + 0.5)
t = random.randrange(expected_retry_interval_ms * 2) - run_time_ms
if t > 0:
log.debug(f'{target_name}: sleep {t}ms')
time.sleep(t / 1000.0)

poll_thread = threading.Thread(target=run_forever, args=(poll,), daemon=True)
poll_thread.start()

if __name__ == "__main__":
app.run(host='0.0.0.0')
Loading

0 comments on commit c0f8585

Please sign in to comment.