Skip to content

Commit

Permalink
Merge pull request #62 from populationgenomics/upstream
Browse files Browse the repository at this point in the history
Merge remote-tracking branch 'upstream/main' into upstream
  • Loading branch information
illusional authored Mar 2, 2021
2 parents 0c287f1 + 555f983 commit 771b8d4
Show file tree
Hide file tree
Showing 66 changed files with 1,558 additions and 582 deletions.
4 changes: 2 additions & 2 deletions auth/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ spec:
resources:
requests:
memory: "50M"
cpu: "5m"
cpu: "15m"
limits:
memory: 3750Mi
cpu: "1"
Expand Down Expand Up @@ -232,7 +232,7 @@ spec:
- type: Resource
resource:
name: cpu
targetAverageUtilization: 80
targetAverageUtilization: 95
---
apiVersion: policy/v1beta1
kind: PodDisruptionBudget
Expand Down
45 changes: 44 additions & 1 deletion batch/batch/driver/canceller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import asyncio

from hailtop.utils import (WaitableSharedPool, retry_long_running, run_if_changed,
AsyncWorkerPool, time_msecs)
AsyncWorkerPool, time_msecs, periodically_call)
from hailtop import aiotools, aiogoogle
from gear import Database

Expand Down Expand Up @@ -37,6 +37,8 @@ async def async_init(self):
'cancel_cancelled_running_jobs_loop',
run_if_changed, self.cancel_running_state_changed, self.cancel_cancelled_running_jobs_loop_body))

self.task_manager.ensure_future(periodically_call(60, self.cancel_orphaned_attempts_loop_body))

def shutdown(self):
try:
self.task_manager.shutdown()
Expand Down Expand Up @@ -290,3 +292,44 @@ async def unschedule_with_error_handling(app, record, instance_name, id):
await waitable_pool.wait()

return should_wait

async def cancel_orphaned_attempts_loop_body(self):
log.info('cancelling orphaned attempts')
waitable_pool = WaitableSharedPool(self.async_worker_pool)

n_unscheduled = 0

async for record in self.db.select_and_fetchall(
'''
SELECT attempts.*
FROM attempts
INNER JOIN jobs ON attempts.batch_id = jobs.batch_id AND attempts.job_id = jobs.job_id
LEFT JOIN instances ON attempts.instance_name = instances.name
WHERE attempts.start_time IS NOT NULL
AND attempts.end_time IS NULL
AND (jobs.state != 'Running' OR jobs.attempt_id != attempts.attempt_id)
AND instances.`state` = 'active'
ORDER BY attempts.start_time ASC
LIMIT 300;
''',
timer_description='in cancel_orphaned_attempts'):
batch_id = record['batch_id']
job_id = record['job_id']
attempt_id = record['attempt_id']
instance_name = record['instance_name']
id = (batch_id, job_id)

n_unscheduled += 1

async def unschedule_with_error_handling(app, record, instance_name, id, attempt_id):
try:
await unschedule_job(app, record)
except Exception:
log.info(f'unscheduling job {id} with orphaned attempt {attempt_id} on instance {instance_name}', exc_info=True)

await waitable_pool.call(
unschedule_with_error_handling, self.app, record, instance_name, id, attempt_id)

await waitable_pool.wait()

log.info(f'cancelled {n_unscheduled} orphaned attempts')
2 changes: 0 additions & 2 deletions batch/batch/driver/job_private.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,6 @@ async def schedule_with_error_handling(app, record, id, instance):
await waitable_pool.call(
schedule_with_error_handling, self.app, record, id, instance)

n_scheduled += 1

await waitable_pool.wait()

log.info(f'scheduled {n_scheduled} jobs for {self}')
Expand Down
5 changes: 3 additions & 2 deletions batch/batch/front_end/front_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,15 +564,16 @@ async def _query_batches(request, user, q):
WHERE {' AND '.join(where_conditions)}
GROUP BY batches.id
ORDER BY batches.id DESC
LIMIT 50;
LIMIT 51;
'''
sql_args = where_args

batches = [batch_record_to_dict(batch)
async for batch
in db.select_and_fetchall(sql, sql_args)]

if len(batches) == 50:
if len(batches) == 51:
batches.pop()
last_batch_id = batches[-1]['id']
else:
last_batch_id = None
Expand Down
4 changes: 3 additions & 1 deletion benchmark-service/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,8 @@ async def github_polling_loop(app):

async def on_startup(app):
app['gs_reader'] = ReadGoogleStorage(service_account_key_file='/benchmark-gsa-key/key.json')
app['github_client'] = gidgethub.aiohttp.GitHubAPI(aiohttp.ClientSession(),
app['gh_client_session'] = aiohttp.ClientSession()
app['github_client'] = gidgethub.aiohttp.GitHubAPI(app['gh_client_session'],
'hail-is/hail',
oauth_token=oauth_token)
app['batch_client'] = bc.BatchClient(billing_project='benchmark')
Expand All @@ -449,6 +450,7 @@ async def on_startup(app):


async def on_cleanup(app):
await app['gh_client_session'].close()
app['task_manager'].shutdown()


Expand Down
1 change: 1 addition & 0 deletions benchmark/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ cleanup_image:
rm -f image_sha
rm -f pushed_image

PROJECT := hail-vdc
BENCHMARK_DOCKER_TAG := benchmark_$(shell whoami)
BENCHMARK_REPO_BASE = $(DOCKER_PREFIX)/$(BENCHMARK_DOCKER_TAG)

Expand Down
42 changes: 1 addition & 41 deletions build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2172,45 +2172,6 @@ steps:
- memory_image
- deploy_memory_sa
- create_certs
- kind: runImage
name: test_query
image:
valueFrom: hail_run_image.image
resources:
memory: "3.75G"
cpu: "1"
script: |
set -ex
cd /io
tar xvf wheel-container.tar
python3 -m pip install --no-dependencies hail-*-py3-none-any.whl
hailctl config set batch/bucket cpg-hail-test
HAIL_BILLING_PROJECT=test HAIL_DONT_RETRY_500=1 HAIL_QUERY_BACKEND=service python3 -m pytest --log-cli-level=INFO -s -vv --instafail --durations=50 /io/test/
timeout: 600
secrets:
- name: gce-deploy-config
namespace:
valueFrom: default_ns.name
mountPath: /deploy-config
- name: test-dev-tokens
namespace:
valueFrom: default_ns.name
mountPath: /user-tokens
- name: ssl-config-query-tests
namespace:
valueFrom: default_ns.name
mountPath: /ssl-config
inputs:
- from: /wheel-container.tar
to: /io/wheel-container.tar
- from: /repo/query/test
to: /io/
dependsOn:
- default_ns
- create_certs
- build_hail
- hail_run_image
- deploy_query
- kind: runImage
name: test_lsm
image:
Expand Down Expand Up @@ -3532,6 +3493,7 @@ steps:
namespace:
valueFrom: default_ns.name
mountPath: /ssl-config
timeout: 1200
dependsOn:
- default_ns
- create_certs
Expand Down Expand Up @@ -3584,7 +3546,6 @@ steps:
- test_hailtop_batch_2
- test_hailtop_batch_3
- test_hailtop_batch_4
- test_query
- kind: runImage
name: delete_batch_instances
image:
Expand Down Expand Up @@ -3622,7 +3583,6 @@ steps:
- test_hailtop_batch_2
- test_hailtop_batch_3
- test_hailtop_batch_4
- test_query
- kind: runImage
name: delete_atgu_tables
image:
Expand Down
8 changes: 7 additions & 1 deletion ci/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@ CI_IMAGE = $(DOCKER_PREFIX)/ci:$(shell docker images -q --no-trunc ci | sed -e '
EXTRA_PYTHONPATH := ../batch:../hail/python:../gear:../web_common
PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3

BLACK := $(PYTHON) -m black ci --line-length=120 --skip-string-normalization

.PHONY: check
check:
$(PYTHON) -m flake8 ci
$(PYTHON) -m pylint --rcfile ../pylintrc ci --score=n
$(PYTHON) -m black ci --check --diff --line-length=120 --skip-string-normalization
$(BLACK) --check --diff
../check-sql.sh

.PHONY: blacken
blacken:
$(BLACK)

.PHONY: build-ci-utils
build-ci-utils:
$(MAKE) -C ../docker build
Expand Down
37 changes: 20 additions & 17 deletions ci/ci/constants.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,35 @@
from typing import Optional
from typing import Optional, List

GITHUB_CLONE_URL = 'https://github.com/'
GITHUB_STATUS_CONTEXT = 'ci-test'
TEAMS = ['Services', 'Compiler']


class User:
def __init__(self, gh_username: str, hail_username: Optional[str]):
# pylint: disable=dangerous-default-value
def __init__(self, gh_username: str, hail_username: Optional[str] = None, teams: List[str] = []):
self.gh_username = gh_username
self.hail_username = hail_username
self.teams = teams


AUTHORIZED_USERS = [
User('danking', 'dking'),
User('danking', 'dking', ['Services']),
User('cseed', 'cseed'),
User('konradjk', 'konradk'),
User('jigold', 'jigold'),
User('patrick-schultz', 'pschultz'),
User('lfrancioli', None),
User('tpoterba', 'tpoterba'),
User('chrisvittal', 'cvittal'),
User('catoverdrive', 'wang'),
User('johnc1231', 'johnc'),
User('nawatts', None),
User('mkveerapen', None),
User('Dania-Abuhijleh', None),
User('bw2', None),
User('jigold', 'jigold', ['Services']),
User('patrick-schultz', 'pschultz', ['Compiler']),
User('lfrancioli'),
User('tpoterba', 'tpoterba', ['Compiler']),
User('chrisvittal', 'cvittal', ['Compiler']),
User('catoverdrive', 'wang', ['Services', 'Compiler']),
User('johnc1231', 'johnc', ['Compiler']),
User('nawatts'),
User('mkveerapen'),
User('Dania-Abuhijleh'),
User('bw2'),
User('pwc2', 'pcumming'),
User('lgruen', None),
User('CDiaz96', 'carolin'),
User('daniel-goldstein', 'dgoldste'),
User('lgruen'),
User('CDiaz96', 'carolin', ['Services']),
User('daniel-goldstein', 'dgoldste', ['Services']),
]
4 changes: 3 additions & 1 deletion ci/ci/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,14 @@ def clone_or_fetch_script(repo):


class PR(Code):
def __init__(self, number, title, source_branch, source_sha, target_branch, author, labels):
def __init__(self, number, title, source_branch, source_sha, target_branch, author, assignees, labels):
self.number = number
self.title = title
self.source_branch = source_branch
self.source_sha = source_sha
self.target_branch = target_branch
self.author = author
self.assignees = assignees
self.labels = labels

# pending, changes_requested, approve
Expand Down Expand Up @@ -255,6 +256,7 @@ def from_gh_json(gh_json, target_branch):
head['sha'],
target_branch,
gh_json['user']['login'],
{user['login'] for user in gh_json['assignees']},
{label['name'] for label in gh_json['labels']},
)

Expand Down
16 changes: 16 additions & 0 deletions ci/ci/templates/team-table.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{% macro team_table(team_member) %}
<table class="data-table">
<thead>
<th>component</th>
<th>reviewer</th>
</thead>
<tbody>
{% for team, member in team_member.items() %}
<tr>
<td>{{ team }}</td>
<td>{{ member }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endmacro %}
50 changes: 38 additions & 12 deletions ci/ci/templates/user.html
Original file line number Diff line number Diff line change
@@ -1,20 +1,46 @@
{% from "pr-table.html" import pr_table with context %}
{% from "dev-deploy-table.html" import dev_deploy_table with context %}
{% from "team-table.html" import team_table with context %}
{% extends "layout.html" %}

{% block title %}User Homepage{% endblock %}

{% block content %}
<h1>Welcome, {{ username }}!</h1>

GitHub username: {{ gh_username }}
<h2>PRs</h2>
{% for wb in watched_branches %}
{% if wb.prs is not none %}
<h2 class="stacked-header">{{ wb.branch }}</h2>
{{ pr_table(wb) }}
{% endif %}
{% endfor %}

<br>
<h1>Welcome, {{ username }}!</h1>

GitHub username: {{ gh_username }}

<div style='display: flex; flex-flow: row wrap;'>
<div style='flex: 40%'>
<div style='margin-bottom: 5%'>
<h2>My PRs</h2>
{% for wb in pr_wbs %}
{% if wb.prs is not none %}
<h3 class="stacked-header">{{ wb.branch }}</h3>
{{ pr_table(wb) }}
{% endif %}
{% endfor %}
</div>

<div style='margin-bottom: 5%'>
<h2>Assigned Reviews</h2>
{% for wb in review_wbs %}
{% if wb.prs is not none %}
<h3 class="stacked-header">{{ wb.branch }}</h3>
{{ pr_table(wb) }}
{% endif %}
{% endfor %}
</div>

<div style='margin-bottom: 5%'>
{{ team_table(team_member) }}
</div>
</div>

<div style='flex: 40%; padding-left: 10px'>
<h2>Dev Deploys</h2>
{{ dev_deploy_table(dev_deploys) }}
</div>
</div>

{% endblock %}
Loading

0 comments on commit 771b8d4

Please sign in to comment.