Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First stab at writing a central utils function for GitHub API calls. #1499

Merged
merged 13 commits into from
May 11, 2022
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
- Linting: Don't allow a `.nf-core.yaml` file, should be `.yml` ([#1515](https://github.com/nf-core/tools/pull/1515)).
- Remove empty JSON schema definition groups to avoid usage errors ([#1419](https://github.com/nf-core/tools/issues/1419))
- Print include statement to terminal when `modules install` ([#1520](https://github.com/nf-core/tools/pull/1520))
- Use [`$XDG_CONFIG_HOME`](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) or `~/.config/nf-core` instead of `~/.nfcore` for API cache (the latter can be safely deleted)
- Consolidate GitHub API calls into a shared function that uses authentication from the [`gh` GitHub cli tool](https://cli.github.com/) or `GITHUB_AUTH_TOKEN` to avoid rate limiting ([#1499](https://github.com/nf-core/tools/pull/1499))

### Modules

Expand Down
19 changes: 12 additions & 7 deletions nf_core/modules/module_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import glob
import json
import os
import requests
import logging
import rich
import datetime
Expand All @@ -15,6 +14,8 @@

log = logging.getLogger(__name__)

gh_api = nf_core.utils.gh_api


class ModuleException(Exception):
"""Exception raised when there was an error with module commands"""
Expand All @@ -35,7 +36,7 @@ def module_exist_in_repo(module_name, modules_repo):
api_url = (
f"https://api.github.com/repos/{modules_repo.name}/contents/modules/{module_name}?ref={modules_repo.branch}"
)
response = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
response = gh_api.get(api_url)
return not (response.status_code == 404)


Expand Down Expand Up @@ -65,7 +66,7 @@ def get_module_git_log(module_name, modules_repo=None, per_page=30, page_nbr=1,
api_url += f"&since={since}"

log.debug(f"Fetching commit history of module '{module_name}' from github API")
response = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
response = gh_api.get(api_url)
if response.status_code == 200:
commits = response.json()

Expand All @@ -80,6 +81,7 @@ def get_module_git_log(module_name, modules_repo=None, per_page=30, page_nbr=1,
elif response.status_code == 404:
raise LookupError(f"Module '{module_name}' not found in '{modules_repo.name}'\n{api_url}")
else:
gh_api.log_content_headers(response)
raise LookupError(
f"Unable to fetch commit SHA for module {module_name}. API responded with '{response.status_code}'"
)
Expand All @@ -101,7 +103,7 @@ def get_commit_info(commit_sha, repo_name="nf-core/modules"):
)
api_url = f"https://api.github.com/repos/{repo_name}/commits/{commit_sha}?stats=false"
log.debug(f"Fetching commit metadata for commit at {commit_sha}")
response = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
response = gh_api.get(api_url)
if response.status_code == 200:
commit = response.json()
message = commit["commit"]["message"].partition("\n")[0]
Expand All @@ -115,6 +117,7 @@ def get_commit_info(commit_sha, repo_name="nf-core/modules"):
elif response.status_code == 404:
raise LookupError(f"Commit '{commit_sha}' not found in 'nf-core/modules/'\n{api_url}")
else:
gh_api.log_content_headers(response)
raise LookupError(f"Unable to fetch metadata for commit SHA {commit_sha}")


Expand Down Expand Up @@ -266,10 +269,12 @@ def local_module_equal_to_commit(local_files, module_name, modules_repo, commit_
for i, file in enumerate(files_to_check):
# Download remote copy and compare
api_url = f"{module_base_url}/{file}"
r = requests.get(url=api_url, auth=nf_core.utils.github_api_auto_auth())
r = gh_api.get(api_url)
# TODO: Remove debugging
gh_api.log_content_headers(r)
if r.status_code != 200:
gh_api.log_content_headers(r)
log.debug(f"Could not download remote copy of file module {module_name}/{file}")
log.debug(api_url)
else:
try:
remote_copies[i] = r.content.decode("utf-8")
Expand Down Expand Up @@ -414,7 +419,7 @@ def verify_pipeline_dir(dir):
modules_is_software = False
for repo_name in repo_names:
api_url = f"https://api.github.com/repos/{repo_name}/contents"
response = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
response = gh_api.get(api_url)
if response.status_code == 404:
missing_remote.append(repo_name)
if repo_name == "nf-core/software":
Expand Down
14 changes: 6 additions & 8 deletions nf_core/modules/modules_repo.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import os
import requests
import base64
import sys
import logging
import nf_core.utils
from nf_core.utils import gh_api

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -43,7 +41,7 @@ def __init__(self, repo="nf-core/modules", branch=None):
def get_default_branch(self):
"""Get the default branch for a GitHub repo"""
api_url = f"https://api.github.com/repos/{self.name}"
response = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
response = gh_api.get(api_url)
if response.status_code == 200:
self.branch = response.json()["default_branch"]
log.debug(f"Found default branch to be '{self.branch}'")
Expand All @@ -58,7 +56,7 @@ def verify_modules_repo(self):

# Check if repository exist
api_url = f"https://api.github.com/repos/{self.name}/branches"
response = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
response = gh_api.get(api_url)
if response.status_code == 200:
branches = [branch["name"] for branch in response.json()]
if self.branch not in branches:
Expand All @@ -67,7 +65,7 @@ def verify_modules_repo(self):
raise LookupError(f"Repository '{self.name}' is not available on GitHub")

api_url = f"https://api.github.com/repos/{self.name}/contents?ref={self.branch}"
response = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
response = gh_api.get(api_url)
if response.status_code == 200:
dir_names = [entry["name"] for entry in response.json() if entry["type"] == "dir"]
if "modules" not in dir_names:
Expand All @@ -86,7 +84,7 @@ def get_modules_file_tree(self):
self.modules_avail_module_names
"""
api_url = "https://api.github.com/repos/{}/git/trees/{}?recursive=1".format(self.name, self.branch)
r = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
r = gh_api.get(api_url)
if r.status_code == 404:
raise LookupError("Repository / branch not found: {} ({})\n{}".format(self.name, self.branch, api_url))
elif r.status_code != 200:
Expand Down Expand Up @@ -157,7 +155,7 @@ def download_gh_file(self, dl_filename, api_url):
os.makedirs(dl_directory)

# Call the GitHub API
r = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
r = gh_api.get(api_url)
if r.status_code != 200:
raise LookupError("Could not fetch {} file: {}\n {}".format(self.name, r.status_code, api_url))
result = r.json()
Expand Down
115 changes: 30 additions & 85 deletions nf_core/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,10 @@
import json
import logging
import os
import random
import re
import requests
import requests_cache
import rich
import shutil
import time

import nf_core
import nf_core.create
Expand Down Expand Up @@ -79,6 +76,12 @@ def __init__(
self.gh_repo = gh_repo
self.pr_url = ""

self.gh_api = nf_core.utils.gh_api
if self.gh_username and "GITHUB_AUTH_TOKEN" in os.environ:
self.gh_api.auth = requests.auth.HTTPBasicAuth(self.gh_username, os.environ["GITHUB_AUTH_TOKEN"])
self.gh_api.return_ok = [200, 201]
self.gh_api.lazy_init()

def sync(self):
"""Find workflow attributes, create a new template pipeline on TEMPLATE"""

Expand Down Expand Up @@ -316,75 +319,28 @@ def make_pull_request(self):
).format(tag=nf_core.__version__)

# Make new pull-request
pr_content = {
"title": pr_title,
"body": pr_body_text,
"maintainer_can_modify": True,
"head": self.merge_branch,
"base": self.from_branch,
}

stderr = rich.console.Console(stderr=True, force_terminal=nf_core.utils.rich_force_colors())

while True:
log.debug("Submitting PR to GitHub API")
with self.gh_api.cache_disabled():
try:
log.debug("Submitting PR to GitHub API")
returned_data_prettyprint = ""
r_headers_pp = ""
with requests_cache.disabled():
r = requests.post(
url="https://api.github.com/repos/{}/pulls".format(self.gh_repo),
data=json.dumps(pr_content),
auth=requests.auth.HTTPBasicAuth(self.gh_username, os.environ["GITHUB_AUTH_TOKEN"]),
)
try:
self.gh_pr_returned_data = json.loads(r.content)
returned_data_prettyprint = json.dumps(dict(self.gh_pr_returned_data), indent=4)
r_headers_pp = json.dumps(dict(r.headers), indent=4)
except:
self.gh_pr_returned_data = r.content
returned_data_prettyprint = r.content
r_headers_pp = r.headers
log.error("Could not parse JSON response from GitHub API!")
stderr.print_exception()

# Dump the responses to the log just in case..
log.debug(f"PR response from GitHub. Data:\n{returned_data_prettyprint}\n\nHeaders:\n{r_headers_pp}")

# PR worked
if r.status_code == 201:
self.pr_url = self.gh_pr_returned_data["html_url"]
log.debug(f"GitHub API PR worked, return code 201")
log.info(f"GitHub PR created: {self.gh_pr_returned_data['html_url']}")
break

# Returned 403 error - too many simultaneous requests
# https://github.com/nf-core/tools/issues/911
if r.status_code == 403:
log.debug(f"GitHub API PR failed with 403 error")
wait_time = float(re.sub("[^0-9]", "", str(r.headers.get("Retry-After", 0))))
if wait_time == 0:
log.debug("Couldn't find 'Retry-After' header, guessing a length of time to wait")
wait_time = random.randrange(10, 60)
log.warning(
f"Got 403 code - probably the abuse protection. Trying again after {wait_time} seconds.."
)
time.sleep(wait_time)

# Something went wrong
else:
raise PullRequestException(
f"GitHub API returned code {r.status_code}: \n\n{returned_data_prettyprint}\n\n{r_headers_pp}"
)
# Don't catch the PullRequestException that we raised inside
except PullRequestException:
raise
# Do catch any other exceptions that we hit
r = self.gh_api.request_retry(
f"https://api.github.com/repos/{self.gh_repo}/pulls",
post_data={
"title": pr_title,
"body": pr_body_text,
"maintainer_can_modify": True,
"head": self.merge_branch,
"base": self.from_branch,
},
)
except Exception as e:
stderr.print_exception()
raise PullRequestException(
f"Something went badly wrong - {e}: \n\n{returned_data_prettyprint}\n\n{r_headers_pp}"
)
raise PullRequestException(f"Something went badly wrong - {e}")
else:
self.gh_pr_returned_data = r.json()
self.pr_url = self.gh_pr_returned_data["html_url"]
log.debug(f"GitHub API PR worked, return code 201")
log.info(f"GitHub PR created: {self.gh_pr_returned_data['html_url']}")

def close_open_template_merge_prs(self):
"""Get all template merging branches (starting with 'nf-core-template-merge-')
Expand All @@ -395,11 +351,8 @@ def close_open_template_merge_prs(self):

# Look for existing pull-requests
list_prs_url = f"https://api.github.com/repos/{self.gh_repo}/pulls"
with requests_cache.disabled():
list_prs_request = requests.get(
url=list_prs_url,
auth=requests.auth.HTTPBasicAuth(self.gh_username, os.environ["GITHUB_AUTH_TOKEN"]),
)
with self.gh_api.cache_disabled():
list_prs_request = self.gh_api.get(list_prs_url)
try:
list_prs_json = json.loads(list_prs_request.content)
list_prs_pp = json.dumps(list_prs_json, indent=4)
Expand Down Expand Up @@ -437,20 +390,12 @@ def close_open_pr(self, pr):
f"This pull-request is now outdated and has been closed in favour of {self.pr_url}\n\n"
f"Please use {self.pr_url} to merge in the new changes from the nf-core template as soon as possible."
)
with requests_cache.disabled():
comment_request = requests.post(
url=pr["comments_url"],
data=json.dumps({"body": comment_text}),
auth=requests.auth.HTTPBasicAuth(self.gh_username, os.environ["GITHUB_AUTH_TOKEN"]),
)
with self.gh_api.cache_disabled():
self.gh_api.post(url=pr["comments_url"], data=json.dumps({"body": comment_text}))

# Update the PR status to be closed
with requests_cache.disabled():
pr_request = requests.patch(
url=pr["url"],
data=json.dumps({"state": "closed"}),
auth=requests.auth.HTTPBasicAuth(self.gh_username, os.environ["GITHUB_AUTH_TOKEN"]),
)
with self.gh_api.cache_disabled():
pr_request = self.gh_api.patch(url=pr["url"], data=json.dumps({"state": "closed"}))
try:
pr_request_json = json.loads(pr_request.content)
pr_request_pp = json.dumps(pr_request_json, indent=4)
Expand Down
Loading