Skip to content

Commit

Permalink
Merge pull request #1116 from ErikDanielsson/update-modules-install
Browse files Browse the repository at this point in the history
Update modules install to support version control
  • Loading branch information
ErikDanielsson authored Jun 23, 2021
2 parents fa9f2bd + 5b7c79b commit d6f616e
Show file tree
Hide file tree
Showing 7 changed files with 460 additions and 139 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/create-lint-wf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
run: nf-core --log-file log.txt lint nf-core-testpipeline --fail-ignored --release

- name: nf-core modules install
run: nf-core --log-file log.txt modules install nf-core-testpipeline/ --tool fastqc
run: nf-core --log-file log.txt modules install nf-core-testpipeline/ --tool fastqc --latest

- name: Upload log file artifact
if: ${{ always() }}
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* Update comment style of modules `functions.nf` template file [[#1076](https://github.com/nf-core/tools/issues/1076)]
* Changed working directory to temporary directory for `nf-core modules create-test-yml` [[#908](https://github.com/nf-core/tools/issues/908)]
* Use Biocontainers API instead of quayi.io API for `nf-core modules create` [[#875](https://github.com/nf-core/tools/issues/875)]
* Update `nf-core modules install` to handle different versions of modules [#1116](https://github.com/nf-core/tools/pull/1116)

#### Sync

Expand Down
10 changes: 9 additions & 1 deletion nf_core/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,12 @@ def list(ctx, pipeline_dir, json):
@click.pass_context
@click.argument("pipeline_dir", type=click.Path(exists=True), required=True, metavar="<pipeline directory>")
@click.option("-t", "--tool", type=str, metavar="<tool> or <tool/subtool>")
def install(ctx, pipeline_dir, tool):
@click.option("-l", "--latest", is_flag=True, default=False, help="Install the latest version of the module")
@click.option(
"-f", "--force", is_flag=True, default=False, help="Force installation of module if module already exists"
)
@click.option("-s", "--sha", type=str, metavar="<commit sha>", help="Install module at commit SHA")
def install(ctx, pipeline_dir, tool, latest, force, sha):
"""
Add a DSL2 software wrapper module to a pipeline.
Expand All @@ -391,6 +396,9 @@ def install(ctx, pipeline_dir, tool):
mods = nf_core.modules.PipelineModules()
mods.modules_repo = ctx.obj["modules_repo_obj"]
mods.pipeline_dir = pipeline_dir
mods.force = force
mods.latest = latest
mods.sha = sha
mods.install(tool)
except UserWarning as e:
log.critical(e)
Expand Down
186 changes: 186 additions & 0 deletions nf_core/modules/module_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
import glob
import json
import os
import requests
import sys
import logging
import questionary
from itertools import count

from requests import api

import nf_core.utils

from .modules_repo import ModulesRepo

log = logging.getLogger(__name__)


def get_module_git_log(module_name, per_page=30, page_nbr=1, since="2020-11-25T00:00:00Z"):
"""
Fetches the commit history the of requested module
Args:
module_name (str): Name of module
per_page (int): Number of commits per page returned by API
page_nbr (int): Page number of the retrieved commits
since (str): Only show commits later than this timestamp.
Time should be given in ISO-8601 format: YYYY-MM-DDTHH:MM:SSZ.
Returns:
[ dict ]: List of commit SHAs and associated (truncated) message
"""
api_url = f"https://api.github.com/repos/nf-core/modules/commits?sha=master&path=software/{module_name}&per_page={per_page}&page={page_nbr}&since={since}"
log.debug(f"Fetching commit history of module '{module_name}' from github API")
response = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
if response.status_code == 200:
commits = response.json()

if len(commits) == 0:
log.debug(f"Reached end of commit history for '{module_name}'")
return []
else:
# Return the commit SHAs and the first line of the commit message
return [
{"git_sha": commit["sha"], "trunc_message": commit["commit"]["message"].partition("\n")[0]}
for commit in commits
]
elif response.status_code == 404:
log.error(f"Module '{module_name}' not found in 'nf-core/modules/'\n{api_url}")
sys.exit(1)
else:
raise SystemError(f"Unable to fetch commit SHA for module {module_name}")


def create_modules_json(pipeline_dir):
"""
Create the modules.json files
Args:
pipeline_dir (str): The directory where the `modules.json` should be created
"""
pipeline_config = nf_core.utils.fetch_wf_config(pipeline_dir)
pipeline_name = pipeline_config["manifest.name"]
pipeline_url = pipeline_config["manifest.homePage"]
modules_json = {"name": pipeline_name.strip("'"), "homePage": pipeline_url.strip("'"), "modules": {}}
all_module_file_paths = glob.glob(f"{pipeline_dir}/modules/nf-core/software/**/*", recursive=True)

# Extract the module paths from the file paths
module_paths = list(set(map(os.path.dirname, filter(os.path.isfile, all_module_file_paths))))
module_names = [path.replace(f"{pipeline_dir}/modules/nf-core/software/", "") for path in module_paths]
module_repo = ModulesRepo()
for module_name, module_path in zip(module_names, module_paths):
try:
# Find the correct commit SHA for the local files.
# We iterate over the commit log pages until we either
# find a matching commit or we reach the end of the commits
correct_commit_sha = None
commit_page_nbr = 1
while correct_commit_sha is None:

commit_shas = [
commit["git_sha"] for commit in get_module_git_log(module_name, page_nbr=commit_page_nbr)
]
correct_commit_sha = find_correct_commit_sha(module_name, module_path, module_repo, commit_shas)
commit_page_nbr += 1

modules_json["modules"][module_name] = {"git_sha": correct_commit_sha}
except SystemError as e:
log.error(e)
log.error("Will not create 'modules.json' file")
sys.exit(1)
modules_json_path = os.path.join(pipeline_dir, "modules.json")
with open(modules_json_path, "w") as fh:
json.dump(modules_json, fh, indent=4)


def find_correct_commit_sha(module_name, module_path, modules_repo, commit_shas):
"""
Returns the SHA for the latest commit where the local files are identical to the remote files
Args:
module_name (str): Name of module
module_path (str): Path to module in local repo
module_repo (str): Remote repo for module
commit_shas ([ str ]): List of commit SHAs for module, sorted in descending order
Returns:
commit_sha (str): The latest commit SHA where local files are identical to remote files
"""

files_to_check = ["main.nf", "functions.nf", "meta.yml"]
local_file_contents = [None, None, None]
for i, file in enumerate(files_to_check):
try:
local_file_contents[i] = open(os.path.join(module_path, file), "r").read()
except FileNotFoundError as e:
log.debug(f"Could not open file: {os.path.join(module_path, file)}")
continue
for commit_sha in commit_shas:
if local_module_equal_to_commit(local_file_contents, module_name, modules_repo, commit_sha):
return commit_sha
return None


def local_module_equal_to_commit(local_files, module_name, modules_repo, commit_sha):
"""
Compares the local module files to the module files for the given commit sha
Args:
local_files ([ str ]): Contents of local files. `None` if files doesn't exist
module_name (str): Name of module
module_repo (str): Remote repo for module
commit_sha (str): Commit SHA for remote version to compare against local version
Returns:
bool: Whether all local files are identical to remote version
"""

files_to_check = ["main.nf", "functions.nf", "meta.yml"]
files_are_equal = [False, False, False]
remote_copies = [None, None, None]

module_base_url = f"https://raw.githubusercontent.com/{modules_repo.name}/{commit_sha}/software/{module_name}"
for i, file in enumerate(files_to_check):
# Download remote copy and compare
api_url = f"{module_base_url}/{file}"
r = requests.get(url=api_url)
if r.status_code != 200:
log.debug(f"Could not download remote copy of file module {module_name}/{file}")
log.debug(api_url)
else:
try:
remote_copies[i] = r.content.decode("utf-8")
except UnicodeDecodeError as e:
log.debug(f"Could not decode remote copy of {file} for the {module_name} module")

# Compare the contents of the files.
# If the file is missing from both the local and remote repo
# we will get the comparision None == None
if local_files[i] == remote_copies[i]:
files_are_equal[i] = True

return all(files_are_equal)


def prompt_module_version_sha(module, installed_sha=None):
older_commits_choice = questionary.Choice(
title=[("fg:ansiyellow", "older commits"), ("class:choice-default", "")], value=""
)
git_sha = ""
page_nbr = 1
next_page_commits = get_module_git_log(module, per_page=10, page_nbr=page_nbr)
while git_sha is "":
commits = next_page_commits
next_page_commits = get_module_git_log(module, per_page=10, page_nbr=page_nbr + 1)
choices = []
for title, sha in map(lambda commit: (commit["trunc_message"], commit["git_sha"]), commits):

display_color = "fg:ansiblue" if sha != installed_sha else "fg:ansired"
message = f"{title} {sha}"
if installed_sha == sha:
message += " (installed version)"
commit_display = [(display_color, message), ("class:choice-default", "")]
choices.append(questionary.Choice(title=commit_display, value=sha))
if len(next_page_commits) > 0:
choices += [older_commits_choice]
git_sha = questionary.select(
f"Select '{module}' version", choices=choices, style=nf_core.utils.nfcore_question_style
).unsafe_ask()
page_nbr += 1
return git_sha
114 changes: 114 additions & 0 deletions nf_core/modules/modules_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import os
import requests
import base64
import sys
import logging
import nf_core.utils

log = logging.getLogger(__name__)


class ModulesRepo(object):
"""
An object to store details about the repository being used for modules.
Used by the `nf-core modules` top-level command with -r and -b flags,
so that this can be used in the same way by all sucommands.
"""

def __init__(self, repo="nf-core/modules", branch="master"):
self.name = repo
self.branch = branch
self.modules_file_tree = {}
self.modules_current_hash = None
self.modules_avail_module_names = []

def get_modules_file_tree(self):
"""
Fetch the file list from the repo, using the GitHub API
Sets self.modules_file_tree
self.modules_current_hash
self.modules_avail_module_names
"""
api_url = "https://api.github.com/repos/{}/git/trees/{}?recursive=1".format(self.name, self.branch)
r = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
if r.status_code == 404:
log.error("Repository / branch not found: {} ({})\n{}".format(self.name, self.branch, api_url))
sys.exit(1)
elif r.status_code != 200:
raise SystemError(
"Could not fetch {} ({}) tree: {}\n{}".format(self.name, self.branch, r.status_code, api_url)
)

result = r.json()
assert result["truncated"] == False

self.modules_current_hash = result["sha"]
self.modules_file_tree = result["tree"]
for f in result["tree"]:
if f["path"].startswith("software/") and f["path"].endswith("/main.nf") and "/test/" not in f["path"]:
# remove software/ and /main.nf
self.modules_avail_module_names.append(f["path"][9:-8])

def get_module_file_urls(self, module, commit=""):
"""Fetch list of URLs for a specific module
Takes the name of a module and iterates over the GitHub repo file tree.
Loops over items that are prefixed with the path 'software/<module_name>' and ignores
anything that's not a blob. Also ignores the test/ subfolder.
Returns a dictionary with keys as filenames and values as GitHub API URLs.
These can be used to then download file contents.
Args:
module (string): Name of module for which to fetch a set of URLs
Returns:
dict: Set of files and associated URLs as follows:
{
'software/fastqc/main.nf': 'https://api.github.com/repos/nf-core/modules/git/blobs/65ba598119206a2b851b86a9b5880b5476e263c3',
'software/fastqc/meta.yml': 'https://api.github.com/repos/nf-core/modules/git/blobs/0d5afc23ba44d44a805c35902febc0a382b17651'
}
"""
results = {}
for f in self.modules_file_tree:
if not f["path"].startswith("software/{}".format(module)):
continue
if f["type"] != "blob":
continue
if "/test/" in f["path"]:
continue
results[f["path"]] = f["url"]
if commit != "":
for path in results:
results[path] = f"https://api.github.com/repos/nf-core/modules/contents/{path}?ref={commit}"
return results

def download_gh_file(self, dl_filename, api_url):
"""Download a file from GitHub using the GitHub API
Args:
dl_filename (string): Path to save file to
api_url (string): GitHub API URL for file
Raises:
If a problem, raises an error
"""

# Make target directory if it doesn't already exist
dl_directory = os.path.dirname(dl_filename)
if not os.path.exists(dl_directory):
os.makedirs(dl_directory)

# Call the GitHub API
r = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth())
if r.status_code != 200:
raise SystemError("Could not fetch {} file: {}\n {}".format(self.name, r.status_code, api_url))
result = r.json()
file_contents = base64.b64decode(result["content"])

# Write the file contents
with open(dl_filename, "wb") as fh:
fh.write(file_contents)
Loading

0 comments on commit d6f616e

Please sign in to comment.