-
Notifications
You must be signed in to change notification settings - Fork 192
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #615 from nf-core/module-import
Module import
- Loading branch information
Showing
3 changed files
with
385 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
#!/usr/bin/env python | ||
""" | ||
Code to handle DSL2 module imports from a GitHub repository | ||
""" | ||
|
||
from __future__ import print_function | ||
|
||
import base64 | ||
import logging | ||
import os | ||
import requests | ||
import sys | ||
import tempfile | ||
|
||
|
||
class ModulesRepo(object): | ||
""" | ||
An object to store details about the repository being used for modules. | ||
Used by the `nf-core modules` top-level command with -r and -b flags, | ||
so that this can be used in the same way by all sucommands. | ||
""" | ||
|
||
def __init__(self, repo="nf-core/modules", branch="master"): | ||
self.name = repo | ||
self.branch = branch | ||
|
||
|
||
class PipelineModules(object): | ||
def __init__(self): | ||
""" | ||
Initialise the PipelineModules object | ||
""" | ||
self.modules_repo = ModulesRepo() | ||
self.pipeline_dir = None | ||
self.modules_file_tree = {} | ||
self.modules_current_hash = None | ||
self.modules_avail_module_names = [] | ||
|
||
def list_modules(self): | ||
""" | ||
Get available module names from GitHub tree for repo | ||
and print as list to stdout | ||
""" | ||
self.get_modules_file_tree() | ||
return_str = "" | ||
|
||
if len(self.modules_avail_module_names) > 0: | ||
logging.info("Modules available from {} ({}):\n".format(self.modules_repo.name, self.modules_repo.branch)) | ||
# Print results to stdout | ||
return_str += "\n".join(self.modules_avail_module_names) | ||
else: | ||
logging.info( | ||
"No available modules found in {} ({}):\n".format(self.modules_repo.name, self.modules_repo.branch) | ||
) | ||
return return_str | ||
|
||
def install(self, module): | ||
|
||
# Check that we were given a pipeline | ||
if self.pipeline_dir is None or not os.path.exists(self.pipeline_dir): | ||
logging.error("Could not find pipeline: {}".format(self.pipeline_dir)) | ||
return False | ||
main_nf = os.path.join(self.pipeline_dir, "main.nf") | ||
nf_config = os.path.join(self.pipeline_dir, "nextflow.config") | ||
if not os.path.exists(main_nf) and not os.path.exists(nf_config): | ||
logging.error("Could not find a main.nf or nextfow.config file in: {}".format(self.pipeline_dir)) | ||
return False | ||
|
||
# Get the available modules | ||
self.get_modules_file_tree() | ||
|
||
# Check that the supplied name is an available module | ||
if module not in self.modules_avail_module_names: | ||
logging.error("Module '{}' not found in list of available modules.".format(module)) | ||
logging.info("Use the command 'nf-core modules list' to view available software") | ||
return False | ||
logging.debug("Installing module '{}' at modules hash {}".format(module, self.modules_current_hash)) | ||
|
||
# Check that we don't already have a folder for this module | ||
module_dir = os.path.join(self.pipeline_dir, "modules", "software", module) | ||
if os.path.exists(module_dir): | ||
logging.error("Module directory already exists: {}".format(module_dir)) | ||
logging.info("To update an existing module, use the commands 'nf-core update' or 'nf-core fix'") | ||
return False | ||
|
||
# Download module files | ||
files = self.get_module_file_urls(module) | ||
logging.debug("Fetching module files:\n - {}".format("\n - ".join(files.keys()))) | ||
for filename, api_url in files.items(): | ||
dl_filename = os.path.join(self.pipeline_dir, "modules", filename) | ||
self.download_gh_file(dl_filename, api_url) | ||
|
||
def update(self, module, force=False): | ||
logging.error("This command is not yet implemented") | ||
pass | ||
|
||
def remove(self, module): | ||
logging.error("This command is not yet implemented") | ||
pass | ||
|
||
def check_modules(self): | ||
logging.error("This command is not yet implemented") | ||
pass | ||
|
||
def get_modules_file_tree(self): | ||
""" | ||
Fetch the file list from the repo, using the GitHub API | ||
Sets self.modules_file_tree | ||
self.modules_current_hash | ||
self.modules_avail_module_names | ||
""" | ||
api_url = "https://api.github.com/repos/{}/git/trees/{}?recursive=1".format( | ||
self.modules_repo.name, self.modules_repo.branch | ||
) | ||
r = requests.get(api_url) | ||
if r.status_code == 404: | ||
logging.error( | ||
"Repository / branch not found: {} ({})\n{}".format( | ||
self.modules_repo.name, self.modules_repo.branch, api_url | ||
) | ||
) | ||
sys.exit(1) | ||
elif r.status_code != 200: | ||
raise SystemError( | ||
"Could not fetch {} ({}) tree: {}\n{}".format( | ||
self.modules_repo.name, self.modules_repo.branch, r.status_code, api_url | ||
) | ||
) | ||
|
||
result = r.json() | ||
assert result["truncated"] == False | ||
|
||
self.modules_current_hash = result["sha"] | ||
self.modules_file_tree = result["tree"] | ||
for f in result["tree"]: | ||
if f["path"].startswith("software/") and f["path"].endswith("/main.nf") and "/test/" not in f["path"]: | ||
# remove software/ and /main.nf | ||
self.modules_avail_module_names.append(f["path"][9:-8]) | ||
|
||
def get_module_file_urls(self, module): | ||
"""Fetch list of URLs for a specific module | ||
Takes the name of a module and iterates over the GitHub repo file tree. | ||
Loops over items that are prefixed with the path 'software/<module_name>' and ignores | ||
anything that's not a blob. Also ignores the test/ subfolder. | ||
Returns a dictionary with keys as filenames and values as GitHub API URIs. | ||
These can be used to then download file contents. | ||
Args: | ||
module (string): Name of module for which to fetch a set of URLs | ||
Returns: | ||
dict: Set of files and associated URLs as follows: | ||
{ | ||
'software/fastqc/main.nf': 'https://api.github.com/repos/nf-core/modules/git/blobs/65ba598119206a2b851b86a9b5880b5476e263c3', | ||
'software/fastqc/meta.yml': 'https://api.github.com/repos/nf-core/modules/git/blobs/0d5afc23ba44d44a805c35902febc0a382b17651' | ||
} | ||
""" | ||
results = {} | ||
for f in self.modules_file_tree: | ||
if not f["path"].startswith("software/{}".format(module)): | ||
continue | ||
if f["type"] != "blob": | ||
continue | ||
if "/test/" in f["path"]: | ||
continue | ||
results[f["path"]] = f["url"] | ||
return results | ||
|
||
def download_gh_file(self, dl_filename, api_url): | ||
"""Download a file from GitHub using the GitHub API | ||
Args: | ||
dl_filename (string): Path to save file to | ||
api_url (string): GitHub API URL for file | ||
Raises: | ||
If a problem, raises an error | ||
""" | ||
|
||
# Make target directory if it doesn't already exist | ||
dl_directory = os.path.dirname(dl_filename) | ||
if not os.path.exists(dl_directory): | ||
os.makedirs(dl_directory) | ||
|
||
# Call the GitHub API | ||
r = requests.get(api_url) | ||
if r.status_code != 200: | ||
raise SystemError("Could not fetch {} file: {}\n {}".format(self.modules_repo.name, r.status_code, api_url)) | ||
result = r.json() | ||
file_contents = base64.b64decode(result["content"]) | ||
|
||
# Write the file contents | ||
with open(dl_filename, "wb") as fh: | ||
fh.write(file_contents) |
Oops, something went wrong.