diff --git a/CHANGELOG.md b/CHANGELOG.md index 286a92a6bf..30e7d12568 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ - Update how we interface with git remotes. ([#1626](https://github.com/nf-core/tools/issues/1626)) - Add prompt for module name to `nf-core modules info` ([#1644](https://github.com/nf-core/tools/issues/1644)) - Update docs with example of custom git remote ([#1645](https://github.com/nf-core/tools/issues/1645)) +- Add `--base-path` flag to `nf-core modules` to specify the base path for the modules in a remote. Also refactored `modules.json` code. ([#1643](https://github.com/nf-core/tools/issues/1643)) ## [v2.4.1 - Cobolt Koala Patch](https://github.com/nf-core/tools/releases/tag/2.4) - [2022-05-16] diff --git a/README.md b/README.md index 26b4aebc5f..c22b8ab0ab 100644 --- a/README.md +++ b/README.md @@ -920,6 +920,8 @@ For example, if you want to install the `fastqc` module from the repository `nf- nf-core modules --git-remote git@gitlab.com:nf-core/modules-test.git install fastqc ``` +If the modules in your custom remote are stored in another directory than `modules`, you can specify the path by using the `--base-path ` flag. This will default to `modules`. + Note that a custom remote must follow a similar directory structure to that of `nf-core/moduleś` for the `nf-core modules` commands to work properly. The modules commands will during initalisation try to pull changes from the remote repositories. If you want to disable this, for example diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 5d95913dd9..effb8dec86 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -4,6 +4,7 @@ import os import re import sys +from email.policy import default import rich.console import rich.logging @@ -357,8 +358,14 @@ def lint(dir, release, fix, key, show_passed, fail_ignored, fail_warned, markdow default=False, help="Do not pull in latest changes to local clone of modules repository.", ) +@click.option( + "--base-path", + type=str, + default=None, + help="Specify where the modules are stored in the remote", +) @click.pass_context -def modules(ctx, git_remote, branch, no_pull): +def modules(ctx, git_remote, branch, no_pull, base_path): """ Commands to manage Nextflow DSL2 modules (tool wrappers). """ @@ -370,6 +377,7 @@ def modules(ctx, git_remote, branch, no_pull): ctx.obj["modules_repo_url"] = git_remote ctx.obj["modules_repo_branch"] = branch ctx.obj["modules_repo_no_pull"] = no_pull + ctx.obj["modules_repo_base_path"] = base_path # nf-core modules list subcommands @@ -393,7 +401,12 @@ def remote(ctx, keywords, json): """ try: module_list = nf_core.modules.ModuleList( - None, True, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + None, + True, + ctx.obj["modules_repo_url"], + ctx.obj["modules_repo_branch"], + ctx.obj["modules_repo_no_pull"], + ctx.obj["modules_repo_base_path"], ) print(module_list.list_modules(keywords, json)) except (UserWarning, LookupError) as e: @@ -419,7 +432,12 @@ def local(ctx, keywords, json, dir): """ try: module_list = nf_core.modules.ModuleList( - dir, False, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + dir, + False, + ctx.obj["modules_repo_url"], + ctx.obj["modules_repo_branch"], + ctx.obj["modules_repo_no_pull"], + ctx.obj["modules_repo_base_path"], ) print(module_list.list_modules(keywords, json)) except (UserWarning, LookupError) as e: @@ -456,6 +474,7 @@ def install(ctx, tool, dir, prompt, force, sha): ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"], + ctx.obj["modules_repo_base_path"], ) exit_status = module_install.install(tool) if not exit_status and all: @@ -513,6 +532,7 @@ def update(ctx, tool, dir, force, prompt, sha, all, preview, save_diff): ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"], + ctx.obj["modules_repo_base_path"], ) exit_status = module_install.update(tool) if not exit_status and all: @@ -539,7 +559,11 @@ def remove(ctx, dir, tool): """ try: module_remove = nf_core.modules.ModuleRemove( - dir, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + dir, + ctx.obj["modules_repo_url"], + ctx.obj["modules_repo_branch"], + ctx.obj["modules_repo_no_pull"], + ctx.obj["modules_repo_base_path"], ) module_remove.remove(tool) except (UserWarning, LookupError) as e: @@ -637,7 +661,11 @@ def lint(ctx, tool, dir, key, all, local, passed, fix_version): """ try: module_lint = nf_core.modules.ModuleLint( - dir, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + dir, + ctx.obj["modules_repo_url"], + ctx.obj["modules_repo_branch"], + ctx.obj["modules_repo_no_pull"], + ctx.obj["modules_repo_base_path"], ) module_lint.lint( module=tool, @@ -683,7 +711,12 @@ def info(ctx, tool, dir): """ try: module_info = nf_core.modules.ModuleInfo( - dir, tool, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + dir, + tool, + ctx.obj["modules_repo_url"], + ctx.obj["modules_repo_branch"], + ctx.obj["modules_repo_no_pull"], + ctx.obj["modules_repo_base_path"], ) print(module_info.get_module_info()) except (UserWarning, LookupError) as e: @@ -705,7 +738,11 @@ def bump_versions(ctx, tool, dir, all, show_all): """ try: version_bumper = nf_core.modules.bump_versions.ModuleVersionBumper( - dir, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + dir, + ctx.obj["modules_repo_url"], + ctx.obj["modules_repo_branch"], + ctx.obj["modules_repo_no_pull"], + ctx.obj["modules_repo_base_path"], ) version_bumper.bump_versions(module=tool, all_modules=all, show_uptodate=show_all) except nf_core.modules.module_utils.ModuleException as e: diff --git a/nf_core/lint/modules_json.py b/nf_core/lint/modules_json.py index 2a4aa1a5b3..cb1f25cbe5 100644 --- a/nf_core/lint/modules_json.py +++ b/nf_core/lint/modules_json.py @@ -3,6 +3,7 @@ from logging import warn from nf_core.modules.modules_command import ModuleCommand +from nf_core.modules.modules_json import ModulesJson def modules_json(self): @@ -19,22 +20,24 @@ def modules_json(self): # Load pipeline modules and modules.json modules_command = ModuleCommand(self.wf_path) - modules_json = modules_command.load_modules_json() + modules_json = ModulesJson(self.wf_path) + modules_json.load_modules_json() + modules_json_dict = modules_json.modules_json if modules_json: modules_command.get_pipeline_modules() all_modules_passed = True - for repo in modules_json["repos"].keys(): + for repo in modules_json_dict["repos"].keys(): # Check if the modules.json has been updated to keep the - if "modules" not in modules_json["repos"][repo] or "git_url" not in modules_json["repos"][repo]: + if "modules" not in modules_json_dict["repos"][repo] or "git_url" not in modules_json_dict["repos"][repo]: failed.append( f"Your `modules.json` file is outdated. Please remove it and reinstall it by running any module command" ) continue - for key in modules_json["repos"][repo]["modules"]: + for key in modules_json_dict["repos"][repo]["modules"]: if not key in modules_command.module_names[repo]: failed.append(f"Entry for `{key}` found in `modules.json` but module is not installed in pipeline.") all_modules_passed = False diff --git a/nf_core/modules/__init__.py b/nf_core/modules/__init__.py index 3dd49e3a41..680202e46d 100644 --- a/nf_core/modules/__init__.py +++ b/nf_core/modules/__init__.py @@ -6,6 +6,7 @@ from .list import ModuleList from .module_test import ModulesTest from .module_utils import ModuleException +from .modules_json import ModulesJson from .modules_repo import ModulesRepo from .mulled import MulledImageNameGenerator from .remove import ModuleRemove diff --git a/nf_core/modules/bump_versions.py b/nf_core/modules/bump_versions.py index 1c5fedaf8f..72f4a1ac52 100644 --- a/nf_core/modules/bump_versions.py +++ b/nf_core/modules/bump_versions.py @@ -28,8 +28,8 @@ class ModuleVersionBumper(ModuleCommand): - def __init__(self, pipeline_dir, remote_url=None, branch=None, no_pull=False): - super().__init__(pipeline_dir, remote_url, branch, no_pull) + def __init__(self, pipeline_dir, remote_url=None, branch=None, no_pull=False, base_path=None): + super().__init__(pipeline_dir, remote_url, branch, no_pull, base_path) self.up_to_date = None self.updated = None diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index ccdd418fc9..62bb2e0c03 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -5,6 +5,7 @@ import nf_core.modules.module_utils import nf_core.utils +from nf_core.modules.modules_json import ModulesJson from .modules_command import ModuleCommand from .modules_repo import NF_CORE_MODULES_NAME @@ -13,8 +14,28 @@ class ModuleInstall(ModuleCommand): - def __init__(self, pipeline_dir, force=False, prompt=False, sha=None, remote_url=None, branch=None, no_pull=False): - super().__init__(pipeline_dir, remote_url, branch, no_pull) + def __init__( + self, + pipeline_dir, + force=False, + prompt=False, + sha=None, + remote_url=None, + branch=None, + no_pull=False, + base_path=None, + ): + # Check if we are given a base path, otherwise look in the modules.json + if base_path is None: + try: + modules_json = ModulesJson(pipeline_dir) + repo_name = nf_core.modules.module_utils.path_from_remote(remote_url) + base_path = modules_json.get_base_path(repo_name) + except: + # We don't want to fail yet if the modules.json is not found + pass + + super().__init__(pipeline_dir, remote_url, branch, no_pull, base_path) self.force = force self.prompt = prompt self.sha = sha @@ -28,7 +49,8 @@ def install(self, module): return False # Verify that 'modules.json' is consistent with the installed modules - self.modules_json_up_to_date() + modules_json = ModulesJson(self.dir) + modules_json.modules_json_up_to_date() if self.prompt and self.sha is not None: log.error("Cannot use '--sha' and '--prompt' at the same time!") @@ -53,22 +75,14 @@ def install(self, module): log.info("Use the command 'nf-core modules list' to view available software") return False - # Load 'modules.json' - modules_json = self.load_modules_json() - if not modules_json: - return False - if not self.modules_repo.module_exists(module): warn_msg = ( - f"Module '{module}' not found in remote '{self.modules_repo.fullname}' ({self.modules_repo.branch})" + f"Module '{module}' not found in remote '{self.modules_repo.remote_url}' ({self.modules_repo.branch})" ) log.warning(warn_msg) return False - if self.modules_repo.fullname in modules_json["repos"]: - current_entry = modules_json["repos"][self.modules_repo.fullname]["modules"].get(module) - else: - current_entry = None + current_version = modules_json.get_module_version(module, self.modules_repo.fullname) # Set the install folder based on the repository name install_folder = [self.dir, "modules"] @@ -78,11 +92,11 @@ def install(self, module): module_dir = os.path.join(*install_folder, module) # Check that the module is not already installed - if (current_entry is not None and os.path.exists(module_dir)) and not self.force: + if (current_version is not None and os.path.exists(module_dir)) and not self.force: log.error("Module is already installed.") repo_flag = ( - "" if self.modules_repo.fullname == NF_CORE_MODULES_NAME else f"-g {self.modules_repo.fullname} " + "" if self.modules_repo.fullname == NF_CORE_MODULES_NAME else f"-g {self.modules_repo.remote_url} " ) branch_flag = "" if self.modules_repo.branch == "master" else f"-b {self.modules_repo.branch} " @@ -97,7 +111,7 @@ def install(self, module): try: version = nf_core.modules.module_utils.prompt_module_version_sha( module, - installed_sha=current_entry["git_sha"] if not current_entry is None else None, + installed_sha=current_version, modules_repo=self.modules_repo, ) except SystemError as e: @@ -113,7 +127,7 @@ def install(self, module): self.clear_module_dir(module, module_dir) log.info(f"{'Rei' if self.force else 'I'}nstalling '{module}'") - log.debug(f"Installing module '{module}' at modules hash {version} from {self.modules_repo.fullname}") + log.debug(f"Installing module '{module}' at modules hash {version} from {self.modules_repo.remote_url}") # Download module files if not self.install_module_files(module, version, self.modules_repo, install_folder): @@ -124,5 +138,5 @@ def install(self, module): log.info(f"Include statement: include {{ {module_name} }} from '.{os.path.join(*install_folder, module)}/main'") # Update module.json with newly installed module - self.update_modules_json(modules_json, self.modules_repo, module, version) + modules_json.update_modules_json(self.modules_repo, module, version) return True diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index d7c40c8d09..e678175c5b 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -29,6 +29,7 @@ from nf_core.lint.pipeline_todos import pipeline_todos from nf_core.lint_utils import console from nf_core.modules.modules_command import ModuleCommand +from nf_core.modules.modules_json import ModulesJson from nf_core.modules.modules_repo import ModulesRepo from nf_core.modules.nfcore_module import NFCoreModule from nf_core.utils import plural_s as _s @@ -69,7 +70,7 @@ class ModuleLint(ModuleCommand): from .module_todos import module_todos from .module_version import module_version - def __init__(self, dir, remote_url=None, branch=None, no_pull=False): + def __init__(self, dir, remote_url=None, branch=None, no_pull=False, base_path=None): self.dir = dir try: self.dir, self.repo_type = nf_core.modules.module_utils.get_repo_type(self.dir) @@ -79,7 +80,7 @@ def __init__(self, dir, remote_url=None, branch=None, no_pull=False): self.passed = [] self.warned = [] self.failed = [] - self.modules_repo = ModulesRepo(remote_url, branch, no_pull) + self.modules_repo = ModulesRepo(remote_url, branch, no_pull, base_path) self.lint_tests = self._get_all_lint_tests() # Get lists of modules install in directory self.all_local_modules, self.all_nfcore_modules = self.get_installed_modules() @@ -196,7 +197,8 @@ def lint( def set_up_pipeline_files(self): self.load_lint_config() - self.modules_json = self.load_modules_json() + self.modules_json = ModulesJson(self.dir) + self.modules_json.load_modules_json() # Only continue if a lint config has been loaded if self.lint_config: diff --git a/nf_core/modules/lint/module_version.py b/nf_core/modules/lint/module_version.py index 979a4ae011..ed834a4792 100644 --- a/nf_core/modules/lint/module_version.py +++ b/nf_core/modules/lint/module_version.py @@ -30,24 +30,23 @@ def module_version(module_lint_object, module): modules_json_path = os.path.join(module_lint_object.dir, "modules.json") # Verify that a git_sha exists in the `modules.json` file for this module - try: - module_entry = module_lint_object.modules_json["repos"][module_lint_object.modules_repo.fullname]["modules"][ - module.module_name - ] - git_sha = module_entry["git_sha"] - module.git_sha = git_sha - module.passed.append(("git_sha", "Found git_sha entry in `modules.json`", modules_json_path)) - - # Check whether a new version is available - try: - modules_repo = nf_core.modules.modules_repo.ModulesRepo() - module_git_log = modules_repo.get_module_git_log(module.module_name) - if git_sha == next(module_git_log)["git_sha"]: - module.passed.append(("module_version", "Module is the latest version", module.module_dir)) - else: - module.warned.append(("module_version", "New version available", module.module_dir)) - except UserWarning: - module.warned.append(("module_version", "Failed to fetch git log", module.module_dir)) - - except KeyError: + module_version = module_lint_object.modules_json.get_module_version( + module.module_name, module_lint_object.modules_repo.fullname + ) + if module_version is None: module.failed.append(("git_sha", "No git_sha entry in `modules.json`", modules_json_path)) + return + + module.git_sha = module_version + module.passed.append(("git_sha", "Found git_sha entry in `modules.json`", modules_json_path)) + + # Check whether a new version is available + try: + modules_repo = nf_core.modules.modules_repo.ModulesRepo() + module_git_log = modules_repo.get_module_git_log(module.module_name) + if module_version == next(module_git_log)["git_sha"]: + module.passed.append(("module_version", "Module is the latest version", module.module_dir)) + else: + module.warned.append(("module_version", "New version available", module.module_dir)) + except UserWarning: + module.warned.append(("module_version", "Failed to fetch git log", module.module_dir)) diff --git a/nf_core/modules/list.py b/nf_core/modules/list.py index dc7f6cf91d..aa00f07b43 100644 --- a/nf_core/modules/list.py +++ b/nf_core/modules/list.py @@ -1,20 +1,18 @@ import json import logging -from os import pipe import rich -import nf_core.modules.module_utils -from nf_core.modules.modules_repo import ModulesRepo - from .modules_command import ModuleCommand +from .modules_json import ModulesJson +from .modules_repo import ModulesRepo log = logging.getLogger(__name__) class ModuleList(ModuleCommand): - def __init__(self, pipeline_dir, remote=True, remote_url=None, branch=None, no_pull=False): - super().__init__(pipeline_dir, remote_url, branch, no_pull) + def __init__(self, pipeline_dir, remote=True, remote_url=None, branch=None, no_pull=False, base_path=None): + super().__init__(pipeline_dir, remote_url, branch, no_pull, base_path) self.remote = remote def list_modules(self, keywords=None, print_json=False): @@ -67,7 +65,8 @@ def pattern_msg(keywords): return "" # Verify that 'modules.json' is consistent with the installed modules - self.modules_json_up_to_date() + modules_json = ModulesJson(self.dir) + modules_json.modules_json_up_to_date() # Get installed modules self.get_pipeline_modules() @@ -89,7 +88,7 @@ def pattern_msg(keywords): table.add_column("Date") # Load 'modules.json' - modules_json = self.load_modules_json() + modules_json = modules_json.modules_json for repo_name, modules in sorted(repos_with_mods.items()): repo_entry = modules_json["repos"].get(repo_name, {}) @@ -105,9 +104,9 @@ def pattern_msg(keywords): version_sha = module_entry["git_sha"] try: # pass repo_name to get info on modules even outside nf-core/modules - message, date = ModulesRepo(remote_url=repo_entry["git_url"]).get_commit_info( - version_sha - ) # NOTE add support for other remotes + message, date = ModulesRepo( + remote_url=repo_entry["git_url"], base_path=repo_entry["base_path"] + ).get_commit_info(version_sha) except LookupError as e: log.warning(e) date = "[red]Not Available" diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 887f64bc12..97346ed169 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -1,19 +1,12 @@ -import datetime -import glob -import json import logging import os import urllib -from sys import modules -import git import questionary import rich -from pyrsistent import m import nf_core.utils -from .modules_repo import NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE, ModulesRepo from .nfcore_module import NFCoreModule log = logging.getLogger(__name__) @@ -25,33 +18,6 @@ class ModuleException(Exception): pass -def dir_tree_uncovered(modules_dir, repos): - """ - Does a BFS of the modules directory of a pipeline and rapports any directories - that are not found in the list of repos - """ - # Initialise the FIFO queue. Note that we assume the directory to be correctly - # configured, i.e. no files etc. - fifo = [os.path.join(modules_dir, subdir) for subdir in os.listdir(modules_dir) if subdir != "local"] - depth = 1 - dirs_not_covered = [] - while len(fifo) > 0: - temp_queue = [] - repos_at_level = {os.path.join(*os.path.split(repo)[:depth]): len(os.path.split(repo)) for repo in repos} - for dir in fifo: - rel_dir = os.path.relpath(dir, modules_dir) - if rel_dir in repos_at_level.keys(): - # Go the next depth if this directory is not one of the repos - if depth < repos_at_level[rel_dir]: - temp_queue.extend([os.path.join(dir, subdir) for subdir in os.listdir(dir)]) - else: - # Otherwise add the directory to the ones not covered - dirs_not_covered.append(dir) - fifo = temp_queue - depth += 1 - return dirs_not_covered - - def path_from_remote(remote_url): """ Extracts the path from the remote URL @@ -74,148 +40,6 @@ def path_from_remote(remote_url): return path -def get_pipeline_module_repositories(modules_dir): - """ - Finds all module repositories in the modules directory. Ignores the local modules. - Args: - modules_dir (str): base directory for the module files - Returns - repos [ (str, str) ]: List of tuples of repo name and repo remote URL - """ - # Check if there are any nf-core modules installed - if os.path.exists(os.path.join(modules_dir, NF_CORE_MODULES_NAME)): - repos = [(NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE)] - else: - repos = [] - # Check if there are any untracked repositories - dirs_not_covered = dir_tree_uncovered(modules_dir, [name for name, _ in repos]) - if len(dirs_not_covered) > 0: - log.info("Found custom module repositories when creating 'modules.json'") - # Loop until all directories in the base directory are covered by a remote - while len(dirs_not_covered) > 0: - log.info( - "The following director{s} in the modules directory are untracked: '{l}'".format( - s="ies" if len(dirs_not_covered) > 0 else "y", l="', '".join(dirs_not_covered) - ) - ) - nrepo_remote = questionary.text( - "Please provide a URL for for one of the repos contained in the untracked directories" - ).unsafe_ask() - # Verify that the remote exists - while True: - try: - git.Git().ls_remote(nrepo_remote) - break - except git.exc.GitCommandError: - nrepo_remote = questionary.text( - "The provided remote does not seem to exist, please provide a new remote." - ).unsafe_ask() - - # Verify that there is a directory corresponding the remote - nrepo_name = path_from_remote(nrepo_remote) - if not os.path.exists(os.path.join(modules_dir, nrepo_name)): - log.info( - "The provided remote does not seem to correspond to a local directory. " - "The directory structure should be the same as in the remote" - ) - dir_name = questionary.text( - "Please provide the correct directory, it will be renamed. If left empty, the remote will be ignored" - ).unsafe_ask() - if dir_name: - os.rename(os.path.join(modules_dir, dir_name), os.path.join(modules_dir, nrepo_name)) - else: - continue - repos.append((nrepo_name, nrepo_remote)) - dirs_not_covered = dir_tree_uncovered(modules_dir, [name for name, _ in repos]) - return repos - - -def create_modules_json(pipeline_dir): - """ - Create the modules.json files - - Args: - pipeline_dir (str): The directory where the `modules.json` should be created - """ - pipeline_config = nf_core.utils.fetch_wf_config(pipeline_dir) - pipeline_name = pipeline_config.get("manifest.name", "") - pipeline_url = pipeline_config.get("manifest.homePage", "") - modules_json = {"name": pipeline_name.strip("'"), "homePage": pipeline_url.strip("'"), "repos": dict()} - modules_dir = f"{pipeline_dir}/modules" - - if not os.path.exists(modules_dir): - raise UserWarning("Can't find a ./modules directory. Is this a DSL2 pipeline?") - - repos = get_pipeline_module_repositories(modules_dir) - - # Get all module names in the repos - repo_module_names = [ - ( - repo_name, - [ - os.path.relpath(dir_name, os.path.join(modules_dir, repo_name)) - for dir_name, _, file_names in os.walk(os.path.join(modules_dir, repo_name)) - if "main.nf" in file_names - ], - repo_remote, - ) - for repo_name, repo_remote in repos - ] - progress_bar = rich.progress.Progress( - "[bold blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[magenta]{task.completed} of {task.total}[reset] » [bold yellow]{task.fields[test_name]}", - transient=True, - ) - with progress_bar: - n_total_modules = sum(len(modules) for _, modules, _ in repo_module_names) - file_progress = progress_bar.add_task( - "Creating 'modules.json' file", total=n_total_modules, test_name="module.json" - ) - for repo_name, module_names, remote in sorted(repo_module_names): - try: - # Create a ModulesRepo object without progress bar to not conflict with the other one - modules_repo = ModulesRepo(remote_url=remote, no_progress=True) - except LookupError as e: - raise UserWarning(e) - - repo_path = os.path.join(modules_dir, repo_name) - modules_json["repos"][repo_name] = dict() - modules_json["repos"][repo_name]["git_url"] = remote - modules_json["repos"][repo_name]["modules"] = dict() - for module_name in sorted(module_names): - module_path = os.path.join(repo_path, module_name) - progress_bar.update(file_progress, advance=1, test_name=f"{repo_name}/{module_name}") - correct_commit_sha = find_correct_commit_sha(module_name, module_path, modules_repo) - - modules_json["repos"][repo_name]["modules"][module_name] = {"git_sha": correct_commit_sha} - - modules_json_path = os.path.join(pipeline_dir, "modules.json") - with open(modules_json_path, "w") as fh: - json.dump(modules_json, fh, indent=4) - fh.write("\n") - - -def find_correct_commit_sha(module_name, module_path, modules_repo): - """ - Returns the SHA for the latest commit where the local files are identical to the remote files - Args: - module_name (str): Name of module - module_path (str): Path to module in local repo - module_repo (str): Remote repo for module - Returns: - commit_sha (str): The latest commit SHA where local files are identical to remote files - """ - # Find the correct commit SHA for the local module files. - # We iterate over the commit history for the module until we find - # a revision that matches the file contents - commit_shas = (commit["git_sha"] for commit in modules_repo.get_module_git_log(module_name, depth=1000)) - for commit_sha in commit_shas: - if all(modules_repo.module_files_identical(module_name, module_path, commit_sha).values()): - return commit_sha - return None - - def get_installed_modules(dir, repo_type="modules"): """ Make a list of all modules installed in this repository diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 432b162982..008474bf4e 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -1,23 +1,15 @@ -import copy import glob -import json import logging import os import shutil -from datetime import datetime -import git -import questionary import yaml import nf_core.modules.module_utils import nf_core.utils -from nf_core.modules.modules_repo import ( - NF_CORE_MODULES_NAME, - NF_CORE_MODULES_REMOTE, - ModulesRepo, -) -from nf_core.utils import plural_s as _s + +from .modules_json import ModulesJson +from .modules_repo import ModulesRepo log = logging.getLogger(__name__) @@ -27,11 +19,11 @@ class ModuleCommand: Base class for the 'nf-core modules' commands """ - def __init__(self, dir, remote_url=None, branch=None, no_pull=False): + def __init__(self, dir, remote_url=None, branch=None, no_pull=False, base_path=None): """ Initialise the ModulesCommand object """ - self.modules_repo = ModulesRepo(remote_url, branch, no_pull) + self.modules_repo = ModulesRepo(remote_url, branch, no_pull, base_path) self.dir = dir self.module_names = [] try: @@ -95,7 +87,6 @@ def has_valid_directory(self): nf_config = os.path.join(self.dir, "nextflow.config") if not os.path.exists(main_nf) and not os.path.exists(nf_config): raise UserWarning(f"Could not find a 'main.nf' or 'nextflow.config' file in '{self.dir}'") - self.has_modules_file() return True @@ -104,197 +95,7 @@ def has_modules_file(self): modules_json_path = os.path.join(self.dir, "modules.json") if not os.path.exists(modules_json_path): log.info("Creating missing 'module.json' file.") - nf_core.modules.module_utils.create_modules_json(self.dir) - - def modules_json_up_to_date(self): - """ - Checks whether the modules installed in the directory - are consistent with the entries in the 'modules.json' file and vice versa. - - If a module has an entry in the 'modules.json' file but is missing in the directory, - we first try to reinstall the module from the remote and if that fails we remove the entry - in 'modules.json'. - - If a module is installed but the entry in 'modules.json' is missing we iterate through - the commit log in the remote to try to determine the SHA. - """ - mod_json = self.load_modules_json() - fresh_mod_json = copy.deepcopy(mod_json) - self.get_pipeline_modules() - missing_from_modules_json = {} - - # Iterate through all installed modules - # and remove all entries in modules_json which - # are present in the directory - for repo, modules in self.module_names.items(): - if repo in mod_json["repos"]: - for module in modules: - repo_modules = mod_json["repos"][repo].get("modules") - if repo_modules is None: - raise UserWarning( - "Your 'modules.json' is not up to date. " - "Please reinstall it by removing it and rerunning the command." - ) - if module in mod_json["repos"][repo]["modules"]: - mod_json["repos"][repo]["modules"].pop(module) - else: - if repo not in missing_from_modules_json: - missing_from_modules_json[repo] = ([], mod_json["repos"][repo]["git_url"]) - missing_from_modules_json[repo][0].append(module) - if len(mod_json["repos"][repo]["modules"]) == 0: - mod_json["repos"].pop(repo) - else: - missing_from_modules_json[repo] = (modules, None) - - # If there are any modules left in 'modules.json' after all installed are removed, - # we try to reinstall them - if len(mod_json["repos"]) > 0: - missing_but_in_mod_json = [ - f"'{repo}/{module}'" for repo, contents in mod_json["repos"].items() for module in contents["modules"] - ] - log.info( - f"Reinstalling modules found in 'modules.json' but missing from directory: {', '.join(missing_but_in_mod_json)}" - ) - - remove_from_mod_json = {} - for repo, contents in mod_json["repos"].items(): - modules = contents["modules"] - remote = contents["git_url"] - - modules_repo = ModulesRepo(remote_url=remote) - install_folder = os.path.split(modules_repo.fullname) - - for module, entry in modules.items(): - sha = entry.get("git_sha") - if sha is None: - if repo not in remove_from_mod_json: - remove_from_mod_json[repo] = [] - log.warning( - f"Could not find git SHA for module '{module}' in '{repo}' - removing from modules.json" - ) - remove_from_mod_json[repo].append(module) - continue - module_dir = [self.dir, "modules", *install_folder] - self.install_module_files(module, sha, modules_repo, module_dir) - - # If the reinstall fails, we remove those entries in 'modules.json' - if sum(map(len, remove_from_mod_json.values())) > 0: - uninstallable_mods = [ - f"'{repo}/{module}'" for repo, modules in remove_from_mod_json.items() for module in modules - ] - if len(uninstallable_mods) == 1: - log.info(f"Was unable to reinstall {uninstallable_mods[0]}. Removing 'modules.json' entry") - else: - log.info( - f"Was unable to reinstall some modules. Removing 'modules.json' entries: {', '.join(uninstallable_mods)}" - ) - - for repo, modules in remove_from_mod_json.items(): - for module in modules: - fresh_mod_json["repos"][repo].pop(module) - if len(fresh_mod_json["repos"][repo]) == 0: - fresh_mod_json["repos"].pop(repo) - - # If some modules didn't have an entry in the 'modules.json' file - # we try to determine the SHA from the commit log of the remote - dead_repos = [] - sb_local_repos = [] - if sum(map(len, missing_from_modules_json.values())) > 0: - - format_missing = [ - f"'{repo}/{module}'" for repo, contents in missing_from_modules_json.items() for module in contents[0] - ] - if len(format_missing) == 1: - log.info(f"Recomputing commit SHA for module {format_missing[0]} which was missing from 'modules.json'") - else: - log.info( - f"Recomputing commit SHAs for modules which were missing from 'modules.json': {', '.join(format_missing)}" - ) - - failed_to_find_commit_sha = [] - for repo, (modules, remote) in missing_from_modules_json.items(): - if remote is None: - if repo == NF_CORE_MODULES_NAME: - remote = NF_CORE_MODULES_REMOTE - else: - choice = questionary.select( - f"Found untracked files in {repo}. Please select a choice", - choices=[ - questionary.Choice("Provide the remote", value=0), - questionary.Choice("Move the directory to 'local'", value=1), - questionary.Choice("Remove the files", value=2), - ], - ) - if choice == 0: - remote = questionary.text("Please provide the URL of the remote") - # Verify that the remote exists - while True: - try: - git.Git().ls_remote(remote) - break - except git.exc.GitCommandError: - remote = questionary.text( - "The provided remote does not seem to exist, please provide a new remote." - ).unsafe_ask() - elif choice == 1: - sb_local_repos.append(repo) - continue - else: - dead_repos.append(repo) - continue - - remote = questionary.text(f"Please provide a remote for these files ") - - modules_repo = ModulesRepo(remote_url=remote) - repo_path = os.path.join(self.dir, "modules", repo) - for module in modules: - module_path = os.path.join(repo_path, module) - correct_commit_sha = nf_core.modules.module_utils.find_correct_commit_sha( - module, module_path, modules_repo - ) - log.info(correct_commit_sha) - if repo not in fresh_mod_json["repos"]: - fresh_mod_json["repos"][repo] = {} - - fresh_mod_json["repos"][repo][module] = {"git_sha": correct_commit_sha} - - if len(failed_to_find_commit_sha) > 0: - log.info( - f"Could not determine 'git_sha' for module{_s(failed_to_find_commit_sha)}: " - f"{', '.join(failed_to_find_commit_sha)}." - f"\nPlease try to install a newer version of " - f"{'this' if len(failed_to_find_commit_sha) == 1 else 'these'} " - f"module{_s(failed_to_find_commit_sha)}." - ) - - # Remove the requested repos - for repo in dead_repos: - path = os.path.join(self.dir, "modules", repo) - shutil.rmtree(path) - - # Copy the untracked repos to local - for repo in sb_local_repos: - modules_path = os.path.join(self.dir, "modules") - path = os.path.join(modules_path, repo) - local_path = os.path.join(modules_path, "local") - - # Create the local module directory if it doesn't already exist - if not os.path.exists(local_path): - os.makedirs(local_path) - - # Check if there is already a subdirectory with the name - if os.path.exists(os.path.join(local_path, to_path)): - to_path = path - while os.path.exists(os.path.join(local_path, to_path)): - # Add a time suffix to the path to make it unique - # (do it again and again if it didn't work out...) - to_path += f"-{datetime.datetime.now().strftime('%y%m%d%H%M%S')}" - shutil.move(path, to_path) - path = to_path - - shutil.move(path, local_path) - - self.dump_modules_json(fresh_mod_json) + ModulesJson(self.dir).create_modules_json() def clear_module_dir(self, module_name, module_dir): """Removes all files in the module directory""" @@ -330,45 +131,6 @@ def install_module_files(self, module_name, module_version, modules_repo, instal """ return modules_repo.install_module(module_name, install_dir, module_version) - def load_modules_json(self): - """ - Loads the modules.json file - - Returns: - (nested dict...): The parsed 'modules.json' file - """ - modules_json_path = os.path.join(self.dir, "modules.json") - try: - with open(modules_json_path, "r") as fh: - modules_json = json.load(fh) - except FileNotFoundError: - log.error("File 'modules.json' is missing") - modules_json = None - return modules_json - - def update_modules_json(self, modules_json, modules_repo, module_name, module_version, write_file=True): - """ - Updates the 'module.json' file with new module info - """ - repo_name = modules_repo.fullname - remote_url = modules_repo.remote_url - if repo_name not in modules_json["repos"]: - modules_json["repos"][repo_name] = {"modules": {}, "git_url": remote_url} - modules_json["repos"][repo_name]["modules"][module_name] = {"git_sha": module_version} - # Sort the 'modules.json' repo entries - modules_json["repos"] = nf_core.utils.sort_dictionary(modules_json["repos"]) - if write_file: - self.dump_modules_json(modules_json) - else: - return modules_json - - def dump_modules_json(self, modules_json): - """Build filename for modules.json and write to file.""" - modules_json_path = os.path.join(self.dir, "modules.json") - with open(modules_json_path, "w") as fh: - json.dump(modules_json, fh, indent=4) - fh.write("\n") - def load_lint_config(self): """Parse a pipeline lint config file. diff --git a/nf_core/modules/modules_json.py b/nf_core/modules/modules_json.py new file mode 100644 index 0000000000..571a3b6807 --- /dev/null +++ b/nf_core/modules/modules_json.py @@ -0,0 +1,616 @@ +import copy +import datetime +import json +import logging +import os +import shutil + +import git +import questionary +import rich.progress + +import nf_core.modules.module_utils +import nf_core.modules.modules_repo +import nf_core.utils + +log = logging.getLogger(__name__) + + +class ModulesJson: + """ + An object for handling a 'modules.json' file in a pipeline + """ + + def __init__(self, pipeline_dir): + """ + Initialise the object. + + Args: + pipeline_dir (str): The pipeline directory + """ + self.dir = pipeline_dir + self.modules_dir = os.path.join(self.dir, "modules") + self.modules_json = None + + def create_modules_json(self): + """ + Creates the modules.json file from the modules installed in the pipeline directory + + Raises: + UserWarning: If the creation fails + """ + pipeline_config = nf_core.utils.fetch_wf_config(self.dir) + pipeline_name = pipeline_config.get("manifest.name", "") + pipeline_url = pipeline_config.get("manifest.homePage", "") + modules_json = {"name": pipeline_name.strip("'"), "homePage": pipeline_url.strip("'"), "repos": dict()} + modules_dir = f"{self.dir}/modules" + + if not os.path.exists(modules_dir): + raise UserWarning("Can't find a ./modules directory. Is this a DSL2 pipeline?") + + repos = self.get_pipeline_module_repositories(modules_dir) + + # Get all module names in the repos + repo_module_names = [ + ( + repo_name, + [ + os.path.relpath(dir_name, os.path.join(modules_dir, repo_name)) + for dir_name, _, file_names in os.walk(os.path.join(modules_dir, repo_name)) + if "main.nf" in file_names + ], + repo_remote, + base_path, + ) + for repo_name, (repo_remote, base_path) in repos.items() + ] + progress_bar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[magenta]{task.completed} of {task.total}[reset] » [bold yellow]{task.fields[test_name]}", + transient=True, + ) + with progress_bar: + n_total_modules = sum(len(modules) for _, modules, _, _ in repo_module_names) + file_progress = progress_bar.add_task( + "Creating 'modules.json' file", total=n_total_modules, test_name="module.json" + ) + for repo_name, module_names, remote, base_path in sorted(repo_module_names): + try: + # Create a ModulesRepo object without progress bar to not conflict with the other one + modules_repo = nf_core.modules.modules_repo.ModulesRepo( + remote_url=remote, base_path=base_path, no_progress=True + ) + except LookupError as e: + raise UserWarning(e) + + repo_path = os.path.join(modules_dir, repo_name) + modules_json["repos"][repo_name] = dict() + modules_json["repos"][repo_name]["git_url"] = remote + modules_json["repos"][repo_name]["modules"] = dict() + modules_json["repos"][repo_name]["base_path"] = base_path + for module_name in sorted(module_names): + module_path = os.path.join(repo_path, module_name) + progress_bar.update(file_progress, advance=1, test_name=f"{repo_name}/{module_name}") + correct_commit_sha = self.find_correct_commit_sha(module_name, module_path, modules_repo) + + modules_json["repos"][repo_name]["modules"][module_name] = {"git_sha": correct_commit_sha} + + modules_json_path = os.path.join(self.dir, "modules.json") + with open(modules_json_path, "w") as fh: + json.dump(modules_json, fh, indent=4) + fh.write("\n") + + def get_pipeline_module_repositories(self, modules_dir, repos=None): + """ + Finds all module repositories in the modules directory. Ignores the local modules. + Args: + modules_dir (str): base directory for the module files + Returns + repos [ (str, str, str) ]: List of tuples of repo name, repo remote URL and path to modules in repo + """ + if repos is None: + repos = {} + + # Check if there are any nf-core modules installed + if os.path.exists(os.path.join(modules_dir, nf_core.modules.modules_repo.NF_CORE_MODULES_NAME)): + repos[nf_core.modules.modules_repo.NF_CORE_MODULES_NAME] = ( + nf_core.modules.modules_repo.NF_CORE_MODULES_REMOTE, + nf_core.modules.modules_repo.NF_CORE_MODULES_BASE_PATH, + ) + + # Check if there are any untracked repositories + dirs_not_covered = self.dir_tree_uncovered(modules_dir, [name for name in repos]) + if len(dirs_not_covered) > 0: + log.info("Found custom module repositories when creating 'modules.json'") + # Loop until all directories in the base directory are covered by a remote + while len(dirs_not_covered) > 0: + log.info( + "The following director{s} in the modules directory are untracked: '{l}'".format( + s="ies" if len(dirs_not_covered) > 0 else "y", l="', '".join(dirs_not_covered) + ) + ) + nrepo_remote = questionary.text( + "Please provide a URL for for one of the repos contained in the untracked directories." + ).unsafe_ask() + # Verify that the remote exists + while True: + try: + git.Git().ls_remote(nrepo_remote) + break + except git.exc.GitCommandError: + nrepo_remote = questionary.text( + "The provided remote does not seem to exist, please provide a new remote." + ).unsafe_ask() + + # Verify that there is a directory corresponding the remote + nrepo_name = nf_core.modules.module_utils.path_from_remote(nrepo_remote) + if not os.path.exists(os.path.join(modules_dir, nrepo_name)): + log.info( + "The provided remote does not seem to correspond to a local directory. " + "The directory structure should be the same as in the remote." + ) + dir_name = questionary.text( + "Please provide the correct directory, it will be renamed. If left empty, the remote will be ignored." + ).unsafe_ask() + if dir_name: + os.rename(os.path.join(modules_dir, dir_name), os.path.join(modules_dir, nrepo_name)) + else: + continue + + # Prompt the user for the modules base path in the remote + nrepo_base_path = questionary.text( + f"Please provide the path of the modules directory in the remote. " + f"Will default to '{nf_core.modules.modules_repo.NF_CORE_MODULES_BASE_PATH}' if left empty." + ).unsafe_ask() + if not nrepo_base_path: + nrepo_base_path = nf_core.modules.modules_repo.NF_CORE_MODULES_BASE_PATH + + repos[nrepo_name] = (nrepo_remote, nrepo_base_path) + dirs_not_covered = self.dir_tree_uncovered(modules_dir, [name for name in repos]) + return repos + + def find_correct_commit_sha(self, module_name, module_path, modules_repo): + """ + Returns the SHA for the latest commit where the local files are identical to the remote files + Args: + module_name (str): Name of module + module_path (str): Path to module in local repo + module_repo (str): Remote repo for module + Returns: + commit_sha (str): The latest commit SHA where local files are identical to remote files, + or None if no commit is found + """ + # Find the correct commit SHA for the local module files. + # We iterate over the commit history for the module until we find + # a revision that matches the file contents + commit_shas = (commit["git_sha"] for commit in modules_repo.get_module_git_log(module_name, depth=1000)) + for commit_sha in commit_shas: + if all(modules_repo.module_files_identical(module_name, module_path, commit_sha).values()): + return commit_sha + return None + + def dir_tree_uncovered(self, modules_dir, repos): + """ + Does a BFS of the modules directory to look for directories that + are not tracked by a remote. The 'repos' argument contains the + directories that are currently covered by remote, and it and its + subdirectories are therefore ignore. + + Args: + module_dir (str): Base path of modules in pipeline + repos ([ str ]): List of repos that are covered by a remote + + Returns: + dirs_not_covered ([ str ]): A list of directories that are currently not covered by any remote. + """ + # Initialise the FIFO queue. Note that we assume the directory to be correctly + # configured, i.e. no files etc. + fifo = [os.path.join(modules_dir, subdir) for subdir in os.listdir(modules_dir) if subdir != "local"] + depth = 1 + dirs_not_covered = [] + while len(fifo) > 0: + temp_queue = [] + repos_at_level = {os.path.join(*os.path.split(repo)[:depth]): len(os.path.split(repo)) for repo in repos} + for dir in fifo: + rel_dir = os.path.relpath(dir, modules_dir) + if rel_dir in repos_at_level.keys(): + # Go the next depth if this directory is not one of the repos + if depth < repos_at_level[rel_dir]: + temp_queue.extend([os.path.join(dir, subdir) for subdir in os.listdir(dir)]) + else: + # Otherwise add the directory to the ones not covered + dirs_not_covered.append(dir) + fifo = temp_queue + depth += 1 + return dirs_not_covered + + def modules_json_up_to_date(self): + """ + Checks whether the modules installed in the directory + are consistent with the entries in the 'modules.json' file and vice versa. + + If a module has an entry in the 'modules.json' file but is missing in the directory, + we first try to reinstall the module from the remote and if that fails we remove the entry + in 'modules.json'. + + If a module is installed but the entry in 'modules.json' is missing we iterate through + the commit log in the remote to try to determine the SHA. + """ + self.load_modules_json() + old_modules_json = copy.deepcopy(self.modules_json) + + # Compute the difference between the modules in the directory + # and the modules in the 'modules.json' file + # This is done by looking at all directories containing + # a 'main.nf' file + dirs = [ + os.path.relpath(dir_name, start=self.modules_dir) + for dir_name, _, file_names in os.walk(self.modules_dir) + if "main.nf" in file_names + ] + + missing_from_modules_json = [] + repo_names = list(old_modules_json["repos"].keys()) + for dir in dirs: + # Check if the modules directory exists + module_repo_name = None + for repo in repo_names: + if dir.startswith(repo + os.sep): + module_repo_name = repo + break + if module_repo_name is not None: + # If it does, check if the module is in the 'modules.json' file + modules_path = os.path.relpath(dir, start=module_repo_name) + + if modules_path not in old_modules_json["repos"][module_repo_name].get("modules", {}): + missing_from_modules_json.append(dir) + else: + old_modules_json["repos"][module_repo_name]["modules"].pop(modules_path) + if len(old_modules_json["repos"][module_repo_name]["modules"]) == 0: + old_modules_json["repos"].pop(module_repo_name) + else: + # If it is not, add it to the list of missing modules + missing_from_modules_json.append(dir) + + # Check which repos are missing the remote url or base path + for repo, values in old_modules_json.get("repos", {}).items(): + if "git_url" not in values or "base_path" not in values: + raise UserWarning( + "The 'modules.json' file is not up to date. " + "Please reinstall it by removing it and rerunning the command." + ) + # If there are any modules left in 'modules.json' after all installed are removed, + # we try to reinstall them + if len(old_modules_json["repos"]) > 0: + missing_but_in_mod_json = [ + f"'{repo}/{module}'" + for repo, contents in old_modules_json["repos"].items() + for module in contents["modules"] + ] + log.info( + f"Reinstalling modules found in 'modules.json' but missing from directory: {', '.join(missing_but_in_mod_json)}" + ) + + remove_from_mod_json = {} + for repo, contents in old_modules_json["repos"].items(): + modules = contents["modules"] + remote = contents["git_url"] + base_path = contents["base_path"] + + modules_repo = nf_core.modules.modules_repo.ModulesRepo(remote_url=remote, base_path=base_path) + install_folder = os.path.split(modules_repo.fullname) + + for module, entry in modules.items(): + sha = entry.get("git_sha") + if sha is None: + if repo not in remove_from_mod_json: + remove_from_mod_json[repo] = [] + log.warning( + f"Could not find git SHA for module '{module}' in '{repo}' - removing from modules.json" + ) + remove_from_mod_json[repo].append(module) + continue + module_dir = [self.dir, "modules", *install_folder] + modules_repo.install_module(module, module_dir, sha) + + # If the reinstall fails, we remove those entries in 'modules.json' + if sum(map(len, remove_from_mod_json.values())) > 0: + uninstallable_mods = [ + f"'{repo}/{module}'" for repo, modules in remove_from_mod_json.items() for module in modules + ] + if len(uninstallable_mods) == 1: + log.info(f"Was unable to reinstall {uninstallable_mods[0]}. Removing 'modules.json' entry") + else: + log.info( + f"Was unable to reinstall some modules. Removing 'modules.json' entries: {', '.join(uninstallable_mods)}" + ) + + for repo, modules in remove_from_mod_json.items(): + for module in modules: + self.modules_json["repos"][repo]["modules"].pop(module) + if len(self.modules_json["repos"][repo]["modules"]) == 0: + self.modules_json["repos"].pop(repo) + + # If some modules didn't have an entry in the 'modules.json' file + # we try to determine the SHA from the commit log of the remote + dead_repos = [] + sb_local_repos = [] + if len(missing_from_modules_json) > 0: + + format_missing = [f"'{dir}'" for dir in missing_from_modules_json] + if len(format_missing) == 1: + log.info(f"Recomputing commit SHA for module {format_missing[0]} which was missing from 'modules.json'") + else: + log.info( + f"Recomputing commit SHAs for modules which were missing from 'modules.json': {', '.join(format_missing)}" + ) + + for dir in missing_from_modules_json: + choice = questionary.select( + f"Found untracked file '{dir}'. Please select a choice", + choices=[ + {"name": "Provide the remote", "value": 0}, + {"name": "Move the directory to 'local'", "value": 1}, + {"name": "Remove the files", "value": 2}, + ], + ).unsafe_ask() + if choice == 0: + # Ask the user if the module belongs to an installed remote + choices = [{"name": "No", "value": (None, None)}] + [ + { + "name": f"'{repo}' ({self.modules_json['repos'][repo]['git_url']})", + "value": ( + self.modules_json["repos"][repo]["git_url"], + self.modules_json["repos"][repo]["base_path"], + ), + } + for repo in self.modules_json["repos"] + ] + remote, base_path = questionary.select( + "Does the module belong to an installed remote?", + choices=choices, + style=nf_core.utils.nfcore_question_style, + ).unsafe_ask() + if remote is None: + while True: + remote = questionary.text( + "Please provide the URL of the remote", style=nf_core.utils.nfcore_question_style + ).unsafe_ask() + # Verify that the name is consistent with the untracked file + repo = nf_core.modules.module_utils.path_from_remote(remote) + if not dir.startswith(repo): + log.info("The module name does not match the remote name") + continue + # Verify that the remote exists + try: + git.Git().ls_remote(remote) + except git.exc.GitCommandError: + log.info("The remote does not exist") + continue + # Ask the user for the modules base path in the remote + base_path = questionary.text( + f"Please provide the path of the modules directory in the remote. " + f"Will default to '{nf_core.modules.modules_repo.NF_CORE_MODULES_BASE_PATH}' if left empty." + ).unsafe_ask() + if not base_path: + base_path = nf_core.modules.modules_repo.NF_CORE_MODULES_BASE_PATH + break + else: + repo = nf_core.modules.module_utils.path_from_remote(remote) + elif choice == 1: + sb_local_repos.append(repo) + continue + else: + dead_repos.append(repo) + continue + + modules_repo = nf_core.modules.modules_repo.ModulesRepo(remote_url=remote, base_path=base_path) + repo_path = os.path.join(self.dir, "modules", repo) + module = os.path.relpath(dir, repo) + module_path = os.path.join(repo_path, module) + correct_commit_sha = self.find_correct_commit_sha(module, module_path, modules_repo) + if correct_commit_sha is not None: + if repo not in self.modules_json["repos"]: + self.modules_json["repos"][repo] = {"git_url": remote, "base_path": base_path, "modules": {}} + + self.modules_json["repos"][repo]["modules"][module] = {"git_sha": correct_commit_sha} + else: + choices = [ + {"name": "Move the directory to local", "value": 0}, + {"name": "Remove the files", "value": 1}, + ] + choice = questionary.select(f"Could not find commit SHA for {dir}", choices=choices).unsafe_ask() + if choice == 0: + sb_local_repos.append(repo) + continue + else: + dead_repos.append(repo) + continue + + # Remove the requested repos + for repo in dead_repos: + path = os.path.join(self.dir, "modules", repo) + shutil.rmtree(path) + + # Copy the untracked repos to local + for repo in sb_local_repos: + modules_path = os.path.join(self.dir, "modules") + path = os.path.join(modules_path, repo) + local_path = os.path.join(modules_path, "local") + + # Create the local module directory if it doesn't already exist + if not os.path.exists(local_path): + os.makedirs(local_path) + + # Check if there is already a subdirectory with the name + if os.path.exists(os.path.join(local_path, to_path)): + to_path = path + while os.path.exists(os.path.join(local_path, to_path)): + # Add a time suffix to the path to make it unique + # (do it again and again if it didn't work out...) + to_path += f"-{datetime.datetime.now().strftime('%y%m%d%H%M%S')}" + shutil.move(path, to_path) + path = to_path + + shutil.move(path, local_path) + + self.dump_modules_json() + + def load_modules_json(self): + """ + Loads the modules.json file into the variable 'modules_json' + + Sets the modules_json attribute to the loaded file. + + Raises: + UserWarning: If the modules.json file is not found + """ + modules_json_path = os.path.join(self.dir, "modules.json") + try: + with open(modules_json_path, "r") as fh: + self.modules_json = json.load(fh) + except FileNotFoundError: + raise UserWarning("File 'modules.json' is missing") + + def update_modules_json(self, modules_repo, module_name, module_version, write_file=True): + """ + Updates the 'module.json' file with new module info + + Args: + modules_repo (ModulesRepo): A ModulesRepo object configured for the new module + module_name (str): Name of new module + module_version (str): git SHA for the new module entry + write_file (bool): whether to write the updated modules.json to a file. + """ + if self.modules_json is None: + self.load_modules_json() + repo_name = modules_repo.fullname + remote_url = modules_repo.remote_url + base_path = modules_repo.base_path + if repo_name not in self.modules_json["repos"]: + self.modules_json["repos"][repo_name] = {"modules": {}, "git_url": remote_url, "base_path": base_path} + self.modules_json["repos"][repo_name]["modules"][module_name] = {"git_sha": module_version} + # Sort the 'modules.json' repo entries + self.modules_json["repos"] = nf_core.utils.sort_dictionary(self.modules_json["repos"]) + if write_file: + self.dump_modules_json() + + def remove_modules_json_entry(self, module_name, repo_name): + """ + Removes an entry from the 'modules.json' file. + + Args: + module_name (str): Name of the module to be removed + repo_name (str): Name of the repository containing the module + Returns: + (bool): True if the removal was successful, False otherwise + """ + if not self.modules_json: + return False + if repo_name in self.modules_json.get("repos", {}): + repo_entry = self.modules_json["repos"][repo_name] + if module_name in repo_entry.get("modules", {}): + repo_entry["modules"].pop(module_name) + else: + log.warning(f"Module '{repo_name}/{module_name}' is missing from 'modules.json' file.") + return False + if len(repo_entry) == 0: + self.modules_json["repos"].pop(repo_name) + else: + log.warning(f"Module '{repo_name}/{module_name}' is missing from 'modules.json' file.") + return False + + self.dump_modules_json() + return True + + def repo_present(self, repo_name): + """ + Checks if a repo is present in the modules.json file + Args: + repo_name (str): Name of the repository + Returns: + (bool): Whether the repo exists in the modules.json + """ + return repo_name in self.modules_json.get("repos", {}) + + def module_present(self, module_name, repo_name): + """ + Checks if a module is present in the modules.json file + Args: + module_name (str): Name of the module + repo_name (str): Name of the repository + Returns: + (bool): Whether the module is present in the 'modules.json' file + """ + return module_name in self.modules_json.get("repos", {}).get(repo_name, {}).get("modules", {}) + + def get_modules_json(self): + """ + Returns a copy of the loaded modules.json + + Returns: + (dict): A copy of the loaded modules.json + """ + if self.modules_json is None: + self.load_modules_json() + return copy.deepcopy(self.modules_json) + + def get_module_version(self, module_name, repo_name): + """ + Returns the version of a module + + Args: + module_name (str): Name of the module + repo_name (str): Name of the repository + + Returns: + (str): The git SHA of the module if it exists, None otherwise + """ + if self.modules_json is None: + self.load_modules_json() + return ( + self.modules_json.get("repos", {}) + .get(repo_name, {}) + .get("modules", {}) + .get(module_name, {}) + .get("git_sha", None) + ) + + def get_git_url(self, repo_name): + """ + Returns the git url of a repo + + Args: + repo_name (str): Name of the repository + + Returns: + (str): The git url of the repository if it exists, None otherwise + """ + if self.modules_json is None: + self.load_modules_json() + return self.modules_json.get("repos", {}).get(repo_name, {}).get("git_url", None) + + def get_base_path(self, repo_name): + """ + Returns the modules base path of a repo + Args: + repo_name (str): Name of the repository + + Returns: + (str): The base path of the repository if it exists, None otherwise + """ + if self.modules_json is None: + self.load_modules_json() + return self.modules_json.get("repos", {}).get(repo_name, {}).get("base_path", None) + + def dump_modules_json(self): + """ + Sort the modules.json, and write it to file + """ + # Sort the modules.json + self.modules_json["repos"] = nf_core.utils.sort_dictionary(self.modules_json["repos"]) + modules_json_path = os.path.join(self.dir, "modules.json") + with open(modules_json_path, "w") as fh: + json.dump(self.modules_json, fh, indent=4) + fh.write("\n") diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 1b8b744fd9..9e9e2c4502 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -2,20 +2,20 @@ import logging import os import shutil -import sys -import urllib.parse import git import rich.progress import nf_core.modules.module_utils -from nf_core.utils import NFCORE_DIR, gh_api +import nf_core.modules.modules_json +from nf_core.utils import NFCORE_DIR log = logging.getLogger(__name__) # Constants for the nf-core/modules repo used throughout the module files NF_CORE_MODULES_NAME = "nf-core/modules" NF_CORE_MODULES_REMOTE = "https://github.com/nf-core/modules.git" +NF_CORE_MODULES_BASE_PATH = "modules" class RemoteProgressbar(git.RemoteProgress): @@ -84,7 +84,7 @@ def update_local_repo_status(repo_name, up_to_date): """ ModulesRepo.local_repo_statuses[repo_name] = up_to_date - def __init__(self, remote_url=None, branch=None, no_pull=False, no_progress=False): + def __init__(self, remote_url=None, branch=None, no_pull=False, base_path=None, no_progress=False): """ Initializes the object and clones the git repository if it is not already present """ @@ -100,6 +100,11 @@ def __init__(self, remote_url=None, branch=None, no_pull=False, no_progress=Fals self.fullname = nf_core.modules.module_utils.path_from_remote(self.remote_url) + if base_path is None: + base_path = NF_CORE_MODULES_BASE_PATH + + self.base_path = base_path + self.setup_local_repo(remote_url, branch, no_progress) # Verify that the repo seems to be correctly configured @@ -107,7 +112,7 @@ def __init__(self, remote_url=None, branch=None, no_pull=False, no_progress=Fals self.verify_branch() # Convenience variable - self.modules_dir = os.path.join(self.local_repo_dir, "modules") + self.modules_dir = os.path.join(self.local_repo_dir, self.base_path) self.avail_module_names = None @@ -212,10 +217,8 @@ def verify_branch(self): Verifies the active branch conforms do the correct directory structure """ dir_names = os.listdir(self.local_repo_dir) - if "modules" not in dir_names: - err_str = f"Repository '{self.fullname}' ({self.branch}) does not contain a 'modules/' directory" - if "software" in dir_names: - err_str += ".\nAs of version 2.0, the 'software/' directory should be renamed to 'modules/'" + if self.base_path not in dir_names: + err_str = f"Repository '{self.fullname}' ({self.branch}) does not contain the '{self.base_path}' directory" raise LookupError(err_str) def checkout_branch(self): @@ -233,7 +236,7 @@ def checkout(self, commit): """ self.repo.git.checkout(commit) - def module_exists(self, module_name): + def module_exists(self, module_name, checkout=True): """ Check if a module exists in the branch of the repo @@ -243,7 +246,7 @@ def module_exists(self, module_name): Returns: (bool): Whether the module exists in this branch of the repository """ - return module_name in self.get_avail_modules() + return module_name in self.get_avail_modules(checkout=checkout) def get_module_dir(self, module_name): """ @@ -273,7 +276,7 @@ def install_module(self, module_name, install_dir, commit): self.checkout(commit) # Check if the module exists in the branch - if not self.module_exists(module_name): + if not self.module_exists(module_name, checkout=False): log.error(f"The requested module does not exists in the '{self.branch}' of {self.fullname}'") return False @@ -327,7 +330,7 @@ def get_module_git_log(self, module_name, depth=None, since="2021-07-07T00:00:00 ( dict ): Iterator of commit SHAs and associated (truncated) message """ self.checkout_branch() - module_path = os.path.join("modules", module_name) + module_path = os.path.join(self.base_path, module_name) commits = self.repo.iter_commits(max_count=depth, paths=module_path) commits = ({"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits) return commits @@ -336,7 +339,7 @@ def sha_exists_on_branch(self, sha): """ Verifies that a given commit sha exists on the branch """ - self.checkout() + self.checkout_branch() return sha in (commit.hexsha for commit in self.repo.iter_commits()) def get_commit_info(self, sha): @@ -359,7 +362,7 @@ def get_commit_info(self, sha): return message, date raise LookupError(f"Commit '{sha}' not found in the '{self.fullname}'") - def get_avail_modules(self): + def get_avail_modules(self, checkout=True): """ Gets the names of the modules in the repository. They are detected by checking which directories have a 'main.nf' file @@ -367,14 +370,15 @@ def get_avail_modules(self): Returns: ([ str ]): The module names """ - if self.avail_module_names is None: - # Module directories are characterized by having a 'main.nf' file - self.avail_module_names = [ - os.path.relpath(dirpath, start=self.modules_dir) - for dirpath, _, file_names in os.walk(self.modules_dir) - if "main.nf" in file_names - ] - return self.avail_module_names + if checkout: + self.checkout_branch() + # Module directories are characterized by having a 'main.nf' file + avail_module_names = [ + os.path.relpath(dirpath, start=self.modules_dir) + for dirpath, _, file_names in os.walk(self.modules_dir) + if "main.nf" in file_names + ] + return avail_module_names def get_meta_yml(self, module_name): """ diff --git a/nf_core/modules/remove.py b/nf_core/modules/remove.py index 5a3748370c..c0b0bbf024 100644 --- a/nf_core/modules/remove.py +++ b/nf_core/modules/remove.py @@ -1,23 +1,22 @@ -import json import logging import os -import sys import questionary import nf_core.utils from .modules_command import ModuleCommand +from .modules_json import ModulesJson log = logging.getLogger(__name__) class ModuleRemove(ModuleCommand): - def __init__(self, pipeline_dir, remote_url=None, branch=None, no_pull=False): + def __init__(self, pipeline_dir, remote_url=None, branch=None, no_pull=False, base_path=None): """ Initialise the ModulesRemove object and run remove command """ - super().__init__(pipeline_dir, remote_url, branch, no_pull) + super().__init__(pipeline_dir, remote_url, branch, no_pull, base_path) def remove(self, module): """ @@ -59,39 +58,23 @@ def remove(self, module): # Get the module directory module_dir = os.path.join(self.dir, "modules", *remove_folder, module) + # Load the modules.json file + modules_json = ModulesJson(self.dir) + modules_json.load_modules_json() + # Verify that the module is actually installed if not os.path.exists(module_dir): log.error(f"Module directory does not exist: '{module_dir}'") - modules_json = self.load_modules_json() - if self.modules_repo.fullname in modules_json["repos"] and module in modules_json["repos"][repo_name]: + if modules_json.module_present(module, repo_name): log.error(f"Found entry for '{module}' in 'modules.json'. Removing...") - self.remove_modules_json_entry(module, repo_name, modules_json) + modules_json.remove_modules_json_entry(module, repo_name) return False log.info(f"Removing {module}") # Remove entry from modules.json - modules_json = self.load_modules_json() - self.remove_modules_json_entry(module, repo_name, modules_json) + modules_json.remove_modules_json_entry(module, repo_name) # Remove the module return self.clear_module_dir(module_name=module, module_dir=module_dir) - - def remove_modules_json_entry(self, module, repo_name, modules_json): - - if not modules_json: - return False - if repo_name in modules_json.get("repos", {}): - repo_entry = modules_json["repos"][repo_name] - if module in repo_entry: - repo_entry.pop(module) - if len(repo_entry) == 0: - modules_json["repos"].pop(repo_name) - else: - log.warning(f"Module '{repo_name}/{module}' is missing from 'modules.json' file.") - return False - - self.dump_modules_json(modules_json) - - return True diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index c96f0b9b92..b4c10d0d50 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -1,4 +1,3 @@ -import copy import difflib import enum import json @@ -8,7 +7,6 @@ import tempfile import questionary -from questionary import question from rich.console import Console from rich.syntax import Syntax @@ -16,6 +14,7 @@ import nf_core.utils from .modules_command import ModuleCommand +from .modules_json import ModulesJson from .modules_repo import ModulesRepo log = logging.getLogger(__name__) @@ -34,8 +33,9 @@ def __init__( remote_url=None, branch=None, no_pull=False, + base_path=None, ): - super().__init__(pipeline_dir, remote_url, branch, no_pull) + super().__init__(pipeline_dir, remote_url, branch, no_pull, base_path) self.force = force self.prompt = prompt self.sha = sha @@ -52,7 +52,8 @@ def update(self, module): return False # Verify that 'modules.json' is consistent with the installed modules - self.modules_json_up_to_date() + modules_json = ModulesJson(self.dir) + modules_json.modules_json_up_to_date() tool_config = nf_core.utils.load_tools_config() update_config = tool_config.get("update", {}) @@ -126,9 +127,6 @@ def update(self, module): repos_mods_shas = [(self.modules_repo, module, sha)] - # Load 'modules.json' (loaded here for consistency with the '--all' case) - modules_json = self.load_modules_json() - else: if module: raise UserWarning("You cannot specify a module and use the '--all' flag at the same time") @@ -175,20 +173,21 @@ def update(self, module): log.info(f"Skipping module{'' if len(skipped_modules) == 1 else 's'}: '{skipped_str}'") # Get the git urls from the modules.json - modules_json = self.load_modules_json() repos_mods_shas = [ - (modules_json["repos"][repo_name]["git_url"], mods_shas) + (modules_json.get_git_url(repo_name), modules_json.get_base_path(repo_name), mods_shas) for repo_name, mods_shas in repos_mods_shas.items() ] - repos_mods_shas = [(ModulesRepo(remote_url=repo_url), mods_shas) for repo_url, mods_shas in repos_mods_shas] + repos_mods_shas = [ + (ModulesRepo(remote_url=repo_url, base_path=base_path), mods_shas) + for repo_url, base_path, mods_shas in repos_mods_shas + ] # Flatten the list repos_mods_shas = [(repo, mod, sha) for repo, mods_shas in repos_mods_shas for mod, sha in mods_shas] - old_modules_json = copy.deepcopy(modules_json) # Deep copy to avoid mutability - if not modules_json: - return False + # Save the current state of the modules.json + old_modules_json = modules_json.get_modules_json() # If --preview is true, don't save to a patch file if self.show_diff: @@ -241,10 +240,7 @@ def update(self, module): exit_value = False continue - if modules_repo.fullname in modules_json["repos"]: - current_entry = modules_json["repos"][modules_repo.fullname]["modules"].get(module) - else: - current_entry = None + current_version = modules_json.get_module_version(module, modules_repo.fullname) # Set the install folder based on the repository name install_folder = [self.dir, "modules"] @@ -258,9 +254,7 @@ def update(self, module): elif self.prompt: try: version = nf_core.modules.module_utils.prompt_module_version_sha( - module, - modules_repo=modules_repo, - installed_sha=current_entry["git_sha"] if not current_entry is None else None, + module, modules_repo=modules_repo, installed_sha=current_version ) except SystemError as e: log.error(e) @@ -271,9 +265,8 @@ def update(self, module): git_log = list(modules_repo.get_module_git_log(module, depth=1)) version = git_log[0]["git_sha"] - if current_entry is not None and not self.force: + if current_version is not None and not self.force: # Fetch the latest commit for the module - current_version = current_entry["git_sha"] if current_version == version: if self.sha or self.prompt: log.info(f"'{modules_repo.fullname}/{module}' is already installed at {version}") @@ -355,7 +348,7 @@ class DiffEnum(enum.Enum): log.info(f"Writing diff of '{module}' to '{self.save_diff_fn}'") with open(self.save_diff_fn, "a") as fh: fh.write( - f"Changes in module '{module}' between ({current_entry['git_sha'] if current_entry is not None else '?'}) and ({version if version is not None else 'latest'})\n" + f"Changes in module '{module}' between ({current_version if current_version is not None else '?'}) and ({version if version is not None else 'latest'})\n" ) for file, d in diffs.items(): @@ -384,7 +377,7 @@ class DiffEnum(enum.Enum): elif self.show_diff: console = Console(force_terminal=nf_core.utils.rich_force_colors()) log.info( - f"Changes in module '{module}' between ({current_entry['git_sha'] if current_entry is not None else '?'}) and ({version if version is not None else 'latest'})" + f"Changes in module '{module}' between ({current_version if current_version is not None else '?'}) and ({version if version is not None else 'latest'})" ) for file, d in diffs.items(): @@ -423,17 +416,17 @@ class DiffEnum(enum.Enum): # Update modules.json with newly installed module if not dry_run: - self.update_modules_json(modules_json, modules_repo, module, version) + modules_json.update_modules_json(modules_repo, module, version) # Don't save to a file, just iteratively update the variable else: - modules_json = self.update_modules_json(modules_json, modules_repo, module, version, write_file=False) + modules_json.update_modules_json(modules_repo, module, version, write_file=False) if self.save_diff_fn: # Compare the new modules.json and build a diff modules_json_diff = difflib.unified_diff( json.dumps(old_modules_json, indent=4).splitlines(keepends=True), - json.dumps(modules_json, indent=4).splitlines(keepends=True), + json.dumps(modules_json.get_modules_json(), indent=4).splitlines(keepends=True), fromfile=os.path.join(self.dir, "modules.json"), tofile=os.path.join(self.dir, "modules.json"), ) diff --git a/nf_core/pipeline-template/modules.json b/nf_core/pipeline-template/modules.json index af2cb416d8..cb1654f29a 100644 --- a/nf_core/pipeline-template/modules.json +++ b/nf_core/pipeline-template/modules.json @@ -3,6 +3,7 @@ "homePage": "https://github.com/{{ name }}", "repos": { "nf-core/modules": { + "base_path": "modules", "git_url": "git@github.com@nf-core/modules.git", "modules": { "custom/dumpsoftwareversions": { diff --git a/tests/modules/lint.py b/tests/modules/lint.py index 0f60377d5e..bcd3571838 100644 --- a/tests/modules/lint.py +++ b/tests/modules/lint.py @@ -1,3 +1,5 @@ +import json + import nf_core.modules