From 1cad18663c5b4965fed6dfa6cf7b279c701d673a Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Wed, 7 Jul 2021 14:37:53 +0200 Subject: [PATCH 1/7] update 'modules.json' creation for new file structure --- nf_core/modules/module_utils.py | 78 +++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 03dad0375..3b0d34b8b 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -102,13 +102,30 @@ def create_modules_json(pipeline_dir): pipeline_config = nf_core.utils.fetch_wf_config(pipeline_dir) pipeline_name = pipeline_config["manifest.name"] pipeline_url = pipeline_config["manifest.homePage"] - modules_json = {"name": pipeline_name.strip("'"), "homePage": pipeline_url.strip("'"), "modules": {}} - all_module_file_paths = glob.glob(f"{pipeline_dir}/modules/nf-core/software/**/*", recursive=True) - - # Extract the module paths from the file paths - module_paths = list(set(map(os.path.dirname, filter(os.path.isfile, all_module_file_paths)))) - module_names = [path.replace(f"{pipeline_dir}/modules/nf-core/software/", "") for path in module_paths] - module_repo = ModulesRepo() + modules_json = {"name": pipeline_name.strip("'"), "homePage": pipeline_url.strip("'"), "modules": dict()} + modules_dir = f"{pipeline_dir}/modules" + + # Extract all modules repos in the pipeline directory + repo_names = [ + f"{user_name}/{repo_name}" + for user_name in os.listdir(modules_dir) + if os.path.isdir(os.path.join(modules_dir, user_name)) and user_name != "local" + for repo_name in os.listdir(os.path.join(modules_dir, user_name)) + ] + log.warning(repo_names) + + # Get all module names in the repos + repo_module_names = { + repo_name: list( + { + os.path.relpath(os.path.dirname(path), os.path.join(modules_dir, repo_name)) + for path in glob.glob(f"{modules_dir}/{repo_name}/**/*", recursive=True) + if os.path.isfile(path) + } + ) + for repo_name in repo_names + } + log.warning(repo_module_names) progress_bar = rich.progress.Progress( "[bold blue]{task.description}", @@ -118,28 +135,33 @@ def create_modules_json(pipeline_dir): ) with progress_bar: file_progress = progress_bar.add_task( - "Creating 'modules.json' file", total=len(module_names), test_name="module.json" + "Creating 'modules.json' file", total=sum(map(len, repo_module_names.values())), test_name="module.json" ) - for module_name, module_path in zip(module_names, module_paths): - progress_bar.update(file_progress, advance=1, test_name=module_name) - try: - # Find the correct commit SHA for the local files. - # We iterate over the commit log pages until we either - # find a matching commit or we reach the end of the commits - correct_commit_sha = None - commit_page_nbr = 1 - while correct_commit_sha is None: - - commit_shas = [ - commit["git_sha"] for commit in get_module_git_log(module_name, page_nbr=commit_page_nbr) - ] - correct_commit_sha = find_correct_commit_sha(module_name, module_path, module_repo, commit_shas) - commit_page_nbr += 1 - - modules_json["modules"][module_name] = {"git_sha": correct_commit_sha} - except LookupError as e: - log.error(e) - raise UserWarning("Will not create 'modules.json' file") + for repo_name, module_names in repo_module_names.items(): + module_repo = ModulesRepo(repo=repo_name) + repo_path = os.path.join(modules_dir, repo_name) + modules_json["modules"][repo_name] = dict() + for module_name in module_names: + module_path = os.path.join(repo_path, module_name) + progress_bar.update(file_progress, advance=1, test_name=f"{repo_name}/{module_name}") + try: + # Find the correct commit SHA for the local files. + # We iterate over the commit log pages until we either + # find a matching commit or we reach the end of the commits + correct_commit_sha = None + commit_page_nbr = 1 + while correct_commit_sha is None: + + commit_shas = [ + commit["git_sha"] for commit in get_module_git_log(module_name, page_nbr=commit_page_nbr) + ] + correct_commit_sha = find_correct_commit_sha(module_name, module_path, module_repo, commit_shas) + commit_page_nbr += 1 + + modules_json["modules"][repo_name][module_name] = {"git_sha": correct_commit_sha} + except LookupError as e: + log.error(e) + raise UserWarning("Will not create 'modules.json' file") modules_json_path = os.path.join(pipeline_dir, "modules.json") with open(modules_json_path, "w") as fh: json.dump(modules_json, fh, indent=4) From 0bc72606b75240a73199b8abbd14d3fafb341c84 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Wed, 7 Jul 2021 15:04:41 +0200 Subject: [PATCH 2/7] Remove print statements and change default value 'since' in git log function --- nf_core/modules/module_utils.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 3b0d34b8b..0cbf48743 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -25,9 +25,11 @@ class ModuleException(Exception): pass -def get_module_git_log(module_name, per_page=30, page_nbr=1, since="2020-11-25T00:00:00Z"): +def get_module_git_log(module_name, per_page=30, page_nbr=1, since="2021-07-07T00:00:00Z"): """ - Fetches the commit history the of requested module + Fetches the commit history the of requested module since a given date. The default value is + not arbitrary - it is the last time the structure of the nf-core/modules repository was had an + update breaking backwards compatibility. Args: module_name (str): Name of module per_page (int): Number of commits per page returned by API @@ -35,6 +37,8 @@ def get_module_git_log(module_name, per_page=30, page_nbr=1, since="2020-11-25T0 since (str): Only show commits later than this timestamp. Time should be given in ISO-8601 format: YYYY-MM-DDTHH:MM:SSZ. + + Returns: [ dict ]: List of commit SHAs and associated (truncated) message """ @@ -112,7 +116,6 @@ def create_modules_json(pipeline_dir): if os.path.isdir(os.path.join(modules_dir, user_name)) and user_name != "local" for repo_name in os.listdir(os.path.join(modules_dir, user_name)) ] - log.warning(repo_names) # Get all module names in the repos repo_module_names = { @@ -125,7 +128,6 @@ def create_modules_json(pipeline_dir): ) for repo_name in repo_names } - log.warning(repo_module_names) progress_bar = rich.progress.Progress( "[bold blue]{task.description}", From 56ade789bfc03d8a5aaae39352f6223deb8d6593 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Wed, 7 Jul 2021 15:07:27 +0200 Subject: [PATCH 3/7] Make install work with new dir structure --- nf_core/modules/install.py | 9 +++------ nf_core/modules/modules_command.py | 13 ++++++++----- nf_core/modules/modules_repo.py | 7 ++++--- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index 3bbc46c12..76fdd8a06 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -1,6 +1,4 @@ import os -import sys -import json import questionary import logging @@ -49,10 +47,9 @@ def install(self, module): log.error("Module '{}' not found in list of available modules.".format(module)) log.info("Use the command 'nf-core modules list' to view available software") return False + # Set the install folder based on the repository name - install_folder = ["nf-core", "software"] - if not self.modules_repo.name == "nf-core/modules": - install_folder = ["external"] + install_folder = [self.modules_repo.user, self.modules_repo.repo] # Compute the module directory module_dir = os.path.join(self.dir, "modules", *install_folder, module) @@ -130,7 +127,7 @@ def install(self, module): return False # Update module.json with newly installed module - self.update_modules_json(modules_json, module, version) + self.update_modules_json(modules_json, self.modules_repo.name, module, version) return True def check_module_files_installed(self, module_name, module_dir): diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 4a9a4d754..e36df8f90 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -1,3 +1,4 @@ +from nf_core import modules import os import glob import shutil @@ -118,16 +119,18 @@ def load_modules_json(self): modules_json = None return modules_json + def update_modules_json(self, modules_json, repo_name, module_name, module_version): + """Updates the 'module.json' file with new module info""" + if repo_name not in modules_json["modules"]: + modules_json["modules"][repo_name] = dict() + modules_json["modules"][repo_name][module_name] = {"git_sha": module_version} + self.dump_modules_json(modules_json) + def dump_modules_json(self, modules_json): modules_json_path = os.path.join(self.dir, "modules.json") with open(modules_json_path, "w") as fh: json.dump(modules_json, fh, indent=4) - def update_modules_json(self, modules_json, module_name, module_version): - """Updates the 'module.json' file with new module info""" - modules_json["modules"][module_name] = {"git_sha": module_version} - self.dump_modules_json(modules_json) - def load_lint_config(self): """Parse a pipeline lint config file. diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index d0686dca9..c5849f62c 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -18,6 +18,7 @@ class ModulesRepo(object): def __init__(self, repo="nf-core/modules", branch="master"): self.name = repo + self.user, self.repo = self.name.split("/") self.branch = branch self.modules_file_tree = {} self.modules_current_hash = None @@ -46,9 +47,9 @@ def get_modules_file_tree(self): self.modules_current_hash = result["sha"] self.modules_file_tree = result["tree"] for f in result["tree"]: - if f["path"].startswith("software/") and f["path"].endswith("/main.nf") and "/test/" not in f["path"]: + if f["path"].startswith(f"modules/") and f["path"].endswith("/main.nf") and "/test/" not in f["path"]: # remove software/ and /main.nf - self.modules_avail_module_names.append(f["path"][9:-8]) + self.modules_avail_module_names.append(f["path"].replace("modules/", "").replace("/main.nf", "")) def get_module_file_urls(self, module, commit=""): """Fetch list of URLs for a specific module @@ -73,7 +74,7 @@ def get_module_file_urls(self, module, commit=""): """ results = {} for f in self.modules_file_tree: - if not f["path"].startswith("software/{}".format(module)): + if not f["path"].startswith("modules/{}".format(module)): continue if f["type"] != "blob": continue From fedc6547ed259c0b4932ed4949faea88a204d928 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Wed, 7 Jul 2021 15:13:39 +0200 Subject: [PATCH 4/7] Rename modules.json header 'modules' to 'repos' --- nf_core/modules/install.py | 2 +- nf_core/modules/module_utils.py | 8 +++----- nf_core/modules/modules_command.py | 6 +++--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index 76fdd8a06..b7985402f 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -59,7 +59,7 @@ def install(self, module): if not modules_json: return False - current_entry = modules_json["modules"].get(module) + current_entry = modules_json["repos"].get(module) if current_entry is not None and self.sha is None: # Fetch the latest commit for the module diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 0cbf48743..948d65408 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -37,8 +37,6 @@ def get_module_git_log(module_name, per_page=30, page_nbr=1, since="2021-07-07T0 since (str): Only show commits later than this timestamp. Time should be given in ISO-8601 format: YYYY-MM-DDTHH:MM:SSZ. - - Returns: [ dict ]: List of commit SHAs and associated (truncated) message """ @@ -106,7 +104,7 @@ def create_modules_json(pipeline_dir): pipeline_config = nf_core.utils.fetch_wf_config(pipeline_dir) pipeline_name = pipeline_config["manifest.name"] pipeline_url = pipeline_config["manifest.homePage"] - modules_json = {"name": pipeline_name.strip("'"), "homePage": pipeline_url.strip("'"), "modules": dict()} + modules_json = {"name": pipeline_name.strip("'"), "homePage": pipeline_url.strip("'"), "repos": dict()} modules_dir = f"{pipeline_dir}/modules" # Extract all modules repos in the pipeline directory @@ -142,7 +140,7 @@ def create_modules_json(pipeline_dir): for repo_name, module_names in repo_module_names.items(): module_repo = ModulesRepo(repo=repo_name) repo_path = os.path.join(modules_dir, repo_name) - modules_json["modules"][repo_name] = dict() + modules_json["repos"][repo_name] = dict() for module_name in module_names: module_path = os.path.join(repo_path, module_name) progress_bar.update(file_progress, advance=1, test_name=f"{repo_name}/{module_name}") @@ -160,7 +158,7 @@ def create_modules_json(pipeline_dir): correct_commit_sha = find_correct_commit_sha(module_name, module_path, module_repo, commit_shas) commit_page_nbr += 1 - modules_json["modules"][repo_name][module_name] = {"git_sha": correct_commit_sha} + modules_json["repos"][repo_name][module_name] = {"git_sha": correct_commit_sha} except LookupError as e: log.error(e) raise UserWarning("Will not create 'modules.json' file") diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index e36df8f90..ac3bfd3be 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -121,9 +121,9 @@ def load_modules_json(self): def update_modules_json(self, modules_json, repo_name, module_name, module_version): """Updates the 'module.json' file with new module info""" - if repo_name not in modules_json["modules"]: - modules_json["modules"][repo_name] = dict() - modules_json["modules"][repo_name][module_name] = {"git_sha": module_version} + if repo_name not in modules_json["repos"]: + modules_json["repos"][repo_name] = dict() + modules_json["repos"][repo_name][module_name] = {"git_sha": module_version} self.dump_modules_json(modules_json) def dump_modules_json(self, modules_json): From 683e613f6162fd8ebb1f88a27cb7513716736d73 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Wed, 7 Jul 2021 15:28:17 +0200 Subject: [PATCH 5/7] Remove more references to 'software' --- nf_core/modules/module_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 948d65408..92a0ee62b 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -40,7 +40,7 @@ def get_module_git_log(module_name, per_page=30, page_nbr=1, since="2021-07-07T0 Returns: [ dict ]: List of commit SHAs and associated (truncated) message """ - api_url = f"https://api.github.com/repos/nf-core/modules/commits?sha=master&path=software/{module_name}&per_page={per_page}&page={page_nbr}&since={since}" + api_url = f"https://api.github.com/repos/nf-core/modules/commits?sha=master&path=modules/{module_name}&per_page={per_page}&page={page_nbr}&since={since}" log.debug(f"Fetching commit history of module '{module_name}' from github API") response = requests.get(api_url, auth=nf_core.utils.github_api_auto_auth()) if response.status_code == 200: @@ -209,7 +209,7 @@ def local_module_equal_to_commit(local_files, module_name, modules_repo, commit_ files_are_equal = [False, False, False] remote_copies = [None, None, None] - module_base_url = f"https://raw.githubusercontent.com/{modules_repo.name}/{commit_sha}/software/{module_name}" + module_base_url = f"https://raw.githubusercontent.com/{modules_repo.name}/{commit_sha}/modules/{module_name}" for i, file in enumerate(files_to_check): # Download remote copy and compare api_url = f"{module_base_url}/{file}" @@ -277,7 +277,7 @@ def get_installed_modules(dir, repo_type="modules"): local_modules = [] nfcore_modules = [] local_modules_dir = None - nfcore_modules_dir = os.path.join(dir, "modules", "nf-core", "software") + nfcore_modules_dir = os.path.join(dir, "modules", "nf-core", "modules") # Get local modules if repo_type == "pipeline": @@ -290,7 +290,7 @@ def get_installed_modules(dir, repo_type="modules"): # nf-core/modules if repo_type == "modules": - nfcore_modules_dir = os.path.join(dir, "software") + nfcore_modules_dir = os.path.join(dir, "modules") # Get nf-core modules if os.path.exists(nfcore_modules_dir): @@ -328,7 +328,7 @@ def get_repo_type(dir): # Determine repository type if os.path.exists(os.path.join(dir, "main.nf")): return "pipeline" - elif os.path.exists(os.path.join(dir, "software")): + elif os.path.exists(os.path.join(dir, "modules")): return "modules" else: raise LookupError("Could not determine repository type of '{}'".format(dir)) From a8dd99ddff630055df9f7023f75b4db14f9c5b8a Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Wed, 7 Jul 2021 15:30:30 +0200 Subject: [PATCH 6/7] Remove even more references to 'software' --- nf_core/modules/modules_command.py | 4 ++-- nf_core/modules/modules_repo.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index ac3bfd3be..469b10332 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -36,9 +36,9 @@ def get_pipeline_modules(self): """Get list of modules installed in the current directory""" self.module_names = [] if self.repo_type == "pipeline": - module_base_path = f"{self.dir}/modules/nf-core/software" + module_base_path = f"{self.dir}/modules/nf-core/modules" elif self.repo_type == "modules": - module_base_path = f"{self.dir}/software" + module_base_path = f"{self.dir}/modules" else: log.error("Directory is neither a clone of nf-core/modules nor a pipeline") raise SystemError diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index c5849f62c..6743fa87f 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -48,7 +48,7 @@ def get_modules_file_tree(self): self.modules_file_tree = result["tree"] for f in result["tree"]: if f["path"].startswith(f"modules/") and f["path"].endswith("/main.nf") and "/test/" not in f["path"]: - # remove software/ and /main.nf + # remove modules/ and /main.nf self.modules_avail_module_names.append(f["path"].replace("modules/", "").replace("/main.nf", "")) def get_module_file_urls(self, module, commit=""): From b2e0236b773ada14df80114078e52ce1f314c7ea Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Wed, 7 Jul 2021 15:42:26 +0200 Subject: [PATCH 7/7] 'fix' tests --- tests/modules/install.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/modules/install.py b/tests/modules/install.py index 399d7f4aa..c9ea65d10 100644 --- a/tests/modules/install.py +++ b/tests/modules/install.py @@ -25,14 +25,14 @@ def test_modules_install_nomodule(self): def test_modules_install_trimgalore(self): """Test installing a module - TrimGalore!""" assert self.mods_install.install("trimgalore") is not False - module_path = os.path.join(self.mods_install.dir, "modules", "nf-core", "software", "trimgalore") + module_path = os.path.join(self.mods_install.dir, "modules", "nf-core", "modules", "trimgalore") assert os.path.exists(module_path) def test_modules_install_trimgalore_alternative_source(self): """Test installing a module from a different source repository - TrimGalore!""" assert self.mods_install_alt.install("trimgalore") is not False - module_path = os.path.join(self.mods_install.dir, "modules", "external", "trimgalore") + module_path = os.path.join(self.mods_install.dir, "modules", "ewels", "nf-core-modules", "trimgalore") assert os.path.exists(module_path)