diff --git a/.github/python/find_changed_files.py b/.github/python/find_changed_files.py index 6dc73449..c0c61752 100644 --- a/.github/python/find_changed_files.py +++ b/.github/python/find_changed_files.py @@ -1,15 +1,17 @@ #!/usr/bin/env python -## This script is used to generate scan *.nf.test files for function/process/workflow name and return as a JSON list -# It is functionally similar to nf-test list but fills a gap until feature https://github.com/askimed/nf-test/issues/196 is added +# This script is used to identify *.nf.test files for changed functions/processs/workflows/pipelines and *.nf-test files +# with changed dependencies, then return as a JSON list import argparse import json import logging import re +import yaml from itertools import chain from pathlib import Path +from git import Repo def parse_args() -> argparse.Namespace: @@ -23,11 +25,40 @@ def parse_args() -> argparse.Namespace: description="Scan *.nf.test files for function/process/workflow name and return as a JSON list" ) parser.add_argument( - "-p", - "--paths", + "-r", + "--head_ref", + required=True, + help="Head reference branch (Source branch for a PR).", + ) + parser.add_argument( + "-b", + "--base_ref", + required=True, + help="Base reference branch (Target branch for a PR).", + ) + parser.add_argument( + "-x", + "--ignored_files", nargs="+", - default=["."], - help="List of directories or files to scan", + default=[ + ".git/*", + ".gitpod.yml", + ".prettierignore", + ".prettierrc.yml", + "*.md", + "*.png", + "modules.json", + "pyproject.toml", + "tower.yml", + ], + help="List of files or file substrings to ignore.", + ) + parser.add_argument( + "-i", + "--include", + type=Path, + default=".github/python/include.yaml", + help="Path to an include file containing a YAML of key value pairs to include in changed files. I.e., return the current directory if an important file is changed.", ) parser.add_argument( "-l", @@ -47,26 +78,109 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() -def find_files(paths: list[str]) -> list[Path]: +def read_yaml_inverted(file_path: str) -> dict: """ - Find all files matching pattern *.nf.test recursively from a list of paths. + Read a YAML file and return its contents as a dictionary but reversed, i.e. the values become the keys and the keys become the values. Args: - paths (list): List of directories or files to scan. + file_path (str): The path to the YAML file. Returns: - list: List of files matching the pattern *.nf.test. + dict: The contents of the YAML file as a dictionary inverted. """ - # this is a bit clunky - result = [] - for path in paths: - path_obj = Path(path) + with open(file_path, "r") as f: + data = yaml.safe_load(f) + + # Invert dictionary of lists into contents of lists are keys, values are the original keys + # { "key": ["item1", "item2] } --> { "item1": "key", "item2": "key" } + return {value: key for key, values in data.items() for value in values} + + +def find_changed_files( + branch1: str, + branch2: str, + ignore: list[str], +) -> list[Path]: + """ + Find all *.nf.tests that are associated with files that have been changed between two specified branches. + + Args: + branch1 (str) : The first branch being compared + branch2 (str) : The second branch being compared + ignore (list) : List of files or file substrings to ignore. + + Returns: + list: List of files matching the pattern *.nf.test that have changed between branch2 and branch1. + """ + # create repo + repo = Repo(".") + # identify commit on branch1 + branch1_commit = repo.commit(branch1) + # identify commit on branch2 + branch2_commit = repo.commit(branch2) + # compare two branches + diff_index = branch1_commit.diff(branch2_commit) + + # Start empty list of changed files + changed_files = [] + + # For every file that has changed between commits + for file in diff_index: + # Get pathlib.Path object + filepath = Path(file.a_path) + # If file does not match any in the ignore list, add containing directory to changed_files + if not any(filepath.match(ignored_path) for ignored_path in ignore): + changed_files.append(filepath) + + # Uniqueify the results before returning for efficiency + return list(set(changed_files)) + + +def detect_include_files( + changed_files: list[Path], include_files: dict[str, str] +) -> list[Path]: + """ + Detects the include files based on the changed files. + + Args: + changed_files (list[Path]): List of paths to the changed files. + include_files (dict[str, str]): Key-value pairs to return if a certain file has changed. If a file in a directory has changed, it points to a different directory. + + Returns: + list[Path]: List of paths to representing the keys of the include_files dictionary, where a value matched a path in changed_files. + """ + new_changed_files = [] + for filepath in changed_files: + # If file is in the include_files, we return the key instead of the value + for include_path, include_key in include_files.items(): + if filepath.match(include_path): + new_changed_files.append(Path(include_key)) + return new_changed_files + + +def detect_nf_test_files(changed_files: list[Path]) -> list[Path]: + """ + Detects and returns a list of nf-test files from the given list of changed files. + + Args: + changed_files (list[Path]): A list of file paths. + + Returns: + list[Path]: A list of nf-test file paths. + """ + result: list[Path] = [] + for path in changed_files: # If Path is the exact nf-test file add to list: - if path_obj.match("*.nf.test"): - result.append(path_obj) + if path.match("*.nf.test") and path.exists(): + result.append(path) # Else recursively search for nf-test files: else: - for file in path_obj.rglob("*.nf.test"): + # Get the enclosing dir so files in the same dir can be found. + # e.g. + # dir/ + # ├─ main.nf + # ├─ main.nf.test + for file in path.parent.rglob("*.nf.test"): result.append(file) return result @@ -102,7 +216,7 @@ def process_files(files: list[Path]) -> list[str]: return result -def generate( +def convert_nf_test_files_to_test_types( lines: list[str], types: list[str] = ["function", "process", "workflow", "pipeline"] ) -> dict[str, list[str]]: """ @@ -115,15 +229,12 @@ def generate( Returns: dict: Dictionary with function, process and workflow lists. """ - result: dict[str, list[str]] = { - "function": [], - "process": [], - "workflow": [], - "pipeline": [], - } + # Populate empty dict from types + result: dict[str, list[str]] = {key: [] for key in types} + for line in lines: words = line.split() - if len(words) == 2: + if len(words) == 2 and re.match(r'^".*"$', words[1]): keyword = words[0] name = words[1].strip("'\"") # Strip both single and double quotes if keyword in types: @@ -131,16 +242,62 @@ def generate( return result +def find_changed_dependencies(paths: list[Path], tags: list[str]) -> list[Path]: + """ + Find all *.nf.test files with changed dependencies from a list of paths. + + Args: + paths (list): List of directories or files to scan. + tags (list): List of tags identified as having changes. + + Returns: + list: List of *.nf.test files with changed dependencies. + """ + + result: list[Path] = [] + + nf_test_files = detect_nf_test_files(paths) + + # find nf-test files with changed dependencies + for nf_test_file in nf_test_files: + with open(nf_test_file, "r") as f: + lines = f.readlines() + # Get all tags from nf-test file + # Make case insensitive with .casefold() + tags_in_nf_test_file = [ + tag.casefold().replace("/", "_") + for tag in convert_nf_test_files_to_test_types(lines, types=["tag"])[ + "tag" + ] + ] + # Check if tag in nf-test file appears in a tag. + # Use .casefold() to be case insensitive + if any( + tag.casefold().replace("/", "_") in tags_in_nf_test_file for tag in tags + ): + result.append(nf_test_file) + + return result + + if __name__ == "__main__": # Utility stuff args = parse_args() logging.basicConfig(level=args.log_level) - # Parse nf-test files for targets of tests - files = find_files(args.paths) - lines = process_files(files) - result = generate(lines) + # Parse nf-test files for target test tags + changed_files = find_changed_files(args.head_ref, args.base_ref, args.ignored_files) + + # If an additional include YAML is added, we detect additional changed dirs to include + if args.include: + include_files = read_yaml_inverted(args.include) + changed_files = changed_files + detect_include_files( + changed_files, include_files + ) + nf_test_files = detect_nf_test_files(changed_files) + lines = process_files(nf_test_files) + result = convert_nf_test_files_to_test_types(lines) # Get only relevant results (specified by -t) # Unique using a set @@ -148,5 +305,13 @@ def generate( {item for sublist in map(result.get, args.types) for item in sublist} ) + # Parse files to identify nf-tests with changed dependencies + changed_dep_files = find_changed_dependencies([Path(".")], target_results) + + # Combine target nf-test files and nf-test files with changed dependencies + all_nf_tests = [ + str(test_path) for test_path in set(changed_dep_files + nf_test_files) + ] + # Print to stdout - print(json.dumps(target_results)) + print(json.dumps(all_nf_tests)) diff --git a/.github/python/include.yaml b/.github/python/include.yaml new file mode 100644 index 00000000..a3629f4c --- /dev/null +++ b/.github/python/include.yaml @@ -0,0 +1,10 @@ +".": + - ./.github/workflows/** + - ./nf-test.config + - ./nextflow.config +tests: + - ./assets/* + - ./bin/* + - ./conf/* + - ./main.nf + - ./nextflow_schema.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8a8d0935..527048fe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,8 +27,7 @@ jobs: name: Check for changes runs-on: ubuntu-latest outputs: - changes: ${{ steps.changed_files.outputs.any_modified }} - tags: ${{ steps.list.outputs.tags }} + nf_test_files: ${{ steps.list.outputs.nf_test_files }} steps: - uses: actions/setup-python@v4 with: @@ -39,60 +38,29 @@ jobs: with: fetch-depth: 0 - - uses: tj-actions/changed-files@v42 - id: changed_files - with: - dir_names: "true" - output_renamed_files_as_deleted_and_added: "true" - # Define list of additional rules for testing paths - # Mostly, we define additional 'pipeline' or 'all' tests here - files_yaml: | - ".": - - .github/workflows/** - - nf-test.config - - nextflow.config - tests: - - assets/* - - bin/* - - conf/* - - main.nf - - nextflow_schema.json - - files_ignore: | - .git* - .gitpod.yml - .prettierignore - .prettierrc.yml - **.md - **.png - modules.json - pyproject.toml - tower.yml - - - name: debug + - name: Install gitpython find changed files run: | - echo ${{ steps.changed_files.outputs.any_modified }} - echo ${{ steps.changed_files.outputs.all_changed_files }} - echo ${{ steps.changed_files.outputs.changed_keys }} + python -m pip install --upgrade pip + pip install gitpython pyyaml - - name: nf-test list tags + - name: List nf-test files id: list - if: ${{ steps.changed_files.outputs.any_modified }} run: | - echo tags=$(python \ + echo nf_test_files=$(python \ .github/python/find_changed_files.py \ -t pipeline workflow process \ - -p ${{ steps.changed_files.outputs.all_changed_files }} ${{ steps.changed_files.outputs.changed_keys }} \ + --head_ref ${{ github.sha }} \ + --base_ref origin/${{ github.base_ref }} \ ) >> $GITHUB_OUTPUT - - name: debug2 + - name: print list of nf-test files run: | - echo ${{ steps.list.outputs.tags }} + echo ${{ steps.list.outputs.nf_test_files }} test: - name: ${{ matrix.tags }} ${{ matrix.profile }} NF-${{ matrix.NXF_VER }} + name: ${{ matrix.nf_test_files }} ${{ matrix.profile }} NF-${{ matrix.NXF_VER }} needs: [changes] - if: needs.changes.outputs.changes + if: needs.changes.outputs.nf_test_files != '[]' runs-on: ubuntu-latest strategy: fail-fast: false @@ -100,16 +68,16 @@ jobs: NXF_VER: - "latest-everything" - "23.04" - tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] + nf_test_files: ["${{ fromJson(needs.changes.outputs.nf_test_files) }}"] profile: - "docker" steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" @@ -140,7 +108,7 @@ jobs: - name: Run nf-test run: | - nf-test test --verbose --tag ${{ matrix.tags }} --profile "+${{ matrix.profile }}" --junitxml=test.xml --tap=test.tap + nf-test test --verbose ${{ matrix.nf_test_files }} --profile "+${{ matrix.profile }}" --junitxml=test.xml --tap=test.tap - uses: pcolby/tap-summary@v1 with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bba6e9d..b9232710 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes - [PR #299](https://github.com/nf-core/fetchngs/pull/299) - Template update for nf-core/tools v2.13.1 +- [PR #300](https://github.com/nf-core/fetchngs/pull/300) - Use file paths instead of tags for testing matrix, should make matrices more efficient ## [[1.12.0](https://github.com/nf-core/fetchngs/releases/tag/1.12.0)] - 2024-02-29