Merge pull request #42 from CenterForMedicalGeneticsGhent/template-v2.8

Template merge v2.8
nf-cmgg · May 11, 2023 · dbb159b · dbb159b
2 parents 0cd4c3d + 48bed31
commit dbb159b
Show file tree

Hide file tree

Showing 60 changed files with 353 additions and 217 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -8,7 +8,7 @@ trim_trailing_whitespace = true
 indent_size = 4
 indent_style = space
 
-[*.{md,yml,yaml,html,css,scss,js,cff}]
+[*.{md,yml,yaml,html,css,scss,js}]
 indent_size = 2
 
 # These files are edited and tested upstream in nf-core/modules

diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -45,7 +45,10 @@ body:
 
         * Hardware _(eg. HPC, Desktop, Cloud)_
         * Executor _(eg. slurm, local, awsbatch)_
-        * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_
+
+        * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud,
+        or Apptainer)_
+
         * OS _(eg. CentOS Linux, macOS, Linux Mint)_
 
         * Version of CenterForMedicalGeneticsGhent/nf-cmgg-structural _(eg. 1.1, 1.5,

diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml
@@ -13,7 +13,7 @@ jobs:
       - name: Check PRs
         if: github.repository == 'CenterForMedicalGeneticsGhent/nf-cmgg-structural'
         run: |
-          { [[ ${{github.event.pull_request.head.repo.full_name }} == CenterForMedicalGeneticsGhent/nf-cmgg-structural ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]
+          { [[ ${{github.event.pull_request.head.repo.full_name }} == CenterForMedicalGeneticsGhent/nf-cmgg-structural ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]
 
       # If the above check failed, post a comment on the PR explaining the failure
       # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets

diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml
@@ -0,0 +1,24 @@
+name: "Close user-tagged issues and PRs"
+on:
+  schedule:
+    - cron: "0 0 * * 0" # Once a week
+
+jobs:
+  clean-up:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    steps:
+      - uses: actions/stale@v7
+        with:
+          stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days."
+          stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful."
+          close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity."
+          days-before-stale: 30
+          days-before-close: 20
+          days-before-pr-close: -1
+          any-of-labels: "awaiting-changes,awaiting-feedback"
+          exempt-issue-labels: "WIP"
+          exempt-pr-labels: "WIP"
+          repo-token: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -37,6 +37,36 @@ jobs:
       - name: Run Prettier --check
         run: prettier --check ${GITHUB_WORKSPACE}
 
+  PythonBlack:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Check code lints with Black
+        uses: psf/black@stable
+
+      # If the above check failed, post a comment on the PR explaining the failure
+      - name: Post PR comment
+        if: failure()
+        uses: mshick/add-pr-comment@v1
+        with:
+          message: |
+            ## Python linting (`black`) is failing
+
+            To keep the code consistent with lots of contributors, we run automated code consistency checks.
+            To fix this CI test, please run:
+
+            * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black`
+            * Fix formatting errors in your pipeline: `black .`
+
+            Once you push these changes the test should pass, and you can hide this comment :+1:
+
+            We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help!
+
+            Thanks again for your contribution!
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+          allow-repeats: false
+
   nf-core:
     runs-on: ubuntu-latest
     steps:
@@ -48,7 +78,7 @@ jobs:
 
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.7"
+          python-version: "3.8"
           architecture: "x64"
 
       - name: Install dependencies

diff --git a/.nf-core.yml b/.nf-core.yml
@@ -5,6 +5,9 @@ lint:
     - assets/nf-core-nf-cmgg-structural_logo_light.png
     - docs/images/nf-core-nf-cmgg-structural_logo_light.png
     - docs/images/nf-core-nf-cmgg-structural_logo_dark.png
+    - assets/nf-core-nf-cmgg-structural_logo_light.png
+    - docs/images/nf-core-nf-cmgg-structural_logo_light.png
+    - docs/images/nf-core-nf-cmgg-structural_logo_dark.png
     - .github/ISSUE_TEMPLATE/config.yml
     - .github/workflows/awstest.yml
     - .github/workflows/awsfulltest.yml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,5 @@
+repos:
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: "v2.7.1"
+    hooks:
+      - id: prettier
diff --git a/README.md b/README.md
@@ -12,34 +12,28 @@
 
 The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
 
-## Pipeline summary
-
 ![metro map](docs/images/metro_map.png)
 
-## Quick Start
-
-1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.5`)
-
-2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_.
-
-3. Download the pipeline and test it on a minimal dataset with a single command:
-
-   ```bash
-   nextflow run CenterForMedicalGeneticsGhent/nf-cmgg-structural -profile test,YOURPROFILE --outdir <OUTDIR>
-   ```
+## Usage
 
-   Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string.
+> **Note**
+> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how
+> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)
+> with `-profile test` before running the workflow on actual data.
 
-   > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`.
-   > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile <institute>` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.
-   > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs.
-   > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs.
+Now, you can run the pipeline using:
 
-4. Start running your own analysis!
+```bash
+nextflow run CenterForMedicalGeneticsGhent/nf-cmgg-structural \
+   -profile <docker/singularity/.../institute> \
+   --input samplesheet.csv \
+   --outdir <OUTDIR>
+```
 
-   ```bash
-   nextflow run CenterForMedicalGeneticsGhent/nf-cmgg-structural --input samplesheet.csv --outdir <OUTDIR> --genome GRCh38 -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
-   ```
+> **Warning:**
+> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those
+> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
+> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
 
 ## Documentation
 

diff --git a/bin/viola_standardize.py b/bin/viola_standardize.py
@@ -8,10 +8,10 @@
 if __name__ == "__main__":
     # Setting up argparser
     parser = argparse.ArgumentParser(description="A script to standardize VCFs using Viola-SV")
-    parser.add_argument('vcf', metavar='FILE', type=str, help="The called VCF")
-    parser.add_argument('caller', metavar='STRING', type=str, help="The caller used to call the VCF")
-    parser.add_argument('out_file', metavar='FILE', type=str, help="The standardized VCF")
-    parser.add_argument('patient_name', metavar='STRING', type=str, help="The name of the patient in the VCF file")
+    parser.add_argument("vcf", metavar="FILE", type=str, help="The called VCF")
+    parser.add_argument("caller", metavar="STRING", type=str, help="The caller used to call the VCF")
+    parser.add_argument("out_file", metavar="FILE", type=str, help="The standardized VCF")
+    parser.add_argument("patient_name", metavar="STRING", type=str, help="The name of the patient in the VCF file")
 
     args = parser.parse_args()
 
@@ -20,18 +20,21 @@
     out_file = args.out_file
     patient_name = args.patient_name
 
-    if caller == "smoove": caller = "lumpy"
+    if caller == "smoove":
+        caller = "lumpy"
 
     if caller == "gridss":
         svlen_not_added = True
-        old_vcf = f'old_{vcf}'
+        old_vcf = f"old_{vcf}"
         os.rename(vcf, old_vcf)
-        with open(old_vcf, 'r') as old:
-            with open(vcf, 'w') as new:
+        with open(old_vcf, "r") as old:
+            with open(vcf, "w") as new:
                 for line in old.readlines():
                     if line.startswith("##INFO") and svlen_not_added:
                         svlen_not_added = False
-                        new.write("##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"The length of the structural variant.\">\n")
+                        new.write(
+                            '##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="The length of the structural variant.">\n'
+                        )
                     new.write(line.replace("CIRPOS", "CIEND"))
 
     viola.read_vcf(vcf, variant_caller=caller, patient_name=patient_name).breakend2breakpoint().to_vcf(out_file)
diff --git a/conf/base.config b/conf/base.config
@@ -15,7 +15,7 @@ process {
     memory = { check_max( 6.GB * task.attempt, 'memory' ) }
     time   = { check_max( 4.h  * task.attempt, 'time'   ) }
 
-    errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
+    errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
     maxRetries    = 1
     maxErrors     = '-1'
 

diff --git a/conf/igenomes.config b/conf/igenomes.config
@@ -36,6 +36,14 @@ params {
             macs_gsize  = "2.7e9"
             blacklist   = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
         }
+        'CHM13' {
+            fasta       = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa"
+            bwa         = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/"
+            bwamem2     = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/"
+            gtf         = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf"
+            gff         = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz"
+            mito_name   = "chrM"
+        }
         'GRCm38' {
             fasta       = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa"
             bwa         = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/"

diff --git a/conf/test_full.config b/conf/test_full.config
@@ -10,6 +10,8 @@
 ----------------------------------------------------------------------------------------
 */
 
+cleanup = true
+
 params {
     config_profile_name        = 'Full test profile'
     config_profile_description = 'Full test dataset to check pipeline function'