diff --git a/.editorconfig b/.editorconfig index 95549501..b6b31907 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,12 +8,9 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{yml,yaml}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 -[*.json] -insert_final_newline = unset - # These files are edited and tested upstream in nf-core/modules [/modules/nf-core/**] charset = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c56ffccf..31d4faf7 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -15,8 +15,7 @@ Contributions to the code are even more welcome ;) If you'd like to write some code for nf-core/hicar, the standard workflow is as follows: -1. Check that there isn't already an issue about your idea in the [nf-core/hicar issues](https://github.com/nf-core/hicar/issues) to avoid duplicating work - * If there isn't one already, please create one so that others know you're working on this +1. Check that there isn't already an issue about your idea in the [nf-core/hicar issues](https://github.com/nf-core/hicar/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/hicar repository](https://github.com/nf-core/hicar) to your GitHub account 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) 4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). @@ -49,9 +48,9 @@ These tests are run both with the latest available version of `Nextflow` and als :warning: Only in the unlikely and regretful event of a release happening with a bug. -* On your own fork, make a new branch `patch` based on `upstream/master`. -* Fix the bug, and bump version (X.Y.Z+1). -* A PR should be made on `master` from patch to directly this particular bug. +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. ## Getting help @@ -73,7 +72,7 @@ If you wish to contribute a new step, please use the following coding standards: 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. 8. If applicable, add a new test command in `.github/workflow/ci.yml`. -9. Update MultiQC config `assets/multiqc_config.yaml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. 10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. ### Default values @@ -92,8 +91,8 @@ The process resources can be passed on to the tool dynamically within the proces Please use the following naming schemes, to make it easy to understand what is going where. -* initial process channel: `ch_output_from_` -* intermediate and terminal channels: `ch__for_` +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` ### Nextflow version bumping diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 7c93c0d3..876a4274 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -2,7 +2,6 @@ name: Bug report description: Report something that is broken or incorrect labels: bug body: - - type: markdown attributes: value: | diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 95bf501f..cd7ffb3d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,10 +16,10 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/hica - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/hicar/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/hicar _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. + - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/hicar/tree/master/.github/CONTRIBUTING.md) + - [ ] If necessary, also make a PR on the nf-core/hicar _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker` --outdir `). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 31a1af10..810b2c07 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -10,9 +10,9 @@ jobs: if: github.repository == 'nf-core/hicar' runs-on: ubuntu-latest steps: + # Launch workflow using Tower CLI tool action - name: Launch workflow via tower uses: nf-core/tower-action@v3 - with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 343499cc..ccf0a8b3 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,8 +13,7 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/hicar' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/hicar ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] - + "{ [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/hicar ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]" # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6f65aec9..67de199c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,19 +16,19 @@ jobs: test: name: Run pipeline with test data # Only run on push if this is the nf-core dev branch (merged PRs) - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/hicar') }} + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/hicar') }}" runs-on: ubuntu-latest strategy: matrix: # Nextflow versions - profile: ['test', 'test_full', 'test_hipeak', 'test_hichip'] + profile: ["test", "test_full", "test_hipeak", "test_hichip"] include: # Test pipeline minimum Nextflow version - - NXF_VER: '21.10.3' - NXF_EDGE: '' + - NXF_VER: "21.10.3" + NXF_EDGE: "" # Test latest edge release of Nextflow - - NXF_VER: '' - NXF_EDGE: '1' + - NXF_VER: "" + NXF_EDGE: "1" steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -46,3 +46,5 @@ jobs: - name: Run pipeline with test data run: | nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker --outdir ./results + +# diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 29088e5b..e9cf5de3 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,6 +1,7 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. on: push: pull_request: @@ -8,42 +9,6 @@ on: types: [published] jobs: - Markdown: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v2 - - name: Install markdownlint - run: npm install -g markdownlint-cli - - name: Run Markdownlint - run: markdownlint . - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 - with: - message: | - ## Markdown linting is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install `markdownlint-cli` - * On Mac: `brew install markdownlint-cli` - * Everything else: [Install `npm`](https://www.npmjs.com/get-npm) then [install `markdownlint-cli`](https://www.npmjs.com/package/markdownlint-cli) (`npm install -g markdownlint-cli`) - * Fix the markdown errors - * Automatically: `markdownlint . --fix` - * Manually resolve anything left from `markdownlint .` - - Once you push these changes the test should pass, and you can hide this comment :+1: - - We highly recommend setting up markdownlint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false - EditorConfig: runs-on: ubuntu-latest steps: @@ -55,49 +20,24 @@ jobs: run: npm install -g editorconfig-checker - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(git ls-files | grep -v test) + run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - YAML: + Prettier: runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@master - - name: 'Yamllint' - uses: karancode/yamllint-github-action@master - with: - yamllint_file_or_dir: '.' - yamllint_config_filepath: '.yamllint.yml' - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 - with: - message: | - ## YAML linting is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install `yamllint` - * Install `yamllint` following [this](https://yamllint.readthedocs.io/en/stable/quickstart.html#installing-yamllint) - instructions or alternative install it in your [conda environment](https://anaconda.org/conda-forge/yamllint) - * Fix the markdown errors - * Run the test locally: `yamllint $(find . -type f -name "*.yml" -o -name "*.yaml") -c ./.yamllint.yml` - * Fix any reported errors in your YAML files + - uses: actions/checkout@v2 - Once you push these changes the test should pass, and you can hide this comment :+1: + - uses: actions/setup-node@v2 - We highly recommend setting up yaml-lint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install Prettier + run: npm install -g prettier - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run Prettier --check + run: prettier --check ${GITHUB_WORKSPACE} nf-core: runs-on: ubuntu-latest steps: - - name: Check out pipeline code uses: actions/checkout@v2 @@ -110,8 +50,8 @@ jobs: - uses: actions/setup-python@v1 with: - python-version: '3.6' - architecture: 'x64' + python-version: "3.6" + architecture: "x64" - name: Install dependencies run: | @@ -138,3 +78,5 @@ jobs: lint_log.txt lint_results.md PR_number.txt + +# diff --git a/.github/workflows/local_modules.yml b/.github/workflows/local_modules.yml index 83835153..bfdf2f0f 100644 --- a/.github/workflows/local_modules.yml +++ b/.github/workflows/local_modules.yml @@ -13,7 +13,7 @@ jobs: - uses: dorny/paths-filter@v2 id: filter with: - filters: 'tests/config/pytest_software.yml' + filters: "tests/config/pytest_software.yml" test: runs-on: ubuntu-latest @@ -23,9 +23,9 @@ jobs: strategy: fail-fast: false matrix: - nxf_version: ['21.04.0'] - tags: ['${{ fromJson(needs.changes.outputs.modules) }}'] - profile: ['docker', 'singularity'] ## 'conda' + nxf_version: ["21.04.0"] + tags: ["${{ fromJson(needs.changes.outputs.modules) }}"] + profile: ["docker", "singularity"] ## 'conda' env: NXF_ANSI_LOG: false steps: @@ -34,7 +34,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.x' + python-version: "3.x" - uses: actions/cache@v2 with: @@ -46,7 +46,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.x' + python-version: "3.x" - name: Install Python dependencies run: python -m pip install --upgrade pip pytest-workflow @@ -98,3 +98,5 @@ jobs: /home/runner/pytest_workflow_*/*/.nextflow.log /home/runner/pytest_workflow_*/*/log.out /home/runner/pytest_workflow_*/*/log.err + +# diff --git a/.gitpod.yml b/.gitpod.yml index b7d4cee1..c452ee93 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -2,13 +2,13 @@ image: nfcore/gitpod:latest vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files + - codezombiech.gitignore # Language support for .gitignore files # - cssho.vscode-svgviewer # SVG viewer - - davidanson.vscode-markdownlint # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + - davidanson.vscode-markdownlint # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors # - nextflow.nextflow # Nextflow syntax highlighting - - oderwat.indent-rainbow # Highlight indentation level - - streetsidesoftware.code-spell-checker # Spelling checker for source code + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.markdownlint.yml b/.markdownlint.yml deleted file mode 100644 index 9e605fcf..00000000 --- a/.markdownlint.yml +++ /dev/null @@ -1,14 +0,0 @@ -# Markdownlint configuration file -default: true -line-length: false -ul-indent: - indent: 4 -no-duplicate-header: - siblings_only: true -no-inline-html: - allowed_elements: - - img - - p - - kbd - - details - - summary diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 00000000..c81f9a76 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/.yamllint.yml b/.yamllint.yml deleted file mode 100644 index d466deec..00000000 --- a/.yamllint.yml +++ /dev/null @@ -1,6 +0,0 @@ -extends: default - -rules: - document-start: disable - line-length: disable - truthy: disable diff --git a/CHANGELOG.md b/CHANGELOG.md index 43687542..c6e118b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v1.0dev - [01/25/2022] +- add filters to chromosome names for `hipeak`. - add parameter `anchor_peaks`. - Update `MAPS` for new version of `VGAM`. - add parameter `publish_dir_mode`. diff --git a/CITATIONS.md b/CITATIONS.md index 0c923231..f5003fbf 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,55 +10,70 @@ ## Pipeline tools -* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) -* [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. * [BWA](https://www.ncbi.nlm.nih.gov/pubmed/19451168/) - > Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. + + > Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. * [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) - > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. * [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) - > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. * [UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/) - > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. + + > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. * [cooler](https://pubmed.ncbi.nlm.nih.gov/31290943/) - > Abdennur N, Mirny LA. Cooler: scalable storage for Hi-C data and other genomically labeled arrays. Bioinformatics. 2020 Jan 1;36(1):311-316. doi: 10.1093/bioinformatics/btz540. PMID: 31290943. + + > Abdennur N, Mirny LA. Cooler: scalable storage for Hi-C data and other genomically labeled arrays. Bioinformatics. 2020 Jan 1;36(1):311-316. doi: 10.1093/bioinformatics/btz540. PMID: 31290943. * [MACS2](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) - > Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. + + > Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. * [MAPS](https://pubmed.ncbi.nlm.nih.gov/30986246/) - > Juric I, Yu M, Abnousi A, Raviram R, Fang R, Zhao Y, Zhang Y, Qiu Y, Yang Y, Li Y, Ren B, Hu M. MAPS: Model-based analysis of long-range chromatin interactions from PLAC-seq and HiChIP experiments. PLoS Comput Biol. 2019 Apr 15;15(4):e1006982. doi: 10.1371/journal.pcbi.1006982. PMID: 30986246; PMCID: PMC6483256. + + > Juric I, Yu M, Abnousi A, Raviram R, Fang R, Zhao Y, Zhang Y, Qiu Y, Yang Y, Li Y, Ren B, Hu M. MAPS: Model-based analysis of long-range chromatin interactions from PLAC-seq and HiChIP experiments. PLoS Comput Biol. 2019 Apr 15;15(4):e1006982. doi: 10.1371/journal.pcbi.1006982. PMID: 30986246; PMCID: PMC6483256. * [GenMap](https://pubmed.ncbi.nlm.nih.gov/32246826/) - > Pockrandt C, Alzamel M, Iliopoulos CS, Reinert K. GenMap: ultra-fast computation of genome mappability. Bioinformatics. 2020 Jun 1;36(12):3687-3692. doi: 10.1093/bioinformatics/btaa222. PMID: 32246826; PMCID: PMC7320602. + + > Pockrandt C, Alzamel M, Iliopoulos CS, Reinert K. GenMap: ultra-fast computation of genome mappability. Bioinformatics. 2020 Jun 1;36(12):3687-3692. doi: 10.1093/bioinformatics/btaa222. PMID: 32246826; PMCID: PMC7320602. * [edgeR](https://pubmed.ncbi.nlm.nih.gov/19910308/) - > Robinson MD, McCarthy DJ, Smyth GK. edgeR: a Bioconductor package for differential expression analysis of digital gene expression data. Bioinformatics. 2010 Jan 1;26(1):139-40. doi: 10.1093/bioinformatics/btp616. Epub 2009 Nov 11. PMID: 19910308; PMCID: PMC2796818. + + > Robinson MD, McCarthy DJ, Smyth GK. edgeR: a Bioconductor package for differential expression analysis of digital gene expression data. Bioinformatics. 2010 Jan 1;26(1):139-40. doi: 10.1093/bioinformatics/btp616. Epub 2009 Nov 11. PMID: 19910308; PMCID: PMC2796818. * [ChIPpeakAnno](https://pubmed.ncbi.nlm.nih.gov/20459804/) - > Zhu LJ, Gazin C, Lawson ND, Pagès H, Lin SM, Lapointe DS, Green MR. ChIPpeakAnno: a Bioconductor package to annotate ChIP-seq and ChIP-chip data. BMC Bioinformatics. 2010 May 11;11:237. doi: 10.1186/1471-2105-11-237. PMID: 20459804; PMCID: PMC3098059. + + > Zhu LJ, Gazin C, Lawson ND, Pagès H, Lin SM, Lapointe DS, Green MR. ChIPpeakAnno: a Bioconductor package to annotate ChIP-seq and ChIP-chip data. BMC Bioinformatics. 2010 May 11;11:237. doi: 10.1186/1471-2105-11-237. PMID: 20459804; PMCID: PMC3098059. * [clusterProfiler](https://pubmed.ncbi.nlm.nih.gov/22455463/) - > Yu G, Wang LG, Han Y, He QY. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS. 2012 May;16(5):284-7. doi: 10.1089/omi.2011.0118. Epub 2012 Mar 28. PMID: 22455463; PMCID: PMC3339379. + + > Yu G, Wang LG, Han Y, He QY. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS. 2012 May;16(5):284-7. doi: 10.1089/omi.2011.0118. Epub 2012 Mar 28. PMID: 22455463; PMCID: PMC3339379. * [trackViewer](https://pubmed.ncbi.nlm.nih.gov/31133757/) - > Ou J, Zhu LJ. trackViewer: a Bioconductor package for interactive and integrative visualization of multi-omics data. Nat Methods. 2019 Jun;16(6):453-454. doi: 10.1038/s41592-019-0430-y. PMID: 31133757. + + > Ou J, Zhu LJ. trackViewer: a Bioconductor package for interactive and integrative visualization of multi-omics data. Nat Methods. 2019 Jun;16(6):453-454. doi: 10.1038/s41592-019-0430-y. PMID: 31133757. * [circos](https://pubmed.ncbi.nlm.nih.gov/19541911/) - > Krzywinski M, Schein J, Birol I, Connors J, Gascoyne R, Horsman D, Jones SJ, Marra MA. Circos: an information aesthetic for comparative genomics. Genome Res. 2009 Sep;19(9):1639-45. doi: 10.1101/gr.092759.109. Epub 2009 Jun 18. PMID: 19541911; PMCID: PMC2752132. + + > Krzywinski M, Schein J, Birol I, Connors J, Gascoyne R, Horsman D, Jones SJ, Marra MA. Circos: an information aesthetic for comparative genomics. Genome Res. 2009 Sep;19(9):1639-45. doi: 10.1101/gr.092759.109. Epub 2009 Jun 18. PMID: 19541911; PMCID: PMC2752132. * [Juicer_tools](https://pubmed.ncbi.nlm.nih.gov/27467249/) - > Durand NC, Shamim MS, Machol I, Rao SS, Huntley MH, Lander ES, Aiden EL. Juicer Provides a One-Click System for Analyzing Loop-Resolution Hi-C Experiments. Cell Syst. 2016 Jul;3(1):95-8. doi: 10.1016/j.cels.2016.07.002. PMID: 27467249; PMCID: PMC5846465. + + > Durand NC, Shamim MS, Machol I, Rao SS, Huntley MH, Lander ES, Aiden EL. Juicer Provides a One-Click System for Analyzing Loop-Resolution Hi-C Experiments. Cell Syst. 2016 Jul;3(1):95-8. doi: 10.1016/j.cels.2016.07.002. PMID: 27467249; PMCID: PMC5846465. * [igv.js](https://pubmed.ncbi.nlm.nih.gov/22517427/) - > Thorvaldsdóttir H, Robinson JT, Mesirov JP. Integrative Genomics Viewer (IGV): high-performance genomics data visualization and exploration. Brief Bioinform. 2013 Mar;14(2):178-92. doi: 10.1093/bib/bbs017. Epub 2012 Apr 19. PMID: 22517427; PMCID: PMC3603213. + + > Thorvaldsdóttir H, Robinson JT, Mesirov JP. Integrative Genomics Viewer (IGV): high-performance genomics data visualization and exploration. Brief Bioinform. 2013 Mar;14(2):178-92. doi: 10.1093/bib/bbs017. Epub 2012 Apr 19. PMID: 22517427; PMCID: PMC3603213. * [pairsqc](https://github.com/4dn-dcic/pairsqc) @@ -66,16 +81,19 @@ ## Software packaging/containerisation tools -* [Anaconda](https://anaconda.com) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. -* [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) -* [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) - > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. -* [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) -* [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) - > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index 363d1503..99817ae3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# ![nf-core/hicar](docs/images/nf-core-hicar_logo_light.png#gh-light-mode-only) ![nf-core/hicar](docs/images/nf-core-hicar_logo_dark.png#gh-dark-mode-only) +# ![nf-core/hicar](docs/images/nf-core/hicar_logo_light.png#gh-light-mode-only) ![nf-core/hicar](docs/images/nf-core/hicar_logo_dark.png#gh-dark-mode-only) [![GitHub Actions CI Status](https://github.com/nf-core/hicar/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/hicar/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/hicar/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/hicar/actions?query=workflow%3A%22nf-core+linting%22) @@ -44,39 +44,39 @@ On release, automated continuous integration tests run the pipeline on a full-si 1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ +2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. 3. Download the pipeline and test it on a minimal dataset with a single command: - ```console - nextflow run nf-core/hicar -profile test,YOURPROFILE --outdir - ``` + ```console + nextflow run nf-core/hicar -profile test,YOURPROFILE --outdir + ``` - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. + Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. - > * The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > * Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > * If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > * If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. + > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. + > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. + > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. 4. Start running your own analysis! - ```console - nextflow run nf-core/hicar -profile \ - --input samples.csv \ # Input data - --qval_thresh 0.01 \ # Cut-off q-value for MACS2 - --genome GRCh38 \ # Genome Reference - --mappability /path/mappability/bigWig/file # Provide mappability to avoid memory intensive calculation - ``` + ```console + nextflow run nf-core/hicar -profile \ + --input samples.csv \ # Input data + --qval_thresh 0.01 \ # Cut-off q-value for MACS2 + --genome GRCh38 \ # Genome Reference + --mappability /path/mappability/bigWig/file # Provide mappability to avoid memory intensive calculation + ``` - Run it on cluster. + Run it on cluster. - First prepare a profile config file named as [profile.config](https://nf-co.re/hicar/usage) and a [samplesheet](https://nf-co.re/hicar/usage). - Then run: + First prepare a profile config file named as [profile.config](https://nf-co.re/hicar/usage) and a [samplesheet](https://nf-co.re/hicar/usage). + Then run: - ```console - nextflow run nf-core/hicar --input samplesheet.csv --outdir --genome GRCh37 -profile - ``` + ```console + nextflow run nf-core/hicar --input samplesheet.csv --outdir --genome GRCh37 -profile + ``` ## Documentation @@ -96,7 +96,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations -If you use nf-core/hicar for your analysis, please cite it using the following doi: [10.5281/zenodo.5618247](https://doi.org/10.5281/zenodo.5618247) +If you use nf-core/hicar for your analysis, please cite it using the following doi: [10.5281/zenodo.5618247](https://doi.org/10.5281/zenodo.5618247) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/email_template.html b/assets/email_template.html index 817eed8b..62174c9a 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,53 +1,111 @@ - - - - + + + + - - nf-core/hicar Pipeline Report - - -
+ + + nf-core/hicar Pipeline Report + + +
+ - +

nf-core/hicar v${version}

+

Run Name: $runName

-

nf-core/hicar v${version}

-

Run Name: $runName

- -<% if (!success){ - out << """ -
-

nf-core/hicar execution completed unsuccessfully!

+ <% if (!success){ out << """ +
+

nf-core/hicar execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
${errorReport}
-
- """ -} else { - out << """ -
+
${errorReport}
+
+ """ } else { out << """ +
nf-core/hicar execution completed successfully! -
- """ -} -%> +
+ """ } %> -

The workflow was completed at $dateComplete (duration: $duration)

-

The command used to launch the workflow was as follows:

-
$commandLine
+

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
+$commandLine
-

Pipeline Configuration:

- - - <% out << summary.collect{ k,v -> "" }.join("\n") %> - -
$k
$v
+

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> " + + + + + " }.join("\n") %> + +
+ $k + +
$v
+
-

nf-core/hicar

-

https://github.com/nf-core/hicar

- -
- - +

nf-core/hicar

+

https://github.com/nf-core/hicar

+
+ diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index da24f387..eec48250 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -1,27 +1,27 @@ report_comment: > - This report has been generated by the nf-core/hicar - analysis pipeline. For information about how to interpret these results, please see the - documentation. + This report has been generated by the nf-core/hicar + analysis pipeline. For information about how to interpret these results, please see the + documentation. report_section_order: - software_versions: - order: -1000 - nf-core-hicar-summary: - order: -1001 + software_versions: + order: -1000 + nf-core-hicar-summary: + order: -1001 export_plots: true # Run only these modules run_modules: - - custom_content - - fastqc - - cutadapt - - samtools + - custom_content + - fastqc + - cutadapt + - samtools # Hiding General Statistics Proper Pairs columns table_columns_visible: - Samtools: - reads_properly_paired_percent: False - non-primary_alignments: False + Samtools: + reads_properly_paired_percent: False + non-primary_alignments: False # Customise the module search patterns to speed up execution time # - Skip module sub-tools that we are not interested in @@ -29,125 +29,125 @@ table_columns_visible: # - Don't add anything that is the same as the MultiQC default # See https://multiqc.info/docs/#optimise-file-search-patterns for details sp: - pairs_reads_stats: - fn: reads_summary.csv - pairs_reads_detailed_summary: - fn: pairsqc_summary_out.csv - pairs_reads_proportion: - fn: dist.prop.qc.json - atac_peaks_qc: - fn: TSSEscore_FRiP.csv - maps_qc: - fn: MAPS_summary_out.csv - sample_pca: - fn: Multidimensional.scaling.qc.json - pairs_peaks_genomic_dist: - fn: genomicElementDistribuitonOfEachPeakList.*.png - other_side_genomic_dist: - fn: genomicElementDistribuitonOfremoteInteractionPeaks.*.png + pairs_reads_stats: + fn: reads_summary.csv + pairs_reads_detailed_summary: + fn: pairsqc_summary_out.csv + pairs_reads_proportion: + fn: dist.prop.qc.json + atac_peaks_qc: + fn: TSSEscore_FRiP.csv + maps_qc: + fn: MAPS_summary_out.csv + sample_pca: + fn: Multidimensional.scaling.qc.json + pairs_peaks_genomic_dist: + fn: genomicElementDistribuitonOfEachPeakList.*.png + other_side_genomic_dist: + fn: genomicElementDistribuitonOfremoteInteractionPeaks.*.png ignore_images: false # See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml custom_data: - pairs_reads_stats: - parent_id: 'pairs_qc' - parent_name: 'PAIRS QC' - parent_description: "Pairs QC report" - section_name: 'PAIRS reads stats' - description: 'Table of mapped reads stats. The duplication rate should be lower than 50%. 20%~30% is a good level for 60M PE reads for human genome. For shallow seq, <30% duplication rate should be the cutting value for further deep sequence. Too low duplication rate (<5%) may indicate low specific chromatin interacts. Those number is estimated by several hypothesis. We set the actual mappable genome size is 70% of the original genome size. And the open chromatin interacts region is about 2% of the genome. Take human genome as an example, the possible unique reads for 60M will be, (3G * 70% * 2%)/60M, about 70%. That is about 30% duplication rate. Those hypotheses are consistent with real data of ChIP-seq.' - file_format: "csv" - plot_type: 'table' - pconfig: - id: 'pairs_reads_stats_table' - table_title: 'PAIRS reads stats' - namespace: 'PAIRS reads stats' - pairs_reads_detailed_summary: - parent_id: 'pairs_qc' - section_name: 'PAIRS reads detailed summary' - description: "Cis-to-trans ratio is computed as the ratio of long-range cis reads (>20kb) to trans reads plus long-range cis reads. Typically, cis-to-trans ratio higher than 40% is required. Percentage of long-range cis reads is the ratio of long-range cis reads to total number of reads. Minimum 15% is required and 40% or higher suggests a good library(doi:10.1016/j.cell.2014.11.021). Convergence is determined as standard deviation of proportions of four read orientations to be <0.002 (Very good) or <0.05 (Good) (See section Proportion of read orientation versus genomic separation in the pairsqc_report file under pairs/QC/). The slope of log10 contact probability vs distance between 10kb ~ 300kb representing TAD is also provided as well. (See section Contact probability versus genomic separation in the pairsqc_report file under pairs/QC/)" - file_format: "csv" - plot_type: 'table' - pconfig: - id: 'pairs_reads_detailed_summary_table' - table_title: 'PAIRS reads detailed summary' - namespace: 'PAIRS reads detailed summary' - format: '{:.2f}' - pairs_reads_proportion: - parent_id: 'pairs_qc' - section_name: 'Proportion of read orientation versus genomic separation' - description: "Contact proportion of reads are shown, stratified by read orientation. Good four cutter samples would show no bias in propotion >3kb(10^3.5)." - file_format: "json" - plot_type: 'scatter' - pconfig: - id: 'pairs_reads_proportion_vs_distance' - title: 'Reads distance vs proportions' - xlab: 'distance (kb)' - ylab: 'Propotion' - ymin: 0 - ymax: 1 - marker_size: 3 - marker_line_colour: '#999999' - atac_peaks_qc: - parent_id: 'pairs_qc' - section_name: 'ATAC R2 reads quality control' - description: 'ATAC reads quality control including FRiP, TSS enrichment score and so on.' - helptext: | - *Fraction of reads in peaks (FRiP)* – Fraction of all mapped reads that fall into the called peak regions, - i.e. usable reads in significantly enriched peaks divided by all usable reads. - In general, FRiP scores correlate positively with the number of regions. - ([Landt et al, Genome Research Sept. 2012, 22(9): 1813–1831](https://pubmed.ncbi.nlm.nih.gov/22955991/). - *FRiP should be greater than 1%* + pairs_reads_stats: + parent_id: "pairs_qc" + parent_name: "PAIRS QC" + parent_description: "Pairs QC report" + section_name: "PAIRS reads stats" + description: 'Table of mapped reads stats. The duplication rate should be lower than 50%. 20%~30% is a good level for 60M PE reads for human genome. For shallow seq, <30% duplication rate should be the cutting value for further deep sequence. Too low duplication rate (<5%) may indicate low specific chromatin interacts. Those number is estimated by several hypothesis. We set the actual mappable genome size is 70% of the original genome size. And the open chromatin interacts region is about 2% of the genome. Take human genome as an example, the possible unique reads for 60M will be, (3G * 70% * 2%)/60M, about 70%. That is about 30% duplication rate. Those hypotheses are consistent with real data of ChIP-seq.' + file_format: "csv" + plot_type: "table" + pconfig: + id: "pairs_reads_stats_table" + table_title: "PAIRS reads stats" + namespace: "PAIRS reads stats" + pairs_reads_detailed_summary: + parent_id: "pairs_qc" + section_name: "PAIRS reads detailed summary" + description: "Cis-to-trans ratio is computed as the ratio of long-range cis reads (>20kb) to trans reads plus long-range cis reads. Typically, cis-to-trans ratio higher than 40% is required. Percentage of long-range cis reads is the ratio of long-range cis reads to total number of reads. Minimum 15% is required and 40% or higher suggests a good library(doi:10.1016/j.cell.2014.11.021). Convergence is determined as standard deviation of proportions of four read orientations to be <0.002 (Very good) or <0.05 (Good) (See section Proportion of read orientation versus genomic separation in the pairsqc_report file under pairs/QC/). The slope of log10 contact probability vs distance between 10kb ~ 300kb representing TAD is also provided as well. (See section Contact probability versus genomic separation in the pairsqc_report file under pairs/QC/)" + file_format: "csv" + plot_type: "table" + pconfig: + id: "pairs_reads_detailed_summary_table" + table_title: "PAIRS reads detailed summary" + namespace: "PAIRS reads detailed summary" + format: "{:.2f}" + pairs_reads_proportion: + parent_id: "pairs_qc" + section_name: "Proportion of read orientation versus genomic separation" + description: "Contact proportion of reads are shown, stratified by read orientation. Good four cutter samples would show no bias in propotion >3kb(10^3.5)." + file_format: "json" + plot_type: "scatter" + pconfig: + id: "pairs_reads_proportion_vs_distance" + title: "Reads distance vs proportions" + xlab: "distance (kb)" + ylab: "Propotion" + ymin: 0 + ymax: 1 + marker_size: 3 + marker_line_colour: "#999999" + atac_peaks_qc: + parent_id: "pairs_qc" + section_name: "ATAC R2 reads quality control" + description: "ATAC reads quality control including FRiP, TSS enrichment score and so on." + helptext: | + *Fraction of reads in peaks (FRiP)* – Fraction of all mapped reads that fall into the called peak regions, + i.e. usable reads in significantly enriched peaks divided by all usable reads. + In general, FRiP scores correlate positively with the number of regions. + ([Landt et al, Genome Research Sept. 2012, 22(9): 1813–1831](https://pubmed.ncbi.nlm.nih.gov/22955991/). + *FRiP should be greater than 1%* - *TSS enrichment score* is a ratio between aggregate distribution of reads centered on TSSs - and that flanking the corresponding TSSs. - TSS score = the depth of TSS (each 100bp window within 1000 bp each side) / the depth of end flanks (100bp each end). - TSSE score = max(mean(TSS score in each window)). - TSS enrichment score is calculated according to the definition at [ENCODE TSSE score](https://www.encodeproject.org/data-standards/terms/#enrichment). - Transcription start site (TSS) enrichment values are dependent on the reference files used; - cutoff values for high quality data are listed in the following table from - [ENCODE ATAC-seq QC](https://www.encodeproject.org/atac-seq/). - For HiCAR, only R2 reads is used in TSSE score calculation and reads enrichment is following - exponential ratio, we adjusted the TSSE score range. - TSSEscore < 2.5: concerning; - TSSEscore 2.5-3.5: acceptable; - TSSEscore > 3.5: Ideal. + *TSS enrichment score* is a ratio between aggregate distribution of reads centered on TSSs + and that flanking the corresponding TSSs. + TSS score = the depth of TSS (each 100bp window within 1000 bp each side) / the depth of end flanks (100bp each end). + TSSE score = max(mean(TSS score in each window)). + TSS enrichment score is calculated according to the definition at [ENCODE TSSE score](https://www.encodeproject.org/data-standards/terms/#enrichment). + Transcription start site (TSS) enrichment values are dependent on the reference files used; + cutoff values for high quality data are listed in the following table from + [ENCODE ATAC-seq QC](https://www.encodeproject.org/atac-seq/). + For HiCAR, only R2 reads is used in TSSE score calculation and reads enrichment is following + exponential ratio, we adjusted the TSSE score range. + TSSEscore < 2.5: concerning; + TSSEscore 2.5-3.5: acceptable; + TSSEscore > 3.5: Ideal. - Promoter/Transcript body proportions is test for the R2 reads enriched in promoters or in transcript body. - file_format: 'csv' - plot_type: 'table' - pconfig: - id: 'atac_peaks_qc_table' - table_title: 'ATAC R2 reads quality control' - namespace: 'ATAC R2 reads quality control' - maps_qc: - parent_id: 'pairs_qc' - section_name: 'MAPS peak calling summary' - description: 'MAPS summary is output of MAPS.
"AND" set: Bin pairs with both ends overlapping two anchors of interaction.
"XOR" set: Bin pairs with one end overlapping one anchor of the interaction.
Singletons are defined as isolated significant bin pairs without adjacent ones.' - file_format: 'csv' - plot_type: 'table' - pconfig: - id: 'maps_peaks_qc_table' - table_title: 'MAPS peak calling summary' - namespace: 'MAPS peak calling summary' - sample_pca: - parent_id: 'pairs_qc' - section_name: 'PCA analysis' - description: 'PCA analysis by edgeR.' - file_format: 'json' - plot_type: 'scatter' - pconfig: - id: 'sample_pca' - title: 'PCA analysis by edgeR' - xlab: 'PC1' - ylab: 'PC2' - marker_size: 3 - pairs_peaks_genomic_dist: - parent_id: 'pairs_qc' - section_name: 'Peaks genomic element distribution' - description: "The genomic element distribution of the peaks. Promoter region is defined as upstream 2000 and downstream 500 from TSS. geneDownstream is defeined as downstream 2000 from TES." - file_format: "png" - other_side_genomic_dist: - parent_id: 'pairs_qc' - section_name: 'The genomeic element distribution for the peaks interacting with promoters' - description: "The genomic element distribution of the other sites of promoter peaks. Promoter region is defined as upstream 2000 and downstream 500 from TSS. geneDownstream is defeined as downstream 2000 from TES." - file_format: "png" + Promoter/Transcript body proportions is test for the R2 reads enriched in promoters or in transcript body. + file_format: "csv" + plot_type: "table" + pconfig: + id: "atac_peaks_qc_table" + table_title: "ATAC R2 reads quality control" + namespace: "ATAC R2 reads quality control" + maps_qc: + parent_id: "pairs_qc" + section_name: "MAPS peak calling summary" + description: 'MAPS summary is output of MAPS.
"AND" set: Bin pairs with both ends overlapping two anchors of interaction.
"XOR" set: Bin pairs with one end overlapping one anchor of the interaction.
Singletons are defined as isolated significant bin pairs without adjacent ones.' + file_format: "csv" + plot_type: "table" + pconfig: + id: "maps_peaks_qc_table" + table_title: "MAPS peak calling summary" + namespace: "MAPS peak calling summary" + sample_pca: + parent_id: "pairs_qc" + section_name: "PCA analysis" + description: 'PCA analysis by edgeR.' + file_format: "json" + plot_type: "scatter" + pconfig: + id: "sample_pca" + title: "PCA analysis by edgeR" + xlab: "PC1" + ylab: "PC2" + marker_size: 3 + pairs_peaks_genomic_dist: + parent_id: "pairs_qc" + section_name: "Peaks genomic element distribution" + description: "The genomic element distribution of the peaks. Promoter region is defined as upstream 2000 and downstream 500 from TSS. geneDownstream is defeined as downstream 2000 from TES." + file_format: "png" + other_side_genomic_dist: + parent_id: "pairs_qc" + section_name: "The genomeic element distribution for the peaks interacting with promoters" + description: "The genomic element distribution of the other sites of promoter peaks. Promoter region is defined as upstream 2000 and downstream 500 from TSS. geneDownstream is defeined as downstream 2000 from TES." + file_format: "png" diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100644 index 00000000..e7a0030d --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,11 @@ +report_comment: > + This report has been generated by the nf-core/hicar + analysis pipeline. For information about how to interpret these results, please see the + documentation. +report_section_order: + software_versions: + order: -1000 + "nf-core-hicar-summary": + order: -1001 + +export_plots: true diff --git a/assets/schema_input.json b/assets/schema_input.json index 79d37df9..700c6c08 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -34,18 +34,13 @@ } ] }, - "md5_1" { + "md5_1": { "type": "string" }, - "md5_2" { + "md5_2": { "type": "string" } }, - "required": [ - "group", - "replicate", - "fastq_1", - "fastq_2" - ] + "required": ["group", "replicate", "fastq_1", "fastq_2"] } } diff --git a/assets/test_full_aws_samplesheet.csv b/assets/test_full_aws_samplesheet.csv index 04c007d4..2603910b 100644 --- a/assets/test_full_aws_samplesheet.csv +++ b/assets/test_full_aws_samplesheet.csv @@ -1,5 +1,5 @@ group,replicate,fastq_1,fastq_2,md5_1,md5_2 -WT,1,https://dusom-cellbio.eso.duhs.duke.edu/dusom-cellbio-regeneromics-public/test/WT_rep1_R1.fastq.gz,https://dusom-cellbio.eso.duhs.duke.edu/dusom-cellbio-regeneromics-public/test/WT_rep1_R2.fastq.gz,, -WT,2,https://dusom-cellbio.eso.duhs.duke.edu/dusom-cellbio-regeneromics-public/test/WT_rep2_R1.fastq.gz,https://dusom-cellbio.eso.duhs.duke.edu/dusom-cellbio-regeneromics-public/test/WT_rep2_R2.fastq.gz,, -KD,1,https://dusom-cellbio.eso.duhs.duke.edu/dusom-cellbio-regeneromics-public/test/KD_rep1_R1.fastq.gz,https://dusom-cellbio.eso.duhs.duke.edu/dusom-cellbio-regeneromics-public/test/KD_rep1_R2.fastq.gz,, -KD,2,https://dusom-cellbio.eso.duhs.duke.edu/dusom-cellbio-regeneromics-public/test/KD_rep2_R1.fastq.gz,https://dusom-cellbio.eso.duhs.duke.edu/dusom-cellbio-regeneromics-public/test/KD_rep2_R2.fastq.gz,, +WT,1,s3://nf-core-awsmegatests/hicar/input_data/WT_rep1_R1.fastq.gz,s3://nf-core-awsmegatests/hicar/input_data/WT_rep1_R2.fastq.gz,82314987c8576698bd81ea9ff60c5920,f4002d59af7fe0d9bc7bf83d20d6c8b8 +WT,2,s3://nf-core-awsmegatests/hicar/input_data/WT_rep2_R1.fastq.gz,s3://nf-core-awsmegatests/hicar/input_data/WT_rep2_R2.fastq.gz,23f159ab88a028ee73672ad3bea2be7b,2757df620aed0e1003e3d18c951b9ef7 +KD,1,s3://nf-core-awsmegatests/hicar/input_data/KD_rep1_R1.fastq.gz,s3://nf-core-awsmegatests/hicar/input_data/KD_rep1_R2.fastq.gz,cec77add4ab56e2acdb64aa02f4e0fe6,2c109d4493862e177872c42b5fd12508 +KD,2,s3://nf-core-awsmegatests/hicar/input_data/KD_rep2_R1.fastq.gz,s3://nf-core-awsmegatests/hicar/input_data/KD_rep2_R2.fastq.gz,226198081c40f3e35790a05979d494dd,4a7c59d3dc4b5d3f01d1aaf83f8d433a diff --git a/docs/README.md b/docs/README.md index c0783f6f..23ac670c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,9 +2,9 @@ The nf-core/hicar documentation is split into the following pages: -* [Usage](usage.md) - * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. -* [Output](output.md) - * An overview of the different results produced by the pipeline and how to interpret them. +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/output.md b/docs/output.md index 035f71d2..ec20e3f5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -10,31 +10,52 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -* [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -* [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution -* [genome](#genome) - The genome related files used in the pipeline. -* [checksums](#checksums) - The checksums (md5) of decompressed fastq files. -* [FastQC](#fastqc) - Raw read QC -* [bwa](#alignment) - The alignments of trimmed reads. -* [pairs](#pairs) - The interaction pairs and their quality control reports. -* [cooler](#cooler) - Cooler files for visualization -* [ATACpeak](#call-peaks-for-r2-reads) - The peaks called for ATAC reads (R2 reads). -* [MAPSpeak](#chromatin-interactions) - The chromatin interactions determined by MAPS. -* [DifferentialAnalyis](#differential-analysis) - Differential analysis for chromatin interactions. -* [fragmentPeak](#call-peaks-for-fragment-reads) - The peaks called for R1 reads. -* [HiCARpeak](#high-resolution-interactions) - The high resolution loops called for R1 and R2 peaks for motif analysis. -* [circos](#circos) - The circos plots. -* [igv.js](#igv) - The track files which can be viewed by web-server. +- [FastQC](#fastqc) - Raw read QC +- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline +- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +- [genome](#genome) - The genome related files used in the pipeline. +- [checksums](#checksums) - The checksums (md5) of decompressed fastq files. +- [bwa](#alignment) - The alignments of trimmed reads. +- [pairs](#pairs) - The interaction pairs and their quality control reports. +- [cooler](#cooler) - Cooler files for visualization +- [ATACpeak](#call-peaks-for-r2-reads) - The peaks called for ATAC reads (R2 reads). +- [MAPSpeak](#chromatin-interactions) - The chromatin interactions determined by MAPS. +- [DifferentialAnalyis](#differential-analysis) - Differential analysis for chromatin interactions. +- [fragmentPeak](#call-peaks-for-fragment-reads) - The peaks called for R1 reads. +- [HiCARpeak](#high-resolution-interactions) - The high resolution loops called for R1 and R2 peaks for motif analysis. +- [circos](#circos) - The circos plots. +- [igv.js](#igv) - The track files which can be viewed by web-server. + +### FastQC + +
+Output files + +- `fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +
+ +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + +![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) + +![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) + +![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) + +> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. ### MultiQC
Output files -* `multiqc/` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `multiqc_plots/`: directory containing static images from the report in various formats. +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats.
@@ -47,10 +68,10 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files -* `pipeline_info/` - * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
@@ -61,7 +82,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files -* `checksums/*.txt`: md5 checksums. +- `checksums/*.txt`: md5 checksums.
@@ -72,44 +93,23 @@ The md5 checksums of decompressed fastq files. This information can be used for
Output files -* `genome/` - * `genome.fa.sizes`: chromosome sizes file - * `genome.fa.fai`: genome index file - * `filtered/genome.include_regions.bed`: filtered genome by blacklist - * `digest/*`: genomic features files digested by given restriction enzyme +- `genome/` + - `genome.fa.sizes`: chromosome sizes file + - `genome.fa.fai`: genome index file + - `filtered/genome.include_regions.bed`: filtered genome by blacklist + - `digest/*`: genomic features files digested by given restriction enzyme
If the mappability file is not provided in the profile file, the [GenMap](https://pubmed.ncbi.nlm.nih.gov/32246826/) will be used to create the mappability file with read length set to 50. -### FastQC - -
-Output files - -* `fastqc/` - * `*_fastqc.html`: FastQC report containing quality metrics. - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. - -
- -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). - -![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) - -![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) - -![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) - -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. - ### Alignment
Output files -* `bwa/mapped/bam/*.(bam|bai)`: The files resulting from the alignment of individual libraries will contain the coordinate sorted alignment files in [`*.bam`](https://samtools.github.io/hts-specs/SAMv1.pdf) format. -* `bwa/mapped/QC/*`: The stats of mapping results. +- `bwa/mapped/bam/*.(bam|bai)`: The files resulting from the alignment of individual libraries will contain the coordinate sorted alignment files in [`*.bam`](https://samtools.github.io/hts-specs/SAMv1.pdf) format. +- `bwa/mapped/QC/*`: The stats of mapping results.
@@ -120,10 +120,10 @@ Adapter-trimmed reads are mapped to the reference assembly using [BWA::mem](http
Output files -* `pairs/` - * `raw/*`: The raw reads pairs for each sample and stats for the pairs. - * `filtered/*`: The filtered files in hdf5 format. The hdf5 files in the folder can be used to generate virtual 4C plots. - * `QC/*`: The quality analysis results. +- `pairs/` + - `raw/*`: The raw reads pairs for each sample and stats for the pairs. + - `filtered/*`: The filtered files in hdf5 format. The hdf5 files in the folder can be used to generate virtual 4C plots. + - `QC/*`: The quality analysis results.
@@ -132,23 +132,23 @@ The quality analysis for filtered pairs were done by [pairsqc](https://github.co The hdf5 filtered pairs contains following groups: -* `header/`, including total reads, chromosome name and sizes, tile width for index. - * `header/chrom_sizes`, COMPOUND - * `header/header`, STRING - * `header/tile_width`, INTEGER - * `header/total`, INTEGER -* `data/`, pairs in path `data/chr1_for_R1_reads/chr2_for_R2_reads/tileIndex1_tileIndex2/`, the tile index is calculated by the ceiling of posistion divided by tile width. - * `position`, genomic location saved in path `data/chr1/chr2/tileIndex1_tileIndex2/position` - * `strand`, strand info saved in `path data/chr1/chr2/tileIndex1_tileIndex2/strand` +- `header/`, including total reads, chromosome name and sizes, tile width for index. + - `header/chrom_sizes`, COMPOUND + - `header/header`, STRING + - `header/tile_width`, INTEGER + - `header/total`, INTEGER +- `data/`, pairs in path `data/chr1_for_R1_reads/chr2_for_R2_reads/tileIndex1_tileIndex2/`, the tile index is calculated by the ceiling of posistion divided by tile width. + - `position`, genomic location saved in path `data/chr1/chr2/tileIndex1_tileIndex2/position` + - `strand`, strand info saved in `path data/chr1/chr2/tileIndex1_tileIndex2/strand` ### Cooler
Output files -* `cooler/mcool/*`: The mcool files for each group. -* `cooler/hic/*`: The .hic files for each group. -* `cooler/MAPS/*`: The mcool and .hic files normalized by MAPS fitting model for each group. +- `cooler/mcool/*`: The mcool files for each group. +- `cooler/hic/*`: The .hic files for each group. +- `cooler/MAPS/*`: The mcool and .hic files normalized by MAPS fitting model for each group.
@@ -168,10 +168,10 @@ Visualizing the `.hic` file with Juicebox is an alternative when `higlass-manage
Output files -* `ATACpeak/` - * `R2_bigwig/*`: The bigWig files of R2_reads. - * `peaks_per_Group/*`: The called peaks for each group. - * `merged_peaks/*`: The merged peaks for all groups. +- `ATACpeak/` + - `R2_bigwig/*`: The bigWig files of R2_reads. + - `peaks_per_Group/*`: The called peaks for each group. + - `merged_peaks/*`: The merged peaks for all groups.
@@ -184,17 +184,17 @@ By default, the peaks are called by `--no-model` parameter for the R2 reads in B
Output files -* `MAPSpeak/` - * `bin*/*`: The annotated TADs and loops. - * `*`: The tables of chromatin interactions with p-value and fdr. +- `MAPSpeak/` + - `bin*/*`: The annotated TADs and loops. + - `*`: The tables of chromatin interactions with p-value and fdr.
The chromatin interactions are called by [MAPS](https://pubmed.ncbi.nlm.nih.gov/30986246/). MAPS is a model-based analysis of long-range chromatin interactions for `AND` or `XOR` reads. -* `AND` set: bin pairs with `both` ends overlapping two anchors of interaction. -* `XOR` set: bin pairs with one end overlapping one anchor of the interaction. +- `AND` set: bin pairs with `both` ends overlapping two anchors of interaction. +- `XOR` set: bin pairs with one end overlapping one anchor of the interaction. It will remove the systematic bias by accessibility. By default, the interactions are called by `positive poisson` regression model @@ -205,7 +205,7 @@ and then filtered by coverage (default is 12 per bin), fold change (default is 2
Output files -* `DifferentialAnalysis/*`: Differential analysis results. +- `DifferentialAnalysis/*`: Differential analysis results.
@@ -220,12 +220,12 @@ for the overlap features (gene level) or nearest features.
Output files -* `fragmentPeak/` - * `R1_bigwig/` - * `byGroup/*`: The bigWig files of R1_reads for each group. - * `pos1/*`: The bigWig files of 5' ends of R1_reads. - * `merged_peaks/merged_peak.bed`: The bed file with merged R1 peaks from all samples. - * `peaks_per_Group/*`: The MACS2 output for each group. +- `fragmentPeak/` + - `R1_bigwig/` + - `byGroup/*`: The bigWig files of R1_reads for each group. + - `pos1/*`: The bigWig files of 5' ends of R1_reads. + - `merged_peaks/merged_peak.bed`: The bed file with merged R1 peaks from all samples. + - `peaks_per_Group/*`: The MACS2 output for each group.
@@ -237,9 +237,9 @@ The fragment reads will be more noisy compare to ATAC (R2) reads and the p value
Output files -* `HiPeak` - * `distalpair_in_peak_bams_4_igv/*`: The psuedo-bam file for visualization. - * `*/`: The high resolution interactions in bedpe format and tab delimited format. +- `HiPeak` + - `distalpair_in_peak_bams_4_igv/*`: The psuedo-bam file for visualization. + - `*/`: The high resolution interactions in bedpe format and tab delimited format.
@@ -255,7 +255,7 @@ The output of HiPeak is designed for motif enrichment analysis.
Output files -* `circos/*`: circos plots. +- `circos/*`: circos plots.
@@ -267,7 +267,7 @@ circos is used to produce the genomic view of chromatin interactions for HiPeak.
Output files -* `igv.js/*`: index.html, readme.txt and track_files.txt. +- `igv.js/*`: index.html, readme.txt and track_files.txt.
diff --git a/docs/usage.md b/docs/usage.md index 24b6e584..421c6316 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -50,14 +50,14 @@ TREATMENT,3,AEG588A6_S6_L003_R1_001.fastq.gz,AEG588A6_S6_L003_R2_001.fastq.gz,, TREATMENT,3,AEG588A6_S6_L004_R1_001.fastq.gz,AEG588A6_S6_L004_R2_001.fastq.gz,, ``` -| Column | Description | -|----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `group` | Custom group name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `replicate` | Biological replicates of the samples. | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `md5_1` | Checksum for fastq_1. The checksums of the files will be check to make sure the file is not truncated if provided. | -| `md5_2` | Checksum for fastq_2. The checksums of the files will be check to make sure the file is not truncated if provided. | +| Column | Description | +| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `group` | Custom group name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `replicate` | Biological replicates of the samples. | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `md5_1` | Checksum for fastq_1. The checksums of the files will be check to make sure the file is not truncated if provided. | +| `md5_2` | Checksum for fastq_2. The checksums of the files will be check to make sure the file is not truncated if provided. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. @@ -66,7 +66,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```console -nextflow run nf-core/hicar --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run nf-core/hicar --input samplesheet.csv --outdir --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -74,9 +74,9 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: ```console -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -115,25 +115,25 @@ They are loaded in sequence, so later profiles can overwrite earlier profiles. If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. -* `docker` - * A generic configuration profile to be used with [Docker](https://docker.com/) -* `singularity` - * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) -* `podman` - * A generic configuration profile to be used with [Podman](https://podman.io/) -* `shifter` - * A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) -* `charliecloud` - * A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) -* `conda` - * A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -* `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters ### `-resume` -Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. @@ -183,6 +183,7 @@ process { ``` > **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. +> > If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. ### Updating containers @@ -193,35 +194,35 @@ The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementatio 2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) 3. Create the custom config accordingly: - * For Docker: + - For Docker: - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` + ```nextflow + process { + withName: PANGOLIN { + container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` - * For Singularity: + - For Singularity: - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` + ```nextflow + process { + withName: PANGOLIN { + container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` - * For Conda: + - For Conda: - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` + ```nextflow + process { + withName: PANGOLIN { + conda = 'bioconda::pangolin=3.0.5' + } + } + ``` > **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. @@ -253,36 +254,35 @@ NXF_OPTS='-Xms1g -Xmx4g' ## Troubleshooting -* Error: `The exit status of the task that caused the workflow execution to fail was: null.` +- Error: `The exit status of the task that caused the workflow execution to fail was: null.` Check the files are readable for the workflow. -* Error: `Session aborted -- Cause: Unable to execute HTTP request: ngi-igenomes.s3.amazonaws.com` +- Error: `Session aborted -- Cause: Unable to execute HTTP request: ngi-igenomes.s3.amazonaws.com` The internet connection reached the limitation. Try to resume the analysis one hour later. -* Error: `PaddingError: Placeholder of length '80' too short in package` +- Error: `PaddingError: Placeholder of length '80' too short in package` There is no easy answer here. The new `conda` packages should having a longer prefix (255 characters). The possible solution now is that try to run the pipeline in a shorter folder path, if at all possible. -* Error: `Not a conda environment` or `command not found` +- Error: `Not a conda environment` or `command not found` There is something going wrong with the conda environment building. Just try to remove the conda environment folder and resume the run. -* Error: `unable to load shared object 'work/conda/env-xxxxxx/lib/R/library/rtracklayer/libs/rtracklayer.dylib', dlopen(rtracklayer.dylib, 6) Library not loaded: @rpath/libssl.1.1.dylib` +- Error: `unable to load shared object 'work/conda/env-xxxxxx/lib/R/library/rtracklayer/libs/rtracklayer.dylib', dlopen(rtracklayer.dylib, 6) Library not loaded: @rpath/libssl.1.1.dylib` The openssl installation have issues for `conda`. Try to reinstall it by `conda activate work/conda/env-xxxxxx && conda install --force-reinstall -y openssl` -* Error: `error Can't locate Statistics/Basic.pm` +- Error: `error Can't locate Statistics/Basic.pm` The perl-statistics-basic installed in wrong location. Try to reinstall it by `conda activate work/conda/env-xxxxx && perl -MCPAN -e 'CPAN::install(Statistics::Basic)'` -* `Error in result[[njob]] <- value : - attempt to select less than one element in OneIndex` +- `Error in result[[njob]] <- value : attempt to select less than one element in OneIndex` The error may caused by out of memory (although the error message seems to be unrelated to memory). Try to set `--peak_pair_block` to a smaller number less than 1e9. diff --git a/modules.json b/modules.json index fefa4627..de879c3c 100644 --- a/modules.json +++ b/modules.json @@ -13,7 +13,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "bwa/mem": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "950700bcdc0e9a2b6883d40d2c51c6fc435cd714" }, "cooler/digest": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -43,7 +43,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "macs2/callpeak": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "f0800157544a82ae222931764483331a81812012" }, "multiqc": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -70,25 +70,25 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "samtools/flagstat": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" }, "samtools/idxstats": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" }, "samtools/index": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" }, "samtools/merge": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" }, "samtools/sort": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" }, "samtools/stats": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" }, "samtools/view": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" }, "ucsc/bedclip": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -104,4 +104,4 @@ } } } -} \ No newline at end of file +} diff --git a/modules/local/cooler/cload/meta.yml b/modules/local/cooler/cload/meta.yml index 8ac75911..ddb0443b 100644 --- a/modules/local/cooler/cload/meta.yml +++ b/modules/local/cooler/cload/meta.yml @@ -9,7 +9,7 @@ tools: documentation: https://cooler.readthedocs.io/en/latest/index.html tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" - licence: ['BSD-3-clause'] + licence: ["BSD-3-clause"] input: - meta: diff --git a/modules/local/cooler/load/meta.yml b/modules/local/cooler/load/meta.yml index 231a969e..3d08aca5 100644 --- a/modules/local/cooler/load/meta.yml +++ b/modules/local/cooler/load/meta.yml @@ -9,7 +9,7 @@ tools: documentation: https://cooler.readthedocs.io/en/latest/index.html tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" - licence: ['BSD-3-clause'] + licence: ["BSD-3-clause"] input: - meta: diff --git a/modules/local/cooler/merge/meta.yml b/modules/local/cooler/merge/meta.yml index eb496bff..db6e530e 100644 --- a/modules/local/cooler/merge/meta.yml +++ b/modules/local/cooler/merge/meta.yml @@ -9,7 +9,7 @@ tools: documentation: https://cooler.readthedocs.io/en/latest/index.html tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" - licence: ['BSD-3-clause'] + licence: ["BSD-3-clause"] input: - meta: diff --git a/modules/local/cooler/zoomify/meta.yml b/modules/local/cooler/zoomify/meta.yml index 5445f4bb..b4f0a83d 100644 --- a/modules/local/cooler/zoomify/meta.yml +++ b/modules/local/cooler/zoomify/meta.yml @@ -9,7 +9,7 @@ tools: documentation: https://cooler.readthedocs.io/en/latest/index.html tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" - licence: ['BSD-3-clause'] + licence: ["BSD-3-clause"] input: - meta: diff --git a/modules/nf-core/modules/bwa/mem/main.nf b/modules/nf-core/modules/bwa/mem/main.nf index 9a91c77f..27ea6f42 100644 --- a/modules/nf-core/modules/bwa/mem/main.nf +++ b/modules/nf-core/modules/bwa/mem/main.nf @@ -2,10 +2,10 @@ process BWA_MEM { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.12" : null) + conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.15" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:66ed1b38d280722529bb8a0167b0cf02f8a0b488-0' : - 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:66ed1b38d280722529bb8a0167b0cf02f8a0b488-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:c56a3aabc8d64e52d5b9da1e8ecec2031668596d-0' : + 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:c56a3aabc8d64e52d5b9da1e8ecec2031668596d-0' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/modules/macs2/callpeak/meta.yml b/modules/nf-core/modules/macs2/callpeak/meta.yml index 974ea33a..982bc5b2 100644 --- a/modules/nf-core/modules/macs2/callpeak/meta.yml +++ b/modules/nf-core/modules/macs2/callpeak/meta.yml @@ -28,8 +28,7 @@ input: description: The control file - macs2_gsize: type: string - description: - Effective genome size. It can be 1.0e+9 or 1000000000, or shortcuts:'hs' for human (2.7e9), + description: Effective genome size. It can be 1.0e+9 or 1000000000, or shortcuts:'hs' for human (2.7e9), 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' for fruitfly (1.2e8) output: diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/modules/samtools/flagstat/main.nf index c267922b..9e3440ac 100644 --- a/modules/nf-core/modules/samtools/flagstat/main.nf +++ b/modules/nf-core/modules/samtools/flagstat/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : + 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/modules/samtools/idxstats/main.nf b/modules/nf-core/modules/samtools/idxstats/main.nf index 8a057413..7d5cee17 100644 --- a/modules/nf-core/modules/samtools/idxstats/main.nf +++ b/modules/nf-core/modules/samtools/idxstats/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : + 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/modules/samtools/index/main.nf index dfe0234f..e41cdcc8 100644 --- a/modules/nf-core/modules/samtools/index/main.nf +++ b/modules/nf-core/modules/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : + 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/modules/samtools/merge/main.nf b/modules/nf-core/modules/samtools/merge/main.nf index be6fe32e..7b771677 100644 --- a/modules/nf-core/modules/samtools/merge/main.nf +++ b/modules/nf-core/modules/samtools/merge/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_MERGE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : + 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" input: tuple val(meta), path(input_files) diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/modules/samtools/sort/main.nf index 0f2237cc..0e2de8ba 100644 --- a/modules/nf-core/modules/samtools/sort/main.nf +++ b/modules/nf-core/modules/samtools/sort/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : + 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/modules/samtools/stats/main.nf index f6fe3bfe..6efc9d9a 100644 --- a/modules/nf-core/modules/samtools/stats/main.nf +++ b/modules/nf-core/modules/samtools/stats/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_STATS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : + 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" input: tuple val(meta), path(input), path(input_index) diff --git a/modules/nf-core/modules/samtools/view/main.nf b/modules/nf-core/modules/samtools/view/main.nf index aee21a4e..75aad063 100644 --- a/modules/nf-core/modules/samtools/view/main.nf +++ b/modules/nf-core/modules/samtools/view/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : + 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" input: tuple val(meta), path(input) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1c0fd3e8..92c66812 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -294,10 +291,7 @@ "hidden": false, "help_text": "pospoisson for positive poisson regression, negbinom for negative binomial. default is pospoisson.", "default": "pospoisson", - "enum": [ - "pospoisson", - "negbinom" - ] + "enum": ["pospoisson", "negbinom"] }, "merge_map_py_source": { "type": "string", @@ -525,14 +519,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { diff --git a/subworkflows/local/hipeak.nf b/subworkflows/local/hipeak.nf index c20805f2..c2f7f555 100644 --- a/subworkflows/local/hipeak.nf +++ b/subworkflows/local/hipeak.nf @@ -28,7 +28,7 @@ workflow HI_PEAK { main: //create count table //input=val(meta), path(r2peak), path(r1peak), path(distalpair), val(chrom1) - chrom1 = chrom_size.splitCsv(sep:"\t", header: false, strip: true).map{it[0]} + chrom1 = chrom_size.splitCsv(sep:"\t", header: false, strip: true).filter{ it[0] !=~ /[_M]/ }.map{it[0]} ch_version = PREPARE_COUNTS(peaks.combine(chrom1)).versions counts = PREPARE_COUNTS.out.counts.map{[it[0].id, it[1]]} .groupTuple() diff --git a/workflows/hicar.nf b/workflows/hicar.nf index ea22670a..ef6f394e 100644 --- a/workflows/hicar.nf +++ b/workflows/hicar.nf @@ -44,13 +44,9 @@ if(params.anchor_peaks){ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", - checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config) : - Channel.empty() -ch_circos_config = file("$projectDir/assets/circos.conf", - checkIfExists: true) +ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() +ch_circos_config = file("$projectDir/assets/circos.conf", checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~