From 004f56db2819441c05e52d8bcbc0275862b2e6ed Mon Sep 17 00:00:00 2001 From: maxulysse Date: Mon, 4 Mar 2024 18:55:31 +0100 Subject: [PATCH 1/5] update sortmerna functionalities --- main.nf | 8 ++++- modules/nf-core/sortmerna/nextflow.config | 2 +- nextflow_schema.json | 8 +++++ subworkflows/local/prepare_genome/main.nf | 35 +++++++++++++++++++ .../local/prepare_genome/nextflow.config | 13 +++++++ workflows/rnaseq/main.nf | 23 ++++++++++-- 6 files changed, 85 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index 59001a4f1..1b98f2341 100755 --- a/main.nf +++ b/main.nf @@ -37,6 +37,7 @@ params.gtf = getGenomeAttribute('gtf') params.gff = getGenomeAttribute('gff') params.gene_bed = getGenomeAttribute('bed12') params.bbsplit_index = getGenomeAttribute('bbsplit') +params.sortmerna_index = getGenomeAttribute('sortmerna') params.star_index = getGenomeAttribute('star') params.hisat2_index = getGenomeAttribute('hisat2') params.rsem_index = getGenomeAttribute('rsem') @@ -70,18 +71,21 @@ workflow NFCORE_RNASEQ { params.gene_bed, params.splicesites, params.bbsplit_fasta_list, + params.ribo_database_manifest, params.star_index, params.rsem_index, params.salmon_index, params.kallisto_index, params.hisat2_index, params.bbsplit_index, + params.sortmerna_index, params.gencode, params.featurecounts_group_type, params.aligner, params.pseudo_aligner, params.skip_gtf_filter, params.skip_bbsplit, + !params.remove_ribo_rna, params.skip_alignment, params.skip_pseudo_alignment ) @@ -114,7 +118,9 @@ workflow NFCORE_RNASEQ { PREPARE_GENOME.out.salmon_index, PREPARE_GENOME.out.kallisto_index, PREPARE_GENOME.out.bbsplit_index, - PREPARE_GENOME.out.splicesites + PREPARE_GENOME.out.sortmerna_index, + PREPARE_GENOME.out.splicesites, + !params.remove_ribo_rna && params.remove_ribo_rna ) ch_versions = ch_versions.mix(RNASEQ.out.versions) diff --git a/modules/nf-core/sortmerna/nextflow.config b/modules/nf-core/sortmerna/nextflow.config index 8771660ce..8322435dc 100644 --- a/modules/nf-core/sortmerna/nextflow.config +++ b/modules/nf-core/sortmerna/nextflow.config @@ -1,7 +1,7 @@ if (params.remove_ribo_rna) { process { withName: 'SORTMERNA' { - ext.args = '--num_alignments 1 -v' + ext.args = '--num_alignments 1 -v --index 0' publishDir = [ [ path: { "${params.outdir}/sortmerna" }, diff --git a/nextflow_schema.json b/nextflow_schema.json index e5195cade..5a93d7060 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -267,6 +267,14 @@ "description": "Path to directory or tar.gz archive for pre-built BBSplit index.", "help_text": "The BBSplit index will have to be built at least once with this pipeline (see `--save_reference` to save index). It can then be provided via `--bbsplit_index` for future runs." }, + "sortmerna_index": { + "type": "string", + "format": "path", + "exists": true, + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built sortmerna index.", + "help_text": "The sortmerna index will have to be built at least once with this pipeline (see `--save_reference` to save index). It can then be provided via `--sortmerna_index` for future runs." + }, "remove_ribo_rna": { "type": "boolean", "fa_icon": "fas fa-trash-alt", diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 58a9b293c..f8fb8f2d1 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -10,6 +10,7 @@ include { GUNZIP as GUNZIP_TRANSCRIPT_FASTA } from '../../../modules/nf-core/gun include { GUNZIP as GUNZIP_ADDITIONAL_FASTA } from '../../../modules/nf-core/gunzip' include { UNTAR as UNTAR_BBSPLIT_INDEX } from '../../../modules/nf-core/untar' +include { UNTAR as UNTAR_SORTMERNA_INDEX } from '../../../modules/nf-core/untar' include { UNTAR as UNTAR_STAR_INDEX } from '../../../modules/nf-core/untar' include { UNTAR as UNTAR_RSEM_INDEX } from '../../../modules/nf-core/untar' include { UNTAR as UNTAR_HISAT2_INDEX } from '../../../modules/nf-core/untar' @@ -20,6 +21,7 @@ include { CUSTOM_CATADDITIONALFASTA } from '../../../modules/nf-core/cus include { CUSTOM_GETCHROMSIZES } from '../../../modules/nf-core/custom/getchromsizes' include { GFFREAD } from '../../../modules/nf-core/gffread' include { BBMAP_BBSPLIT } from '../../../modules/nf-core/bbmap/bbsplit' +include { SORTMERNA as SORTMERNA_INDEX } from '../../../modules/nf-core/sortmerna' include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate' include { HISAT2_EXTRACTSPLICESITES } from '../../../modules/nf-core/hisat2/extractsplicesites' include { HISAT2_BUILD } from '../../../modules/nf-core/hisat2/build' @@ -43,18 +45,21 @@ workflow PREPARE_GENOME { gene_bed // file: /path/to/gene.bed splicesites // file: /path/to/splicesites.txt bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt + sortmerna_fasta_list // file: /path/to/sortmerna_fasta_list.txt star_index // directory: /path/to/star/index/ rsem_index // directory: /path/to/rsem/index/ salmon_index // directory: /path/to/salmon/index/ kallisto_index // directory: /path/to/kallisto/index/ hisat2_index // directory: /path/to/hisat2/index/ bbsplit_index // directory: /path/to/rsem/index/ + sortmerna_index // directory: /path/to/sortmerna/index/ gencode // boolean: whether the genome is from GENCODE featurecounts_group_type // string: The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts aligner // string: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2' pseudo_aligner // string: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner' skip_gtf_filter // boolean: Skip filtering of GTF for valid scaffolds and/ or transcript IDs skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads + skip_sortmerna // boolean: Skip sortmerna for removal of non-reference genome reads skip_alignment // boolean: Skip all of the alignment-based processes within the pipeline skip_pseudo_alignment // boolean: Skip all of the pseudoalignment-based processes within the pipeline @@ -188,6 +193,7 @@ workflow PREPARE_GENOME { // def prepare_tool_indices = [] if (!skip_bbsplit) { prepare_tool_indices << 'bbsplit' } + if (!skip_sortmerna) { prepare_tool_indices << 'sortmerna' } if (!skip_alignment) { prepare_tool_indices << aligner } if (!skip_pseudo_alignment && pseudo_aligner) { prepare_tool_indices << pseudo_aligner } @@ -218,6 +224,34 @@ workflow PREPARE_GENOME { } } + // + // Uncompress sortmerna index or generate from scratch if required + // + ch_sortmerna_index = Channel.empty() + if ('sortmerna' in prepare_tool_indices) { + if (sortmerna_index) { + if (sortmerna_index.endsWith('.tar.gz')) { + ch_sortmerna_index = UNTAR_SORTMERNA_INDEX ( [ [:], sortmerna_index ] ).untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR_SORTMERNA_INDEX.out.versions) + } else { + ch_sortmerna_index = Channel.value(file(sortmerna_index)) + } + } else { + ch_sortmerna_fastas = Channel.from(file(sortmerna_fasta_list).readLines()) + .map { row -> file(row, checkIfExists: true) } + .collect() + .map{ ['rrna_refs', it] } + + SORTMERNA_INDEX ( + Channel.of([[],[]]), + ch_sortmerna_fastas, + Channel.of([[],[]]) + ) + ch_sortmerna_index = SORTMERNA_INDEX.out.index.first() + ch_versions = ch_versions.mix(SORTMERNA_INDEX.out.versions) + } + } + // // Uncompress STAR index or generate from scratch if required // @@ -336,6 +370,7 @@ workflow PREPARE_GENOME { chrom_sizes = ch_chrom_sizes // channel: path(genome.sizes) splicesites = ch_splicesites // channel: path(genome.splicesites.txt) bbsplit_index = ch_bbsplit_index // channel: path(bbsplit/index/) + sortmerna_index = ch_sortmerna_index // channel: path(sortmerna/index/) star_index = ch_star_index // channel: path(star/index/) rsem_index = ch_rsem_index // channel: path(rsem/index/) hisat2_index = ch_hisat2_index // channel: path(hisat2/index/) diff --git a/subworkflows/local/prepare_genome/nextflow.config b/subworkflows/local/prepare_genome/nextflow.config index e02648197..cb78cb9e3 100644 --- a/subworkflows/local/prepare_genome/nextflow.config +++ b/subworkflows/local/prepare_genome/nextflow.config @@ -112,3 +112,16 @@ if (!params.skip_bbsplit && params.bbsplit_fasta_list) { } } } + +if (params.remove_ribo_rna && params.ribo_database_manifest) { + process { + withName: 'SORTMERNA_INDEX' { + ext.args = '--index 1' + publishDir = [ + path: { params.save_reference ? "${params.outdir}/genome/sortmerna" : params.outdir }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null } + ] + } + } +} diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index 5c7c2b964..0a8f2f9e9 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -44,6 +44,7 @@ include { SAMTOOLS_SORT } from '../../mod include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap' include { QUALIMAP_RNASEQ } from '../../modules/nf-core/qualimap/rnaseq' include { SORTMERNA } from '../../modules/nf-core/sortmerna' +include { SORTMERNA as SORTMERNA_INDEX } from '../../../modules/nf-core/sortmerna/main' include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie' include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts' include { MULTIQC } from '../../modules/nf-core/multiqc' @@ -97,7 +98,9 @@ workflow RNASEQ { ch_salmon_index // channel: path(salmon/index/) ch_kallisto_index // channel: [ meta, path(kallisto/index/) ] ch_bbsplit_index // channel: path(bbsplit/index/) + ch_sortmerna_index // channel: path(sortmerna/index/) ch_splicesites // channel: path(genome.splicesites.txt) + make_sortmerna_index // boolean: Whether to create a sortmerna index before running sortmerna main: @@ -225,13 +228,29 @@ workflow RNASEQ { // // MODULE: Remove ribosomal RNA reads // + // Check rRNA databases for sortmerna if (params.remove_ribo_rna) { ch_ribo_db = file(params.ribo_database_manifest) - ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect() + if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"} + + ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()) + .map { row -> file(row, checkIfExists: true) } + .collect() + .map{ ['rrna_refs', it] } + + if (make_sortmerna_index) { + SORTMERNA_INDEX ( + [[],[]], + ch_sortmerna_fastas, + [[],[]] + ) + ch_sortmerna_index = SORTMERNA_INDEX.out.index.first() + } SORTMERNA ( ch_filtered_reads, - ch_sortmerna_fastas + ch_sortmerna_fastas, + ch_sortmerna_index ) .reads .set { ch_filtered_reads } From 1e043da1144b90952c94ef4eda7af35e016fe11f Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Mon, 4 Mar 2024 19:06:37 +0100 Subject: [PATCH 2/5] Update modules/nf-core/sortmerna/nextflow.config --- modules/nf-core/sortmerna/nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/sortmerna/nextflow.config b/modules/nf-core/sortmerna/nextflow.config index 8322435dc..953da5a39 100644 --- a/modules/nf-core/sortmerna/nextflow.config +++ b/modules/nf-core/sortmerna/nextflow.config @@ -1,7 +1,7 @@ if (params.remove_ribo_rna) { process { withName: 'SORTMERNA' { - ext.args = '--num_alignments 1 -v --index 0' + ext.args = '--num_alignments 1 -v --index 0' publishDir = [ [ path: { "${params.outdir}/sortmerna" }, From d02c6f662ff12dc59228ae10d83c9a54c5a16ab2 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Mon, 4 Mar 2024 20:11:32 +0100 Subject: [PATCH 3/5] update CHANGELOG --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 581304379..c24a063e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,9 +21,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [PR #1220](https://github.com/nf-core/rnaseq/pull/1220) - Initialise nf-test and add pipeline level test - [PR #1226](https://github.com/nf-core/rnaseq/pull/1226) - Reuse bbsplit index and don't keep overwriting ([#1225](https://github.com/nf-core/rnaseq/issues/1225)) - [PR #1229](https://github.com/nf-core/rnaseq/pull/1229) - Template update for nf-core/tools v2.13.1 +- [PR #1231](https://github.com/nf-core/rnaseq/pull/1231) - Add sortmerna index possibilities ### Parameters +| Old parameter | New parameter | +| ------------- | ------------------- | +| | `--sortmerna_index` | + ### Software dependencies | Dependency | Old version | New version | From d0941f139eec26e355fff13ceb18503b0ded2dde Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 5 Mar 2024 10:12:49 +0100 Subject: [PATCH 4/5] fix path --- workflows/rnaseq/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index 0a8f2f9e9..8549f802e 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -44,7 +44,7 @@ include { SAMTOOLS_SORT } from '../../mod include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap' include { QUALIMAP_RNASEQ } from '../../modules/nf-core/qualimap/rnaseq' include { SORTMERNA } from '../../modules/nf-core/sortmerna' -include { SORTMERNA as SORTMERNA_INDEX } from '../../../modules/nf-core/sortmerna/main' +include { SORTMERNA as SORTMERNA_INDEX } from '../../modules/nf-core/sortmerna' include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie' include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts' include { MULTIQC } from '../../modules/nf-core/multiqc' From 04b3581dcdc50326833fc65ae8539783bf820180 Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Tue, 5 Mar 2024 10:17:51 +0100 Subject: [PATCH 5/5] Update subworkflows/local/prepare_genome/main.nf Co-authored-by: Jonathan Manning --- subworkflows/local/prepare_genome/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index f8fb8f2d1..ba2ee14dc 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -59,7 +59,7 @@ workflow PREPARE_GENOME { pseudo_aligner // string: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner' skip_gtf_filter // boolean: Skip filtering of GTF for valid scaffolds and/ or transcript IDs skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads - skip_sortmerna // boolean: Skip sortmerna for removal of non-reference genome reads + skip_sortmerna // boolean: Skip sortmerna for removal of reads mapping to sequences in sortmerna_fasta_list skip_alignment // boolean: Skip all of the alignment-based processes within the pipeline skip_pseudo_alignment // boolean: Skip all of the pseudoalignment-based processes within the pipeline