Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update sortmerna usage #1231

Merged
merged 6 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [PR #1220](https://github.com/nf-core/rnaseq/pull/1220) - Initialise nf-test and add pipeline level test
- [PR #1226](https://github.com/nf-core/rnaseq/pull/1226) - Reuse bbsplit index and don't keep overwriting ([#1225](https://github.com/nf-core/rnaseq/issues/1225))
- [PR #1229](https://github.com/nf-core/rnaseq/pull/1229) - Template update for nf-core/tools v2.13.1
- [PR #1231](https://github.com/nf-core/rnaseq/pull/1231) - Add sortmerna index possibilities

### Parameters

| Old parameter | New parameter |
| ------------- | ------------------- |
| | `--sortmerna_index` |

### Software dependencies

| Dependency | Old version | New version |
Expand Down
8 changes: 7 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ params.gtf = getGenomeAttribute('gtf')
params.gff = getGenomeAttribute('gff')
params.gene_bed = getGenomeAttribute('bed12')
params.bbsplit_index = getGenomeAttribute('bbsplit')
params.sortmerna_index = getGenomeAttribute('sortmerna')
params.star_index = getGenomeAttribute('star')
params.hisat2_index = getGenomeAttribute('hisat2')
params.rsem_index = getGenomeAttribute('rsem')
Expand Down Expand Up @@ -70,18 +71,21 @@ workflow NFCORE_RNASEQ {
params.gene_bed,
params.splicesites,
params.bbsplit_fasta_list,
params.ribo_database_manifest,
params.star_index,
params.rsem_index,
params.salmon_index,
params.kallisto_index,
params.hisat2_index,
params.bbsplit_index,
params.sortmerna_index,
params.gencode,
params.featurecounts_group_type,
params.aligner,
params.pseudo_aligner,
params.skip_gtf_filter,
params.skip_bbsplit,
!params.remove_ribo_rna,
params.skip_alignment,
params.skip_pseudo_alignment
)
Expand Down Expand Up @@ -114,7 +118,9 @@ workflow NFCORE_RNASEQ {
PREPARE_GENOME.out.salmon_index,
PREPARE_GENOME.out.kallisto_index,
PREPARE_GENOME.out.bbsplit_index,
PREPARE_GENOME.out.splicesites
PREPARE_GENOME.out.sortmerna_index,
PREPARE_GENOME.out.splicesites,
!params.remove_ribo_rna && params.remove_ribo_rna
)
ch_versions = ch_versions.mix(RNASEQ.out.versions)

Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/sortmerna/nextflow.config

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,14 @@
"description": "Path to directory or tar.gz archive for pre-built BBSplit index.",
"help_text": "The BBSplit index will have to be built at least once with this pipeline (see `--save_reference` to save index). It can then be provided via `--bbsplit_index` for future runs."
},
"sortmerna_index": {
"type": "string",
"format": "path",
"exists": true,
"fa_icon": "fas fa-bezier-curve",
"description": "Path to directory or tar.gz archive for pre-built sortmerna index.",
"help_text": "The sortmerna index will have to be built at least once with this pipeline (see `--save_reference` to save index). It can then be provided via `--sortmerna_index` for future runs."
},
"remove_ribo_rna": {
"type": "boolean",
"fa_icon": "fas fa-trash-alt",
Expand Down
35 changes: 35 additions & 0 deletions subworkflows/local/prepare_genome/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ include { GUNZIP as GUNZIP_TRANSCRIPT_FASTA } from '../../../modules/nf-core/gun
include { GUNZIP as GUNZIP_ADDITIONAL_FASTA } from '../../../modules/nf-core/gunzip'

include { UNTAR as UNTAR_BBSPLIT_INDEX } from '../../../modules/nf-core/untar'
include { UNTAR as UNTAR_SORTMERNA_INDEX } from '../../../modules/nf-core/untar'
include { UNTAR as UNTAR_STAR_INDEX } from '../../../modules/nf-core/untar'
include { UNTAR as UNTAR_RSEM_INDEX } from '../../../modules/nf-core/untar'
include { UNTAR as UNTAR_HISAT2_INDEX } from '../../../modules/nf-core/untar'
Expand All @@ -20,6 +21,7 @@ include { CUSTOM_CATADDITIONALFASTA } from '../../../modules/nf-core/cus
include { CUSTOM_GETCHROMSIZES } from '../../../modules/nf-core/custom/getchromsizes'
include { GFFREAD } from '../../../modules/nf-core/gffread'
include { BBMAP_BBSPLIT } from '../../../modules/nf-core/bbmap/bbsplit'
include { SORTMERNA as SORTMERNA_INDEX } from '../../../modules/nf-core/sortmerna'
include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate'
include { HISAT2_EXTRACTSPLICESITES } from '../../../modules/nf-core/hisat2/extractsplicesites'
include { HISAT2_BUILD } from '../../../modules/nf-core/hisat2/build'
Expand All @@ -43,18 +45,21 @@ workflow PREPARE_GENOME {
gene_bed // file: /path/to/gene.bed
splicesites // file: /path/to/splicesites.txt
bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt
sortmerna_fasta_list // file: /path/to/sortmerna_fasta_list.txt
star_index // directory: /path/to/star/index/
rsem_index // directory: /path/to/rsem/index/
salmon_index // directory: /path/to/salmon/index/
kallisto_index // directory: /path/to/kallisto/index/
hisat2_index // directory: /path/to/hisat2/index/
bbsplit_index // directory: /path/to/rsem/index/
sortmerna_index // directory: /path/to/sortmerna/index/
gencode // boolean: whether the genome is from GENCODE
featurecounts_group_type // string: The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts
aligner // string: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'
pseudo_aligner // string: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'
skip_gtf_filter // boolean: Skip filtering of GTF for valid scaffolds and/ or transcript IDs
skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads
skip_sortmerna // boolean: Skip sortmerna for removal of non-reference genome reads
maxulysse marked this conversation as resolved.
Show resolved Hide resolved
skip_alignment // boolean: Skip all of the alignment-based processes within the pipeline
skip_pseudo_alignment // boolean: Skip all of the pseudoalignment-based processes within the pipeline

Expand Down Expand Up @@ -188,6 +193,7 @@ workflow PREPARE_GENOME {
//
def prepare_tool_indices = []
if (!skip_bbsplit) { prepare_tool_indices << 'bbsplit' }
if (!skip_sortmerna) { prepare_tool_indices << 'sortmerna' }
if (!skip_alignment) { prepare_tool_indices << aligner }
if (!skip_pseudo_alignment && pseudo_aligner) { prepare_tool_indices << pseudo_aligner }

Expand Down Expand Up @@ -218,6 +224,34 @@ workflow PREPARE_GENOME {
}
}

//
// Uncompress sortmerna index or generate from scratch if required
//
ch_sortmerna_index = Channel.empty()
if ('sortmerna' in prepare_tool_indices) {
if (sortmerna_index) {
if (sortmerna_index.endsWith('.tar.gz')) {
ch_sortmerna_index = UNTAR_SORTMERNA_INDEX ( [ [:], sortmerna_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_SORTMERNA_INDEX.out.versions)
} else {
ch_sortmerna_index = Channel.value(file(sortmerna_index))
}
} else {
ch_sortmerna_fastas = Channel.from(file(sortmerna_fasta_list).readLines())
.map { row -> file(row, checkIfExists: true) }
.collect()
.map{ ['rrna_refs', it] }

SORTMERNA_INDEX (
Channel.of([[],[]]),
ch_sortmerna_fastas,
Channel.of([[],[]])
)
ch_sortmerna_index = SORTMERNA_INDEX.out.index.first()
ch_versions = ch_versions.mix(SORTMERNA_INDEX.out.versions)
}
}

//
// Uncompress STAR index or generate from scratch if required
//
Expand Down Expand Up @@ -336,6 +370,7 @@ workflow PREPARE_GENOME {
chrom_sizes = ch_chrom_sizes // channel: path(genome.sizes)
splicesites = ch_splicesites // channel: path(genome.splicesites.txt)
bbsplit_index = ch_bbsplit_index // channel: path(bbsplit/index/)
sortmerna_index = ch_sortmerna_index // channel: path(sortmerna/index/)
star_index = ch_star_index // channel: path(star/index/)
rsem_index = ch_rsem_index // channel: path(rsem/index/)
hisat2_index = ch_hisat2_index // channel: path(hisat2/index/)
Expand Down
13 changes: 13 additions & 0 deletions subworkflows/local/prepare_genome/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,16 @@ if (!params.skip_bbsplit && params.bbsplit_fasta_list) {
}
}
}

if (params.remove_ribo_rna && params.ribo_database_manifest) {
process {
withName: 'SORTMERNA_INDEX' {
ext.args = '--index 1'
publishDir = [
path: { params.save_reference ? "${params.outdir}/genome/sortmerna" : params.outdir },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null }
]
}
}
}
24 changes: 21 additions & 3 deletions workflows/rnaseq/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ include { SAMTOOLS_SORT } from '../../mod
include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap'
include { QUALIMAP_RNASEQ } from '../../modules/nf-core/qualimap/rnaseq'
include { SORTMERNA } from '../../modules/nf-core/sortmerna'
include { SORTMERNA as SORTMERNA_INDEX } from '../../../modules/nf-core/sortmerna/main'
maxulysse marked this conversation as resolved.
Show resolved Hide resolved
include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie'
include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts'
include { MULTIQC } from '../../modules/nf-core/multiqc'
Expand Down Expand Up @@ -97,7 +98,9 @@ workflow RNASEQ {
ch_salmon_index // channel: path(salmon/index/)
ch_kallisto_index // channel: [ meta, path(kallisto/index/) ]
ch_bbsplit_index // channel: path(bbsplit/index/)
ch_sortmerna_index // channel: path(sortmerna/index/)
ch_splicesites // channel: path(genome.splicesites.txt)
make_sortmerna_index // boolean: Whether to create a sortmerna index before running sortmerna

main:

Expand Down Expand Up @@ -225,14 +228,29 @@ workflow RNASEQ {
//
// MODULE: Remove ribosomal RNA reads
//
// Check rRNA databases for sortmerna
if (params.remove_ribo_rna) {
ch_ribo_db = file(params.ribo_database_manifest)
ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect()
if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"}

ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines())
.map { row -> file(row, checkIfExists: true) }
.collect()
.map{ ['rrna_refs', it] }

if (make_sortmerna_index) {
SORTMERNA_INDEX (
[[],[]],
ch_sortmerna_fastas,
[[],[]]
)
ch_sortmerna_index = SORTMERNA_INDEX.out.index.first()
}

SORTMERNA (
ch_filtered_reads,
ch_sortmerna_fastas.map{ it -> [ [ id:'fastas' ], it ] },
[[:],[]]
ch_sortmerna_fastas,
ch_sortmerna_index
)
.reads
.set { ch_filtered_reads }
Expand Down
Loading