Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RNA-seq references #14

Merged
merged 6 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 45 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,51 @@ include { fromSamplesheet } from 'plugin/nf-validation'

include { BOWTIE_BUILD } from "./modules/nf-core/bowtie/build/main"
include { BOWTIE2_BUILD } from "./modules/nf-core/bowtie2/build/main"
// RNASEQ
include { STAR_GENOMEGENERATE } from "./modules/nf-core/star/genomegenerate/main"
include { HISAT2_EXTRACTSPLICESITES } from "./modules/nf-core/hisat2/extractsplicesites"
include { HISAT2_BUILD } from "./modules/nf-core/hisat2/build"
include { RSEM_PREPAREREFERENCE as MAKE_TRANSCRIPTS_FASTA } from "./modules/nf-core/rsem/preparereference"
include { SALMON_INDEX } from "./modules/nf-core/salmon/index"
include { KALLISTO_INDEX } from "./modules/nf-core/kallisto/index"
include { RSEM_PREPAREREFERENCE as RSEM_PREPAREREFERENCE_GENOME } from "./modules/nf-core/rsem/preparereference"

workflow RNASEQ {
take:
reference // fasta, gtf

main:
reference
.multiMap { meta, fasta, gtf, bed, readme, mito, size ->
fasta: tuple(meta, fasta)
gtf: tuple(meta, gtf)
bed: tuple(meta, bed)
}
.set { input }

STAR_GENOMEGENERATE ( input.fasta, input.gtf )

ch_splicesites = HISAT2_EXTRACTSPLICESITES ( input.gtf ).txt.map { it[1] }
HISAT2_BUILD ( input.fasta, input.gtf, ch_splicesites.map { [ [:], it ] } )

ch_transcript_fasta = MAKE_TRANSCRIPTS_FASTA ( input.fasta, input.gtf ).transcript_fasta

SALMON_INDEX ( input.fasta, ch_transcript_fasta )

KALLISTO_INDEX ( ch_transcript_fasta.map{[ [:], it]} )

RSEM_PREPAREREFERENCE_GENOME ( input.fasta, input.gtf )

emit:
star_index = STAR_GENOMEGENERATE.out.index
hisat2_index = HISAT2_BUILD.out.index
transcript_fasta = ch_transcript_fasta
salmon_index = SALMON_INDEX.out.index
kallisto_index = KALLISTO_INDEX.out.index
rsem_index = RSEM_PREPAREREFERENCE_GENOME.out.index
}

// TODO workflow SAREK {

workflow INDEX {
take:
Expand All @@ -19,17 +63,16 @@ workflow INDEX {

BOWTIE_BUILD ( input.fasta )
BOWTIE2_BUILD ( input.fasta )
STAR_GENOMEGENERATE ( input.fasta, input.gtf )

emit:
// bowtie_index = BOWTIE_BUILD.out.index
bowtie2_index = BOWTIE2_BUILD.out.index
star_index = STAR_GENOMEGENERATE.out.index
}


workflow {
ch_input = Channel.fromSamplesheet("input")

INDEX ( ch_input )
RNASEQ ( ch_input )
}
27 changes: 27 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,33 @@
"git_sha": "1fea64f5132a813ec97c1c6d3a74e0aee7142b6d",
"installed_by": ["modules"]
},
"hisat2/build": {
"branch": "master",
"git_sha": "400037f54de4b0c42712ec5a499d9fd9e66250d1",
"installed_by": ["modules"]
},
"hisat2/extractsplicesites": {
"branch": "master",
"git_sha": "400037f54de4b0c42712ec5a499d9fd9e66250d1",
"installed_by": ["modules"]
},
"kallisto/index": {
"branch": "master",
"git_sha": "de5811dd9ca15af1e131806001bcaae909e42021",
"installed_by": ["modules"]
},
"rsem/preparereference": {
"branch": "master",
"git_sha": "301b088c7e9e00c4c80686411383f07173b54d69",
"installed_by": ["modules"],
"patch": "modules/nf-core/rsem/preparereference/rsem-preparereference.diff"
},
"salmon/index": {
"branch": "master",
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
"installed_by": ["modules"],
"patch": "modules/nf-core/salmon/index/salmon-index.diff"
},
"star/genomegenerate": {
"branch": "master",
"git_sha": "0e98289b5bec6e3f8f588a8a9d05e8aacc1179a0",
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/hisat2/build/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: hisat2_build
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::hisat2=2.2.1
64 changes: 64 additions & 0 deletions modules/nf-core/hisat2/build/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process HISAT2_BUILD {
tag "$fasta"
label 'process_high'
label 'process_high_memory'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hisat2:2.2.1--h1b792b2_3' :
'biocontainers/hisat2:2.2.1--h1b792b2_3' }"

input:
tuple val(meta), path(fasta)
tuple val(meta2), path(gtf)
tuple val(meta3), path(splicesites)

output:
tuple val(meta), path("hisat2") , emit: index
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def avail_mem = 0
if (!task.memory) {
log.info "[HISAT2 index build] Available memory not known - defaulting to 0. Specify process memory requirements to change this."
} else {
log.info "[HISAT2 index build] Available memory: ${task.memory}"
avail_mem = task.memory.toGiga()
}

def ss = ''
def exon = ''
def extract_exons = ''
def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as nextflow.util.MemoryUnit).toGiga() : 0
if (avail_mem >= hisat2_build_memory) {
log.info "[HISAT2 index build] At least ${hisat2_build_memory} GB available, so using splice sites and exons to build HISAT2 index"
extract_exons = gtf ? "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt" : ""
ss = splicesites ? "--ss $splicesites" : ""
exon = gtf ? "--exon ${gtf.baseName}.exons.txt" : ""
} else {
log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index."
log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check."
}
def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
mkdir hisat2
$extract_exons
hisat2-build \\
-p $task.cpus \\
$ss \\
$exon \\
$args \\
$fasta \\
hisat2/${fasta.baseName}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
hisat2: $VERSION
END_VERSIONS
"""
}
61 changes: 61 additions & 0 deletions modules/nf-core/hisat2/build/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: hisat2_build
description: Builds HISAT2 index for reference genome
keywords:
- build
- index
- fasta
- genome
- reference
tools:
- hisat2:
description: HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome.
homepage: https://daehwankimlab.github.io/hisat2/
documentation: https://daehwankimlab.github.io/hisat2/manual/
doi: "10.1038/s41587-019-0201-4"
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fasta:
type: file
description: Reference fasta file
pattern: "*.{fa,fasta,fna}"
- meta2:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- gtf:
type: file
description: Reference gtf annotation file
pattern: "*.{gtf}"
- meta3:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- splicesites:
type: file
description: Splices sites in gtf file
pattern: "*.{txt}"
output:
- meta:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- index:
type: file
description: HISAT2 genome index file
pattern: "*.ht2"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@ntoda03"
maintainers:
- "@ntoda03"
53 changes: 53 additions & 0 deletions modules/nf-core/hisat2/build/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
nextflow_process {

name "Test Process HISAT2_BUILD"
script "../main.nf"
process "HISAT2_BUILD"
tag "modules"
tag "modules_nfcore"
tag "hisat2"
tag "hisat2/build"
tag "hisat2/extractsplicesites"

test("Should run without failures") {

setup {
run("HISAT2_EXTRACTSPLICESITES") {
script "../../extractsplicesites/main.nf"
process {
"""
input[0] = Channel.of([
[id:'genome'],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true)
])
"""
}
}
}

when {
params {
outdir = "$outputDir"
}
process {
"""
input[0] = Channel.of([
[id:'genome'],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
])
input[1] = Channel.of([ [id:'genome'],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true)
])
input[2] = HISAT2_EXTRACTSPLICESITES.out.txt
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
49 changes: 49 additions & 0 deletions modules/nf-core/hisat2/build/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"Should run without failures": {
"content": [
{
"0": [
[
{
"id": "genome"
},
[
"genome.1.ht2:md5,057cfa8a22b97ee9cff4c8d342498803",
"genome.2.ht2:md5,47b153cd1319abc88dda532462651fcf",
"genome.3.ht2:md5,4ed93abba181d8dfab2e303e33114777",
"genome.4.ht2:md5,c25be5f8b0378abf7a58c8a880b87626",
"genome.5.ht2:md5,91198831aaba993acac1734138c5f173",
"genome.6.ht2:md5,265e1284ce85686516fae5d35540994a",
"genome.7.ht2:md5,9013eccd91ad614d7893c739275a394f",
"genome.8.ht2:md5,33cdeccccebe80329f1fdbee7f5874cb"
]
]
],
"1": [
"versions.yml:md5,e36ef3cd73d19ccf2378c9358fe942c0"
],
"index": [
[
{
"id": "genome"
},
[
"genome.1.ht2:md5,057cfa8a22b97ee9cff4c8d342498803",
"genome.2.ht2:md5,47b153cd1319abc88dda532462651fcf",
"genome.3.ht2:md5,4ed93abba181d8dfab2e303e33114777",
"genome.4.ht2:md5,c25be5f8b0378abf7a58c8a880b87626",
"genome.5.ht2:md5,91198831aaba993acac1734138c5f173",
"genome.6.ht2:md5,265e1284ce85686516fae5d35540994a",
"genome.7.ht2:md5,9013eccd91ad614d7893c739275a394f",
"genome.8.ht2:md5,33cdeccccebe80329f1fdbee7f5874cb"
]
]
],
"versions": [
"versions.yml:md5,e36ef3cd73d19ccf2378c9358fe942c0"
]
}
],
"timestamp": "2023-10-16T14:42:22.381609786"
}
}
3 changes: 3 additions & 0 deletions modules/nf-core/hisat2/build/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
hisat2/build:
- modules/nf-core/hisat2/build/**
- modules/nf-core/hisat2/extractsplicesites/**
7 changes: 7 additions & 0 deletions modules/nf-core/hisat2/extractsplicesites/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: hisat2_extractsplicesites
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::hisat2=2.2.1
31 changes: 31 additions & 0 deletions modules/nf-core/hisat2/extractsplicesites/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
process HISAT2_EXTRACTSPLICESITES {
tag "$gtf"
label 'process_medium'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hisat2:2.2.1--h1b792b2_3' :
'biocontainers/hisat2:2.2.1--h1b792b2_3' }"

input:
tuple val(meta), path(gtf)

output:
tuple val(meta), path("*.splice_sites.txt"), emit: txt
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def VERSION = '2.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
hisat2_extract_splice_sites.py $gtf > ${gtf.baseName}.splice_sites.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
hisat2: $VERSION
END_VERSIONS
"""
}
Loading