From 0976781105c68535589e61c0c562d09ad236dd5f Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Mon, 4 Nov 2024 23:54:03 +0100 Subject: [PATCH] style: First pass with lsp --- conf/test.config | 20 +- conf/test_copro.config | 14 +- conf/test_full.config | 6 +- conf/test_grocap.config | 19 +- modules/local/bed2saf.nf | 10 +- modules/local/dreg_prep/main.nf | 20 +- modules/local/grohmm/parametertuning/main.nf | 20 +- .../local/grohmm/transcriptcalling/main.nf | 24 +- modules/local/gtf2bed.nf | 15 +- subworkflows/local/align_bwamem2/main.nf | 28 +- subworkflows/local/align_dragmap/main.nf | 28 +- subworkflows/local/coverage_graphs.nf | 34 +- subworkflows/local/dreg_prep/main.nf | 1 - subworkflows/local/prepare_genome.nf | 140 ++++---- subworkflows/local/quality_control.nf | 35 +- .../local/transcript_identification/main.nf | 6 +- .../utils_nfcore_nascent_pipeline/main.nf | 99 +++--- workflows/nascent.nf | 302 +++++++++--------- 18 files changed, 418 insertions(+), 403 deletions(-) diff --git a/conf/test.config b/conf/test.config index c7635451..3b306c17 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,21 +24,22 @@ params { // Input data // TODO params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - input = "${projectDir}/assets/samplesheet.csv" + input = "${projectDir}/assets/samplesheet.csv" // Genome references - fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/GRCh38_chr21.fa' - gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/genes_chr21.gtf' - hisat2_index = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/GRCh38_chr21_hisat2.tar.gz' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/GRCh38_chr21.fa' + gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/genes_chr21.gtf' + hisat2_index = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/GRCh38_chr21_hisat2.tar.gz' - assay_type = "GROseq" - skip_grohmm = true // FIXME Fails due to higher memory requirements + assay_type = "GROseq" + // FIXME Fails due to higher memory requirements + skip_grohmm = true grohmm_min_uts = 5 grohmm_max_uts = 10 grohmm_min_ltprobb = -100 grohmm_max_ltprobb = -150 - filter_bed = "${projectDir}/tests/config/unwanted_region.bed" - intersect_bed = "${projectDir}/tests/config/wanted_region.bed" + filter_bed = "${projectDir}/tests/config/unwanted_region.bed" + intersect_bed = "${projectDir}/tests/config/wanted_region.bed" } process { @@ -46,8 +47,7 @@ process { ext.args = '--genomeSAindexNbases 9' } - withName: 'PINTS_CALLER' { - // HACK Tests fail after latest modules update + withName: PINTS_CALLER { ext.args = { "--disable-small" } } } diff --git a/conf/test_copro.config b/conf/test_copro.config index 27d33607..42c41cdd 100644 --- a/conf/test_copro.config +++ b/conf/test_copro.config @@ -15,17 +15,17 @@ params { config_profile_description = 'Test dataset to check PINTS pipeline function(https://pints.yulab.org/tre_calling#part-iv-case-2)' // Input data - input = "${projectDir}/tests/config/samplesheets/copro.csv" + input = "${projectDir}/tests/config/samplesheets/copro.csv" - genome = 'hg38' - assay_type = 'CoPRO' - filter_bed = "https://pints.yulab.org/ref/examples/promoters_1kb_tss_centered.bed.gz" - with_umi = true - umitools_dedup_stats = true + genome = 'hg38' + assay_type = 'CoPRO' + filter_bed = "https://pints.yulab.org/ref/examples/promoters_1kb_tss_centered.bed.gz" + with_umi = true + umitools_dedup_stats = true } process { - withName: NFCORE_NASCENT:NASCENT:FASTP { + withName: FASTP { ext.args = [ "--adapter_sequence TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC", "--adapter_sequence_r2 GATCGTCGGACTGTAGAACTCTGAAC", diff --git a/conf/test_full.config b/conf/test_full.config index f5f1c5e6..5b464fe2 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -16,9 +16,9 @@ params { // Input data for full size test // TODO params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' - input = "${projectDir}/assets/samplesheet_full.csv" + input = "${projectDir}/assets/samplesheet_full.csv" // Genome references - genome = 'hg38' - assay_type = 'GROseq' + genome = 'hg38' + assay_type = 'GROseq' } diff --git a/conf/test_grocap.config b/conf/test_grocap.config index 53916d5f..55a1823b 100644 --- a/conf/test_grocap.config +++ b/conf/test_grocap.config @@ -15,21 +15,22 @@ params { config_profile_description = 'Test dataset to check PINTS pipeline function(https://pints.yulab.org/tre_calling#part-iii-case-1)' // Input data - input = "${projectDir}/tests/config/samplesheets/grocap.csv" + input = "${projectDir}/tests/config/samplesheets/grocap.csv" - genome = 'hg38' - assay_type = 'GROcap' - filter_bed = "https://pints.yulab.org/ref/examples/promoters_1kb_tss_centered.bed.gz" + genome = 'hg38' + assay_type = 'GROcap' + filter_bed = "https://pints.yulab.org/ref/examples/promoters_1kb_tss_centered.bed.gz" } process { - withName: NFCORE_NASCENT:NASCENT:FASTP { + // only keep reads longer than 14nts after trimming + // This library was polyadenylated, + // so we are trimming the last 20nts per reads (with --trim_tail1). + // For more recent single-end PRO/GRO-cap libraries, this may not be necessary. + withName: 'NFCORE_NASCENT:NASCENT:FASTP' { ext.args = [ "--adapter_sequence TGGAATTCTCGGGTGCCAAGG", - "-l 14", // only keep reads longer than 14nts after trimming - // This library was polyadenylated, - // so we are trimming the last 20nts per reads (with --trim_tail1). - // For more recent single-end PRO/GRO-cap libraries, this may not be necessary. + "-l 14", "--trim_tail1 20", "--low_complexity_filter", "-w 8" diff --git a/modules/local/bed2saf.nf b/modules/local/bed2saf.nf index f23b7750..bcc23c3e 100644 --- a/modules/local/bed2saf.nf +++ b/modules/local/bed2saf.nf @@ -1,11 +1,11 @@ process BED2SAF { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "conda-forge::gawk=5.1.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' + : 'nf-core/ubuntu:20.04'}" input: tuple val(meta), path(bed) @@ -20,7 +20,7 @@ process BED2SAF { script: """ awk 'OFS="\\t" {print \$1"."\$2"."\$3, \$1, \$2, \$3, "."}' \\ - $bed \\ + ${bed} \\ > ${bed.baseName}.saf cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/dreg_prep/main.nf b/modules/local/dreg_prep/main.nf index 7c640cf4..403bdfdb 100644 --- a/modules/local/dreg_prep/main.nf +++ b/modules/local/dreg_prep/main.nf @@ -1,16 +1,16 @@ process DREG_PREP { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-f01e242bdea19948f0576fdca94777242fe4c2cb:4238fb992d2a93e648108c86e3a9f51348e834a9-0' : - 'biocontainers/mulled-v2-f01e242bdea19948f0576fdca94777242fe4c2cb:4238fb992d2a93e648108c86e3a9f51348e834a9-0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/mulled-v2-f01e242bdea19948f0576fdca94777242fe4c2cb:4238fb992d2a93e648108c86e3a9f51348e834a9-0' + : 'biocontainers/mulled-v2-f01e242bdea19948f0576fdca94777242fe4c2cb:4238fb992d2a93e648108c86e3a9f51348e834a9-0'}" input: tuple val(meta), path(bam_file), val(index) - path sizes + path sizes val assay_type output: @@ -78,10 +78,11 @@ process DREG_PREP { echo "bedGraph to bigwig done" """ - } else { + } + else { if (forwardStranded) { """ - samtools view -@ $task.cpus -bf 0x2 ${bam_file} | samtools sort -n -@ $task.cpus \\ + samtools view -@ ${task.cpus} -bf 0x2 ${bam_file} | samtools sort -n -@ ${task.cpus} \\ > ${prefix}.dreg.bam bedtools bamtobed -bedpe -mate1 -i ${prefix}.dreg.bam \\ @@ -118,9 +119,10 @@ process DREG_PREP { ${prefix}.unsorted.bedGraph \\ > ${prefix}.bedGraph """ - } else { + } + else { """ - samtools view -@ $task.cpus -bf 0x2 ${bam_file} | samtools sort -n -@ $task.cpus \\ + samtools view -@ ${task.cpus} -bf 0x2 ${bam_file} | samtools sort -n -@ ${task.cpus} \\ > ${prefix}.dreg.bam bedtools bamtobed -bedpe -mate1 -i ${prefix}.dreg.bam \\ diff --git a/modules/local/grohmm/parametertuning/main.nf b/modules/local/grohmm/parametertuning/main.nf index ac56402f..b19810ce 100644 --- a/modules/local/grohmm/parametertuning/main.nf +++ b/modules/local/grohmm/parametertuning/main.nf @@ -1,12 +1,12 @@ process GROHMM_PARAMETERTUNING { - tag "$meta.id|$UTS|$LtProbB" + tag "${meta.id}|${UTS}|${LtProbB}" label 'process_high' // array 10 conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b9/b929af5662486ba6ce2d27eb501e5c7ec71ca7dd8e333fe5d3dcf2803d87cf67/data' : - 'community.wave.seqera.io/library/grohmm:833aa94cad4202ac' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b9/b929af5662486ba6ce2d27eb501e5c7ec71ca7dd8e333fe5d3dcf2803d87cf67/data' + : 'community.wave.seqera.io/library/grohmm:833aa94cad4202ac'}" input: tuple val(meta), path(bams), path(bais), val(UTS), val(LtProbB) @@ -15,7 +15,7 @@ process GROHMM_PARAMETERTUNING { output: tuple val(meta), path("*.tuning.csv"), emit: tuning tuple val(meta), path("*.tuning.consensus.bed"), emit: bed - path "versions.yml", emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -27,12 +27,12 @@ process GROHMM_PARAMETERTUNING { grohmm_parametertuning.R \\ --bam_file ${bams} \\ --outprefix ${prefix} \\ - --gxf $gxf \\ - --uts $UTS \\ - --ltprobb $LtProbB \\ + --gxf ${gxf} \\ + --uts ${UTS} \\ + --ltprobb ${LtProbB} \\ --outdir ./ \\ - --cores $task.cpus \\ - $args + --cores ${task.cpus} \\ + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/grohmm/transcriptcalling/main.nf b/modules/local/grohmm/transcriptcalling/main.nf index 984d1800..95dca60c 100644 --- a/modules/local/grohmm/transcriptcalling/main.nf +++ b/modules/local/grohmm/transcriptcalling/main.nf @@ -1,12 +1,12 @@ process GROHMM_TRANSCRIPTCALLING { - tag "$meta.id" + tag "${meta.id}" label 'process_high' label 'process_long' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b9/b929af5662486ba6ce2d27eb501e5c7ec71ca7dd8e333fe5d3dcf2803d87cf67/data' : - 'community.wave.seqera.io/library/grohmm:833aa94cad4202ac' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b9/b929af5662486ba6ce2d27eb501e5c7ec71ca7dd8e333fe5d3dcf2803d87cf67/data' + : 'community.wave.seqera.io/library/grohmm:833aa94cad4202ac'}" input: tuple val(meta), path(bams), path(bais), path(tuning_file) @@ -14,12 +14,12 @@ process GROHMM_TRANSCRIPTCALLING { output: tuple val(meta), path("*.transcripts.txt"), emit: transcripts - tuple val(meta), path("*.eval.txt") , emit: eval + tuple val(meta), path("*.eval.txt"), emit: eval tuple val(meta), path("*.transcripts.bed"), emit: transcripts_bed - tuple val(meta), path("*.tdFinal.txt") , emit: td - tuple val(meta), path("*.tdplot_mqc.png") , emit: td_plot - tuple val(meta), path("*.tdFinal_mqc.csv") , emit: mqc_csv - path "versions.yml" , emit: versions + tuple val(meta), path("*.tdFinal.txt"), emit: td + tuple val(meta), path("*.tdplot_mqc.png"), emit: td_plot + tuple val(meta), path("*.tdFinal_mqc.csv"), emit: mqc_csv + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -32,11 +32,11 @@ process GROHMM_TRANSCRIPTCALLING { --bam_file ${bams} \\ --tuning_file ${tuning_file} \\ --outprefix ${prefix} \\ - --gxf $gxf \\ + --gxf ${gxf} \\ --outdir ./ \\ - --cores $task.cpus \\ + --cores ${task.cpus} \\ --memory ${task.memory.toMega()} \\ - $args + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/gtf2bed.nf b/modules/local/gtf2bed.nf index 07df95fb..7b34be8b 100644 --- a/modules/local/gtf2bed.nf +++ b/modules/local/gtf2bed.nf @@ -1,26 +1,27 @@ process GTF2BED { - tag "$gtf" + tag "${gtf}" label 'process_low' conda "conda-forge::perl=5.26.2" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/perl:5.26.2' : - 'biocontainers/perl:5.26.2' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/perl:5.26.2' + : 'biocontainers/perl:5.26.2'}" input: path gtf output: - path '*.bed' , emit: bed + path '*.bed', emit: bed path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when - script: // This script is bundled with the pipeline, in nf-core/nascent/bin/ + script: + // This script is bundled with the pipeline, in nf-core/nascent/bin/ """ gtf2bed \\ - $gtf \\ + ${gtf} \\ > ${gtf.baseName}.bed cat <<-END_VERSIONS > versions.yml diff --git a/subworkflows/local/align_bwamem2/main.nf b/subworkflows/local/align_bwamem2/main.nf index beb6d863..f1ffb959 100644 --- a/subworkflows/local/align_bwamem2/main.nf +++ b/subworkflows/local/align_bwamem2/main.nf @@ -2,15 +2,15 @@ // Alignment with BWAMEM2 // -include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' +include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_sort_stats_samtools/main' workflow ALIGN_BWAMEM2 { take: - ch_reads // channel (mandatory): [ val(meta), [ path(reads) ] ] - ch_index // channel (mandatory): [ val(meta2), path(index) ] - val_sort_bam // boolean (mandatory): true or false - ch_fasta // channel (optional) : [ val(meta3), path(fasta) ] + ch_reads // channel (mandatory): [ val(meta), [ path(reads) ] ] + ch_index // channel (mandatory): [ val(meta2), path(index) ] + val_sort_bam // boolean (mandatory): true or false + ch_fasta // channel (optional) : [ val(meta3), path(fasta) ] main: ch_versions = Channel.empty() @@ -19,25 +19,23 @@ workflow ALIGN_BWAMEM2 { // Map reads with BWA // - BWAMEM2_MEM ( ch_reads, ch_index, ch_fasta, val_sort_bam ) + BWAMEM2_MEM(ch_reads, ch_index, ch_fasta, val_sort_bam) ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions.first()) // // Sort, index BAM file and run samtools stats, flagstat and idxstats // - BAM_SORT_STATS_SAMTOOLS ( BWAMEM2_MEM.out.bam, ch_fasta ) + BAM_SORT_STATS_SAMTOOLS(BWAMEM2_MEM.out.bam, ch_fasta) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) emit: - bam_orig = BWAMEM2_MEM.out.bam // channel: [ val(meta), path(bam) ] - - bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] - bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] - csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ] - stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + bam_orig = BWAMEM2_MEM.out.bam // channel: [ val(meta), path(bam) ] + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] - - versions = ch_versions // channel: [ path(versions.yml) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/align_dragmap/main.nf b/subworkflows/local/align_dragmap/main.nf index 17a74ec6..73a181b8 100644 --- a/subworkflows/local/align_dragmap/main.nf +++ b/subworkflows/local/align_dragmap/main.nf @@ -2,15 +2,15 @@ // Alignment with dragmap // -include { DRAGMAP_ALIGN } from '../../../modules/nf-core/dragmap/align/main' +include { DRAGMAP_ALIGN } from '../../../modules/nf-core/dragmap/align/main' include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_sort_stats_samtools/main' workflow ALIGN_DRAGMAP { take: - ch_reads // channel (mandatory): [ val(meta), [ path(reads) ] ] - ch_index // channel (mandatory): [ val(meta2), path(index) ] - val_sort_bam // boolean (mandatory): true or false - ch_fasta // channel (optional) : [ val(meta3), path(fasta) ] + ch_reads // channel (mandatory): [ val(meta), [ path(reads) ] ] + ch_index // channel (mandatory): [ val(meta2), path(index) ] + val_sort_bam // boolean (mandatory): true or false + ch_fasta // channel (optional) : [ val(meta3), path(fasta) ] main: ch_versions = Channel.empty() @@ -19,25 +19,23 @@ workflow ALIGN_DRAGMAP { // Map reads with dragmap // - DRAGMAP_ALIGN ( ch_reads, ch_index, ch_fasta, val_sort_bam ) + DRAGMAP_ALIGN(ch_reads, ch_index, ch_fasta, val_sort_bam) ch_versions = ch_versions.mix(DRAGMAP_ALIGN.out.versions.first()) // // Sort, index BAM file and run samtools stats, flagstat and idxstats // - BAM_SORT_STATS_SAMTOOLS ( DRAGMAP_ALIGN.out.bam, ch_fasta ) + BAM_SORT_STATS_SAMTOOLS(DRAGMAP_ALIGN.out.bam, ch_fasta) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) emit: - bam_orig = DRAGMAP_ALIGN.out.bam // channel: [ val(meta), path(bam) ] - - bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] - bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] - csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ] - stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + bam_orig = DRAGMAP_ALIGN.out.bam // channel: [ val(meta), path(bam) ] + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] - - versions = ch_versions // channel: [ path(versions.yml) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/coverage_graphs.nf b/subworkflows/local/coverage_graphs.nf index a1628c16..ac8c46db 100644 --- a/subworkflows/local/coverage_graphs.nf +++ b/subworkflows/local/coverage_graphs.nf @@ -2,15 +2,13 @@ * Create bigWig and bedGraph files */ -include { - BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_PLUS - BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_MINUS } from '../../modules/nf-core/bedtools/genomecov/main' +include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_PLUS } from '../../modules/nf-core/bedtools/genomecov/main' +include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_MINUS } from '../../modules/nf-core/bedtools/genomecov/main' -include { - DEEPTOOLS_BAMCOVERAGE as DEEPTOOLS_BAMCOVERAGE_PLUS - DEEPTOOLS_BAMCOVERAGE as DEEPTOOLS_BAMCOVERAGE_MINUS } from '../../modules/nf-core/deeptools/bamcoverage/main' +include { DEEPTOOLS_BAMCOVERAGE as DEEPTOOLS_BAMCOVERAGE_PLUS } from '../../modules/nf-core/deeptools/bamcoverage/main' +include { DEEPTOOLS_BAMCOVERAGE as DEEPTOOLS_BAMCOVERAGE_MINUS } from '../../modules/nf-core/deeptools/bamcoverage/main' -include { DREG_PREP } from '../../modules/local/dreg_prep/main' +include { DREG_PREP } from '../../modules/local/dreg_prep/main' workflow COVERAGE_GRAPHS { take: @@ -21,13 +19,13 @@ workflow COVERAGE_GRAPHS { main: - bam = bam_bai.map{ [ it[0], it[1] ] } + bam = bam_bai.map { [it[0], it[1]] } ch_versions = Channel.empty() ch_genomecov_bam = bam.combine(Channel.from(1)) - BEDTOOLS_GENOMECOV_PLUS ( + BEDTOOLS_GENOMECOV_PLUS( ch_genomecov_bam, sizes, 'bedGraph', @@ -35,7 +33,7 @@ workflow COVERAGE_GRAPHS { ) ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV_PLUS.out.versions.first()) - BEDTOOLS_GENOMECOV_MINUS ( + BEDTOOLS_GENOMECOV_MINUS( ch_genomecov_bam, sizes, 'bedGraph', @@ -43,14 +41,14 @@ workflow COVERAGE_GRAPHS { ) ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV_MINUS.out.versions.first()) - DEEPTOOLS_BAMCOVERAGE_PLUS ( + DEEPTOOLS_BAMCOVERAGE_PLUS( bam_bai, fasta, fai ) ch_versions = ch_versions.mix(DEEPTOOLS_BAMCOVERAGE_PLUS.out.versions.first()) - DEEPTOOLS_BAMCOVERAGE_MINUS ( + DEEPTOOLS_BAMCOVERAGE_MINUS( bam_bai, fasta, fai @@ -59,17 +57,15 @@ workflow COVERAGE_GRAPHS { ch_plus_minus = DEEPTOOLS_BAMCOVERAGE_PLUS.out.bigwig.join(DEEPTOOLS_BAMCOVERAGE_MINUS.out.bigwig) - DREG_PREP ( + DREG_PREP( bam_bai, sizes, - params.assay_type, + params.assay_type ) emit: - plus_bedGraph = BEDTOOLS_GENOMECOV_PLUS.out.genomecov + plus_bedGraph = BEDTOOLS_GENOMECOV_PLUS.out.genomecov minus_bedGraph = BEDTOOLS_GENOMECOV_MINUS.out.genomecov - - plus_minus = ch_plus_minus - - versions = ch_versions + plus_minus = ch_plus_minus + versions = ch_versions } diff --git a/subworkflows/local/dreg_prep/main.nf b/subworkflows/local/dreg_prep/main.nf index 54c0bff8..942d97ec 100644 --- a/subworkflows/local/dreg_prep/main.nf +++ b/subworkflows/local/dreg_prep/main.nf @@ -10,5 +10,4 @@ workflow DREG_PREP { BEDTOOLS_BAMTOBED( bam ) - } diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index cc5bab21..1e802c27 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -2,22 +2,24 @@ // Uncompress and prepare reference genome files // -include { GTF2BED } from '../../modules/local/gtf2bed' +include { GTF2BED } from '../../modules/local/gtf2bed' include { - GUNZIP as GUNZIP_FASTA - GUNZIP as GUNZIP_GTF - GUNZIP as GUNZIP_GFF - GUNZIP as GUNZIP_GENE_BED } from '../../modules/nf-core/gunzip/main' + GUNZIP as GUNZIP_FASTA ; + GUNZIP as GUNZIP_GTF ; + GUNZIP as GUNZIP_GFF ; + GUNZIP as GUNZIP_GENE_BED +} from '../../modules/nf-core/gunzip/main' include { - UNTAR as UNTAR_BWA_INDEX - UNTAR as UNTAR_DRAGMAP } from '../../modules/nf-core/untar/main' -include { GFFREAD } from '../../modules/nf-core/gffread/main' -include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main' -include { BWAMEM2_INDEX } from '../../modules/nf-core/bwamem2/index/main' -include { DRAGMAP_HASHTABLE } from '../../modules/nf-core/dragmap/hashtable/main' -include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' -include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' + UNTAR as UNTAR_BWA_INDEX ; + UNTAR as UNTAR_DRAGMAP +} from '../../modules/nf-core/untar/main' +include { GFFREAD } from '../../modules/nf-core/gffread/main' +include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main' +include { BWAMEM2_INDEX } from '../../modules/nf-core/bwamem2/index/main' +include { DRAGMAP_HASHTABLE } from '../../modules/nf-core/dragmap/hashtable/main' +include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' +include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' workflow PREPARE_GENOME { take: @@ -40,9 +42,10 @@ workflow PREPARE_GENOME { // Uncompress genome fasta file if required // if (fasta.endsWith('.gz')) { - ch_fasta = GUNZIP_FASTA ( [ [:], fasta ] ).gunzip.map { it[1] } + ch_fasta = GUNZIP_FASTA([[:], fasta]).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) - } else { + } + else { ch_fasta = Channel.value(file(fasta)) } @@ -52,19 +55,22 @@ workflow PREPARE_GENOME { if (gtf || gff) { if (gtf) { if (gtf.endsWith('.gz')) { - ch_gtf = GUNZIP_GTF ( [ [:], gtf ] ).gunzip.map { it[1] } + ch_gtf = GUNZIP_GTF([[:], gtf]).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) - } else { + } + else { ch_gtf = Channel.value(file(gtf)) } - } else if (gff) { + } + else if (gff) { if (gff.endsWith('.gz')) { - ch_gff = GUNZIP_GFF ( [ [:], gff ] ).gunzip + ch_gff = GUNZIP_GFF([[:], gff]).gunzip ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) - } else { - ch_gff = [ [:], file(gff)] } - ch_gtf = GFFREAD ( ch_gff, ch_fasta ).gtf.map { it[1] } + else { + ch_gff = [[:], file(gff)] + } + ch_gtf = GFFREAD(ch_gff, ch_fasta).gtf.map { it[1] } ch_versions = ch_versions.mix(GFFREAD.out.versions) } } @@ -74,23 +80,25 @@ workflow PREPARE_GENOME { // if (gene_bed) { if (gene_bed.endsWith('.gz')) { - ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map { it[1] } + ch_gene_bed = GUNZIP_GENE_BED([[:], gene_bed]).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) - } else { + } + else { ch_gene_bed = file(gene_bed) } - } else { - ch_gene_bed = GTF2BED ( ch_gtf ).bed + } + else { + ch_gene_bed = GTF2BED(ch_gtf).bed ch_versions = ch_versions.mix(GTF2BED.out.versions) } // // Create chromosome sizes file // - CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } ) - ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } + CUSTOM_GETCHROMSIZES(ch_fasta.map { [[:], it] }) + ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } - ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) // // Uncompress BWA index or generate from scratch if required @@ -102,66 +110,76 @@ workflow PREPARE_GENOME { if ('bwa' in prepare_tool_indices) { if (bwa_index) { if (bwa_index.endsWith('.tar.gz')) { - ch_bwa_index = UNTAR_BWA_INDEX ( [ [:], bwa_index ] ).untar + ch_bwa_index = UNTAR_BWA_INDEX([[:], bwa_index]).untar ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions) - } else { + } + else { // TODO Give the meta from basename or genome? - ch_bwa_index = [ [meta: "Genome"], file(bwa_index) ] + ch_bwa_index = [[meta: "Genome"], file(bwa_index)] } - } else { - ch_bwa_index = BWA_INDEX ( ch_fasta.map { [ [:], it ] } ).index + } + else { + ch_bwa_index = BWA_INDEX(ch_fasta.map { [[:], it] }).index ch_versions = ch_versions.mix(BWA_INDEX.out.versions) } - } else if ('bwamem2' in prepare_tool_indices) { + } + else if ('bwamem2' in prepare_tool_indices) { if (bwamem2_index) { if (bwamem2_index.endsWith('.tar.gz') || bwamem2_index.endsWith('.tgz')) { - ch_bwa_index = UNTAR_BWA_INDEX ( [ [:], bwamem2_index ] ).untar + ch_bwa_index = UNTAR_BWA_INDEX([[:], bwamem2_index]).untar ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions) - } else { + } + else { // TODO Give the meta from basename or genome? - ch_bwa_index = [ [meta: "Genome"], file(bwamem2_index) ] + ch_bwa_index = [[meta: "Genome"], file(bwamem2_index)] } - } else { - ch_bwa_index = BWAMEM2_INDEX ( ch_fasta.map { [ [:], it ] } ).index + } + else { + ch_bwa_index = BWAMEM2_INDEX(ch_fasta.map { [[:], it] }).index ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) } - } else if ('dragmap' in prepare_tool_indices) { + } + else if ('dragmap' in prepare_tool_indices) { if (dragmap) { if (dragmap.endsWith('.tar.gz')) { - ch_dragmap = UNTAR_DRAGMAP_INDEX ( dragmap ).untar + ch_dragmap = UNTAR_DRAGMAP_INDEX(dragmap).untar ch_versions = ch_versions.mix(UNTAR_DRAGMAP_INDEX.out.versions) - } else { + } + else { // TODO Give the meta from basename or genome? - ch_dragmap = [ [meta: "Genome"], file(dragmap) ] + ch_dragmap = [[meta: "Genome"], file(dragmap)] } - } else { - ch_dragmap = DRAGMAP_HASHTABLE( ch_fasta.map { [ [:], it ] } ).hashmap + } + else { + ch_dragmap = DRAGMAP_HASHTABLE(ch_fasta.map { [[:], it] }).hashmap ch_versions = ch_versions.mix(DRAGMAP_HASHTABLE.out.versions) } - } else if ('bowtie2' in prepare_tool_indices) { + } + else if ('bowtie2' in prepare_tool_indices) { if (bowtie2_index) { if (bowtie2_index.endsWith('.tar.gz')) { - ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( bowtie2_index ).untar + ch_bowtie2_index = UNTAR_BOWTIE2_INDEX(bowtie2_index).untar ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions) - } else { + } + else { // TODO Give the meta from basename or genome? - ch_bowtie2_index = [ [meta: "Genome"], file(bowtie2_index) ] + ch_bowtie2_index = [[meta: "Genome"], file(bowtie2_index)] } - } else { - ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta.map { [ [:], it ] } ).index + } + else { + ch_bowtie2_index = BOWTIE2_BUILD(ch_fasta.map { [[:], it] }).index ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) } } emit: - fasta = ch_fasta - fai = ch_fai - gtf = ch_gtf - gene_bed = ch_gene_bed - chrom_sizes = ch_chrom_sizes - bwa_index = ch_bwa_index - dragmap = ch_dragmap + fasta = ch_fasta + fai = ch_fai + gtf = ch_gtf + gene_bed = ch_gene_bed + chrom_sizes = ch_chrom_sizes + bwa_index = ch_bwa_index + dragmap = ch_dragmap bowtie2_index = ch_bowtie2_index - - versions = ch_versions.ifEmpty(null) + versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/quality_control.nf b/subworkflows/local/quality_control.nf index ad24c7d9..9169d4cd 100644 --- a/subworkflows/local/quality_control.nf +++ b/subworkflows/local/quality_control.nf @@ -1,8 +1,8 @@ -include { PRESEQ_CCURVE } from '../../modules/nf-core/preseq/ccurve/main' +include { PRESEQ_CCURVE } from '../../modules/nf-core/preseq/ccurve/main' include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap/main' -include { BBMAP_PILEUP } from '../../modules/nf-core/bbmap/pileup/main' +include { BBMAP_PILEUP } from '../../modules/nf-core/bbmap/pileup/main' -include { BAM_RSEQC } from '../../subworkflows/nf-core/bam_rseqc' +include { BAM_RSEQC } from '../../subworkflows/nf-core/bam_rseqc' workflow QUALITY_CONTROL { take: @@ -11,37 +11,34 @@ workflow QUALITY_CONTROL { main: - bam = bam_bai.map{ [ it[0], it[1] ] } + bam = bam_bai.map { [it[0], it[1]] } ch_versions = Channel.empty() - PRESEQ_CCURVE ( bam ) + PRESEQ_CCURVE(bam) ch_versions = ch_versions.mix(PRESEQ_CCURVE.out.versions.first()) - PRESEQ_LCEXTRAP ( bam ) + PRESEQ_LCEXTRAP(bam) ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) // TODO Set this in a param? rseqc_modules = ['read_duplication', 'read_distribution', 'infer_experiment'] - BAM_RSEQC ( bam_bai, bed, rseqc_modules ) + BAM_RSEQC(bam_bai, bed, rseqc_modules) ch_versions = ch_versions.mix(BAM_RSEQC.out.versions) - BBMAP_PILEUP ( bam ) + BBMAP_PILEUP(bam) ch_versions = ch_versions.mix(BBMAP_PILEUP.out.versions.first()) emit: - preseq_ccurve = PRESEQ_CCURVE.out.c_curve - preseq_lcextrap = PRESEQ_LCEXTRAP.out.lc_extrap - - inferexperiment_txt = BAM_RSEQC.out.inferexperiment_txt - readdistribution_txt = BAM_RSEQC.out.readdistribution_txt + preseq_ccurve = PRESEQ_CCURVE.out.c_curve + preseq_lcextrap = PRESEQ_LCEXTRAP.out.lc_extrap + inferexperiment_txt = BAM_RSEQC.out.inferexperiment_txt + readdistribution_txt = BAM_RSEQC.out.readdistribution_txt readduplication_seq_xls = BAM_RSEQC.out.readduplication_seq_xls readduplication_pos_xls = BAM_RSEQC.out.readduplication_pos_xls - readduplication_pdf = BAM_RSEQC.out.readduplication_pdf + readduplication_pdf = BAM_RSEQC.out.readduplication_pdf readduplication_rscript = BAM_RSEQC.out.readduplication_rscript - - pileup_stats = BBMAP_PILEUP.out.covstats - pileup_hist = BBMAP_PILEUP.out.hist - - versions = ch_versions + pileup_stats = BBMAP_PILEUP.out.covstats + pileup_hist = BBMAP_PILEUP.out.hist + versions = ch_versions } diff --git a/subworkflows/local/transcript_identification/main.nf b/subworkflows/local/transcript_identification/main.nf index 890f5579..537c8552 100644 --- a/subworkflows/local/transcript_identification/main.nf +++ b/subworkflows/local/transcript_identification/main.nf @@ -37,7 +37,7 @@ workflow TRANSCRIPT_INDENTIFICATION { homer_peaks = Channel.empty() homer_tagdir = Channel.empty() if (params.assay_type == "GROseq") { - group_bam = group_bam_bai.map { meta, bam, bai -> [meta, bam] } + group_bam = group_bam_bai.map { meta, bam, _bai -> [meta, bam] } HOMER_GROSEQ(group_bam, fasta, uniqmap) ch_identification_bed = ch_identification_bed.mix(HOMER_GROSEQ.out.bed) homer_peaks = HOMER_GROSEQ.out.peaks @@ -78,7 +78,7 @@ workflow TRANSCRIPT_INDENTIFICATION { // TODO Tests don't seem to hit this because there's no bidirectional_TREs // Need to collect all of the beds for each chromosome/sample and concatenate them // Nextflow makes this super easy - def ch_bidirectional_tres = PINTS_CALLER.out.unidirectional_TREs.groupTuple(by: [0]).map { meta, beds -> + ch_bidirectional_tres = PINTS_CALLER.out.unidirectional_TREs.groupTuple(by: [0]).map { meta, beds -> [meta, beds.flatten()] } @@ -106,7 +106,7 @@ workflow TRANSCRIPT_INDENTIFICATION { } ch_identification_bed - .filter { meta, bed -> bed.size() > 0 } + .filter { _meta, bed -> bed.size() > 0 } .set { ch_identification_bed_clean } emit: diff --git a/subworkflows/local/utils_nfcore_nascent_pipeline/main.nf b/subworkflows/local/utils_nfcore_nascent_pipeline/main.nf index bf28cfc1..087fca19 100644 --- a/subworkflows/local/utils_nfcore_nascent_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_nascent_pipeline/main.nf @@ -8,14 +8,14 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { samplesheetToList } from 'plugin/nf-schema' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -24,7 +24,6 @@ include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipelin */ workflow PIPELINE_INITIALISATION { - take: version // boolean: Display version and exit validate_params // boolean: Boolean whether to validate parameters against the schema at runtime @@ -40,7 +39,7 @@ workflow PIPELINE_INITIALISATION { // // Print version and exit if required and dump pipeline parameters to JSON file // - UTILS_NEXTFLOW_PIPELINE ( + UTILS_NEXTFLOW_PIPELINE( version, true, outdir, @@ -50,7 +49,7 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // - UTILS_NFSCHEMA_PLUGIN ( + UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, null @@ -59,7 +58,7 @@ workflow PIPELINE_INITIALISATION { // // Check config provided to the pipeline // - UTILS_NFCORE_PIPELINE ( + UTILS_NFCORE_PIPELINE( nextflow_cli_args ) @@ -74,21 +73,20 @@ workflow PIPELINE_INITIALISATION { Channel .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } + .map { meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [meta.id, meta + [single_end: true], [fastq_1]] + } + else { + return [meta.id, meta + [single_end: false], [fastq_1, fastq_2]] + } } .groupTuple() .map { samplesheet -> validateInputSamplesheet(samplesheet) } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] + .map { meta, fastqs -> + return [meta, fastqs.flatten()] } .set { ch_samplesheet } @@ -104,7 +102,6 @@ workflow PIPELINE_INITIALISATION { */ workflow PIPELINE_COMPLETION { - take: email // string: email address email_on_fail // string: email address sent on pipeline failure @@ -141,7 +138,7 @@ workflow PIPELINE_COMPLETION { } workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") } } @@ -164,20 +161,20 @@ def validateInputSamplesheet(input) { def (metas, fastqs) = input[1..2] // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 + def endedness_ok = metas.collect { meta -> meta.single_end }.unique().size == 1 if (!endedness_ok) { error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") } - return [ metas[0], fastqs ] + return [metas[0], fastqs] } // // Get attribute from genome config file e.g. fasta // def getGenomeAttribute(attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] + if (params.genomes[params.genome].containsKey(attribute)) { + return params.genomes[params.genome][attribute] } } return null @@ -188,11 +185,7 @@ def getGenomeAttribute(attribute) { // def genomeExistsError() { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" error(error_string) } } @@ -205,24 +198,24 @@ def toolCitationText() { def citation_text = "Tools used in the workflow included: " + [ "BEDTools (Quinlan 2010)", "Bowtie 2 (Langmead 2012)", - "BWA-MEM (Li 2013)", // TODO if bwamem - "BWA-MEM2 (Vasimuddin 2019)", // TODO if bwamem2 + "BWA-MEM (Li 2013)", + "BWA-MEM2 (Vasimuddin 2019)", "deepTools (Ramírez 2016)", "FastQC (Andrews 2010)", - "FastP (Chen 2018)", // TODO if trimming + "FastP (Chen 2018)", "featureCounts (Liao 2013)", "GffRead (Pertea 2013)", - "HISAT2 (Kim 2019)", // TODO if hisat2 - "HOMER (Heinz 2010)", // TODO if homer + "HISAT2 (Kim 2019)", + "HOMER (Heinz 2010)", "MultiQC (Ewels et al. 2016)", - "PINTS (Yao 2022)", // TODO if pints + "PINTS (Yao 2022)", "preseq (Daley 2013)", "RSeQC (Wang 2012)", "SAMTools (Li 2009)", - "STAR (Dobin 2013)", // TODO if STAR + "STAR (Dobin 2013)", "UMI-tools (Li 2009)", "Genomic Alignments (Lawrence 2013)", - "groHMM (Chae 2015)", // TODO if grohmm + "groHMM (Chae 2015)", "." ].join(', ').trim() @@ -235,24 +228,24 @@ def toolBibliographyText() { def reference_text = "
  • " + [ "Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824.", "Langmead, B., Salzberg, S. Fast gapped-read alignment with Bowtie 2. Nat Methods 9, 357–359 (2012). doi: 10.1038/nmeth.1923.", - "Li H: Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv 2013. doi: 10.48550/arXiv.1303.3997", // TODO if bwamem - "M. Vasimuddin, S. Misra, H. Li and S. Aluru, Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems, 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), 2019, pp. 314-324. doi: 10.1109/IPDPS.2019.00041.", // TODO if bwamem2 + "Li H: Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv 2013. doi: 10.48550/arXiv.1303.3997", + "M. Vasimuddin, S. Misra, H. Li and S. Aluru, Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems, 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), 2019, pp. 314-324. doi: 10.1109/IPDPS.2019.00041.", "Ramírez, Fidel, Devon P. Ryan, Björn Grüning, Vivek Bhardwaj, Fabian Kilpert, Andreas S. Richter, Steffen Heyne, Friederike Dündar, and Thomas Manke. deepTools2: A next Generation Web Server for Deep-Sequencing Data Analysis. Nucleic Acids Research (2016). doi:10.1093/nar/gkw257.", "Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 01 September 2018, Pages i884–i890, doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086. PubMed Central PMCID: PMC6129281", "Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].", "Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677.", "Pertea G, Pertea M. GFF Utilities: GffRead and GffCompare. F1000Res. 2020 Apr 28;9:ISCB Comm J-304. doi: 10.12688/f1000research.23297.2. eCollection 2020. PubMed PMID: 32489650; PubMed Central PMCID: PMC7222033.", - "Kim D, Paggi JM, Park C, Bennett C, Salzberg SL. Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol. 2019 Aug;37(8):907-915. doi: 10.1038/s41587-019-0201-4. Epub 2019 Aug 2. PubMed PMID: 31375807.", // TODO if hisat2 - "Heinz S, Benner C, Spann N, Bertolino E et al. Simple Combinations of Lineage-Determining Transcription Factors Prime cis-Regulatory Elements Required for Macrophage and B Cell Identities. Mol Cell 2010 May 28;38(4):576-589. PMID: 20513432", // TODO if homer + "Kim D, Paggi JM, Park C, Bennett C, Salzberg SL. Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol. 2019 Aug;37(8):907-915. doi: 10.1038/s41587-019-0201-4. Epub 2019 Aug 2. PubMed PMID: 31375807.", + "Heinz S, Benner C, Spann N, Bertolino E et al. Simple Combinations of Lineage-Determining Transcription Factors Prime cis-Regulatory Elements Required for Macrophage and B Cell Identities. Mol Cell 2010 May 28;38(4):576-589. PMID: 20513432", "Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.", - "Yao, L., Liang, J., Ozer, A. et al. A comparison of experimental assays and analytical methods for genome-wide identification of active enhancers. Nat Biotechnol 40, 1056–1065 (2022). https://doi.org/10.1038/s41587-022-01211-7", // TODO if pints + "Yao, L., Liang, J., Ozer, A. et al. A comparison of experimental assays and analytical methods for genome-wide identification of active enhancers. Nat Biotechnol 40, 1056–1065 (2022). https://doi.org/10.1038/s41587-022-01211-7", "Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374.", "Wang L, Wang S, Li W. RSeQC: quality control of RNA-seq experiments Bioinformatics. 2012 Aug 15;28(16):2184-5. doi: 10.1093/bioinformatics/bts356. Epub 2012 Jun 27. PubMed PMID: 22743226.", "Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002.", - "Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905.", // TODO if STAR + "Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905.", "Smith T, Heger A, Sudbery I. UMI-tools: modeling sequencing errors in Unique Molecular Identifiers to improve quantification accuracy Genome Res. 2017 Mar;27(3):491-499. doi: 10.1101/gr.209601.116. Epub 2017 Jan 18. PubMed PMID: 28100584; PubMed Central PMCID: PMC5340976.", "Lawrence M, Huber W, Pagès H, Aboyoun P, Carlson M, Gentleman R, Morgan M, Carey V (2013). “Software for Computing and Annotating Genomic Ranges.” PLoS Computational Biology, 9. doi: 10.1371/journal.pcbi.1003118, http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003118.", - "Chae M, Danko CG, Kraus WL (2015). “groHMM: a computational tool for identifying unannotated and cell type-specific transcription units from global run-on sequencing data.” BMC Bioinformatics, 16(222).", // TODO if grohmm + "Chae M, Danko CG, Kraus WL (2015). “groHMM: a computational tool for identifying unannotated and cell type-specific transcription units from global run-on sequencing data.” BMC Bioinformatics, 16(222)." ].join('
  • ').trim() return reference_text @@ -272,10 +265,13 @@ def methodsDescriptionText(mqc_methods_yaml) { def temp_doi_ref = "" def manifest_doi = meta.manifest_map.doi.tokenize(",") manifest_doi.each { doi_ref -> - temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " } meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) - } else meta["doi_text"] = "" + } + else { + meta["doi_text"] = "" + } meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " // Tool references @@ -288,9 +284,8 @@ def methodsDescriptionText(mqc_methods_yaml) { def methods_text = mqc_methods_yaml.text - def engine = new groovy.text.SimpleTemplateEngine() + def engine = new groovy.text.SimpleTemplateEngine() def description_html = engine.createTemplate(methods_text).make(meta) return description_html.toString() } - diff --git a/workflows/nascent.nf b/workflows/nascent.nf index bb1eb1b9..724bf1bb 100644 --- a/workflows/nascent.nf +++ b/workflows/nascent.nf @@ -4,40 +4,38 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { BED2SAF } from '../modules/local/bed2saf' - -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' -include { ALIGN_BWAMEM2 } from '../subworkflows/local/align_bwamem2/main' -include { ALIGN_DRAGMAP } from '../subworkflows/local/align_dragmap/main' -include { QUALITY_CONTROL } from '../subworkflows/local/quality_control.nf' -include { COVERAGE_GRAPHS } from '../subworkflows/local/coverage_graphs.nf' -include { TRANSCRIPT_INDENTIFICATION } from '../subworkflows/local/transcript_identification' - -include { FASTP } from '../modules/nf-core/fastp/main' -include { - UNTAR as UNTAR_HISAT2_INDEX - UNTAR as UNTAR_STAR_INDEX -} from '../modules/nf-core/untar/main' -include { STAR_GENOMEGENERATE } from '../modules/nf-core/star/genomegenerate/main' -include { - SUBREAD_FEATURECOUNTS as SUBREAD_FEATURECOUNTS_GENE - SUBREAD_FEATURECOUNTS as SUBREAD_FEATURECOUNTS_PREDICTED } from '../modules/nf-core/subread/featurecounts/main' - -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_nascent_pipeline' +include { BED2SAF } from '../modules/local/bed2saf' + +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { ALIGN_BWAMEM2 } from '../subworkflows/local/align_bwamem2/main' +include { ALIGN_DRAGMAP } from '../subworkflows/local/align_dragmap/main' +include { QUALITY_CONTROL } from '../subworkflows/local/quality_control.nf' +include { COVERAGE_GRAPHS } from '../subworkflows/local/coverage_graphs.nf' +include { TRANSCRIPT_INDENTIFICATION } from '../subworkflows/local/transcript_identification' + +include { FASTP } from '../modules/nf-core/fastp/main' +include { UNTAR as UNTAR_HISAT2_INDEX } from '../modules/nf-core/untar/main' +include { UNTAR as UNTAR_STAR_INDEX } from '../modules/nf-core/untar/main' +include { STAR_GENOMEGENERATE } from '../modules/nf-core/star/genomegenerate/main' +include { SUBREAD_FEATURECOUNTS as SUBREAD_FEATURECOUNTS_GENE } from '../modules/nf-core/subread/featurecounts/main' +include { SUBREAD_FEATURECOUNTS as SUBREAD_FEATURECOUNTS_PREDICTED } from '../modules/nf-core/subread/featurecounts/main' + + +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_nascent_pipeline' // // SUBWORKFLOW: Consisting entirely of nf-core/modules // -include { FASTQ_ALIGN_BWA } from '../subworkflows/nf-core/fastq_align_bwa/main' -include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' -include { FASTQ_ALIGN_HISAT2 } from '../subworkflows/nf-core/fastq_align_hisat2/main' -include { FASTQ_ALIGN_STAR } from '../subworkflows/nf-core/fastq_align_star/main' -include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from '../subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main' +include { FASTQ_ALIGN_BWA } from '../subworkflows/nf-core/fastq_align_bwa/main' +include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' +include { FASTQ_ALIGN_HISAT2 } from '../subworkflows/nf-core/fastq_align_hisat2/main' +include { FASTQ_ALIGN_STAR } from '../subworkflows/nf-core/fastq_align_star/main' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from '../subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -46,9 +44,8 @@ include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from '../subworkflows/nf-core/bam_ */ workflow NASCENT { - take: - ch_samplesheet // channel: samplesheet read in from --input + ch_samplesheet // channel: samplesheet read in from --input ch_fasta ch_gtf ch_gff @@ -61,7 +58,6 @@ workflow NASCENT { ch_star_index ch_uniqmap - main: ch_versions = Channel.empty() @@ -72,8 +68,10 @@ workflow NASCENT { // HACK Rework this because of nf-validation def prepareToolIndices = [] - if (!params.skip_alignment) { prepareToolIndices << params.aligner } - PREPARE_GENOME ( + if (!params.skip_alignment) { + prepareToolIndices << params.aligner + } + PREPARE_GENOME( prepareToolIndices, ch_fasta, ch_gtf, @@ -83,26 +81,27 @@ workflow NASCENT { ch_bwamem2_index, ch_dragmap, ch_bowtie2_index, - ch_hisat2_index, + ch_hisat2_index ) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) - ch_fasta = PREPARE_GENOME.out.fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } + ch_fasta = PREPARE_GENOME.out.fasta.map { fasta -> [[id: fasta.baseName], fasta] } // // MODULE: Run FastQC // - FASTQC ( + FASTQC( ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect { it[1] }) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) ch_reads = Channel.empty() - if(!params.skip_trimming) { - FASTP ( ch_samplesheet, [], false, false, false ) + if (!params.skip_trimming) { + FASTP(ch_samplesheet, [], false, false, false) ch_reads = FASTP.out.reads ch_versions = ch_versions.mix(FASTP.out.versions.first()) - } else { + } + else { ch_reads = ch_samplesheet } @@ -119,11 +118,11 @@ workflow NASCENT { ch_aligner_clustering_multiqc = Channel.empty() ch_bowtie2_multiqc = Channel.empty() if (!params.skip_alignment && params.aligner == 'bwa') { - FASTQ_ALIGN_BWA ( + FASTQ_ALIGN_BWA( ch_reads, PREPARE_GENOME.out.bwa_index, false, - ch_fasta, + ch_fasta ) ch_genome_bam = FASTQ_ALIGN_BWA.out.bam ch_genome_bai = FASTQ_ALIGN_BWA.out.bai @@ -132,12 +131,13 @@ workflow NASCENT { ch_samtools_idxstats = FASTQ_ALIGN_BWA.out.idxstats ch_versions = ch_versions.mix(FASTQ_ALIGN_BWA.out.versions.first()) - } else if (!params.skip_alignment && params.aligner == 'bwamem2') { - ALIGN_BWAMEM2 ( + } + else if (!params.skip_alignment && params.aligner == 'bwamem2') { + ALIGN_BWAMEM2( ch_reads, PREPARE_GENOME.out.bwa_index, false, - ch_fasta, + ch_fasta ) ch_genome_bam = ALIGN_BWAMEM2.out.bam ch_genome_bai = ALIGN_BWAMEM2.out.bai @@ -146,12 +146,13 @@ workflow NASCENT { ch_samtools_idxstats = ALIGN_BWAMEM2.out.idxstats ch_versions = ch_versions.mix(ALIGN_BWAMEM2.out.versions) - } else if (!params.skip_alignment && params.aligner == 'dragmap') { - ALIGN_DRAGMAP ( + } + else if (!params.skip_alignment && params.aligner == 'dragmap') { + ALIGN_DRAGMAP( ch_reads, PREPARE_GENOME.out.dragmap, false, - ch_fasta, + ch_fasta ) ch_genome_bam = ALIGN_DRAGMAP.out.bam ch_genome_bai = ALIGN_DRAGMAP.out.bai @@ -160,13 +161,14 @@ workflow NASCENT { ch_samtools_idxstats = ALIGN_DRAGMAP.out.idxstats ch_versions = ch_versions.mix(ALIGN_DRAGMAP.out.versions) - } else if (!params.skip_alignment && params.aligner == 'bowtie2') { - FASTQ_ALIGN_BOWTIE2 ( + } + else if (!params.skip_alignment && params.aligner == 'bowtie2') { + FASTQ_ALIGN_BOWTIE2( ch_reads, PREPARE_GENOME.out.bowtie2_index, false, false, - ch_fasta, + ch_fasta ) ch_genome_bam = FASTQ_ALIGN_BOWTIE2.out.bam ch_genome_bai = FASTQ_ALIGN_BOWTIE2.out.bai @@ -176,20 +178,22 @@ workflow NASCENT { ch_bowtie2_multiqc = FASTQ_ALIGN_BOWTIE2.out.log_out ch_versions = ch_versions.mix(FASTQ_ALIGN_BOWTIE2.out.versions) - } else if (!params.skip_alignment && params.aligner == 'hisat2') { + } + else if (!params.skip_alignment && params.aligner == 'hisat2') { if (ch_hisat2_index.endsWith('.tar.gz')) { - ch_hisat2_index = UNTAR_HISAT2_INDEX ( [ [:], ch_hisat2_index ] ).untar + ch_hisat2_index = UNTAR_HISAT2_INDEX([[:], ch_hisat2_index]).untar ch_versions = ch_versions.mix(UNTAR_HISAT2_INDEX.out.versions) - } else { + } + else { // TODO Give the meta from basename or genome? - ch_hisat2_index = [ [meta: "Genome"], file(ch_hisat2_index) ] + ch_hisat2_index = [[meta: "Genome"], file(ch_hisat2_index)] } - FASTQ_ALIGN_HISAT2 ( + FASTQ_ALIGN_HISAT2( ch_reads, ch_hisat2_index, - [[:],[]], - ch_fasta, + [[:], []], + ch_fasta ) ch_genome_bam = FASTQ_ALIGN_HISAT2.out.bam ch_genome_bai = FASTQ_ALIGN_HISAT2.out.bai @@ -199,42 +203,45 @@ workflow NASCENT { ch_HISAT2_multiqc = FASTQ_ALIGN_HISAT2.out.summary ch_versions = ch_versions.mix(FASTQ_ALIGN_HISAT2.out.versions) - } else if (!params.skip_alignment && params.aligner == 'star') { - if(!ch_star_index) { - ch_star_index = STAR_GENOMEGENERATE ( + } + else if (!params.skip_alignment && params.aligner == 'star') { + if (!ch_star_index) { + ch_star_index = STAR_GENOMEGENERATE( ch_fasta, PREPARE_GENOME.out.gtf.map { [[:], it] } ).index - } else if (ch_star_index.endsWith('.tar.gz')) { - ch_star_index = UNTAR_STAR_INDEX ( [ [:], ch_star_index ] ).untar + } + else if (ch_star_index.endsWith('.tar.gz')) { + ch_star_index = UNTAR_STAR_INDEX([[:], ch_star_index]).untar ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) - } else { + } + else { // TODO Give the meta from basename or genome? - ch_star_index = [ [meta: "Genome"], file(ch_star_index) ] + ch_star_index = [[meta: "Genome"], file(ch_star_index)] } - FASTQ_ALIGN_STAR ( + FASTQ_ALIGN_STAR( ch_reads, ch_star_index, - PREPARE_GENOME.out.gtf.map { [ [:], it ] }, + PREPARE_GENOME.out.gtf.map { [[:], it] }, false, '', - '', // TODO params.seq_center ?: + '', ch_fasta, - Channel.of([[:], []]), + Channel.of([[:], []]) ) - ch_genome_bam = FASTQ_ALIGN_STAR.out.bam - ch_genome_bai = FASTQ_ALIGN_STAR.out.bai + ch_genome_bam = FASTQ_ALIGN_STAR.out.bam + ch_genome_bai = FASTQ_ALIGN_STAR.out.bai ch_transcriptome_bam = FASTQ_ALIGN_STAR.out.bam_transcript - ch_star_log = FASTQ_ALIGN_STAR.out.log_final - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_STAR.out.stats.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_STAR.out.flagstat.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_STAR.out.idxstats.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(ch_star_log.collect{it[1]}) + ch_star_log = FASTQ_ALIGN_STAR.out.log_final + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_STAR.out.stats.collect { it[1] }) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_STAR.out.flagstat.collect { it[1] }) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_ALIGN_STAR.out.idxstats.collect { it[1] }) + ch_multiqc_files = ch_multiqc_files.mix(ch_star_log.collect { it[1] }) } - if(params.with_umi) { - BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS ( + if (params.with_umi) { + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS( ch_genome_bam.join(ch_genome_bai, by: [0]), params.umitools_dedup_stats ) @@ -249,13 +256,13 @@ workflow NASCENT { ch_genome_bam_bai = ch_genome_bam.join(ch_genome_bai, by: [0], remainder: true) - QUALITY_CONTROL ( + QUALITY_CONTROL( ch_genome_bam_bai, PREPARE_GENOME.out.gene_bed ) ch_versions = ch_versions.mix(QUALITY_CONTROL.out.versions) - COVERAGE_GRAPHS ( + COVERAGE_GRAPHS( ch_genome_bam_bai, PREPARE_GENOME.out.chrom_sizes, PREPARE_GENOME.out.fasta, @@ -266,31 +273,33 @@ workflow NASCENT { // // SUBWORKFLOW: Transcript indetification // - ch_genome_bam.map { - meta, bam -> - fmeta = meta.findAll { it.key != 'read_group' } - // Split and take the first element - fmeta.id = fmeta.id.split('_')[0] - [ fmeta, bam ] } + ch_genome_bam + .map { meta, bam -> + fmeta = meta.findAll { it.key != 'read_group' } + // Split and take the first element + fmeta.id = fmeta.id.split('_')[0] + [fmeta, bam] + } .groupTuple(by: [0]) - .map { it -> [ it[0], it[1].flatten() ] } + .map { it -> [it[0], it[1].flatten()] } .set { ch_group_bam } // Group the index files with bams - ch_genome_bai.map { - meta, bai -> - fmeta = meta.findAll { it.key != 'read_group' } - // Split and take the first element - fmeta.id = fmeta.id.split('_')[0] - [ fmeta, bai ] } + ch_genome_bai + .map { meta, bai -> + fmeta = meta.findAll { it.key != 'read_group' } + // Split and take the first element + fmeta.id = fmeta.id.split('_')[0] + [fmeta, bai] + } .groupTuple(by: [0]) - .map { it -> [ it[0], it[1].flatten() ] } + .map { it -> [it[0], it[1].flatten()] } .set { ch_group_bai } ch_group_bam_bai = ch_group_bam.join(ch_group_bai, by: [0]) ch_gxf = ch_gff ? ch_gff : PREPARE_GENOME.out.gtf - TRANSCRIPT_INDENTIFICATION ( + TRANSCRIPT_INDENTIFICATION( ch_group_bam_bai, ch_gxf, PREPARE_GENOME.out.fasta, @@ -302,16 +311,16 @@ workflow NASCENT { ch_homer_multiqc = ch_homer_multiqc.mix(TRANSCRIPT_INDENTIFICATION.out.homer_tagdir) ch_versions = ch_versions.mix(TRANSCRIPT_INDENTIFICATION.out.versions) - SUBREAD_FEATURECOUNTS_PREDICTED ( + SUBREAD_FEATURECOUNTS_PREDICTED( ch_group_bam.combine( - BED2SAF ( + BED2SAF( TRANSCRIPT_INDENTIFICATION.out.transcript_beds ).saf.map { it[1] } ) ) ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS_PREDICTED.out.versions.first()) - SUBREAD_FEATURECOUNTS_GENE ( + SUBREAD_FEATURECOUNTS_GENE( ch_group_bam.combine(PREPARE_GENOME.out.gtf) ) ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS_GENE.out.versions.first()) @@ -322,34 +331,41 @@ workflow NASCENT { softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'nascent_software_' + 'mqc_' + 'versions.yml', + name: 'nf_core_' + 'nascent_software_' + 'mqc_' + 'versions.yml', sort: true, newLine: true - ).set { ch_collated_versions } + ) + .set { ch_collated_versions } // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - summary_params = paramsSummaryMap( - workflow, parameters_schema: "nextflow_schema.json") + ch_multiqc_config = Channel.fromPath( + "${projectDir}/assets/multiqc_config.yml", + checkIfExists: true + ) + ch_multiqc_custom_config = params.multiqc_config + ? Channel.fromPath(params.multiqc_config, checkIfExists: true) + : Channel.empty() + ch_multiqc_logo = params.multiqc_logo + ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) + : Channel.empty() + + summary_params = paramsSummaryMap( + workflow, + parameters_schema: "nextflow_schema.json" + ) ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - file(params.multiqc_methods_description, checkIfExists: true) : - file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( - methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + ) + ch_multiqc_custom_methods_description = params.multiqc_methods_description + ? file(params.multiqc_methods_description, checkIfExists: true) + : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description) + ) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix( @@ -359,38 +375,32 @@ workflow NASCENT { ) ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_bowtie2_multiqc.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_samtools_stats.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_samtools_flagstat.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_samtools_idxstats.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.preseq_ccurve.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.preseq_lcextrap.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.readdistribution_txt.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.readduplication_seq_xls.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.readduplication_pos_xls.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.inferexperiment_txt.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_grohmm_multiqc.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_homer_multiqc.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(SUBREAD_FEATURECOUNTS_PREDICTED.out.summary.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(SUBREAD_FEATURECOUNTS_GENE.out.summary.collect{it[1]}.ifEmpty([])) - - MULTIQC ( + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_bowtie2_multiqc.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_samtools_stats.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_samtools_flagstat.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_samtools_idxstats.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.preseq_ccurve.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.preseq_lcextrap.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.readdistribution_txt.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.readduplication_seq_xls.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.readduplication_pos_xls.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.inferexperiment_txt.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_grohmm_multiqc.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_homer_multiqc.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(SUBREAD_FEATURECOUNTS_PREDICTED.out.summary.collect { it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(SUBREAD_FEATURECOUNTS_GENE.out.summary.collect { it[1] }.ifEmpty([])) + + MULTIQC( ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), [], - [], + [] ) - emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] - + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/