nf-core · maxulysse · Dec 27, 2024 · Dec 27, 2024 · Dec 27, 2024
@@ -58,6 +58,7 @@ Initial release of nf-core/references, created with the [nf-core](https://nf-co.
 - [63](https://github.com/nf-core/references/pull/63) - Unpack gff even when gtf is present
 - [64](https://github.com/nf-core/references/pull/64) - Improve documentation
 - [68](https://github.com/nf-core/references/pull/68) - Minor refactoring
+- [69](https://github.com/nf-core/references/pull/69) - Better comments
 
 ### Fixed
 

@@ -3,32 +3,48 @@ workflow ASSET_TO_CHANNEL {
     asset // channel: [meta, fasta]
 
     main:
-
     // All the files and meta data are contained in the meta map (except for fasta)
     // They are extracted out of the meta map in their own channel in this subworkflow
     // When adding a new field in the assets/schema_input.json, also add it in the meta map
-    // And in this scrip, add a new map operation and a new output corresponding to this input
-
-    // If any of the asset does not exist, then we return null
-    // That way, the channel will be empty and does not trigger anything
+    // And in this scrip, add a new branch and a new output corresponding to this input
+    // And in the emit, add the new output to the channel
 
+    // Only keep the actual meta data in the meta map
+    // Add a field here if it is a relevant meta data
     def reduce = { meta -> meta.subMap(['genome', 'id', 'source', 'source_vcf', 'source_version', 'species']) }
 
     intervals_bed_branch = asset.branch { meta, _fasta ->
         file: meta.intervals_bed
         return [reduce(meta), meta.intervals_bed]
         other: true
+        // If the asset doesn't exist, then we return nothing
         return null
     }
     intervals_bed = intervals_bed_branch.file
 
-
-    // If ends with .gz, decompress it
-    // If any of the asset exists, then adding run_tools to false and skip the asset creation from the fasta file
     fasta_branch = asset.branch { meta, fasta_ ->
         file: fasta_
-        return [reduce(meta) + [decompress_fasta: fasta_.endsWith('.gz') ?: false] + [run_bowtie1: meta.bowtie1_index ? false : true] + [run_bowtie2: meta.bowtie2_index ? false : true] + [run_bwamem1: meta.bwamem1_index ? false : true] + [run_bwamem2: meta.bwamem2_index ? false : true] + [run_dragmap: meta.dragmap_hashtable ? false : true] + [run_faidx: meta.fasta_fai && meta.fasta_sizes ? false : true] + [run_gatkdict: meta.fasta_dict ? false : true] + [run_hisat2: meta.hisat2_index ? false : true] + [run_intervals: meta.intervals_bed ? false : true] + [run_kallisto: meta.kallisto_index ? false : true] + [run_msisenpro: meta.msisensorpro_list ? false : true] + [run_rsem: meta.rsem_index ? false : true] + [run_rsem_make_transcript_fasta: meta.transcript_fasta ? false : true] + [run_salmon: meta.salmon_index ? false : true] + [run_star: meta.star_index ? false : true], fasta_]
+        // If ends with .gz, decompress it
+        def meta_extra = [decompress_fasta: fasta_.endsWith('.gz') ?: false]
+        // If any of the asset exists, then adding run_tools to false and skip the asset creation from the fasta file
+        meta_extra += [run_bowtie1: meta.bowtie1_index ? false : true]
+        meta_extra += [run_bowtie2: meta.bowtie2_index ? false : true]
+        meta_extra += [run_bwamem1: meta.bwamem1_index ? false : true]
+        meta_extra += [run_bwamem2: meta.bwamem2_index ? false : true]
+        meta_extra += [run_dragmap: meta.dragmap_hashtable ? false : true]
+        meta_extra += [run_faidx: meta.fasta_fai && meta.fasta_sizes ? false : true]
+        meta_extra += [run_gatkdict: meta.fasta_dict ? false : true]
+        meta_extra += [run_hisat2: meta.hisat2_index ? false : true]
+        meta_extra += [run_intervals: meta.intervals_bed ? false : true]
+        meta_extra += [run_kallisto: meta.kallisto_index ? false : true]
+        meta_extra += [run_msisenpro: meta.msisensorpro_list ? false : true]
+        meta_extra += [run_rsem: meta.rsem_index ? false : true]
+        meta_extra += [run_rsem_make_transcript_fasta: meta.transcript_fasta ? false : true]
+        meta_extra += [run_salmon: meta.salmon_index ? false : true]
+        meta_extra += [run_star: meta.star_index ? false : true]
+        return [reduce(meta) + meta_extra, fasta_]
         other: true
+        // If the asset doesn't exist, then we return nothing
         return null
     }
     fasta = fasta_branch.file
@@ -38,6 +54,7 @@ workflow ASSET_TO_CHANNEL {
         file: meta.fasta_dict
         return [reduce(meta), meta.fasta_dict]
         other: true
+        // If the asset doesn't exist, then we return nothing
         return null
     }
     fasta_dict = fasta_dict_branch.file
@@ -46,8 +63,11 @@ workflow ASSET_TO_CHANNEL {
     // If we have intervals_bed, then we don't need to run faidx
     fasta_fai_branch = asset.branch { meta, _fasta ->
         file: meta.fasta_fai
-        return [reduce(meta) + [run_intervals: meta.intervals_bed ? false : true], meta.fasta_fai]
+        // If we have intervals_bed, then we don't need to run faidx
+        def meta_extra = [run_intervals: meta.intervals_bed ? false : true]
+        return [reduce(meta) + meta_extra, meta.fasta_fai]
         other: true
+        // If the asset doesn't exist, then we return nothing
         return null
     }
     fasta_fai = fasta_fai_branch.file
@@ -57,28 +77,38 @@ workflow ASSET_TO_CHANNEL {
         file: meta.fasta_sizes
         return [reduce(meta), meta.fasta_sizes]
         other: true
+        // If the asset doesn't exist, then we return nothing
         return null
     }
     fasta_sizes = fasta_sizes_branch.file
 
 
-    // If ends with .gz, decompress it
-    // If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file (gff, gtf or transcript_fasta)
     gff_branch = asset.branch { meta, fasta_ ->
         file: meta.gff
-        return [reduce(meta) + [decompress_gff: meta.gff.endsWith('.gz') ?: false] + [run_gffread: fasta_ && !meta.gtf ?: false] + [run_hisat2: meta.splice_sites ? false : true], meta.gff]
+        // If ends with .gz, decompress it
+        def meta_extra = [decompress_gff: meta.gff.endsWith('.gz') ?: false]
+        // If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file
+        // (gff, gtf or transcript_fasta)
+        meta_extra += [run_gffread: fasta_ && !meta.gtf ?: false]
+        meta_extra += [run_hisat2: meta.splice_sites ? false : true]
+        return [reduce(meta) + meta_extra, meta.gff]
         other: true
+        // If the asset doesn't exist, then we return nothing
         return null
     }
     gff = gff_branch.file
 
 
-    // If ends with .gz, decompress it
-    // If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file (gff, gtf or transcript_fasta)
     gtf_branch = asset.branch { meta, _fasta ->
         file: meta.gtf
-        return [reduce(meta) + [decompress_gtf: meta.gtf.endsWith('.gz') ?: false] + [run_hisat2: meta.splice_sites ? false : true], meta.gtf]
+        // If ends with .gz, decompress it
+        def meta_extra = [decompress_gtf: meta.gtf.endsWith('.gz') ?: false]
+        // If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file
+        // (gff, gtf or transcript_fasta)
+        meta_extra += [run_hisat2: meta.splice_sites ? false : true]
+        return [reduce(meta) + meta_extra, meta.gtf]
         other: true
+        // If the asset doesn't exist, then we return nothing
         return null
     }
     gtf = gtf_branch.file
@@ -88,30 +118,39 @@ workflow ASSET_TO_CHANNEL {
         file: meta.splice_sites
         return [reduce(meta), meta.splice_sites]
         other: true
+        // If the asset doesn't exist, then we return nothing
         return null
     }
     splice_sites = splice_sites_branch.file
 
-
-    // If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file (gff, gtf or transcript_fasta)
     transcript_fasta_branch = asset.branch { meta, _fasta ->
         file: meta.transcript_fasta
-        return [reduce(meta) + [run_hisat2: meta.hisat2_index ? false : true] + [run_kallisto: meta.kallisto_index ? false : true] + [run_rsem: meta.rsem_index ? false : true] + [run_salmon: meta.salmon_index ? false : true] + [run_star: meta.star_index ? false : true], meta.transcript_fasta]
+        // If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file
+        // (gff, gtf or transcript_fasta)
+        def meta_extra = [run_hisat2: meta.hisat2_index ? false : true]
+        meta_extra += [run_kallisto: meta.kallisto_index ? false : true]
+        meta_extra += [run_rsem: meta.rsem_index ? false : true]
+        meta_extra += [run_salmon: meta.salmon_index ? false : true]
+        meta_extra += [run_star: meta.star_index ? false : true]
+        return [reduce(meta) + meta_extra, meta.transcript_fasta]
         other: true
+        // If the asset doesn't exist, then we return nothing
         return null
     }
     transcript_fasta = transcript_fasta_branch.file
 
 
-    // Using transpose here because we want to catch vcf with globs in the path because of nf-core/Sarek
-    // return a file, because we can catch globs this way, but it create issues with publishing
-    // If we already have the vcf_tbi, then we don't need to index the vcf
     vcf_branch = asset.branch { meta, _fasta ->
         file: meta.vcf
-        return [reduce(meta) + [run_tabix: meta.vcf_tbi ? false : true], file(meta.vcf)]
+        // If we already have the vcf_tbi, then we don't need to index the vcf
+        def meta_extra = [run_tabix: meta.vcf_tbi ? false : true]
+        // return a file, because we can catch globs this way, but it create issues with publishing
+        return [reduce(meta) + meta_extra, file(meta.vcf)]
         other: true
+        // If the asset doesn't exist, then we return nothing
         return null
     }
+    // Using transpose here because we want to catch vcf with globs in the path because of nf-core/Sarek
     vcf = vcf_branch.file.transpose()
 
     emit:

@@ -32,11 +32,12 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
     versions = Channel.empty()
 
     if (run_hisat2 || run_kallisto || run_rsem || run_rsem_make_transcript_fasta || run_salmon || run_star) {
+        // Do not run GFFREAD if the condition is false
         gff_gffread = gff.map { meta, gff_ -> meta.run_gffread ? [meta, gff_] : null }
 
         GFFREAD(
             gff_gffread,
-            []
+            [],
         )
 
         versions = versions.mix(GFFREAD.out.versions)
@@ -47,6 +48,7 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
             .map { meta, gtf_ -> gtf_[1] ? [meta, gtf_[1]] : [meta, gtf_] }
 
         if (run_hisat2 || run_hisat2_extractsplicesites) {
+            // Do not run HISAT2_EXTRACTSPLICESITES if the condition is false
             gtf_hisat2 = gtf.map { meta, gtf_ -> meta.run_hisat2 ? [meta, gtf_] : null }
 
             HISAT2_EXTRACTSPLICESITES(gtf_hisat2)
@@ -55,12 +57,13 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
             splice_sites = splice_sites.mix(HISAT2_EXTRACTSPLICESITES.out.txt)
 
             if (run_hisat2) {
+                // Do not run HISAT2_BUILD if the condition is false
                 fasta_hisat2 = fasta.map { meta, fasta_ -> meta.run_hisat2 ? [meta, fasta_] : null }
 
                 HISAT2_BUILD(
                     fasta_hisat2,
                     gtf,
-                    splice_sites
+                    splice_sites,
                 )
 
                 hisat2_index = HISAT2_BUILD.out.index
@@ -70,17 +73,19 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
         }
 
         if (run_kallisto || run_rsem_make_transcript_fasta || run_salmon) {
+            // Do not run MAKE_TRANSCRIPTS_FASTA if the condition is false
             fasta_make_transcripts_fasta = fasta.map { meta, fasta_ -> meta.run_rsem_make_transcript_fasta ? [meta, fasta_] : null }
 
             MAKE_TRANSCRIPTS_FASTA(
                 fasta_make_transcripts_fasta,
-                gtf
+                gtf,
             )
             versions = versions.mix(MAKE_TRANSCRIPTS_FASTA.out.versions)
 
             transcript_fasta = transcript_fasta.mix(MAKE_TRANSCRIPTS_FASTA.out.transcript_fasta)
 
             if (run_kallisto) {
+                // Do not run KALLISTO_INDEX if the condition is false
                 transcript_fasta_kallisto = transcript_fasta.map { meta, transcript_fasta_ -> meta.run_kallisto ? [meta, transcript_fasta_] : null }
 
                 KALLISTO_INDEX(transcript_fasta_kallisto)
@@ -90,13 +95,15 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
             }
 
             if (run_salmon) {
+                // Do not run SALMON_INDEX if the condition is false
                 fasta_salmon = fasta.map { meta, fasta_ -> meta.run_salmon ? [meta, fasta_] : null }
 
+                // Do not run SALMON_INDEX if the condition is false
                 transcript_fasta_salmon = transcript_fasta.map { meta, transcript_fasta_ -> meta.run_salmon ? [meta, transcript_fasta_] : null }
 
                 SALMON_INDEX(
                     fasta_salmon,
-                    transcript_fasta_salmon
+                    transcript_fasta_salmon,
                 )
 
                 salmon_index = SALMON_INDEX.out.index
@@ -105,6 +112,7 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
         }
 
         if (run_rsem) {
+            // Do not run RSEM_PREPAREREFERENCE_GENOME if the condition is false
             fasta_rsem = fasta.map { meta, fasta_ -> meta.run_rsem ? [meta, fasta_] : null }
 
             RSEM_PREPAREREFERENCE_GENOME(fasta_rsem, gtf)
@@ -114,6 +122,7 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
         }
 
         if (run_star) {
+            // Do not run STAR_GENOMEGENERATE if the condition is false
             fasta_star = fasta.map { meta, fasta_ -> meta.run_star ? [meta, fasta_] : null }
 
             STAR_GENOMEGENERATE(fasta_star, gtf)

@@ -37,6 +37,7 @@ workflow CREATE_FROM_FASTA_ONLY {
     versions = Channel.empty()
 
     if (run_bowtie1) {
+        // Do not run BOWTIE1_BUILD if the condition is false
         fasta_bowtie1 = fasta.map { meta, fasta_ -> meta.run_bowtie1 ? [meta, fasta_] : null }
 
         BOWTIE1_BUILD(fasta_bowtie1)
@@ -46,6 +47,7 @@ workflow CREATE_FROM_FASTA_ONLY {
     }
 
     if (run_bowtie2) {
+        // Do not run BOWTIE2_BUILD if the condition is false
         fasta_bowtie2 = fasta.map { meta, fasta_ -> meta.run_bowtie2 ? [meta, fasta_] : null }
 
         BOWTIE2_BUILD(fasta_bowtie2)
@@ -55,6 +57,7 @@ workflow CREATE_FROM_FASTA_ONLY {
     }
 
     if (run_bwamem1) {
+        // Do not run BWAMEM1_INDEX if the condition is false
         fasta_bwamem1 = fasta.map { meta, fasta_ -> meta.run_bwamem1 ? [meta, fasta_] : null }
 
         BWAMEM1_INDEX(fasta_bwamem1)
@@ -64,6 +67,7 @@ workflow CREATE_FROM_FASTA_ONLY {
     }
 
     if (run_bwamem2) {
+        // Do not run BWAMEM2_INDEX if the condition is false
         fasta_bwamem2 = fasta.map { meta, fasta_ -> meta.run_bwamem2 ? [meta, fasta_] : null }
 
         BWAMEM2_INDEX(fasta_bwamem2)
@@ -73,6 +77,7 @@ workflow CREATE_FROM_FASTA_ONLY {
     }
 
     if (run_dragmap) {
+        // Do not run DRAGMAP_HASHTABLE if the condition is false
         fasta_dragmap = fasta.map { meta, fasta_ -> meta.run_dragmap ? [meta, fasta_] : null }
 
         DRAGMAP_HASHTABLE(fasta_dragmap)
@@ -82,7 +87,8 @@ workflow CREATE_FROM_FASTA_ONLY {
     }
 
     if (run_createsequencedictionary) {
-        fasta_gat4kdict = fasta.map { meta, fasta_ -> meta.run_gat4kdict ? [meta, fasta_] : null }
+        // Do not run GATK4_CREATESEQUENCEDICTIONARY if the condition is false
+        fasta_gat4kdict = fasta.map { meta, fasta_ -> meta.run_createsequencedictionary ? [meta, fasta_] : null }
 
         GATK4_CREATESEQUENCEDICTIONARY(fasta_gat4kdict)
 
@@ -91,19 +97,21 @@ workflow CREATE_FROM_FASTA_ONLY {
     }
 
     if (run_faidx || run_intervals || run_sizes) {
+        // Do not run SAMTOOLS_FAIDX if the condition is false
         fasta_samtools = fasta.map { meta, fasta_ -> meta.run_faidx ? [meta, fasta_] : null }
 
         SAMTOOLS_FAIDX(
             fasta_samtools,
             [[id: 'no_fai'], []],
-            run_sizes
+            run_sizes,
         )
 
         fasta_fai = fasta_fai.mix(SAMTOOLS_FAIDX.out.fai)
         fasta_sizes = SAMTOOLS_FAIDX.out.sizes
         versions = versions.mix(SAMTOOLS_FAIDX.out.versions)
 
         if (run_intervals) {
+            // Do not run BUILD_INTERVALS if the condition is false
             fasta_fai_intervals = fasta_fai.map { meta, fasta_fai_ -> meta.run_intervals ? [meta, fasta_fai_] : null }
 
             BUILD_INTERVALS(fasta_fai_intervals, [])
@@ -113,6 +121,7 @@ workflow CREATE_FROM_FASTA_ONLY {
     }
 
     if (run_msisensorpro) {
+        // Do not run MSISENSORPRO_SCAN if the condition is false
         fasta_msisensorpro = fasta.map { meta, fasta_ -> meta.run_msisensorpro ? [meta, fasta_] : null }
 
         MSISENSORPRO_SCAN(fasta_msisensorpro)

@@ -11,6 +11,7 @@ workflow INDEX_VCF {
 
 
     if (run_tabix) {
+        // Do not run TABIX_TABIX if the condition is false
         vcf_tabix = vcf.map { meta, vcf_ -> meta.run_tabix ? [meta, vcf_] : null }
 
         TABIX_TABIX(vcf_tabix)

@@ -15,6 +15,7 @@ workflow UNCOMPRESS_ASSET {
     main:
     versions = Channel.empty()
 
+    // Do not run GUNZIP_* if the condition is false
     fasta = fasta.map { meta, fasta_ -> meta.decompress_fasta ? [meta, fasta_] : null }
     gff = gff.map { meta, gff_ -> meta.decompress_gff ? [meta, gff_] : null }
     gtf = gtf.map { meta, gtf_ -> meta.decompress_gtf ? [meta, gtf_] : null }