From 1c775aeff24b4e738c5910f46379a58df4d76493 Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Sun, 22 Dec 2024 11:33:53 +0100 Subject: [PATCH] Generate index out of all the generated references (#66) * add index.json * fix path * add run_tabix * file() * mix versions * update CHANGELOG * fix publish * cursor :shake_fist: * Apply suggestions from code review * update snapshot because input was changed * use branch * file() * no file() --- CHANGELOG.md | 2 +- main.nf | 202 +++++++++----------- subworkflows/local/asset_to_channel/main.nf | 89 ++++++++- subworkflows/local/index_vcf/main.nf | 2 +- tests/.nftignore | 3 +- tests/hisat2.nf.test.snap | 4 +- tests/kallisto.nf.test.snap | 4 +- tests/multiple.nf.test.snap | 4 +- tests/rnaseq.nf.test.snap | 3 +- tests/rsem.nf.test.snap | 4 +- tests/salmon.nf.test.snap | 4 +- tests/samtools.nf.test.snap | 3 + tests/sarek.nf.test.snap | 3 +- tests/tabix.nf.test.snap | 4 +- tests/wbcel235.nf.test.snap | 63 +++--- workflows/references/main.nf | 81 ++++---- 16 files changed, 261 insertions(+), 214 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 37af8c05..f73b3119 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,7 +30,7 @@ Initial release of nf-core/references, created with the [nf-core](https://nf-co. - [56](https://github.com/nf-core/references/pull/56) - Add new params: kallisto_make_unique to use the --make-unique option for kallisto - [56](https://github.com/nf-core/references/pull/56) - New file assets/genomes/Caenorhabditis_elegans/NCBI/WBcel235_updated.yml, build from assets/genomes/Caenorhabditis_elegans/NCBI/WBcel235.yml - [62](https://github.com/nf-core/references/pull/62) - Added comments to the code -- [68](https://github.com/nf-core/references/pull/68) - Output vcf asset +- [66](https://github.com/nf-core/references/pull/66) - Output index ### Changed diff --git a/main.nf b/main.nf index 66340e9c..2edd4e92 100644 --- a/main.nf +++ b/main.nf @@ -100,105 +100,99 @@ workflow { } output { - 'bowtie1_index' { - path { meta, _bowtie1_index -> { _file -> "${meta.species}/${meta.source}/${meta.genome}/Sequence/BowtieIndex/version1.3.1" } } - } - 'bowtie2_index' { - path { meta, _bowtie2_index -> { _file -> "${meta.species}/${meta.source}/${meta.genome}/Sequence/Bowtie2Index/version2.5.2" } } - } - 'bwamem1_index' { - path { meta, _bwamem1_index -> { _file -> "${meta.species}/${meta.source}/${meta.genome}/Sequence/BWAIndex/version0.7.18" } } - } - 'bwamem2_index' { - path { meta, _bwamem2_index -> { _file -> "${meta.species}/${meta.source}/${meta.genome}/Sequence/BWAmem2Index/version2.2.1" } } - } - 'dragmap_hashmap' { - path { meta, _index -> { _file -> "${meta.species}/${meta.source}/${meta.genome}/Sequence/dragmap/version1.2.1" } } - } - 'fasta' { - path { meta, _fasta -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Sequence/WholeGenomeFasta/${file}" } } - } - 'fasta_dict' { - path { meta, _fasta_dict -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Sequence/WholeGenomeFasta/${file}" } } - } - 'fasta_fai' { - path { meta, _fasta_fai -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Sequence/WholeGenomeFasta/${file}" } } - } - 'fasta_sizes' { - path { meta, _fasta_sizes -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Sequence/WholeGenomeFasta/${file}" } } - } - 'gff' { - path { meta, _gff -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Annotation/Genes/${file}" } } - } - 'gtf' { - path { meta, _gtf -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Annotation/Genes/${file}" } } - } - 'hisat2_index' { - path { meta, _hisat2_index -> - { _file -> - meta.source_version == "unknown" - ? "${meta.species}/${meta.source}/${meta.genome}/Sequence/Hisat2Index/version2.2.1" - : "${meta.species}/${meta.source}/${meta.genome}/Sequence/Hisat2Index/${meta.source_version}/version2.2.1" - } - } - } - 'intervals_bed' { - path { meta, _intervals_bed -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Annotation/intervals/${file}" } } - } - 'kallisto_index' { - path { meta, _kallisto_index -> - { file -> - meta.source_version == "unknown" - ? "${meta.species}/${meta.source}/${meta.genome}/Sequence/KallistoIndex/version0.51.1/${file}" - : "${meta.species}/${meta.source}/${meta.genome}/Sequence/KallistoIndex/${meta.source_version}/version0.51.1/${file}" - } - } - } - 'msisensorpro_list' { - path { meta, _msisensorpro_list -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Annotation/msisensorpro/${file}" } } - } 'multiqc' { path "multiqc" } - 'rsem_index' { - path { meta, _rsem_index -> - { _file -> - meta.source_version == "unknown" - ? "${meta.species}/${meta.source}/${meta.genome}/Sequence/RSEMIndex/version1.3.1" - : "${meta.species}/${meta.source}/${meta.genome}/Sequence/RSEMIndex/${meta.source_version}/version1.3.1" - } - } - } - 'salmon_index' { - path { meta, _salmon_index -> - { _file -> - meta.source_version == "unknown" - ? "${meta.species}/${meta.source}/${meta.genome}/Sequence/SalmonIndex/version1.10.3" - : "${meta.species}/${meta.source}/${meta.genome}/Sequence/SalmonIndex/${meta.source_version}/version1.10.3" + 'reference' { + path { meta, _file -> + { file -> + if (meta.file == "bowtie1_index") { + "${meta.species}/${meta.source}/${meta.genome}/Sequence/BowtieIndex/version1.3.1" + } + else if (meta.file == "bowtie2_index") { + "${meta.species}/${meta.source}/${meta.genome}/Sequence/Bowtie2Index/version2.5.2" + } + else if (meta.file == "bwamem1_index") { + "${meta.species}/${meta.source}/${meta.genome}/Sequence/BWAIndex/version0.7.18" + } + else if (meta.file == "bwamem2_index") { + "${meta.species}/${meta.source}/${meta.genome}/Sequence/BWAmem2Index/version2.2.1" + } + else if (meta.file == "dragmap_hashmap") { + "${meta.species}/${meta.source}/${meta.genome}/Sequence/dragmap/version1.2.1" + } + else if (meta.file == "fasta") { + "${meta.species}/${meta.source}/${meta.genome}/Sequence/WholeGenomeFasta/${file}" + } + else if (meta.file == "fasta_dict") { + "${meta.species}/${meta.source}/${meta.genome}/Sequence/WholeGenomeFasta/${file}" + } + else if (meta.file == "fasta_fai") { + "${meta.species}/${meta.source}/${meta.genome}/Sequence/WholeGenomeFasta/${file}" + } + else if (meta.file == "fasta_sizes") { + "${meta.species}/${meta.source}/${meta.genome}/Sequence/WholeGenomeFasta/${file}" + } + else if (meta.file == "gff") { + "${meta.species}/${meta.source}/${meta.genome}/Annotation/Genes/${file}" + } + else if (meta.file == "gtf") { + "${meta.species}/${meta.source}/${meta.genome}/Annotation/Genes/${file}" + } + else if (meta.file == "hisat2_index") { + meta.source_version == "unknown" + ? "${meta.species}/${meta.source}/${meta.genome}/Sequence/Hisat2Index/version2.2.1" + : "${meta.species}/${meta.source}/${meta.genome}/Sequence/Hisat2Index/${meta.source_version}/version2.2.1" + } + else if (meta.file == "intervals_bed") { + "${meta.species}/${meta.source}/${meta.genome}/Annotation/intervals/${file}" + } + else if (meta.file == "kallisto_index") { + meta.source_version == "unknown" + ? "${meta.species}/${meta.source}/${meta.genome}/Sequence/KallistoIndex/version0.51.1/${file}" + : "${meta.species}/${meta.source}/${meta.genome}/Sequence/KallistoIndex/${meta.source_version}/version0.51.1/${file}" + } + else if (meta.file == "msisensorpro_list") { + "${meta.species}/${meta.source}/${meta.genome}/Annotation/msisensorpro/${file}" + } + else if (meta.file == "rsem_index") { + meta.source_version == "unknown" + ? "${meta.species}/${meta.source}/${meta.genome}/Sequence/RSEMIndex/version1.3.1/" + : "${meta.species}/${meta.source}/${meta.genome}/Sequence/RSEMIndex/${meta.source_version}/version1.3.1/" + } + else if (meta.file == "salmon_index") { + meta.source_version == "unknown" + ? "${meta.species}/${meta.source}/${meta.genome}/Sequence/SalmonIndex/version1.10.3/" + : "${meta.species}/${meta.source}/${meta.genome}/Sequence/SalmonIndex/${meta.source_version}/version1.10.3/" + } + else if (meta.file == "splice_sites") { + "${meta.species}/${meta.source}/${meta.genome}/Sequence/SpliceSites/${file}" + } + else if (meta.file == "star_index") { + meta.source_version == "unknown" + ? "${meta.species}/${meta.source}/${meta.genome}/Sequence/STARIndex/version2.7.11b/" + : "${meta.species}/${meta.source}/${meta.genome}/Sequence/STARIndex/${meta.source_version}/version2.7.11b/" + } + else if (meta.file == "transcript_fasta") { + "${meta.species}/${meta.source}/${meta.genome}/Sequence/TranscriptFasta/${file}" + } + else if (meta.file == "vcf") { + "${meta.species}/${meta.source}/${meta.genome}/Annotation/${meta.source_vcf}/${file}" + } + else if (meta.file == "vcf_tbi") { + "${meta.species}/${meta.source}/${meta.genome}/Annotation/${meta.source_vcf}/${file}" + } + else { + null + } } } - } - 'splice_sites' { - path { meta, _splice_sites -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Sequence/SpliceSites/${file}" } } - } - 'star_index' { - path { meta, _star_index -> - { _file -> - meta.source_version == "unknown" - ? "${meta.species}/${meta.source}/${meta.genome}/Sequence/STARIndex/version2.7.11b" - : "${meta.species}/${meta.source}/${meta.genome}/Sequence/STARIndex/${meta.source_version}/version2.7.11b" - } + + index { + path "index.json" + mapper { meta, reference -> ["${meta.file}:${reference}"] } } } - 'transcript_fasta' { - path { meta, _transcript_fasta -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Sequence/TranscriptFasta/${file}" } } - } - // 'vcf' { - // path { meta, _vcf -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Annotation/${meta.source_vcf}/${file}" } } - // } - 'vcf_tbi' { - path { meta, _vcf_tbi -> { file -> "${meta.species}/${meta.source}/${meta.genome}/Annotation/${meta.source_vcf}/${file}" } } - } } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -219,26 +213,6 @@ workflow NFCORE_REFERENCES { REFERENCES(input, tools) emit: - bowtie1_index = REFERENCES.out.bowtie1_index - bowtie2_index = REFERENCES.out.bowtie2_index - bwamem1_index = REFERENCES.out.bwamem1_index - bwamem2_index = REFERENCES.out.bwamem2_index - dragmap_hashmap = REFERENCES.out.dragmap_hashmap - fasta = REFERENCES.out.fasta - fasta_dict = REFERENCES.out.fasta_dict - fasta_fai = REFERENCES.out.fasta_fai - fasta_sizes = REFERENCES.out.fasta_sizes - gtf = REFERENCES.out.gtf - hisat2_index = REFERENCES.out.hisat2_index - intervals_bed = REFERENCES.out.intervals_bed - kallisto_index = REFERENCES.out.kallisto_index - msisensorpro_list = REFERENCES.out.msisensorpro_list - rsem_index = REFERENCES.out.rsem_index - salmon_index = REFERENCES.out.salmon_index - splice_sites = REFERENCES.out.splice_sites - star_index = REFERENCES.out.star_index - transcript_fasta = REFERENCES.out.transcript_fasta - // vcf = REFERENCES.out.vcf - vcf_tbi = REFERENCES.out.vcf_tbi - versions = REFERENCES.out.versions + reference = REFERENCES.out.reference + versions = REFERENCES.out.versions } diff --git a/subworkflows/local/asset_to_channel/main.nf b/subworkflows/local/asset_to_channel/main.nf index ee6bd64b..5e6de403 100644 --- a/subworkflows/local/asset_to_channel/main.nf +++ b/subworkflows/local/asset_to_channel/main.nf @@ -14,36 +14,105 @@ workflow ASSET_TO_CHANNEL { def reduce = { meta -> meta.subMap(['genome', 'id', 'source', 'source_vcf', 'source_version', 'species']) } - intervals_bed = asset.map { meta, _fasta -> meta.intervals_bed ? [reduce(meta), meta.intervals_bed] : null } + intervals_bed_branch = asset.branch { meta, _fasta -> + file: meta.intervals_bed + return [reduce(meta), meta.intervals_bed] + other: true + return null + } + intervals_bed = intervals_bed_branch.file + // If ends with .gz, decompress it // If any of the asset exists, then adding run_tools to false and skip the asset creation from the fasta file - fasta = asset.map { meta, fasta_ -> fasta_ ? [reduce(meta) + [decompress_fasta: fasta_.endsWith('.gz') ?: false] + [run_bowtie1: meta.bowtie1_index ? false : true] + [run_bowtie2: meta.bowtie2_index ? false : true] + [run_bwamem1: meta.bwamem1_index ? false : true] + [run_bwamem2: meta.bwamem2_index ? false : true] + [run_dragmap: meta.dragmap_hashtable ? false : true] + [run_faidx: meta.fasta_fai && meta.fasta_sizes ? false : true] + [run_gatkdict: meta.fasta_dict ? false : true] + [run_hisat2: meta.hisat2_index ? false : true] + [run_intervals: meta.intervals_bed ? false : true] + [run_kallisto: meta.kallisto_index ? false : true] + [run_msisenpro: meta.msisensorpro_list ? false : true] + [run_rsem: meta.rsem_index ? false : true] + [run_rsem_make_transcript_fasta: meta.transcript_fasta ? false : true] + [run_salmon: meta.salmon_index ? false : true] + [run_star: meta.star_index ? false : true], fasta_] : null } + fasta_branch = asset.branch { meta, fasta_ -> + file: fasta_ + return [reduce(meta) + [decompress_fasta: fasta_.endsWith('.gz') ?: false] + [run_bowtie1: meta.bowtie1_index ? false : true] + [run_bowtie2: meta.bowtie2_index ? false : true] + [run_bwamem1: meta.bwamem1_index ? false : true] + [run_bwamem2: meta.bwamem2_index ? false : true] + [run_dragmap: meta.dragmap_hashtable ? false : true] + [run_faidx: meta.fasta_fai && meta.fasta_sizes ? false : true] + [run_gatkdict: meta.fasta_dict ? false : true] + [run_hisat2: meta.hisat2_index ? false : true] + [run_intervals: meta.intervals_bed ? false : true] + [run_kallisto: meta.kallisto_index ? false : true] + [run_msisenpro: meta.msisensorpro_list ? false : true] + [run_rsem: meta.rsem_index ? false : true] + [run_rsem_make_transcript_fasta: meta.transcript_fasta ? false : true] + [run_salmon: meta.salmon_index ? false : true] + [run_star: meta.star_index ? false : true], fasta_] + other: true + return null + } + fasta = fasta_branch.file + + + fasta_dict_branch = asset.branch { meta, _fasta -> + file: meta.fasta_dict + return [reduce(meta), meta.fasta_dict] + other: true + return null + } + fasta_dict = fasta_dict_branch.file - fasta_dict = asset.map { meta, _fasta -> meta.fasta_dict ? [reduce(meta), meta.fasta_dict] : null } // If we have intervals_bed, then we don't need to run faidx - fasta_fai = asset.map { meta, _fasta -> meta.fasta_fai ? [reduce(meta) + [run_intervals: meta.intervals_bed ? false : true], meta.fasta_fai] : null } + fasta_fai_branch = asset.branch { meta, _fasta -> + file: meta.fasta_fai + return [reduce(meta) + [run_intervals: meta.intervals_bed ? false : true], meta.fasta_fai] + other: true + return null + } + fasta_fai = fasta_fai_branch.file + + + fasta_sizes_branch = asset.branch { meta, _fasta -> + file: meta.fasta_sizes + return [reduce(meta), meta.fasta_sizes] + other: true + return null + } + fasta_sizes = fasta_sizes_branch.file - fasta_sizes = asset.map { meta, _fasta -> meta.fasta_sizes ? [reduce(meta), meta.fasta_sizes] : null } // If ends with .gz, decompress it // If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file (gff, gtf or transcript_fasta) - gff = asset.map { meta, fasta_ -> meta.gff ? [reduce(meta) + [decompress_gff: meta.gff.endsWith('.gz') ?: false] + [run_gffread: fasta_ && !meta.gtf ?: false] + [run_hisat2: meta.splice_sites ? false : true], meta.gff] : null } + gff_branch = asset.branch { meta, fasta_ -> + file: meta.gff + return [reduce(meta) + [decompress_gff: meta.gff.endsWith('.gz') ?: false] + [run_gffread: fasta_ && !meta.gtf ?: false] + [run_hisat2: meta.splice_sites ? false : true], meta.gff] + other: true + return null + } + gff = gff_branch.file + // If ends with .gz, decompress it // If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file (gff, gtf or transcript_fasta) - gtf = asset.map { meta, _fasta -> meta.gtf ? [reduce(meta) + [decompress_gtf: meta.gtf.endsWith('.gz') ?: false] + [run_hisat2: meta.splice_sites ? false : true], meta.gtf] : null } + gtf_branch = asset.branch { meta, _fasta -> + file: meta.gtf + return [reduce(meta) + [decompress_gtf: meta.gtf.endsWith('.gz') ?: false] + [run_hisat2: meta.splice_sites ? false : true], meta.gtf] + other: true + return null + } + gtf = gtf_branch.file + + + splice_sites_branch = asset.branch { meta, _fasta -> + file: meta.splice_sites + return [reduce(meta), meta.splice_sites] + other: true + return null + } + splice_sites = splice_sites_branch.file - splice_sites = asset.map { meta, _fasta -> meta.splice_sites ? [reduce(meta), meta.splice_sites] : null } // If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file (gff, gtf or transcript_fasta) - transcript_fasta = asset.map { meta, _fasta -> meta.transcript_fasta ? [reduce(meta) + [run_hisat2: meta.hisat2_index ? false : true] + [run_kallisto: meta.kallisto_index ? false : true] + [run_rsem: meta.rsem_index ? false : true] + [run_salmon: meta.salmon_index ? false : true] + [run_star: meta.star_index ? false : true], meta.transcript_fasta] : null } + transcript_fasta_branch = asset.branch { meta, _fasta -> + file: meta.transcript_fasta + return [reduce(meta) + [run_hisat2: meta.hisat2_index ? false : true] + [run_kallisto: meta.kallisto_index ? false : true] + [run_rsem: meta.rsem_index ? false : true] + [run_salmon: meta.salmon_index ? false : true] + [run_star: meta.star_index ? false : true], meta.transcript_fasta] + other: true + return null + } + transcript_fasta = transcript_fasta_branch.file + // Using transpose here because we want to catch vcf with globs in the path because of nf-core/Sarek // return a file, because we can catch globs this way, but it create issues with publishing // If we already have the vcf_tbi, then we don't need to index the vcf - vcf = asset.map { meta, _fasta -> meta.vcf ? [reduce(meta) + [run_tabix: meta.vcf_tbi ? false : true], file(meta.vcf)] : null }.transpose() + vcf_branch = asset.branch { meta, _fasta -> + file: meta.vcf + return [reduce(meta) + [run_tabix: meta.vcf_tbi ? false : true], file(meta.vcf)] + other: true + return null + } + vcf = vcf_branch.file.transpose() emit: intervals_bed // channel: [meta, *.bed] diff --git a/subworkflows/local/index_vcf/main.nf b/subworkflows/local/index_vcf/main.nf index fbd7262d..547f2917 100644 --- a/subworkflows/local/index_vcf/main.nf +++ b/subworkflows/local/index_vcf/main.nf @@ -16,7 +16,7 @@ workflow INDEX_VCF { TABIX_TABIX(vcf_tabix) vcf_tbi = TABIX_TABIX.out.tbi - versions = TABIX_TABIX.out.versions + versions = versions.mix(TABIX_TABIX.out.versions) } emit: diff --git a/tests/.nftignore b/tests/.nftignore index 9bd0a0d1..fdecbc0c 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -1,4 +1,3 @@ -**/kallisto **/RSEMIndex/**/Log.out **/STARIndex/**/Log.out **/SalmonIndex/**/ctable.bin @@ -9,7 +8,9 @@ **/dragmap/*/hash_table.cfg **/dragmap/*/hash_table.cfg.bin **/dragmap/*/hash_table_stats.txt +**/kallisto .DS_Store +index.json multiqc/multiqc_data/multiqc.log multiqc/multiqc_data/multiqc_data.json multiqc/multiqc_data/multiqc_general_stats.txt diff --git a/tests/hisat2.nf.test.snap b/tests/hisat2.nf.test.snap index cd3ebb81..38208d31 100644 --- a/tests/hisat2.nf.test.snap +++ b/tests/hisat2.nf.test.snap @@ -27,6 +27,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/Hisat2Index/CUSTOM/version2.2.1/GRCh38_chr21.6.ht2", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/Hisat2Index/CUSTOM/version2.2.1/GRCh38_chr21.7.ht2", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/Hisat2Index/CUSTOM/version2.2.1/GRCh38_chr21.8.ht2", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -94,6 +95,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/Hisat2Index/version2.2.1/GRCh38_chr21.8.ht2", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/SpliceSites", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/SpliceSites/GRCh38_chr21.splice_sites.txt", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -125,4 +127,4 @@ }, "timestamp": "2024-12-06T11:33:17.476760832" } -} \ No newline at end of file +} diff --git a/tests/kallisto.nf.test.snap b/tests/kallisto.nf.test.snap index 8e4d8083..2be7f5b8 100644 --- a/tests/kallisto.nf.test.snap +++ b/tests/kallisto.nf.test.snap @@ -31,6 +31,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/KallistoIndex/version0.51.1/kallisto", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/TranscriptFasta", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/TranscriptFasta/genome.transcripts.fa", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -75,6 +76,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/KallistoIndex/CUSTOM", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/KallistoIndex/CUSTOM/version0.51.1", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/KallistoIndex/CUSTOM/version0.51.1/kallisto", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -96,4 +98,4 @@ }, "timestamp": "2024-12-06T11:50:22.597389596" } -} \ No newline at end of file +} diff --git a/tests/multiple.nf.test.snap b/tests/multiple.nf.test.snap index 85ebe983..f0438780 100644 --- a/tests/multiple.nf.test.snap +++ b/tests/multiple.nf.test.snap @@ -23,6 +23,7 @@ "Homo_sapiens/GATK/GRCh38/Annotation/GATK_BUNDLE", "Homo_sapiens/GATK/GRCh38/Annotation/GATK_BUNDLE/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi", "Homo_sapiens/GATK/GRCh38/Annotation/GATK_BUNDLE/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -97,6 +98,7 @@ "Homo_sapiens/nf-core/references/testdata.GRCh38_chr22/Sequence/BWAIndex/version0.7.18/genome.sa", "Homo_sapiens/nf-core/references/testdata.GRCh38_chr22/Sequence/WholeGenomeFasta", "Homo_sapiens/nf-core/references/testdata.GRCh38_chr22/Sequence/WholeGenomeFasta/genome.fasta.fai", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -132,4 +134,4 @@ }, "timestamp": "2024-12-10T13:44:39.079626708" } -} \ No newline at end of file +} diff --git a/tests/rnaseq.nf.test.snap b/tests/rnaseq.nf.test.snap index 90c20df0..168c10a1 100644 --- a/tests/rnaseq.nf.test.snap +++ b/tests/rnaseq.nf.test.snap @@ -70,6 +70,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/WholeGenomeFasta", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/WholeGenomeFasta/GRCh38_chr21.fa.fai", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/WholeGenomeFasta/GRCh38_chr21.fa.sizes", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -121,4 +122,4 @@ }, "timestamp": "2024-12-06T11:37:48.115918724" } -} \ No newline at end of file +} diff --git a/tests/rsem.nf.test.snap b/tests/rsem.nf.test.snap index d7912587..a0595adc 100644 --- a/tests/rsem.nf.test.snap +++ b/tests/rsem.nf.test.snap @@ -44,6 +44,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/RSEMIndex/CUSTOM/version1.3.1/sjdbList.fromGTF.out.tab", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/RSEMIndex/CUSTOM/version1.3.1/sjdbList.out.tab", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/RSEMIndex/CUSTOM/version1.3.1/transcriptInfo.tab", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -144,6 +145,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/RSEMIndex/version1.3.1/transcriptInfo.tab", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/TranscriptFasta", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/TranscriptFasta/genome.transcripts.fa", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -190,4 +192,4 @@ }, "timestamp": "2024-12-06T11:41:35.396864169" } -} \ No newline at end of file +} diff --git a/tests/salmon.nf.test.snap b/tests/salmon.nf.test.snap index 99988322..c10c93b4 100644 --- a/tests/salmon.nf.test.snap +++ b/tests/salmon.nf.test.snap @@ -45,6 +45,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/SalmonIndex/version1.10.3/versionInfo.json", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/TranscriptFasta", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/TranscriptFasta/genome.transcripts.fa", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -113,6 +114,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/SalmonIndex/CUSTOM/version1.10.3/refseq.bin", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/SalmonIndex/CUSTOM/version1.10.3/seq.bin", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/SalmonIndex/CUSTOM/version1.10.3/versionInfo.json", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -144,4 +146,4 @@ }, "timestamp": "2024-12-06T11:57:24.351180092" } -} \ No newline at end of file +} diff --git a/tests/samtools.nf.test.snap b/tests/samtools.nf.test.snap index dfdca6ff..67172dd9 100644 --- a/tests/samtools.nf.test.snap +++ b/tests/samtools.nf.test.snap @@ -8,6 +8,7 @@ } }, [ + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -55,6 +56,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/WholeGenomeFasta", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/WholeGenomeFasta/GRCh38_chr21.fa.fai", "Homo_sapiens/nf-core/references/GRCh38_chr21/Sequence/WholeGenomeFasta/GRCh38_chr21.fa.sizes", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -98,6 +100,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Annotation", "Homo_sapiens/nf-core/references/GRCh38_chr21/Annotation/intervals", "Homo_sapiens/nf-core/references/GRCh38_chr21/Annotation/intervals/GRCh38_chr21.bed", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", diff --git a/tests/sarek.nf.test.snap b/tests/sarek.nf.test.snap index a0cf67e4..5a0991e2 100644 --- a/tests/sarek.nf.test.snap +++ b/tests/sarek.nf.test.snap @@ -64,6 +64,7 @@ "Homo_sapiens/nf-core/references/testdata.GRCh38_chr22/Sequence/dragmap/version1.2.1/reference.bin", "Homo_sapiens/nf-core/references/testdata.GRCh38_chr22/Sequence/dragmap/version1.2.1/repeat_mask.bin", "Homo_sapiens/nf-core/references/testdata.GRCh38_chr22/Sequence/dragmap/version1.2.1/str_table.bin", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -105,4 +106,4 @@ }, "timestamp": "2024-12-10T13:45:37.140557415" } -} \ No newline at end of file +} diff --git a/tests/tabix.nf.test.snap b/tests/tabix.nf.test.snap index 3688d116..c29dd74c 100644 --- a/tests/tabix.nf.test.snap +++ b/tests/tabix.nf.test.snap @@ -8,6 +8,7 @@ } }, [ + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -50,6 +51,7 @@ "Homo_sapiens/nf-core/references/GRCh38_chr21/Annotation/GATK_BUNDLE/dbsnp_146.hg38.vcf.gz.tbi", "Homo_sapiens/nf-core/references/GRCh38_chr21/Annotation/GATK_BUNDLE/gnomAD.r2.1.1.vcf.gz.tbi", "Homo_sapiens/nf-core/references/GRCh38_chr21/Annotation/GATK_BUNDLE/mills_and_1000G.indels.vcf.gz.tbi", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -74,4 +76,4 @@ }, "timestamp": "2024-12-02T11:21:55.801035083" } -} \ No newline at end of file +} diff --git a/tests/wbcel235.nf.test.snap b/tests/wbcel235.nf.test.snap index bb0d7a88..3ef72daa 100644 --- a/tests/wbcel235.nf.test.snap +++ b/tests/wbcel235.nf.test.snap @@ -101,6 +101,7 @@ "Caenorhabditis_elegans/NCBI/WBcel235/Sequence/WholeGenomeFasta", "Caenorhabditis_elegans/NCBI/WBcel235/Sequence/WholeGenomeFasta/GCF_000002985.6_WBcel235_genomic.fna", "Caenorhabditis_elegans/NCBI/WBcel235/Sequence/WholeGenomeFasta/GCF_000002985.6_WBcel235_genomic.fna.fai", + "index.json", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -113,8 +114,8 @@ "pipeline_info/nf_core_references_software_mqc_versions.yml" ], [ - "GCF_000002985.6_WBcel235_genomic.gff:md5,4ecdb82b6f15e9223aa3197f5f38be29", - "GCF_000002985.6_WBcel235_genomic.gtf:md5,7bdf9e5149fba51317db2cb2c5ba5736", + "GCF_000002985.6_WBcel235_genomic.gff:md5,1d9345a72d94695b2538afa1bdf0aa0d", + "GCF_000002985.6_WBcel235_genomic.gtf:md5,56a068b7ad02e260b3225726745109a3", "WBcel235.bed:md5,84cb3d6e8ed7c8475fc9735b19b66908", "GCF_000002985.6_WBcel235_genomic.amb:md5,a0741fa25fb4e5a52ac61ecc5b679ac2", "GCF_000002985.6_WBcel235_genomic.ann:md5,7e46a49f983e0ca1fb6ee14be12251d3", @@ -122,43 +123,43 @@ "GCF_000002985.6_WBcel235_genomic.pac:md5,1f0196045942ef7b2d94fb80b43e9c2a", "GCF_000002985.6_WBcel235_genomic.sa:md5,9bd628829e7f38cce39a4fd82f6634b2", "GCF_000002985.6_WBcel235_genomic.fna:md5,2fa2b1575d9e722f076bafcf3b755fed", - "Genome:md5,99fd8885ee27659e37e1354e1cbb71f9", - "SA:md5,a3dc27d3ad7ecc6cc88a33d9eb63fbc1", - "SAindex:md5,715dec1f872236355d0d1d0cba760b89", + "Genome:md5,1db8a7b9fb0ce216f9b488e2a4077b95", + "SA:md5,d52322ae559fe15bd61ab643d98bbc8d", + "SAindex:md5,d59bb768e3f7583d626238150db784e1", "chrLength.txt:md5,f05097b2e861d6c4ba7aabd9ab3adb57", "chrName.txt:md5,d20e5340b7c16270e610785b20af9cce", "chrNameLength.txt:md5,7ca5bd1d9d220af81b785a965ed6b839", "chrStart.txt:md5,e913b6ededd36d63662b25a20fb48a4e", - "exonGeTrInfo.tab:md5,c01bd0878faa926ccc732bd1141dbb2f", - "exonInfo.tab:md5,38e9760960405972086beb2160795294", + "exonGeTrInfo.tab:md5,6073b61cbe2f1dd04b723c18058a23e0", + "exonInfo.tab:md5,e3e9f4a742069534803fb557b4acc137", "geneInfo.tab:md5,7ca7a86a201eb23993ac3de5e46e669c", "genome.chrlist:md5,46094719dedb5edac18cbeeccd5295e8", - "genome.grp:md5,010a2500cc2318aae253b1bfd8139cae", - "genome.idx.fa:md5,c88dcb3b4a73e2d0d2c2b6327c109960", - "genome.n2g.idx.fa:md5,c88dcb3b4a73e2d0d2c2b6327c109960", - "genome.seq:md5,8500f2e8db4f3957e14ad63ebbf71fd8", - "genome.ti:md5,d4315d98c08c595d5d26d97a236a63bc", - "genome.transcripts.fa:md5,9b7c6dfcc1bb4414e7b35ebc8bcca1d8", - "genomeParameters.txt:md5,829e42c3586a4b01360d02a5975b7116", - "sjdbInfo.txt:md5,19eaa1f30bccff2e41b884da62e51786", - "sjdbList.fromGTF.out.tab:md5,f66e7cd2c8d0986b2972986164f22323", - "sjdbList.out.tab:md5,142172b0f54743f76115dc4c656dca62", - "transcriptInfo.tab:md5,96afa780b277a11bb619e37fea325fbc", - "Genome:md5,99fd8885ee27659e37e1354e1cbb71f9", - "SA:md5,a3dc27d3ad7ecc6cc88a33d9eb63fbc1", - "SAindex:md5,45af68e0cb6db30c2375b1b0babf28cb", + "genome.grp:md5,d7a8e45851f5ce8e26893ea303dc0949", + "genome.idx.fa:md5,fba07c31ec614f72cb5ba47091238037", + "genome.n2g.idx.fa:md5,fba07c31ec614f72cb5ba47091238037", + "genome.seq:md5,0da6d1c107dd0965e6c2ce85dbf127ba", + "genome.ti:md5,850c53a59585b964a3cc09538d22ea3b", + "genome.transcripts.fa:md5,059947bbc5eadbb7a22667abff2c7403", + "genomeParameters.txt:md5,4a277195f42a94802c31b448012a61d8", + "sjdbInfo.txt:md5,99c6235b60cdbaedeec24a016b4d6786", + "sjdbList.fromGTF.out.tab:md5,4f2624ef95a62a02428294eeb4e5a00a", + "sjdbList.out.tab:md5,c2fef538a9a54c9391fd4e47fc71e21e", + "transcriptInfo.tab:md5,b3355cc97ab9381c53ebb02684d26350", + "Genome:md5,1db8a7b9fb0ce216f9b488e2a4077b95", + "SA:md5,d52322ae559fe15bd61ab643d98bbc8d", + "SAindex:md5,a5dc808391e14614708f3c46c3006987", "chrLength.txt:md5,f05097b2e861d6c4ba7aabd9ab3adb57", "chrName.txt:md5,d20e5340b7c16270e610785b20af9cce", "chrNameLength.txt:md5,7ca5bd1d9d220af81b785a965ed6b839", "chrStart.txt:md5,e913b6ededd36d63662b25a20fb48a4e", - "exonGeTrInfo.tab:md5,c01bd0878faa926ccc732bd1141dbb2f", - "exonInfo.tab:md5,38e9760960405972086beb2160795294", + "exonGeTrInfo.tab:md5,6073b61cbe2f1dd04b723c18058a23e0", + "exonInfo.tab:md5,e3e9f4a742069534803fb557b4acc137", "geneInfo.tab:md5,7ca7a86a201eb23993ac3de5e46e669c", - "genomeParameters.txt:md5,7ed29e43e0a254c4f932b6724dcd7f19", - "sjdbInfo.txt:md5,19eaa1f30bccff2e41b884da62e51786", - "sjdbList.fromGTF.out.tab:md5,f66e7cd2c8d0986b2972986164f22323", - "sjdbList.out.tab:md5,142172b0f54743f76115dc4c656dca62", - "transcriptInfo.tab:md5,96afa780b277a11bb619e37fea325fbc", + "genomeParameters.txt:md5,4d276b3058026c1af299eee4a4af0576", + "sjdbInfo.txt:md5,99c6235b60cdbaedeec24a016b4d6786", + "sjdbList.fromGTF.out.tab:md5,4f2624ef95a62a02428294eeb4e5a00a", + "sjdbList.out.tab:md5,c2fef538a9a54c9391fd4e47fc71e21e", + "transcriptInfo.tab:md5,b3355cc97ab9381c53ebb02684d26350", "GCF_000002985.6_WBcel235_genomic.fna:md5,2fa2b1575d9e722f076bafcf3b755fed", "GCF_000002985.6_WBcel235_genomic.fna.fai:md5,5765b3ad41f8ad61dc582fba226214bf", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" @@ -166,8 +167,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.2" + "nextflow": "24.10.3" }, - "timestamp": "2024-12-10T13:41:27.246445713" + "timestamp": "2024-12-20T17:06:25.711035545" } -} +} \ No newline at end of file diff --git a/workflows/references/main.nf b/workflows/references/main.nf index ecf5234e..158a3ae5 100644 --- a/workflows/references/main.nf +++ b/workflows/references/main.nf @@ -62,7 +62,7 @@ workflow REFERENCES { tools.split(',').contains('faidx'), tools.split(',').contains('intervals'), tools.split(',').contains('msisensorpro'), - tools.split(',').contains('sizes') + tools.split(',').contains('sizes'), ) // Create reference assets from fasta and annotation (gff derived (so gff, gtf and transcript_fasta)) @@ -78,13 +78,13 @@ workflow REFERENCES { tools.split(',').contains('rsem'), tools.split(',').contains('rsem_make_transcript_fasta'), tools.split(',').contains('salmon'), - tools.split(',').contains('star') + tools.split(',').contains('star'), ) // Index VCF INDEX_VCF( vcf, - tools.split(',').contains('tabix') + tools.split(',').contains('tabix'), ) // This works with a mixture of input and computed assets @@ -116,52 +116,37 @@ workflow REFERENCES { versions = versions.mix(INDEX_VCF.out.versions) versions = versions.mix(UNCOMPRESS_ASSET.out.versions) + reference = Channel + .empty() + .mix( + bowtie1_index.map { meta, reference_ -> [meta + [file: 'bowtie1_index'], reference_] }, + bowtie2_index.map { meta, reference_ -> [meta + [file: 'bowtie2_index'], reference_] }, + bwamem1_index.map { meta, reference_ -> [meta + [file: 'bwamem1_index'], reference_] }, + bwamem2_index.map { meta, reference_ -> [meta + [file: 'bwamem2_index'], reference_] }, + dragmap_hashmap.map { meta, reference_ -> [meta + [file: 'dragmap_hashmap'], reference_] }, + fasta.map { meta, reference_ -> [meta + [file: 'fasta'], reference_] }, + fasta_dict.map { meta, reference_ -> [meta + [file: 'fasta_dict'], reference_] }, + fasta_fai.map { meta, reference_ -> [meta + [file: 'fasta_fai'], reference_] }, + fasta_sizes.map { meta, reference_ -> [meta + [file: 'fasta_sizes'], reference_] }, + gff.map { meta, reference_ -> [meta + [file: 'gff'], reference_] }, + gtf.map { meta, reference_ -> [meta + [file: 'gtf'], reference_] }, + hisat2_index.map { meta, reference_ -> [meta + [file: 'hisat2_index'], reference_] }, + intervals_bed.map { meta, reference_ -> [meta + [file: 'intervals_bed'], reference_] }, + kallisto_index.map { meta, reference_ -> [meta + [file: 'kallisto_index'], reference_] }, + msisensorpro_list.map { meta, reference_ -> [meta + [file: 'msisensorpro_list'], reference_] }, + rsem_index.map { meta, reference_ -> [meta + [file: 'rsem_index'], reference_] }, + salmon_index.map { meta, reference_ -> [meta + [file: 'salmon_index'], reference_] }, + splice_sites.map { meta, reference_ -> [meta + [file: 'splice_sites'], reference_] }, + star_index.map { meta, reference_ -> [meta + [file: 'star_index'], reference_] }, + transcript_fasta.map { meta, reference_ -> [meta + [file: 'transcript_fasta'], reference_] }, + // vcf.map { meta, reference_ -> [meta + [file: 'vcf'], reference_] }, + vcf_tbi.map { meta, reference_ -> [meta + [file: 'vcf_tbi'], reference_] }, + ) + emit: - bowtie1_index // channel: [meta, BowtieIndex/] - bowtie2_index // channel: [meta, Bowtie2Index/] - bwamem1_index // channel: [meta, BWAmemIndex/] - bwamem2_index // channel: [meta, BWAmem2memIndex/] - dragmap_hashmap // channel: [meta, DragmapHashtable/] - fasta // channel: [meta, *.f(ast|n)?a] - fasta_dict // channel: [meta, *.f(ast|n)?a.dict] - fasta_fai // channel: [meta, *.f(ast|n)?a.fai] - fasta_sizes // channel: [meta, *.f(ast|n)?a.sizes] - gff // channel: [meta, gff] - gtf // channel: [meta, gtf] - hisat2_index // channel: [meta, Hisat2Index/] - intervals_bed // channel: [meta, *.bed] - kallisto_index // channel: [meta, KallistoIndex] - msisensorpro_list // channel: [meta, *.list] - rsem_index // channel: [meta, RSEMIndex/] - salmon_index // channel: [meta, SalmonIndex/] - splice_sites // channel: [meta, *.splice_sites.txt] - star_index // channel: [meta, STARIndex/] - transcript_fasta // channel: [meta, *.transcripts.fasta] - // vcf // channel: [meta, *.vcf.gz] - vcf_tbi // channel: [meta, *.vcf.gz.tbi] - versions // channel: [versions.yml] + reference // channel: [meta, *] + versions // channel: [versions.yml] publish: - bowtie1_index >> 'bowtie1_index' - bowtie2_index >> 'bowtie2_index' - bwamem1_index >> 'bwamem1_index' - bwamem2_index >> 'bwamem2_index' - dragmap_hashmap >> 'dragmap_hashmap' - fasta >> 'fasta' - fasta_dict >> 'fasta_dict' - fasta_fai >> 'fasta_fai' - fasta_sizes >> 'fasta_sizes' - gff >> 'gff' - gtf >> 'gtf' - hisat2_index >> 'hisat2_index' - intervals_bed >> 'intervals_bed' - kallisto_index >> 'kallisto_index' - msisensorpro_list >> 'msisensorpro_list' - rsem_index >> 'rsem_index' - salmon_index >> 'salmon_index' - splice_sites >> 'splice_sites' - star_index >> 'star_index' - transcript_fasta >> 'transcript_fasta' - // vcf >> 'vcf' - vcf_tbi >> 'vcf_tbi' + reference >> 'reference' }