Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better comments pre-release 0.1 #69

Merged
merged 2 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ Initial release of nf-core/references, created with the [nf-core](https://nf-co.
- [63](https://github.com/nf-core/references/pull/63) - Unpack gff even when gtf is present
- [64](https://github.com/nf-core/references/pull/64) - Improve documentation
- [68](https://github.com/nf-core/references/pull/68) - Minor refactoring
- [69](https://github.com/nf-core/references/pull/69) - Better comments

### Fixed

Expand Down
85 changes: 62 additions & 23 deletions subworkflows/local/asset_to_channel/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,48 @@ workflow ASSET_TO_CHANNEL {
asset // channel: [meta, fasta]

main:

// All the files and meta data are contained in the meta map (except for fasta)
// They are extracted out of the meta map in their own channel in this subworkflow
// When adding a new field in the assets/schema_input.json, also add it in the meta map
// And in this scrip, add a new map operation and a new output corresponding to this input

// If any of the asset does not exist, then we return null
// That way, the channel will be empty and does not trigger anything
// And in this scrip, add a new branch and a new output corresponding to this input
// And in the emit, add the new output to the channel

// Only keep the actual meta data in the meta map
// Add a field here if it is a relevant meta data
def reduce = { meta -> meta.subMap(['genome', 'id', 'source', 'source_vcf', 'source_version', 'species']) }

intervals_bed_branch = asset.branch { meta, _fasta ->
file: meta.intervals_bed
return [reduce(meta), meta.intervals_bed]
other: true
// If the asset doesn't exist, then we return nothing
return null
}
intervals_bed = intervals_bed_branch.file


// If ends with .gz, decompress it
// If any of the asset exists, then adding run_tools to false and skip the asset creation from the fasta file
fasta_branch = asset.branch { meta, fasta_ ->
file: fasta_
return [reduce(meta) + [decompress_fasta: fasta_.endsWith('.gz') ?: false] + [run_bowtie1: meta.bowtie1_index ? false : true] + [run_bowtie2: meta.bowtie2_index ? false : true] + [run_bwamem1: meta.bwamem1_index ? false : true] + [run_bwamem2: meta.bwamem2_index ? false : true] + [run_dragmap: meta.dragmap_hashtable ? false : true] + [run_faidx: meta.fasta_fai && meta.fasta_sizes ? false : true] + [run_gatkdict: meta.fasta_dict ? false : true] + [run_hisat2: meta.hisat2_index ? false : true] + [run_intervals: meta.intervals_bed ? false : true] + [run_kallisto: meta.kallisto_index ? false : true] + [run_msisenpro: meta.msisensorpro_list ? false : true] + [run_rsem: meta.rsem_index ? false : true] + [run_rsem_make_transcript_fasta: meta.transcript_fasta ? false : true] + [run_salmon: meta.salmon_index ? false : true] + [run_star: meta.star_index ? false : true], fasta_]
// If ends with .gz, decompress it
def meta_extra = [decompress_fasta: fasta_.endsWith('.gz') ?: false]
// If any of the asset exists, then adding run_tools to false and skip the asset creation from the fasta file
meta_extra += [run_bowtie1: meta.bowtie1_index ? false : true]
meta_extra += [run_bowtie2: meta.bowtie2_index ? false : true]
meta_extra += [run_bwamem1: meta.bwamem1_index ? false : true]
meta_extra += [run_bwamem2: meta.bwamem2_index ? false : true]
meta_extra += [run_dragmap: meta.dragmap_hashtable ? false : true]
meta_extra += [run_faidx: meta.fasta_fai && meta.fasta_sizes ? false : true]
meta_extra += [run_gatkdict: meta.fasta_dict ? false : true]
meta_extra += [run_hisat2: meta.hisat2_index ? false : true]
meta_extra += [run_intervals: meta.intervals_bed ? false : true]
meta_extra += [run_kallisto: meta.kallisto_index ? false : true]
meta_extra += [run_msisenpro: meta.msisensorpro_list ? false : true]
meta_extra += [run_rsem: meta.rsem_index ? false : true]
meta_extra += [run_rsem_make_transcript_fasta: meta.transcript_fasta ? false : true]
meta_extra += [run_salmon: meta.salmon_index ? false : true]
meta_extra += [run_star: meta.star_index ? false : true]
return [reduce(meta) + meta_extra, fasta_]
other: true
// If the asset doesn't exist, then we return nothing
return null
}
fasta = fasta_branch.file
Expand All @@ -38,6 +54,7 @@ workflow ASSET_TO_CHANNEL {
file: meta.fasta_dict
return [reduce(meta), meta.fasta_dict]
other: true
// If the asset doesn't exist, then we return nothing
return null
}
fasta_dict = fasta_dict_branch.file
Expand All @@ -46,8 +63,11 @@ workflow ASSET_TO_CHANNEL {
// If we have intervals_bed, then we don't need to run faidx
fasta_fai_branch = asset.branch { meta, _fasta ->
file: meta.fasta_fai
return [reduce(meta) + [run_intervals: meta.intervals_bed ? false : true], meta.fasta_fai]
// If we have intervals_bed, then we don't need to run faidx
def meta_extra = [run_intervals: meta.intervals_bed ? false : true]
return [reduce(meta) + meta_extra, meta.fasta_fai]
other: true
// If the asset doesn't exist, then we return nothing
return null
}
fasta_fai = fasta_fai_branch.file
Expand All @@ -57,28 +77,38 @@ workflow ASSET_TO_CHANNEL {
file: meta.fasta_sizes
return [reduce(meta), meta.fasta_sizes]
other: true
// If the asset doesn't exist, then we return nothing
return null
}
fasta_sizes = fasta_sizes_branch.file


// If ends with .gz, decompress it
// If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file (gff, gtf or transcript_fasta)
gff_branch = asset.branch { meta, fasta_ ->
file: meta.gff
return [reduce(meta) + [decompress_gff: meta.gff.endsWith('.gz') ?: false] + [run_gffread: fasta_ && !meta.gtf ?: false] + [run_hisat2: meta.splice_sites ? false : true], meta.gff]
// If ends with .gz, decompress it
def meta_extra = [decompress_gff: meta.gff.endsWith('.gz') ?: false]
// If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file
// (gff, gtf or transcript_fasta)
meta_extra += [run_gffread: fasta_ && !meta.gtf ?: false]
meta_extra += [run_hisat2: meta.splice_sites ? false : true]
return [reduce(meta) + meta_extra, meta.gff]
other: true
// If the asset doesn't exist, then we return nothing
return null
}
gff = gff_branch.file


// If ends with .gz, decompress it
// If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file (gff, gtf or transcript_fasta)
gtf_branch = asset.branch { meta, _fasta ->
file: meta.gtf
return [reduce(meta) + [decompress_gtf: meta.gtf.endsWith('.gz') ?: false] + [run_hisat2: meta.splice_sites ? false : true], meta.gtf]
// If ends with .gz, decompress it
def meta_extra = [decompress_gtf: meta.gtf.endsWith('.gz') ?: false]
// If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file
// (gff, gtf or transcript_fasta)
meta_extra += [run_hisat2: meta.splice_sites ? false : true]
return [reduce(meta) + meta_extra, meta.gtf]
other: true
// If the asset doesn't exist, then we return nothing
return null
}
gtf = gtf_branch.file
Expand All @@ -88,30 +118,39 @@ workflow ASSET_TO_CHANNEL {
file: meta.splice_sites
return [reduce(meta), meta.splice_sites]
other: true
// If the asset doesn't exist, then we return nothing
return null
}
splice_sites = splice_sites_branch.file


// If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file (gff, gtf or transcript_fasta)
transcript_fasta_branch = asset.branch { meta, _fasta ->
file: meta.transcript_fasta
return [reduce(meta) + [run_hisat2: meta.hisat2_index ? false : true] + [run_kallisto: meta.kallisto_index ? false : true] + [run_rsem: meta.rsem_index ? false : true] + [run_salmon: meta.salmon_index ? false : true] + [run_star: meta.star_index ? false : true], meta.transcript_fasta]
// If any of the asset exists, then adding run_tools to false and skip the asset creation from the annotation derived file
// (gff, gtf or transcript_fasta)
def meta_extra = [run_hisat2: meta.hisat2_index ? false : true]
meta_extra += [run_kallisto: meta.kallisto_index ? false : true]
meta_extra += [run_rsem: meta.rsem_index ? false : true]
meta_extra += [run_salmon: meta.salmon_index ? false : true]
meta_extra += [run_star: meta.star_index ? false : true]
return [reduce(meta) + meta_extra, meta.transcript_fasta]
other: true
// If the asset doesn't exist, then we return nothing
return null
}
transcript_fasta = transcript_fasta_branch.file


// Using transpose here because we want to catch vcf with globs in the path because of nf-core/Sarek
// return a file, because we can catch globs this way, but it create issues with publishing
// If we already have the vcf_tbi, then we don't need to index the vcf
vcf_branch = asset.branch { meta, _fasta ->
file: meta.vcf
return [reduce(meta) + [run_tabix: meta.vcf_tbi ? false : true], file(meta.vcf)]
// If we already have the vcf_tbi, then we don't need to index the vcf
def meta_extra = [run_tabix: meta.vcf_tbi ? false : true]
// return a file, because we can catch globs this way, but it create issues with publishing
return [reduce(meta) + meta_extra, file(meta.vcf)]
other: true
// If the asset doesn't exist, then we return nothing
return null
}
// Using transpose here because we want to catch vcf with globs in the path because of nf-core/Sarek
vcf = vcf_branch.file.transpose()

emit:
Expand Down
17 changes: 13 additions & 4 deletions subworkflows/local/create_from_fasta_and_annotation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
versions = Channel.empty()

if (run_hisat2 || run_kallisto || run_rsem || run_rsem_make_transcript_fasta || run_salmon || run_star) {
// Do not run GFFREAD if the condition is false
gff_gffread = gff.map { meta, gff_ -> meta.run_gffread ? [meta, gff_] : null }

GFFREAD(
gff_gffread,
[]
[],
)

versions = versions.mix(GFFREAD.out.versions)
Expand All @@ -47,6 +48,7 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
.map { meta, gtf_ -> gtf_[1] ? [meta, gtf_[1]] : [meta, gtf_] }

if (run_hisat2 || run_hisat2_extractsplicesites) {
// Do not run HISAT2_EXTRACTSPLICESITES if the condition is false
gtf_hisat2 = gtf.map { meta, gtf_ -> meta.run_hisat2 ? [meta, gtf_] : null }

HISAT2_EXTRACTSPLICESITES(gtf_hisat2)
Expand All @@ -55,12 +57,13 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
splice_sites = splice_sites.mix(HISAT2_EXTRACTSPLICESITES.out.txt)

if (run_hisat2) {
// Do not run HISAT2_BUILD if the condition is false
fasta_hisat2 = fasta.map { meta, fasta_ -> meta.run_hisat2 ? [meta, fasta_] : null }

HISAT2_BUILD(
fasta_hisat2,
gtf,
splice_sites
splice_sites,
)

hisat2_index = HISAT2_BUILD.out.index
Expand All @@ -70,17 +73,19 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
}

if (run_kallisto || run_rsem_make_transcript_fasta || run_salmon) {
// Do not run MAKE_TRANSCRIPTS_FASTA if the condition is false
fasta_make_transcripts_fasta = fasta.map { meta, fasta_ -> meta.run_rsem_make_transcript_fasta ? [meta, fasta_] : null }

MAKE_TRANSCRIPTS_FASTA(
fasta_make_transcripts_fasta,
gtf
gtf,
)
versions = versions.mix(MAKE_TRANSCRIPTS_FASTA.out.versions)

transcript_fasta = transcript_fasta.mix(MAKE_TRANSCRIPTS_FASTA.out.transcript_fasta)

if (run_kallisto) {
// Do not run KALLISTO_INDEX if the condition is false
transcript_fasta_kallisto = transcript_fasta.map { meta, transcript_fasta_ -> meta.run_kallisto ? [meta, transcript_fasta_] : null }

KALLISTO_INDEX(transcript_fasta_kallisto)
Expand All @@ -90,13 +95,15 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
}

if (run_salmon) {
// Do not run SALMON_INDEX if the condition is false
fasta_salmon = fasta.map { meta, fasta_ -> meta.run_salmon ? [meta, fasta_] : null }

// Do not run SALMON_INDEX if the condition is false
transcript_fasta_salmon = transcript_fasta.map { meta, transcript_fasta_ -> meta.run_salmon ? [meta, transcript_fasta_] : null }

SALMON_INDEX(
fasta_salmon,
transcript_fasta_salmon
transcript_fasta_salmon,
)

salmon_index = SALMON_INDEX.out.index
Expand All @@ -105,6 +112,7 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
}

if (run_rsem) {
// Do not run RSEM_PREPAREREFERENCE_GENOME if the condition is false
fasta_rsem = fasta.map { meta, fasta_ -> meta.run_rsem ? [meta, fasta_] : null }

RSEM_PREPAREREFERENCE_GENOME(fasta_rsem, gtf)
Expand All @@ -114,6 +122,7 @@ workflow CREATE_FROM_FASTA_AND_ANNOTATION {
}

if (run_star) {
// Do not run STAR_GENOMEGENERATE if the condition is false
fasta_star = fasta.map { meta, fasta_ -> meta.run_star ? [meta, fasta_] : null }

STAR_GENOMEGENERATE(fasta_star, gtf)
Expand Down
13 changes: 11 additions & 2 deletions subworkflows/local/create_from_fasta_only/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ workflow CREATE_FROM_FASTA_ONLY {
versions = Channel.empty()

if (run_bowtie1) {
// Do not run BOWTIE1_BUILD if the condition is false
fasta_bowtie1 = fasta.map { meta, fasta_ -> meta.run_bowtie1 ? [meta, fasta_] : null }

BOWTIE1_BUILD(fasta_bowtie1)
Expand All @@ -46,6 +47,7 @@ workflow CREATE_FROM_FASTA_ONLY {
}

if (run_bowtie2) {
// Do not run BOWTIE2_BUILD if the condition is false
fasta_bowtie2 = fasta.map { meta, fasta_ -> meta.run_bowtie2 ? [meta, fasta_] : null }

BOWTIE2_BUILD(fasta_bowtie2)
Expand All @@ -55,6 +57,7 @@ workflow CREATE_FROM_FASTA_ONLY {
}

if (run_bwamem1) {
// Do not run BWAMEM1_INDEX if the condition is false
fasta_bwamem1 = fasta.map { meta, fasta_ -> meta.run_bwamem1 ? [meta, fasta_] : null }

BWAMEM1_INDEX(fasta_bwamem1)
Expand All @@ -64,6 +67,7 @@ workflow CREATE_FROM_FASTA_ONLY {
}

if (run_bwamem2) {
// Do not run BWAMEM2_INDEX if the condition is false
fasta_bwamem2 = fasta.map { meta, fasta_ -> meta.run_bwamem2 ? [meta, fasta_] : null }

BWAMEM2_INDEX(fasta_bwamem2)
Expand All @@ -73,6 +77,7 @@ workflow CREATE_FROM_FASTA_ONLY {
}

if (run_dragmap) {
// Do not run DRAGMAP_HASHTABLE if the condition is false
fasta_dragmap = fasta.map { meta, fasta_ -> meta.run_dragmap ? [meta, fasta_] : null }

DRAGMAP_HASHTABLE(fasta_dragmap)
Expand All @@ -82,7 +87,8 @@ workflow CREATE_FROM_FASTA_ONLY {
}

if (run_createsequencedictionary) {
fasta_gat4kdict = fasta.map { meta, fasta_ -> meta.run_gat4kdict ? [meta, fasta_] : null }
// Do not run GATK4_CREATESEQUENCEDICTIONARY if the condition is false
fasta_gat4kdict = fasta.map { meta, fasta_ -> meta.run_createsequencedictionary ? [meta, fasta_] : null }

GATK4_CREATESEQUENCEDICTIONARY(fasta_gat4kdict)

Expand All @@ -91,19 +97,21 @@ workflow CREATE_FROM_FASTA_ONLY {
}

if (run_faidx || run_intervals || run_sizes) {
// Do not run SAMTOOLS_FAIDX if the condition is false
fasta_samtools = fasta.map { meta, fasta_ -> meta.run_faidx ? [meta, fasta_] : null }

SAMTOOLS_FAIDX(
fasta_samtools,
[[id: 'no_fai'], []],
run_sizes
run_sizes,
)

fasta_fai = fasta_fai.mix(SAMTOOLS_FAIDX.out.fai)
fasta_sizes = SAMTOOLS_FAIDX.out.sizes
versions = versions.mix(SAMTOOLS_FAIDX.out.versions)

if (run_intervals) {
// Do not run BUILD_INTERVALS if the condition is false
fasta_fai_intervals = fasta_fai.map { meta, fasta_fai_ -> meta.run_intervals ? [meta, fasta_fai_] : null }

BUILD_INTERVALS(fasta_fai_intervals, [])
Expand All @@ -113,6 +121,7 @@ workflow CREATE_FROM_FASTA_ONLY {
}

if (run_msisensorpro) {
// Do not run MSISENSORPRO_SCAN if the condition is false
fasta_msisensorpro = fasta.map { meta, fasta_ -> meta.run_msisensorpro ? [meta, fasta_] : null }

MSISENSORPRO_SCAN(fasta_msisensorpro)
Expand Down
1 change: 1 addition & 0 deletions subworkflows/local/index_vcf/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ workflow INDEX_VCF {


if (run_tabix) {
// Do not run TABIX_TABIX if the condition is false
vcf_tabix = vcf.map { meta, vcf_ -> meta.run_tabix ? [meta, vcf_] : null }

TABIX_TABIX(vcf_tabix)
Expand Down
1 change: 1 addition & 0 deletions subworkflows/local/uncompress_asset/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ workflow UNCOMPRESS_ASSET {
main:
versions = Channel.empty()

// Do not run GUNZIP_* if the condition is false
fasta = fasta.map { meta, fasta_ -> meta.decompress_fasta ? [meta, fasta_] : null }
gff = gff.map { meta, gff_ -> meta.decompress_gff ? [meta, gff_] : null }
gtf = gtf.map { meta, gtf_ -> meta.decompress_gtf ? [meta, gtf_] : null }
Expand Down
Loading
Loading