diff --git a/config/test.config b/config/test.config index 23f22fb..db80180 100644 --- a/config/test.config +++ b/config/test.config @@ -1,36 +1,36 @@ if ( params.subworkflow == 'abinitio_training' ) { params { maker_evidence_gff = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genes.gff' - genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa' - species_label = 'test_species' // e.g. 'asecodes_parviclava' - flank_region_size = 500 - aed_value = [ 0.3, 0.2 ] - locus_distance = [ 500 ] + genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa' + species_label = 'test_species' // e.g. 'asecodes_parviclava' + flank_region_size = 500 + aed_value = [ 0.3, 0.2 ] + locus_distance = [ 500 ] } process { // Trick: Fully qualified process name has higher priority than simple name // Otherwise settings are overridden by those in modules.config loaded after this withName: 'ABINITIO_TRAINING:GBK2AUGUSTUS' { - ext.args = '10' + ext.args = '10' } } } if ( params.subworkflow == 'annotation_preprocessing' ) { params { - genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa' + genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa' } } if ( params.subworkflow == 'functional_annotation' ) { params { - genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa' - gff_annotation = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genes.gff' + genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa' + gff_annotation = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genes.gff' // blast_db_fasta = 'https://www.uniprot.org/uniprot/%3Fquery%3Dorganism%3A4932%26format%3Dfasta' - blast_db_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/proteomics/database/yeast_UPS.fasta' + blast_db_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/proteomics/database/yeast_UPS.fasta' } process { withName: 'FUNCTIONAL_ANNOTATION:INTERPROSCAN' { - cpus = 2 - ext.args = [ + cpus = 2 + ext.args = [ // '--iprlookup', // '--goterms', // '-pa', @@ -41,9 +41,16 @@ if ( params.subworkflow == 'functional_annotation' ) { } if ( params.subworkflow == 'transcript_assembly' ) { params { - reads = 'https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/SRR4238351_subsamp.fastq.gz' - genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa' - single_end = true + reads = 'https://github.com/nf-core/test-datasets/raw/rnaseq/testdata/SRR4238351_subsamp.fastq.gz' + genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa' + single_end = true + } +} + +if ( params.subworkflow == 'format_validation' ) { + params { + genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa' + gff_annotation = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genes.gff' } } diff --git a/main.nf b/main.nf index a2eac73..b79db24 100644 --- a/main.nf +++ b/main.nf @@ -18,7 +18,7 @@ workflow { ''' - def valid_subworkflows = [ 'abinitio_training', 'annotation_preprocessing', 'functional_annotation', 'transcript_assembly' ] + def valid_subworkflows = [ 'abinitio_training', 'annotation_preprocessing', 'functional_annotation', 'transcript_assembly', 'format_validation' ] if( ! params.subworkflow in valid_subworkflows ){ error """ The parameter 'subworkflow' (value: ${params.subworkflow}) is not a valid subworkflow. diff --git a/modules/local/embl/apivalidator.nf b/modules/local/embl/apivalidator.nf new file mode 100644 index 0000000..d909b5e --- /dev/null +++ b/modules/local/embl/apivalidator.nf @@ -0,0 +1,55 @@ +process EMBL_APIVALIDATOR { + tag "$meta.id" + label 'process_single' + + conda "bioconda::embl-api-validator:1.1.180" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/embl-api-validator:1.1.180--py36_0': + 'biocontainers/embl-api-validator:1.1.180--py36_0' }" + + input: + tuple val(meta), path(file), val(extension) + + output: + tuple val(meta), env(status) , emit: status + tuple val(meta), path("*.gff3") , emit: gff3 , optional: true + tuple val(meta), path("diagnose", type: 'dir'), emit: diagnosis , optional: true + tuple val(meta), path("*_good.txt") , emit: filtered_good, optional: true + tuple val(meta), path("*_bad.txt") , emit: filtered_bad , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + status=embl-api-validator \\ + -f $extension \\ + $args \\ + -p ${prefix} \\ + $file \\ + || echo "\$?" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + embl-api-validator: \$(embl-api-validator -version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mkdir_diagnose = args.contains('-fix_diagnose')? 'mkdir diagnose' : '' + """ + touch ${prefix}.gff3 + $mkdir_diagnose + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + embl-api-validator: \$(embl-api-validator -version) + END_VERSIONS + """ +} diff --git a/subworkflows/format_validation/main.nf b/subworkflows/format_validation/main.nf index e69de29..8636f00 100644 --- a/subworkflows/format_validation/main.nf +++ b/subworkflows/format_validation/main.nf @@ -0,0 +1,17 @@ +include { EMBL_APIVALIDATOR } from "$projectDir/modules/local/embl/apivalidator" + +workflow FORMAT_VALIDATION { + + main: + log.info """ + Functional annotation workflow + =================================================== + """ + Channel.fromPath( params.gff_annotation, checkIfExists: true ) + .map { gff -> [ [ id: gff.baseName ], gff ] } + .set { gff_file } + Channel.fromPath( params.genome, checkIfExists: true ) + .set { genome } + + EMBL_APIVALIDATOR ( gff_file.map { meta, gff -> [ meta, gff, gff.getExtension() ] } ) +} \ No newline at end of file