Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Prokka compliance mode #732

Merged
merged 1 commit into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#707](https://github.com/nf-core/mag/pull/707) - Make Bin QC a subworkflow (added by @dialvarezs)
- [#707](https://github.com/nf-core/mag/pull/707) - Added CheckM2 as an alternative bin completeness and QC tool (added by @dialvarezs)
- [#708](https://github.com/nf-core/mag/pull/708) - Added `--exclude_unbins_from_postbinning` parameter to exclude unbinned contigs from post-binning processes, speeding up Prokka in some cases (added by @dialvarezs)
- [#732](https://github.com/nf-core/mag/pull/732) - Added support for Prokka's compliance mode with `--prokka_with_compliance --prokka_compliance_centre <xyz>` (reported by @audy and @Thomieh73, added by @jfy133)

### `Changed`

Expand Down
23 changes: 13 additions & 10 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ process {
"--keep_percent ${params.longreads_keep_percent}",
"--trim",
"--length_weight ${params.longreads_length_weight}",
params.longreads_min_quality ? "--min_mean_q ${params.longreads_min_quality}" : '',
params.longreads_min_quality ? "--min_mean_q ${params.longreads_min_quality}" : ''
].join(' ').trim()
publishDir = [
path: { "${params.outdir}/QC_longreads/Filtlong" },
Expand All @@ -196,9 +196,9 @@ process {
}

withName: NANOQ {
ext.args = [
ext.args = [
"--min-len ${params.longreads_min_length}",
params.longreads_min_quality ? "--min-qual ${params.longreads_min_quality}": '',
params.longreads_min_quality ? "--min-qual ${params.longreads_min_quality}" : '',
"-vv"
].join(' ').trim()
publishDir = [
Expand All @@ -221,11 +221,13 @@ process {
publishDir = [
[
path: { "${params.outdir}/QC_longreads/NanoLyse" },
mode: params.publish_dir_mode, pattern: "*.log"
mode: params.publish_dir_mode,
pattern: "*.log"
],
[
path: { "${params.outdir}/QC_longreads/NanoLyse" },
mode: params.publish_dir_mode, pattern: "*_nanolyse.fastq.gz",
mode: params.publish_dir_mode,
pattern: "*_nanolyse.fastq.gz",
enabled: params.save_lambdaremoved_reads
]
]
Expand All @@ -234,8 +236,8 @@ process {

withName: CHOPPER {
ext.args2 = [
params.longreads_min_quality ? "--quality ${params.longreads_min_quality}": '',
params.longreads_min_length ? "--minlength ${params.longreads_min_length}": ''
params.longreads_min_quality ? "--quality ${params.longreads_min_quality}" : '',
params.longreads_min_length ? "--minlength ${params.longreads_min_length}" : ''
].join(' ').trim()
publishDir = [
[
Expand All @@ -250,7 +252,7 @@ process {
enabled: params.save_lambdaremoved_reads || params.save_filtered_longreads
]
]
ext.prefix = { "${meta.id}_run${meta.run}_chopper" }
ext.prefix = { "${meta.id}_run${meta.run}_chopper" }
}

withName: NANOPLOT_RAW {
Expand Down Expand Up @@ -434,7 +436,8 @@ process {
withName: CHECKM2_DATABASEDOWNLOAD {
publishDir = [
path: { "${params.outdir}/GenomeBinning/QC/CheckM2/checkm2_downloads" },
mode: params.publish_dir_mode, overwrite: false,
mode: params.publish_dir_mode,
overwrite: false,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.save_checkm2_data
]
Expand Down Expand Up @@ -509,7 +512,7 @@ process {
}

withName: PROKKA {
ext.args = "--metagenome"
ext.args = { params.prokka_with_compliance ? "--metagenome --compliant --centre ${params.prokka_compliance_centre}" : "--metagenome" }
publishDir = [path: { "${params.outdir}/Annotation/Prokka/${meta.assembler}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}

Expand Down
4 changes: 4 additions & 0 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,8 @@ params {

// Skip CONCOCT due to timeout issues
skip_concoct = true

// Set Prokka compliance mode to allow metaSPAdes bins to be annotated
prokka_with_compliance = true
prokka_compliance_centres = "nfcore"
}
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ params {
min_length_unbinned_contigs = 1000000
max_unbinned_contigs = 100
skip_prokka = false
prokka_with_compliance = false
prokka_compliance_centre = null

// assembly options
coassemble_group = false
Expand Down
19 changes: 14 additions & 5 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -503,23 +503,23 @@
},
"gtdbtk_min_completeness": {
"type": "number",
"default": 50.0,
"default": 50,
"description": "Min. bin completeness (in %) required to apply GTDB-tk classification.",
"help_text": "Completeness assessed with BUSCO analysis (100% - %Missing). Must be greater than 0 (min. 0.01) to avoid GTDB-tk errors. If too low, GTDB-tk classification results can be impaired due to not enough marker genes!",
"minimum": 0.01,
"maximum": 100
},
"gtdbtk_max_contamination": {
"type": "number",
"default": 10.0,
"default": 10,
"description": "Max. bin contamination (in %) allowed to apply GTDB-tk classification.",
"help_text": "Contamination approximated based on BUSCO analysis (%Complete and duplicated). If too high, GTDB-tk classification results can be impaired due to contamination!",
"minimum": 0,
"maximum": 100
},
"gtdbtk_min_perc_aa": {
"type": "number",
"default": 10.0,
"default": 10,
"description": "Min. fraction of AA (in %) in the MSA for bins to be kept.",
"minimum": 0,
"maximum": 100
Expand Down Expand Up @@ -597,6 +597,16 @@
"type": "boolean",
"description": "Skip Prodigal gene prediction"
},
"prokka_with_compliance": {
"type": "boolean",
"help_text": "Sometimes Prokka will complain that your contig names are too long and fail.\n\nThis particularly happens with metaSPAdes assemblies.\n\nYou can turn on this flag which will tell Prokka to truncate the contig names for you.\nHowever this also requires you to specify a sequencing centre name (specified with `--prokka_compliance_centre`).\n\n:::warning\nTruncating contig names may make it harder to associated contig annotations with their original contigs!\n:::\n",
"description": "Turn on Prokka complicance mode for truncating contig names for NCBI/ENA compatibility."
},
"prokka_compliance_centre": {
"type": "string",
"help_text": "Specify the sequencing centre name for making NCBI Genbank/ENA compatible annotation files (required when specifying `--prokka_with_compliance`).",
"description": "Specify sequencing centre name required for Prokka's compliance mode."
},
"skip_prokka": {
"type": "boolean",
"description": "Skip Prokka genome annotation."
Expand Down Expand Up @@ -718,8 +728,7 @@
"exclude_unbins_from_postbinning": {
"type": "boolean",
"description": "Exclude unbinned contigs in the post-binning steps (bin QC, taxonomic classification, and annotation steps).",
"help": "If you're not interested in assemby results that are not considered 'genome level', excluding unbinned contigs can greatly speed up downstream steps such as Prokka, that can be quite slow and spin up many tasks.",
"default": false
"help": "If you're not interested in assemby results that are not considered 'genome level', excluding unbinned contigs can greatly speed up downstream steps such as Prokka, that can be quite slow and spin up many tasks."
}
}
},
Expand Down
7 changes: 6 additions & 1 deletion subworkflows/local/utils_nfcore_mag_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -309,13 +309,18 @@ def validateInputParameters(hybrid) {
error('[nf-core/mag] ERROR: Invalid parameter combination: parameter --save_cat_db specified, but not --cat_db_generate! Note also that the parameter --save_cat_db does not work in combination with --cat_db.')
}

// Chech MetaEuk db paramaters
// Check MetaEuk db paramaters
if (params.metaeuk_mmseqs_db && params.metaeuk_db) {
error('[nf-core/mag] ERROR: Invalid parameter combination: both --metaeuk_mmseqs_db and --metaeuk_db are specified! Please specify either --metaeuk_mmseqs_db or --metaeuk_db.')
}
if (params.save_mmseqs_db && !params.metaeuk_mmseqs_db) {
error('[nf-core/mag] ERROR: Invalid parameter combination: --save_mmseqs_db supplied but no database has been requested for download with --metaeuk_mmseqs_db!')
}

// Check Prokka parameters
if (params.prokka_with_compliance && !params.prokka_compliance_centre) {
error('[nf-core/mag] ERROR: Invalid parameter combination: running PROKKA with compliance mode requires a centre name specified with `--prokka_compliance_centre <XYZ>`!')
}
}

//
Expand Down
Loading