nf-core · ypriverol · Apr 15, 2021 · Apr 14, 2021 · Apr 14, 2021 · Apr 15, 2021
diff --git a/.nf-core-lint.yml b/.nf-core-lint.yml
@@ -0,0 +1,5 @@
+## NOTE - after nf-core/tools release 1.14 delete this line and
+## uncomment the ones below. See https://github.com/nf-core/tools/pull/1019
+nextflow_config: False
+# nextflow_config:
+#  - params.input
diff --git a/docs/usage.md b/docs/usage.md
@@ -9,15 +9,17 @@
 General usage:
 
 ```bash
-nextflow run nf-core/pgdb -profile <docker/singularity/podman/conda/institute> --ensembl_name homo_sapiens
+nextflow run nf-core/pgdb -profile <docker/singularity/podman/conda/institute> --taxonomy 9606 --decoy
 ```
 
-## Running the pipeline
+This command will download the ENSEMBL human proteome and attach the decoy database to it.
 
-The typical command for running the pipeline is as follows:
+## Adding non canonical proteins
+
+Te main purpose of the pgdb pipeline to add non-canonical proteins to the database including varriants, ncRNAs, altORFs:
 
 ```bash
-nextflow run nf-core/pgdb --ensembl_name homo_sapiens --altorfs -profile docker
+nextflow run nf-core/pgdb --taxonomy 9606 --altorfs --decoy  -profile docker
 ```
 
 This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.

diff --git a/main.nf b/main.nf
@@ -61,7 +61,7 @@ if (params.ensembl_name == "homo_sapiens"){
 }
 
 // Pipeline checks
-if ((params.cosmic || params.cosmic_celllines) && (params.cosmic_user_name=="" || params.cosmic_password=="")){
+if ((params.cosmic || params.cosmic_celllines) && (!params.cosmic_user_name || !params.cosmic_password)){
     exit 1, "User name and password has to be provided. In order to be able to download COSMIC data. Please first register in COSMIC database (https://cancer.sanger.ac.uk/cosmic/register)."
 }
 
@@ -211,8 +211,6 @@ process merge_cdnas {
  */
 process add_ncrna {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.ncrna
 
@@ -241,8 +239,6 @@ merged_databases = ensembl_protein_database.mix(optional_ncrna)
  */
 process add_pseudogenes {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.pseudogenes
 
@@ -272,8 +268,6 @@ merged_databases = merged_databases.mix(optional_pseudogenes)
  */
 process add_altorfs {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.altorfs
 
@@ -331,8 +325,6 @@ process cosmic_download {
 */
 process cosmic_proteindb {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.cosmic
 
@@ -362,8 +354,6 @@ merged_databases = merged_databases.mix(cosmic_proteindbs)
 */
 process cosmic_celllines_proteindb {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.cosmic_celllines
 
@@ -494,13 +484,10 @@ process gtf_to_fasta {
     """
 }
 
-//vcf_file = Channel.fromPath(params.vcf_file)
 vcf_file = params.vcf_file ? Channel.fromPath(params.vcf_file, checkIfExists: true) : Channel.empty()
 
 process vcf_proteinDB {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.vcf
 
@@ -712,8 +699,6 @@ process download_all_cbioportal {
  */
 process cbioportal_proteindb {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.cbioportal
 
@@ -746,8 +731,6 @@ merged_databases = merged_databases.mix(cBioportal_proteindb)
  */
 process merge_proteindbs {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     input:
     file("proteindb*") from merged_databases.collect()
 
@@ -760,18 +743,11 @@ process merge_proteindbs {
     """
 }
 
-stop_codons = ''
-if (params.add_stop_codons){
-    stop_codons = "--add_stop_codons"
-}
-
 /**
  * clean the database for stop codons, and unwanted AA like: *, also remove proteins with less than 6 AA
  */
 process clean_protein_database {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.clean_database
 
@@ -783,6 +759,11 @@ process clean_protein_database {
     file 'database_clean.fa' into clean_database_sh
 
     script:
+    stop_codons = ''
+    if (params.add_stop_codons){
+       stop_codons = "--add_stop_codons"
+    }
+
     """
     pypgatk_cli.py ensembl-check \\
         -in "$file" \\
@@ -801,8 +782,6 @@ to_protein_decoy_ch = params.clean_database ? clean_database_sh : to_clean_ch
  */
 process decoy {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.decoy
 

diff --git a/nextflow.config b/nextflow.config
@@ -8,8 +8,6 @@
 // Global default params, used in configs
 params {
 
-  input = null
-
   // process flag variables
   ncrna = false
   pseudogenes = false
@@ -32,8 +30,8 @@ params {
   add_stop_codons = true
 
   // data download variables
-  cosmic_user_name = ""
-  cosmic_password = ""
+  cosmic_user_name = null
+  cosmic_password = null
 
   // config files
   ensembl_downloader_config = "$projectDir/conf/ensembl_downloader_config.yaml"

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -155,11 +155,13 @@
             "default": "",
             "properties": {
                 "gnomad": {
-                    "type": "boolean"
+                    "type": "boolean",
+                    "description": "Add gNOMAD variants to the database"
                 },
                 "gnomad_file_url": {
                     "type": "string",
-                    "default": "gs://gnomad-public/release/2.1.1/vcf/exomes/gnomad.exomes.r2.1.1.sites.vcf.bgz"
+                    "default": "gs://gnomad-public/release/2.1.1/vcf/exomes/gnomad.exomes.r2.1.1.sites.vcf.bgz",
+                    "description": "gNOMAD url"
                 }
             }
         },
@@ -226,13 +228,6 @@
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
             "properties": {
-                "input": {
-                    "type": "string",
-                    "fa_icon": "fas fa-dna",
-                    "description": "Input files.",
-                    "help_text": "Use this to specify the location of your input files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`",
-                    "hidden": true
-                },
                 "outdir": {
                     "type": "string",
                     "description": "The output directory where the results will be saved.",
@@ -252,7 +247,8 @@
                     "description": "The final protein database generated"
                 },
                 "push_s3": {
-                    "type": "string"
+                    "type": "string",
+                    "description": "push data to s3"
                 }
             }
         },
@@ -325,11 +321,13 @@
                 "validate_params": {
                     "type": "boolean",
                     "default": true,
-                    "hidden": true
+                    "hidden": true,
+                    "description": "validate params of the schema"
                 },
                 "show_hidden_params": {
                     "type": "string",
-                    "hidden": true
+                    "hidden": true,
+                    "description": "hide params of the schema"
                 }
             }
         },