From 293f23350a34de024cdfad045dc873a0a9d4c3b4 Mon Sep 17 00:00:00 2001
From: ypriverol <ypriverol@gmail.com>
Date: Wed, 14 Apr 2021 18:33:14 +0100
Subject: [PATCH 1/9] remove input.

---
 docs/usage.md        | 11 ++++++-----
 nextflow.config      |  2 --
 nextflow_schema.json | 22 ++++++++++------------
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 625fa160..03ba6bdf 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -9,17 +9,18 @@
 General usage:
 
 ```bash
-nextflow run nf-core/pgdb -profile <docker/singularity/podman/conda/institute> --ensembl_name homo_sapiens
+nextflow run nf-core/pgdb -profile <docker/singularity/podman/conda/institute> --taxonomy 9606 --decoy
 ```
 
-## Running the pipeline
+This command will download the ENSEMBL human proteome and attach the decoy database to it.
 
-The typical command for running the pipeline is as follows:
+## Adding non canonical proteins
+
+Te main purpose of the pgdb pipeline to add non-canonical proteins to the database including varriants, ncRNAs, altORFs:
 
 ```bash
-nextflow run nf-core/pgdb --taxonomy 9606 --altorfs -profile docker
+nextflow run nf-core/pgdb --taxonomy 9606 --altorfs --decoy  -profile docker
 ```
-
 This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
 
 Note that the pipeline will create the following files in your working directory:
diff --git a/nextflow.config b/nextflow.config
index 530c6861..18477c28 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -8,8 +8,6 @@
 // Global default params, used in configs
 params {
 
-  input = null
-
   // process flag variables
   ncrna = false
   pseudogenes = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 688fd392..631bb438 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -155,11 +155,13 @@
             "default": "",
             "properties": {
                 "gnomad": {
-                    "type": "boolean"
+                    "type": "boolean",
+                    "description": "Add gNOMAD variants to the database"
                 },
                 "gnomad_file_url": {
                     "type": "string",
-                    "default": "gs://gnomad-public/release/2.1.1/vcf/exomes/gnomad.exomes.r2.1.1.sites.vcf.bgz"
+                    "default": "gs://gnomad-public/release/2.1.1/vcf/exomes/gnomad.exomes.r2.1.1.sites.vcf.bgz",
+                    "description": "gNOMAD url"
                 }
             }
         },
@@ -226,13 +228,6 @@
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
             "properties": {
-                "input": {
-                    "type": "string",
-                    "fa_icon": "fas fa-dna",
-                    "description": "Input files.",
-                    "help_text": "Use this to specify the location of your input files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`",
-                    "hidden": true
-                },
                 "outdir": {
                     "type": "string",
                     "description": "The output directory where the results will be saved.",
@@ -252,7 +247,8 @@
                     "description": "The final protein database generated"
                 },
                 "push_s3": {
-                    "type": "string"
+                    "type": "string",
+                    "description": "push data to s3"
                 }
             }
         },
@@ -325,11 +321,13 @@
                 "validate_params": {
                     "type": "boolean",
                     "default": true,
-                    "hidden": true
+                    "hidden": true,
+                    "description": "validate params of the schema"
                 },
                 "show_hidden_params": {
                     "type": "string",
-                    "hidden": true
+                    "hidden": true,
+                    "description": "hide params of the schema"
                 }
             }
         },

From b034ece6b0107da6a9331a897b72018767d05407 Mon Sep 17 00:00:00 2001
From: ypriverol <ypriverol@gmail.com>
Date: Wed, 14 Apr 2021 20:24:02 +0100
Subject: [PATCH 2/9] ZCAT removed

---
 main.nf | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/main.nf b/main.nf
index aebcf9d2..856e43bf 100644
--- a/main.nf
+++ b/main.nf
@@ -74,10 +74,6 @@ if ((params.cosmic || params.cosmic_celllines) && (params.cosmic_user_name=="" |
 	exit 1, "User name and password has to be provided. In order to be able to download COSMIC data. Please first register in COSMIC database (https://cancer.sanger.ac.uk/cosmic/register)."
 }
 
-// Pipeline OS-specific commands
-ZCAT = (System.properties['os.name'] == 'Mac OS X' ? 'gzcat' : 'zcat')
-
-
 /**
  * Download data from ensembl for the particular species.
  */

From b3a4075bdc672cc22bd474c3d7644f1e8a63eec1 Mon Sep 17 00:00:00 2001
From: ypriverol <ypriverol@gmail.com>
Date: Thu, 15 Apr 2021 08:25:01 +0100
Subject: [PATCH 3/9] small changes

---
 main.nf | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/main.nf b/main.nf
index 6718c094..9b4671e6 100644
--- a/main.nf
+++ b/main.nf
@@ -760,11 +760,6 @@ process merge_proteindbs {
     """
 }
 
-stop_codons = ''
-if (params.add_stop_codons){
-    stop_codons = "--add_stop_codons"
-}
-
 /**
  * clean the database for stop codons, and unwanted AA like: *, also remove proteins with less than 6 AA
  */
@@ -784,6 +779,11 @@ process clean_protein_database {
 
     script:
     """
+    stop_codons = ''
+    if (params.add_stop_codons){
+       stop_codons = "--add_stop_codons"
+    }
+
     pypgatk_cli.py ensembl-check \\
         -in "$file" \\
         --config_file "$e" \\

From f8ea73fecdb6b441edfd26c6c8879993617d935f Mon Sep 17 00:00:00 2001
From: ypriverol <ypriverol@gmail.com>
Date: Thu, 15 Apr 2021 08:26:10 +0100
Subject: [PATCH 4/9] small changes

---
 main.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/main.nf b/main.nf
index 9b4671e6..0ffd4a8d 100644
--- a/main.nf
+++ b/main.nf
@@ -494,7 +494,6 @@ process gtf_to_fasta {
     """
 }
 
-//vcf_file = Channel.fromPath(params.vcf_file)
 vcf_file = params.vcf_file ? Channel.fromPath(params.vcf_file, checkIfExists: true) : Channel.empty()
 
 process vcf_proteinDB {

From 4f65b9f6c1bebef4e1472a9a55bee302043122ea Mon Sep 17 00:00:00 2001
From: ypriverol <ypriverol@gmail.com>
Date: Thu, 15 Apr 2021 08:31:54 +0100
Subject: [PATCH 5/9] small changes

---
 main.nf         | 2 +-
 nextflow.config | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/main.nf b/main.nf
index 0ffd4a8d..355f5ce2 100644
--- a/main.nf
+++ b/main.nf
@@ -61,7 +61,7 @@ if (params.ensembl_name == "homo_sapiens"){
 }
 
 // Pipeline checks
-if ((params.cosmic || params.cosmic_celllines) && (params.cosmic_user_name=="" || params.cosmic_password=="")){
+if ((params.cosmic || params.cosmic_celllines) && (!params.cosmic_user_name || !params.cosmic_password)){
     exit 1, "User name and password has to be provided. In order to be able to download COSMIC data. Please first register in COSMIC database (https://cancer.sanger.ac.uk/cosmic/register)."
 }
 
diff --git a/nextflow.config b/nextflow.config
index 726f6f62..f21dafc6 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -30,8 +30,8 @@ params {
   add_stop_codons = true
 
   // data download variables
-  cosmic_user_name = ""
-  cosmic_password = ""
+  cosmic_user_name = null
+  cosmic_password = null
 
   // config files
   ensembl_downloader_config = "$projectDir/conf/ensembl_downloader_config.yaml"

From 39a11a59280e50239e12481d86e856773792ec5d Mon Sep 17 00:00:00 2001
From: ypriverol <ypriverol@gmail.com>
Date: Thu, 15 Apr 2021 08:34:48 +0100
Subject: [PATCH 6/9] input skip test

---
 .nf-core-lint.yml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .nf-core-lint.yml

diff --git a/.nf-core-lint.yml b/.nf-core-lint.yml
new file mode 100644
index 00000000..be600998
--- /dev/null
+++ b/.nf-core-lint.yml
@@ -0,0 +1,5 @@
+## NOTE - after nf-core/tools release 1.14 delete this line and
+## uncomment the ones below. See https://github.com/nf-core/tools/pull/1019
+nextflow_config: False
+# nextflow_config:
+#  - params.input

From 9414d021f00468599dcf5141f01fbc2f458b6f2e Mon Sep 17 00:00:00 2001
From: ypriverol <ypriverol@gmail.com>
Date: Thu, 15 Apr 2021 08:40:37 +0100
Subject: [PATCH 7/9] remove the intermedia files

---
 main.nf | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/main.nf b/main.nf
index 355f5ce2..ae9ab731 100644
--- a/main.nf
+++ b/main.nf
@@ -211,8 +211,6 @@ process merge_cdnas {
  */
 process add_ncrna {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.ncrna
 
@@ -241,8 +239,6 @@ merged_databases = ensembl_protein_database.mix(optional_ncrna)
  */
 process add_pseudogenes {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.pseudogenes
 
@@ -272,8 +268,6 @@ merged_databases = merged_databases.mix(optional_pseudogenes)
  */
 process add_altorfs {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.altorfs
 
@@ -331,8 +325,6 @@ process cosmic_download {
 */
 process cosmic_proteindb {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.cosmic
 
@@ -362,8 +354,6 @@ merged_databases = merged_databases.mix(cosmic_proteindbs)
 */
 process cosmic_celllines_proteindb {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.cosmic_celllines
 
@@ -498,8 +488,6 @@ vcf_file = params.vcf_file ? Channel.fromPath(params.vcf_file, checkIfExists: tr
 
 process vcf_proteinDB {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.vcf
 
@@ -711,8 +699,6 @@ process download_all_cbioportal {
  */
 process cbioportal_proteindb {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.cbioportal
 
@@ -745,8 +731,6 @@ merged_databases = merged_databases.mix(cBioportal_proteindb)
  */
 process merge_proteindbs {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     input:
     file("proteindb*") from merged_databases.collect()
 
@@ -764,8 +748,6 @@ process merge_proteindbs {
  */
 process clean_protein_database {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.clean_database
 
@@ -800,8 +782,6 @@ to_protein_decoy_ch = params.clean_database ? clean_database_sh : to_clean_ch
  */
 process decoy {
 
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
     when:
     params.decoy
 

From b2e80c98b9f1f33e01912fbeb1940dc1c92580fc Mon Sep 17 00:00:00 2001
From: ypriverol <ypriverol@gmail.com>
Date: Thu, 15 Apr 2021 08:54:45 +0100
Subject: [PATCH 8/9] remove the intermedia files

---
 docs/usage.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/usage.md b/docs/usage.md
index 03ba6bdf..a4a160a8 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -21,6 +21,7 @@ Te main purpose of the pgdb pipeline to add non-canonical proteins to the databa
 ```bash
 nextflow run nf-core/pgdb --taxonomy 9606 --altorfs --decoy  -profile docker
 ```
+
 This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
 
 Note that the pipeline will create the following files in your working directory:

From 137f9e1c6da8e2f27b66d450df20a93291651c6a Mon Sep 17 00:00:00 2001
From: ypriverol <ypriverol@gmail.com>
Date: Thu, 15 Apr 2021 08:59:35 +0100
Subject: [PATCH 9/9] remove the intermedia files

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index ae9ab731..ccb4d3dd 100644
--- a/main.nf
+++ b/main.nf
@@ -759,12 +759,12 @@ process clean_protein_database {
     file 'database_clean.fa' into clean_database_sh
 
     script:
-    """
     stop_codons = ''
     if (params.add_stop_codons){
        stop_codons = "--add_stop_codons"
     }
 
+    """
     pypgatk_cli.py ensembl-check \\
         -in "$file" \\
         --config_file "$e" \\