From 293f23350a34de024cdfad045dc873a0a9d4c3b4 Mon Sep 17 00:00:00 2001 From: ypriverol Date: Wed, 14 Apr 2021 18:33:14 +0100 Subject: [PATCH 1/9] remove input. --- docs/usage.md | 11 ++++++----- nextflow.config | 2 -- nextflow_schema.json | 22 ++++++++++------------ 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 625fa160..03ba6bdf 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -9,17 +9,18 @@ General usage: ```bash -nextflow run nf-core/pgdb -profile --ensembl_name homo_sapiens +nextflow run nf-core/pgdb -profile --taxonomy 9606 --decoy ``` -## Running the pipeline +This command will download the ENSEMBL human proteome and attach the decoy database to it. -The typical command for running the pipeline is as follows: +## Adding non canonical proteins + +Te main purpose of the pgdb pipeline to add non-canonical proteins to the database including varriants, ncRNAs, altORFs: ```bash -nextflow run nf-core/pgdb --taxonomy 9606 --altorfs -profile docker +nextflow run nf-core/pgdb --taxonomy 9606 --altorfs --decoy -profile docker ``` - This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: diff --git a/nextflow.config b/nextflow.config index 530c6861..18477c28 100644 --- a/nextflow.config +++ b/nextflow.config @@ -8,8 +8,6 @@ // Global default params, used in configs params { - input = null - // process flag variables ncrna = false pseudogenes = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 688fd392..631bb438 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -155,11 +155,13 @@ "default": "", "properties": { "gnomad": { - "type": "boolean" + "type": "boolean", + "description": "Add gNOMAD variants to the database" }, "gnomad_file_url": { "type": "string", - "default": "gs://gnomad-public/release/2.1.1/vcf/exomes/gnomad.exomes.r2.1.1.sites.vcf.bgz" + "default": "gs://gnomad-public/release/2.1.1/vcf/exomes/gnomad.exomes.r2.1.1.sites.vcf.bgz", + "description": "gNOMAD url" } } }, @@ -226,13 +228,6 @@ "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", "properties": { - "input": { - "type": "string", - "fa_icon": "fas fa-dna", - "description": "Input files.", - "help_text": "Use this to specify the location of your input files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`", - "hidden": true - }, "outdir": { "type": "string", "description": "The output directory where the results will be saved.", @@ -252,7 +247,8 @@ "description": "The final protein database generated" }, "push_s3": { - "type": "string" + "type": "string", + "description": "push data to s3" } } }, @@ -325,11 +321,13 @@ "validate_params": { "type": "boolean", "default": true, - "hidden": true + "hidden": true, + "description": "validate params of the schema" }, "show_hidden_params": { "type": "string", - "hidden": true + "hidden": true, + "description": "hide params of the schema" } } }, From b034ece6b0107da6a9331a897b72018767d05407 Mon Sep 17 00:00:00 2001 From: ypriverol Date: Wed, 14 Apr 2021 20:24:02 +0100 Subject: [PATCH 2/9] ZCAT removed --- main.nf | 4 ---- 1 file changed, 4 deletions(-) diff --git a/main.nf b/main.nf index aebcf9d2..856e43bf 100644 --- a/main.nf +++ b/main.nf @@ -74,10 +74,6 @@ if ((params.cosmic || params.cosmic_celllines) && (params.cosmic_user_name=="" | exit 1, "User name and password has to be provided. In order to be able to download COSMIC data. Please first register in COSMIC database (https://cancer.sanger.ac.uk/cosmic/register)." } -// Pipeline OS-specific commands -ZCAT = (System.properties['os.name'] == 'Mac OS X' ? 'gzcat' : 'zcat') - - /** * Download data from ensembl for the particular species. */ From b3a4075bdc672cc22bd474c3d7644f1e8a63eec1 Mon Sep 17 00:00:00 2001 From: ypriverol Date: Thu, 15 Apr 2021 08:25:01 +0100 Subject: [PATCH 3/9] small changes --- main.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index 6718c094..9b4671e6 100644 --- a/main.nf +++ b/main.nf @@ -760,11 +760,6 @@ process merge_proteindbs { """ } -stop_codons = '' -if (params.add_stop_codons){ - stop_codons = "--add_stop_codons" -} - /** * clean the database for stop codons, and unwanted AA like: *, also remove proteins with less than 6 AA */ @@ -784,6 +779,11 @@ process clean_protein_database { script: """ + stop_codons = '' + if (params.add_stop_codons){ + stop_codons = "--add_stop_codons" + } + pypgatk_cli.py ensembl-check \\ -in "$file" \\ --config_file "$e" \\ From f8ea73fecdb6b441edfd26c6c8879993617d935f Mon Sep 17 00:00:00 2001 From: ypriverol Date: Thu, 15 Apr 2021 08:26:10 +0100 Subject: [PATCH 4/9] small changes --- main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/main.nf b/main.nf index 9b4671e6..0ffd4a8d 100644 --- a/main.nf +++ b/main.nf @@ -494,7 +494,6 @@ process gtf_to_fasta { """ } -//vcf_file = Channel.fromPath(params.vcf_file) vcf_file = params.vcf_file ? Channel.fromPath(params.vcf_file, checkIfExists: true) : Channel.empty() process vcf_proteinDB { From 4f65b9f6c1bebef4e1472a9a55bee302043122ea Mon Sep 17 00:00:00 2001 From: ypriverol Date: Thu, 15 Apr 2021 08:31:54 +0100 Subject: [PATCH 5/9] small changes --- main.nf | 2 +- nextflow.config | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index 0ffd4a8d..355f5ce2 100644 --- a/main.nf +++ b/main.nf @@ -61,7 +61,7 @@ if (params.ensembl_name == "homo_sapiens"){ } // Pipeline checks -if ((params.cosmic || params.cosmic_celllines) && (params.cosmic_user_name=="" || params.cosmic_password=="")){ +if ((params.cosmic || params.cosmic_celllines) && (!params.cosmic_user_name || !params.cosmic_password)){ exit 1, "User name and password has to be provided. In order to be able to download COSMIC data. Please first register in COSMIC database (https://cancer.sanger.ac.uk/cosmic/register)." } diff --git a/nextflow.config b/nextflow.config index 726f6f62..f21dafc6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -30,8 +30,8 @@ params { add_stop_codons = true // data download variables - cosmic_user_name = "" - cosmic_password = "" + cosmic_user_name = null + cosmic_password = null // config files ensembl_downloader_config = "$projectDir/conf/ensembl_downloader_config.yaml" From 39a11a59280e50239e12481d86e856773792ec5d Mon Sep 17 00:00:00 2001 From: ypriverol Date: Thu, 15 Apr 2021 08:34:48 +0100 Subject: [PATCH 6/9] input skip test --- .nf-core-lint.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .nf-core-lint.yml diff --git a/.nf-core-lint.yml b/.nf-core-lint.yml new file mode 100644 index 00000000..be600998 --- /dev/null +++ b/.nf-core-lint.yml @@ -0,0 +1,5 @@ +## NOTE - after nf-core/tools release 1.14 delete this line and +## uncomment the ones below. See https://github.com/nf-core/tools/pull/1019 +nextflow_config: False +# nextflow_config: +# - params.input From 9414d021f00468599dcf5141f01fbc2f458b6f2e Mon Sep 17 00:00:00 2001 From: ypriverol Date: Thu, 15 Apr 2021 08:40:37 +0100 Subject: [PATCH 7/9] remove the intermedia files --- main.nf | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/main.nf b/main.nf index 355f5ce2..ae9ab731 100644 --- a/main.nf +++ b/main.nf @@ -211,8 +211,6 @@ process merge_cdnas { */ process add_ncrna { - publishDir "${params.outdir}", mode: 'copy', overwrite: true - when: params.ncrna @@ -241,8 +239,6 @@ merged_databases = ensembl_protein_database.mix(optional_ncrna) */ process add_pseudogenes { - publishDir "${params.outdir}", mode: 'copy', overwrite: true - when: params.pseudogenes @@ -272,8 +268,6 @@ merged_databases = merged_databases.mix(optional_pseudogenes) */ process add_altorfs { - publishDir "${params.outdir}", mode: 'copy', overwrite: true - when: params.altorfs @@ -331,8 +325,6 @@ process cosmic_download { */ process cosmic_proteindb { - publishDir "${params.outdir}", mode: 'copy', overwrite: true - when: params.cosmic @@ -362,8 +354,6 @@ merged_databases = merged_databases.mix(cosmic_proteindbs) */ process cosmic_celllines_proteindb { - publishDir "${params.outdir}", mode: 'copy', overwrite: true - when: params.cosmic_celllines @@ -498,8 +488,6 @@ vcf_file = params.vcf_file ? Channel.fromPath(params.vcf_file, checkIfExists: tr process vcf_proteinDB { - publishDir "${params.outdir}", mode: 'copy', overwrite: true - when: params.vcf @@ -711,8 +699,6 @@ process download_all_cbioportal { */ process cbioportal_proteindb { - publishDir "${params.outdir}", mode: 'copy', overwrite: true - when: params.cbioportal @@ -745,8 +731,6 @@ merged_databases = merged_databases.mix(cBioportal_proteindb) */ process merge_proteindbs { - publishDir "${params.outdir}", mode: 'copy', overwrite: true - input: file("proteindb*") from merged_databases.collect() @@ -764,8 +748,6 @@ process merge_proteindbs { */ process clean_protein_database { - publishDir "${params.outdir}", mode: 'copy', overwrite: true - when: params.clean_database @@ -800,8 +782,6 @@ to_protein_decoy_ch = params.clean_database ? clean_database_sh : to_clean_ch */ process decoy { - publishDir "${params.outdir}", mode: 'copy', overwrite: true - when: params.decoy From b2e80c98b9f1f33e01912fbeb1940dc1c92580fc Mon Sep 17 00:00:00 2001 From: ypriverol Date: Thu, 15 Apr 2021 08:54:45 +0100 Subject: [PATCH 8/9] remove the intermedia files --- docs/usage.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/usage.md b/docs/usage.md index 03ba6bdf..a4a160a8 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -21,6 +21,7 @@ Te main purpose of the pgdb pipeline to add non-canonical proteins to the databa ```bash nextflow run nf-core/pgdb --taxonomy 9606 --altorfs --decoy -profile docker ``` + This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: From 137f9e1c6da8e2f27b66d450df20a93291651c6a Mon Sep 17 00:00:00 2001 From: ypriverol Date: Thu, 15 Apr 2021 08:59:35 +0100 Subject: [PATCH 9/9] remove the intermedia files --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index ae9ab731..ccb4d3dd 100644 --- a/main.nf +++ b/main.nf @@ -759,12 +759,12 @@ process clean_protein_database { file 'database_clean.fa' into clean_database_sh script: - """ stop_codons = '' if (params.add_stop_codons){ stop_codons = "--add_stop_codons" } + """ pypgatk_cli.py ensembl-check \\ -in "$file" \\ --config_file "$e" \\