Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

minor changes to respond to #19 #29

Merged
merged 10 commits into from
Apr 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .nf-core-lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
## NOTE - after nf-core/tools release 1.14 delete this line and
## uncomment the ones below. See https://github.com/nf-core/tools/pull/1019
nextflow_config: False
# nextflow_config:
# - params.input
10 changes: 6 additions & 4 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@
General usage:

```bash
nextflow run nf-core/pgdb -profile <docker/singularity/podman/conda/institute> --ensembl_name homo_sapiens
nextflow run nf-core/pgdb -profile <docker/singularity/podman/conda/institute> --taxonomy 9606 --decoy
```

## Running the pipeline
This command will download the ENSEMBL human proteome and attach the decoy database to it.

The typical command for running the pipeline is as follows:
## Adding non canonical proteins

Te main purpose of the pgdb pipeline to add non-canonical proteins to the database including varriants, ncRNAs, altORFs:

```bash
nextflow run nf-core/pgdb --ensembl_name homo_sapiens --altorfs -profile docker
nextflow run nf-core/pgdb --taxonomy 9606 --altorfs --decoy -profile docker
```

This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
Expand Down
33 changes: 6 additions & 27 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ if (params.ensembl_name == "homo_sapiens"){
}

// Pipeline checks
if ((params.cosmic || params.cosmic_celllines) && (params.cosmic_user_name=="" || params.cosmic_password=="")){
if ((params.cosmic || params.cosmic_celllines) && (!params.cosmic_user_name || !params.cosmic_password)){
exit 1, "User name and password has to be provided. In order to be able to download COSMIC data. Please first register in COSMIC database (https://cancer.sanger.ac.uk/cosmic/register)."
}

Expand Down Expand Up @@ -211,8 +211,6 @@ process merge_cdnas {
*/
process add_ncrna {

publishDir "${params.outdir}", mode: 'copy', overwrite: true

when:
params.ncrna

Expand Down Expand Up @@ -241,8 +239,6 @@ merged_databases = ensembl_protein_database.mix(optional_ncrna)
*/
process add_pseudogenes {

publishDir "${params.outdir}", mode: 'copy', overwrite: true

when:
params.pseudogenes

Expand Down Expand Up @@ -272,8 +268,6 @@ merged_databases = merged_databases.mix(optional_pseudogenes)
*/
process add_altorfs {

publishDir "${params.outdir}", mode: 'copy', overwrite: true

when:
params.altorfs

Expand Down Expand Up @@ -331,8 +325,6 @@ process cosmic_download {
*/
process cosmic_proteindb {

publishDir "${params.outdir}", mode: 'copy', overwrite: true

when:
params.cosmic

Expand Down Expand Up @@ -362,8 +354,6 @@ merged_databases = merged_databases.mix(cosmic_proteindbs)
*/
process cosmic_celllines_proteindb {

publishDir "${params.outdir}", mode: 'copy', overwrite: true

when:
params.cosmic_celllines

Expand Down Expand Up @@ -494,13 +484,10 @@ process gtf_to_fasta {
"""
}

//vcf_file = Channel.fromPath(params.vcf_file)
vcf_file = params.vcf_file ? Channel.fromPath(params.vcf_file, checkIfExists: true) : Channel.empty()

process vcf_proteinDB {

publishDir "${params.outdir}", mode: 'copy', overwrite: true

when:
params.vcf

Expand Down Expand Up @@ -712,8 +699,6 @@ process download_all_cbioportal {
*/
process cbioportal_proteindb {

publishDir "${params.outdir}", mode: 'copy', overwrite: true

when:
params.cbioportal

Expand Down Expand Up @@ -746,8 +731,6 @@ merged_databases = merged_databases.mix(cBioportal_proteindb)
*/
process merge_proteindbs {

publishDir "${params.outdir}", mode: 'copy', overwrite: true

input:
file("proteindb*") from merged_databases.collect()

Expand All @@ -760,18 +743,11 @@ process merge_proteindbs {
"""
}

stop_codons = ''
if (params.add_stop_codons){
stop_codons = "--add_stop_codons"
}

/**
* clean the database for stop codons, and unwanted AA like: *, also remove proteins with less than 6 AA
*/
process clean_protein_database {

publishDir "${params.outdir}", mode: 'copy', overwrite: true

when:
params.clean_database

Expand All @@ -783,6 +759,11 @@ process clean_protein_database {
file 'database_clean.fa' into clean_database_sh

script:
stop_codons = ''
if (params.add_stop_codons){
stop_codons = "--add_stop_codons"
}

"""
pypgatk_cli.py ensembl-check \\
-in "$file" \\
Expand All @@ -801,8 +782,6 @@ to_protein_decoy_ch = params.clean_database ? clean_database_sh : to_clean_ch
*/
process decoy {

publishDir "${params.outdir}", mode: 'copy', overwrite: true

when:
params.decoy

Expand Down
6 changes: 2 additions & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
// Global default params, used in configs
params {

input = null

// process flag variables
ncrna = false
pseudogenes = false
Expand All @@ -32,8 +30,8 @@ params {
add_stop_codons = true

// data download variables
cosmic_user_name = ""
cosmic_password = ""
cosmic_user_name = null
cosmic_password = null

// config files
ensembl_downloader_config = "$projectDir/conf/ensembl_downloader_config.yaml"
Expand Down
22 changes: 10 additions & 12 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,13 @@
"default": "",
"properties": {
"gnomad": {
"type": "boolean"
"type": "boolean",
"description": "Add gNOMAD variants to the database"
},
"gnomad_file_url": {
"type": "string",
"default": "gs://gnomad-public/release/2.1.1/vcf/exomes/gnomad.exomes.r2.1.1.sites.vcf.bgz"
"default": "gs://gnomad-public/release/2.1.1/vcf/exomes/gnomad.exomes.r2.1.1.sites.vcf.bgz",
"description": "gNOMAD url"
}
}
},
Expand Down Expand Up @@ -226,13 +228,6 @@
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"properties": {
"input": {
"type": "string",
"fa_icon": "fas fa-dna",
"description": "Input files.",
"help_text": "Use this to specify the location of your input files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`",
"hidden": true
},
"outdir": {
"type": "string",
"description": "The output directory where the results will be saved.",
Expand All @@ -252,7 +247,8 @@
"description": "The final protein database generated"
},
"push_s3": {
"type": "string"
"type": "string",
"description": "push data to s3"
}
}
},
Expand Down Expand Up @@ -325,11 +321,13 @@
"validate_params": {
"type": "boolean",
"default": true,
"hidden": true
"hidden": true,
"description": "validate params of the schema"
},
"show_hidden_params": {
"type": "string",
"hidden": true
"hidden": true,
"description": "hide params of the schema"
}
}
},
Expand Down