batch add params

h3abionet · Mar 10, 2024 · c7068b4 · c7068b4
1 parent e8b2a98
commit c7068b4
Show file tree

Hide file tree

Showing 2 changed files with 187 additions and 49 deletions.
diff --git a/nextflow.config b/nextflow.config
@@ -10,7 +10,7 @@
 params {
 
     // TODO nf-core: Specify your pipeline's command line flags
-    // Input options
+    // Input options (sample sheet)
     input                      = null
     // References
     genome                     = null
@@ -36,90 +36,88 @@ params {
     // skip_multiQC = false  // set to run this step by default, this can fail with large sample #'s
 
     // Trimming
-    // NYI; this bypasses all trimming and QC, assumes primers are removed and sequence(s) ready for DADA2
-    // skip_trimming = false
+    // NYI: Setting to false bypasses all trimming, assumes primers are removed 
+    // and sequence(s) are ready for denoising
+    // trim = true
+    // trim-mode = "dada2" // default: use DADA2 methods for trimming, can be 'dada2', 'cutadapt'
 
     // when true (default), this sets cutadapt's trimming (which uses linked adapters) to require *both* 
     // primers be present.  With some kits like StrainID this can be an issue (can have some truncated reads 
     // at the 5' or 3' end) and so can be relaxed by setting to false.
-    // pacbio_strict_match = false
-
-    // fwdprimer = false
-    // revprimer = false
-    // trimFor = 0
-    // trimRev = 0
-    // truncFor = 0
-    // truncRev = 0
-    // maxEEFor = 2
-    // maxEERev = 2
-    // truncQ = 2 //default
-    // maxN = 0 //default
-    // maxLen = "Inf" // default, this can be coersed in R using as.numeric
-    // minLen = 50 // default
+    pacbio_strict_match = true
+
+    // setting these assumes use of cutadapt for trimming
+    fwd_adapter = false 
+    rev_adapter = false
+    trim_for = 0
+    trim_rev = 0
+    trunc_for = 0
+    trunc_rev = 0
+    maxEE_for = 0
+    maxEE_rev = 0
+    truncQ = 2 //default
+    maxN = 0 //default
+    max_read_len = null // default, this can be coersed in R using as.numeric
+    min_read_len = 50 // default
     // I think we can make these bool 'false' as above with R coersion (either through as.logical or using optparse in a Rscript)
-    // rmPhiX = "F"  // TODO: test using false instead of string
+    rmPhiX = false  // TODO: test using false instead of string
 
     // Error model
-    // qualityBinning = false  // false, set to true if using binned qualities (NovaSeq)
-    // errorModel = 'illumina' // NYI, thinking about best way to implement this
+    qualityBinning = false  // false, set to true if using binned qualities (NovaSeq)
+    errorModel = 'illumina' // NYI, thinking about best way to implement this
 
     // Merging
     // paired_type = "overlapping"  // allowed: 'overlapping' (paired reads overlap), 'separate' (paired reads are non-overlapping), or 'mix' (variable length)
-    // minOverlap = 20 // default=20
-    // maxMismatch = 0 // default
-    // trimOverhang = "F" // KL: I don't think we have overhangs for WISH project03
-    // justConcatenate = "F"  // TODO: test using false instead of string
+    min_overlap = 20 // default=20
+    max_mismatch = 0 // default
+    trim_overhang = "F" // KL: I don't think we have overhangs for WISH project03
+    just_concatenate = "F"  // TODO: test using false instead of string
     // CF: this is for rescuing unmerged ITS, should 
     // be off unless really needed, and even then it's questionable.  But it is requested sometimes
     // rescueUnmerged = false 
-    // dadaParams = false // !!!Deprecated!!!
-    // dadaOpt = [] 
-    // maxMergedLen = 0 // Only run if set > 1
-    // minMergedLen = 0 // Only run if set > 1
+    dadaOpt = []
+    max_asv_len = 0 // Only run if set > 1
+    min_asv_len = 0 // Only run if set > 1
     // Chimera detection
-    // skipChimeraDetection = false
+    chimera_detection = true
     // removeBimeraDenovoOptions = false
 
     // Taxonomic assignment
-    // taxassignment = 'rdp' // default: RDP classifier implementation in dada2
-    // reference = false
-    // species = false
-    // minBoot = 50 // default for dada2
-    // taxLevels = false
-    // taxBatch = 0  // batch size of ASVs to run through assignTaxonomy/assignSpecies, 0 = run everything
+    tax_assignment_method = 'rdp' // default: RDP classifier implementation in dada2
+    reference = false
+    species = false
+    min_boot = 50 // default for dada2
+    tax_levels = false
+    tax_batch = 0  // batch size of ASVs to run through assignTaxonomy/assignSpecies, 0 = run everything
 
     // alignment
-    // skipAlignment = false
-    // aligner = 'DECIPHER' // default
+    skip_alignment = false
+    aligner = 'DECIPHER' // default
     // infernalCM  = false
 
     // Phylogenetic analysis
-    // runTree = 'phangorn' // default, current alternative is 'fasttree'
+    run_tree = 'phangorn' // default, current alternative is 'fasttree'
 
-    // NYI, for dada sample inference pooling (requires all samples)
-    // pool = "pseudo" // TODO: test using false instead of string
+    // for dada sample inference pooling (requires all samples)
+    pool = "pseudo" // TODO: test using false instead of string
 
     // MultiQC
     // interactiveMultiQC = false
 
     // additional outputs
-    // toBIOM = true  // generate BIOM v1 output
-    // toQIIME2 = false  // generate QZA artifacts for QIIME2
-
-    // Quick hack to clean up sample names, probably unsafe (bobby tables);
-    // This is now deprecated in favor of using a sample sheet (CSV)
-    // sampleRegex = false
+    to_BIOM = true  // generate BIOM v1 output
+    to_QIIME2 = false  // generate QZA artifacts for QIIME2
 
     // Renaming
-    // idType = "md5"
+    id_type = "md5"
 
     // Pre-chimera sequence tables. This pulls in one or more sequence tables
     // from independent sequencing runs, merges them, and runs
     // downstream analysis. The only supported sequence table format
     // is the original version from DADA2 (ASV names are the
     // sequence, with counts per sample). As these are run through
     // chimera detection, these should be pre-chimera removal data.
-    // seqTables = false
+    // seq_tables = false
 
     // Boilerplate options
     outdir                     = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -285,5 +285,145 @@
         {
             "$ref": "#/definitions/generic_options"
         }
-    ]
+    ],
+    "properties": {
+        "pacbio_strict_match": {
+            "type": "boolean",
+            "default": true
+        },
+        "fwd_adapter": {
+            "type": "boolean"
+        },
+        "rev_adapter": {
+            "type": "boolean"
+        },
+        "trim_for": {
+            "type": "integer",
+            "default": 0
+        },
+        "trim_rev": {
+            "type": "integer",
+            "default": 0
+        },
+        "trunc_for": {
+            "type": "integer",
+            "default": 0
+        },
+        "trunc_rev": {
+            "type": "integer",
+            "default": 0
+        },
+        "maxEE_for": {
+            "type": "integer",
+            "default": 0
+        },
+        "maxEE_rev": {
+            "type": "integer",
+            "default": 0
+        },
+        "truncQ": {
+            "type": "integer",
+            "default": 2
+        },
+        "maxN": {
+            "type": "integer",
+            "default": 0
+        },
+        "max_read_len": {
+            "type": "string"
+        },
+        "min_read_len": {
+            "type": "integer",
+            "default": 50
+        },
+        "rmPhiX": {
+            "type": "boolean"
+        },
+        "qualityBinning": {
+            "type": "boolean"
+        },
+        "errorModel": {
+            "type": "string",
+            "default": "illumina"
+        },
+        "min_overlap": {
+            "type": "integer",
+            "default": 20
+        },
+        "max_mismatch": {
+            "type": "integer",
+            "default": 0
+        },
+        "trim_overhang": {
+            "type": "string",
+            "default": "F"
+        },
+        "just_concatenate": {
+            "type": "string",
+            "default": "F"
+        },
+        "dadaOpt": {
+            "type": "string",
+            "default": "[]"
+        },
+        "max_asv_len": {
+            "type": "integer",
+            "default": 0
+        },
+        "min_asv_len": {
+            "type": "integer",
+            "default": 0
+        },
+        "chimera_detection": {
+            "type": "boolean",
+            "default": true
+        },
+        "tax_assignment_method": {
+            "type": "string",
+            "default": "rdp"
+        },
+        "reference": {
+            "type": "boolean"
+        },
+        "species": {
+            "type": "boolean"
+        },
+        "min_boot": {
+            "type": "integer",
+            "default": 50
+        },
+        "tax_levels": {
+            "type": "boolean"
+        },
+        "tax_batch": {
+            "type": "integer",
+            "default": 0
+        },
+        "skip_alignment": {
+            "type": "boolean"
+        },
+        "aligner": {
+            "type": "string",
+            "default": "DECIPHER"
+        },
+        "run_tree": {
+            "type": "string",
+            "default": "phangorn"
+        },
+        "pool": {
+            "type": "string",
+            "default": "pseudo"
+        },
+        "to_BIOM": {
+            "type": "boolean",
+            "default": true
+        },
+        "to_QIIME2": {
+            "type": "boolean"
+        },
+        "id_type": {
+            "type": "string",
+            "default": "md5"
+        }
+    }
 }