Skip to content

Commit

Permalink
batch add params
Browse files Browse the repository at this point in the history
  • Loading branch information
cjfields committed Mar 10, 2024
1 parent e8b2a98 commit c7068b4
Show file tree
Hide file tree
Showing 2 changed files with 187 additions and 49 deletions.
94 changes: 46 additions & 48 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
params {

// TODO nf-core: Specify your pipeline's command line flags
// Input options
// Input options (sample sheet)
input = null
// References
genome = null
Expand All @@ -36,90 +36,88 @@ params {
// skip_multiQC = false // set to run this step by default, this can fail with large sample #'s

// Trimming
// NYI; this bypasses all trimming and QC, assumes primers are removed and sequence(s) ready for DADA2
// skip_trimming = false
// NYI: Setting to false bypasses all trimming, assumes primers are removed
// and sequence(s) are ready for denoising
// trim = true
// trim-mode = "dada2" // default: use DADA2 methods for trimming, can be 'dada2', 'cutadapt'

// when true (default), this sets cutadapt's trimming (which uses linked adapters) to require *both*
// primers be present. With some kits like StrainID this can be an issue (can have some truncated reads
// at the 5' or 3' end) and so can be relaxed by setting to false.
// pacbio_strict_match = false

// fwdprimer = false
// revprimer = false
// trimFor = 0
// trimRev = 0
// truncFor = 0
// truncRev = 0
// maxEEFor = 2
// maxEERev = 2
// truncQ = 2 //default
// maxN = 0 //default
// maxLen = "Inf" // default, this can be coersed in R using as.numeric
// minLen = 50 // default
pacbio_strict_match = true

// setting these assumes use of cutadapt for trimming
fwd_adapter = false
rev_adapter = false
trim_for = 0
trim_rev = 0
trunc_for = 0
trunc_rev = 0
maxEE_for = 0
maxEE_rev = 0
truncQ = 2 //default
maxN = 0 //default
max_read_len = null // default, this can be coersed in R using as.numeric
min_read_len = 50 // default
// I think we can make these bool 'false' as above with R coersion (either through as.logical or using optparse in a Rscript)
// rmPhiX = "F" // TODO: test using false instead of string
rmPhiX = false // TODO: test using false instead of string

// Error model
// qualityBinning = false // false, set to true if using binned qualities (NovaSeq)
// errorModel = 'illumina' // NYI, thinking about best way to implement this
qualityBinning = false // false, set to true if using binned qualities (NovaSeq)
errorModel = 'illumina' // NYI, thinking about best way to implement this

// Merging
// paired_type = "overlapping" // allowed: 'overlapping' (paired reads overlap), 'separate' (paired reads are non-overlapping), or 'mix' (variable length)
// minOverlap = 20 // default=20
// maxMismatch = 0 // default
// trimOverhang = "F" // KL: I don't think we have overhangs for WISH project03
// justConcatenate = "F" // TODO: test using false instead of string
min_overlap = 20 // default=20
max_mismatch = 0 // default
trim_overhang = "F" // KL: I don't think we have overhangs for WISH project03
just_concatenate = "F" // TODO: test using false instead of string
// CF: this is for rescuing unmerged ITS, should
// be off unless really needed, and even then it's questionable. But it is requested sometimes
// rescueUnmerged = false
// dadaParams = false // !!!Deprecated!!!
// dadaOpt = []
// maxMergedLen = 0 // Only run if set > 1
// minMergedLen = 0 // Only run if set > 1
dadaOpt = []
max_asv_len = 0 // Only run if set > 1
min_asv_len = 0 // Only run if set > 1
// Chimera detection
// skipChimeraDetection = false
chimera_detection = true
// removeBimeraDenovoOptions = false

// Taxonomic assignment
// taxassignment = 'rdp' // default: RDP classifier implementation in dada2
// reference = false
// species = false
// minBoot = 50 // default for dada2
// taxLevels = false
// taxBatch = 0 // batch size of ASVs to run through assignTaxonomy/assignSpecies, 0 = run everything
tax_assignment_method = 'rdp' // default: RDP classifier implementation in dada2
reference = false
species = false
min_boot = 50 // default for dada2
tax_levels = false
tax_batch = 0 // batch size of ASVs to run through assignTaxonomy/assignSpecies, 0 = run everything

// alignment
// skipAlignment = false
// aligner = 'DECIPHER' // default
skip_alignment = false
aligner = 'DECIPHER' // default
// infernalCM = false

// Phylogenetic analysis
// runTree = 'phangorn' // default, current alternative is 'fasttree'
run_tree = 'phangorn' // default, current alternative is 'fasttree'

// NYI, for dada sample inference pooling (requires all samples)
// pool = "pseudo" // TODO: test using false instead of string
// for dada sample inference pooling (requires all samples)
pool = "pseudo" // TODO: test using false instead of string

// MultiQC
// interactiveMultiQC = false

// additional outputs
// toBIOM = true // generate BIOM v1 output
// toQIIME2 = false // generate QZA artifacts for QIIME2

// Quick hack to clean up sample names, probably unsafe (bobby tables);
// This is now deprecated in favor of using a sample sheet (CSV)
// sampleRegex = false
to_BIOM = true // generate BIOM v1 output
to_QIIME2 = false // generate QZA artifacts for QIIME2

// Renaming
// idType = "md5"
id_type = "md5"

// Pre-chimera sequence tables. This pulls in one or more sequence tables
// from independent sequencing runs, merges them, and runs
// downstream analysis. The only supported sequence table format
// is the original version from DADA2 (ASV names are the
// sequence, with counts per sample). As these are run through
// chimera detection, these should be pre-chimera removal data.
// seqTables = false
// seq_tables = false

// Boilerplate options
outdir = null
Expand Down
142 changes: 141 additions & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -285,5 +285,145 @@
{
"$ref": "#/definitions/generic_options"
}
]
],
"properties": {
"pacbio_strict_match": {
"type": "boolean",
"default": true
},
"fwd_adapter": {
"type": "boolean"
},
"rev_adapter": {
"type": "boolean"
},
"trim_for": {
"type": "integer",
"default": 0
},
"trim_rev": {
"type": "integer",
"default": 0
},
"trunc_for": {
"type": "integer",
"default": 0
},
"trunc_rev": {
"type": "integer",
"default": 0
},
"maxEE_for": {
"type": "integer",
"default": 0
},
"maxEE_rev": {
"type": "integer",
"default": 0
},
"truncQ": {
"type": "integer",
"default": 2
},
"maxN": {
"type": "integer",
"default": 0
},
"max_read_len": {
"type": "string"
},
"min_read_len": {
"type": "integer",
"default": 50
},
"rmPhiX": {
"type": "boolean"
},
"qualityBinning": {
"type": "boolean"
},
"errorModel": {
"type": "string",
"default": "illumina"
},
"min_overlap": {
"type": "integer",
"default": 20
},
"max_mismatch": {
"type": "integer",
"default": 0
},
"trim_overhang": {
"type": "string",
"default": "F"
},
"just_concatenate": {
"type": "string",
"default": "F"
},
"dadaOpt": {
"type": "string",
"default": "[]"
},
"max_asv_len": {
"type": "integer",
"default": 0
},
"min_asv_len": {
"type": "integer",
"default": 0
},
"chimera_detection": {
"type": "boolean",
"default": true
},
"tax_assignment_method": {
"type": "string",
"default": "rdp"
},
"reference": {
"type": "boolean"
},
"species": {
"type": "boolean"
},
"min_boot": {
"type": "integer",
"default": 50
},
"tax_levels": {
"type": "boolean"
},
"tax_batch": {
"type": "integer",
"default": 0
},
"skip_alignment": {
"type": "boolean"
},
"aligner": {
"type": "string",
"default": "DECIPHER"
},
"run_tree": {
"type": "string",
"default": "phangorn"
},
"pool": {
"type": "string",
"default": "pseudo"
},
"to_BIOM": {
"type": "boolean",
"default": true
},
"to_QIIME2": {
"type": "boolean"
},
"id_type": {
"type": "string",
"default": "md5"
}
}
}

0 comments on commit c7068b4

Please sign in to comment.