nextflow.config

/*
 * -------------------------------------------------
 *  UCT dada2 Nextflow config file
 * -------------------------------------------------
 * Default config options for all environments.
 * Cluster-specific config options should be saved
 * in the conf folder and imported under a profile
 * name here.
 */

// Global default params, used in configs
// Some help with time stamps
import java.text.SimpleDateFormat

params {
    // Configurable variables
    clusterOptions = ''
    project = false
    precheck = false
    email = false
    plaintext_email = false
    amplicon = '16S'

    // Show help emssage
    help = false

    name = false
    base = "uct-cbio"
    version = 0.5 //pipeline version
    // Pipeline Options
    reads = "data/*{1,2}.fastq.gz"
    timestamp = new SimpleDateFormat("yyyy-MM-dd").format(new java.util.Date())
    outdir = "./" + timestamp + "-dada2"
    ticket = 0 //for Redmine, not currently used (KL)

    // Trimming
    fwdprimer = false
    revprimer = false
    qualityBinning = false  // false, set to true if using binned qualities (NovaSeq)
    trimFor = 0
    trimRev = 0
    truncFor = 0
    truncRev = 0
    maxEEFor = 2
    maxEERev = 2
    truncQ = 2 //default
    maxN = 0 //default
    maxLen = "Inf" // default, this can be coersed in R using as.numeric
    minLen = 50 // default

    // I think we can make these bool 'false' as above with R coersion (either through as.logical or using optparse in a Rscript)
    rmPhiX = "F"  // TODO: test using false instead of string

    // Merging
    minOverlap = 20 // default=12
    maxMismatch = 0 // default
    trimOverhang = "F" // KL: I don't think we have overhangs for WISH project03
    justConcatenate = "F"  // TODO: test using false instead of string
    rescueUnmerged = false // CF: this is for rescuing unmerged ITS, should be off unless really needed
    dadaParams = false // if set, these are additional arguments passed to the dada() function in the PacBio workflow !!!Deprecated!!!
    dadaOpt = []
    maxMergedLen = 0 // Only run if set > 1
    minMergedLen = 0 // Only run if set > 1
    // Renaming

    // Chimera detection
    skipChimeraDetection = false
    removeBimeraDenovoOptions = false

    // minFoldParentOverAbundance = 1.5

    // Taxonomic assignment
    taxassignment = 'rdp' // default: RDP classifier implementation in dada2
    reference = false
    species = false
    minBoot = 50 // default for dada2
    taxLevels = false

    // Phylogenetic analysis
    runtree = 'phangorn' // default, current alternative is 'fasttree'

    // alignment
    aligner = 'DECIPHER' // default
    infernalCM  = false

    // NYI, for dada sample inference pooling (requires all samples)
    pool = "pseudo" // TODO: test using false instead of string

    // MultiQC
    interactiveMultiQC = false

    // additional outputs
    toBIOM = true  // generate BIOM v1 output
    toQIIME2 = false  // generate QZA artifacts for QIIME2

    // Quick hack to clean up sample names, probably unsafe (bobby tables);
    // this would work better with a mapping table
    sampleRegex = false
    idType = "simple"
}

profiles {

  ilifu{
    includeConfig 'conf/base.config'
    includeConfig 'conf/ilifu.config'
   }
  icts_hpc{
    includeConfig 'conf/icts_hpc.config'
    includeConfig 'conf/base.config'
  }
  training {
        includeConfig 'conf/base.config'
        includeConfig 'conf/training.config'
  }
  standard {
        includeConfig 'conf/standard.config'
  }
  uiuc_biocluster {
    includeConfig 'conf/base.config'
    includeConfig 'conf/uiuc_biocluster.config'
  }
  uiuc_singularity {
    includeConfig 'conf/base.config'
    includeConfig 'conf/uiuc_singularity.config'
  } 
  aws_batch {
    includeConfig 'conf/base.config'
    includeConfig 'conf/aws_batch.config'
  }
  azure_batch {
    includeConfig 'conf/base.config'
    includeConfig 'conf/azure_batch.config'
  }
   none {
    // Don't load any config (for use with custom home configs)
  }

}


// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

timeline {
  enabled = true
  file = "${params.outdir}/pipeline_info/dada2_timeline.html"
}
report {
  enabled = true
  file = "${params.outdir}/pipeline_info/dada2_report.html"
}
trace {
  enabled = true
  file = "${params.outdir}/pipeline_info/dada2_trace.txt"
}
dag {
  enabled = true
  file = "${params.outdir}/pipeline_info/dada2_DAG.svg"
}

manifest {
  homePage = 'https://github.com/kviljoen/16S-rDNA-dada2-pipeline'
  description = 'Nextflow dada2 analysis pipeline for UCT CBIO'
  mainScript = 'main.nf'
}

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
  if(type == 'memory'){
    try {
      if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
        return params.max_memory as nextflow.util.MemoryUnit
      else
        return obj
    } catch (all) {
      println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
      return obj
    }
  } else if(type == 'time'){
    try {
      if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
        return params.max_time as nextflow.util.Duration
      else
        return obj
    } catch (all) {
      println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
      return obj
    }
  } else if(type == 'cpus'){
    try {
      return Math.min( obj, params.max_cpus as int )
    } catch (all) {
      println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
      return obj
    }
  }
}