nextflow.config

/*
 * -------------------------------------------------
 *  nf-core/eager Nextflow config file
 * -------------------------------------------------
 * Default config options for all environments.
 */
// Global default params, used in configs
params {

  // Workflow flags
  genome = false
  single_end = false
  outdir = './results'
  publish_dir_mode = 'copy'

  // aws
  awsqueue = ''
  awsregion = 'eu-west-1'
  awscli = ''

  //Pipeline options

  //Input reads
  input = null
  udg_type = 'none'
  single_stranded = false
  single_end = false
  colour_chemistry = 4
  bam = false
  
  // Optional input information
  snpcapture_bed = ''
  run_convertinputbam = false

  //Input reference
  fasta = ''
  bwa_index = ''
  bt2_index = ''
  fasta_index = ''
  seq_dict = ''
  large_ref = false
  save_reference = false
  saveTrimmed = true
  saveAlignedIntermediates = false

  //Skipping parts of the pipeline for impatient users
  skip_fastqc = false
  skip_adapterremoval = false 
  skip_preseq = false
  skip_deduplication = false
  skip_damage_calculation = false
  skip_qualimap = false

  //More defaults
  complexity_filter_poly_g = false
  complexity_filter_poly_g_min = 10

  //Read clipping and merging parameters
  clip_forward_adaptor = 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'
  clip_reverse_adaptor = 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA'
  clip_readlength = 30
  clip_min_read_quality = 20
  min_adap_overlap = 1
  skip_collapse = false
  skip_trim = false
  preserve5p = false
  mergedonly = false

  //Mapping algorithm
  mapper = 'bwaaln'
  bwaalnn = 0.04
  bwaalnk = 2
  bwaalnl = 1024 // From Schubert et al. 2012 (10.1186/1471-2164-13-178)
  circularextension = 500
  circulartarget = 'MT'
  circularfilter = false
  bt2_alignmode = 'local' // from Cahill 2018 (10.1093/molbev/msy018) and, Poullet and Orlando (10.3389/fevo.2020.00105)
  bt2_sensitivity = 'sensitive' // from Poullet and Orlando (10.3389/fevo.2020.00105)
  bt2n = 0 // Do not set Cahill 2018 recommendation of 1 here, so not to 'hide' overriding bowtie2 presets
  bt2l = 0
  bt2_trim5 = 0
  bt2_trim3 = 0

  //Mapped read removal from input FASTQ
  hostremoval_input_fastq = false
  hostremoval_mode = 'remove'

  //BAM Filtering steps (default = discard unmapped reads)
  run_bam_filtering = false
  bam_mapping_quality_threshold = 0
  bam_filter_minreadlength = 0
  bam_unmapped_type = 'discard'

  //DeDuplication settings
  dedupper = 'markduplicates'
  dedup_all_merged = false

  //Preseq settings
  preseq_step_size = 1000

  //DamageProfiler settings
  damageprofiler_length = 100
  damageprofiler_threshold = 15
  damageprofiler_yaxis = 0.30

  //PMDTools settings
  run_pmdtools = false
  pmdtools_range = 10
  pmdtools_threshold = 3
  pmdtools_reference_mask = ''
  pmdtools_max_reads = 10000

  // mapDamage
  run_mapdamage_rescaling = false
  params.rescale_length_5p = 12
  params.rescale_length_3p = 12

  //Bedtools settings
  run_bedtools_coverage = false 
  anno_file = ''

  //bamUtils trimbam settings
  run_trim_bam = false 
  bamutils_clip_half_udg_left = 1 
  bamutils_clip_half_udg_right = 1 
  bamutils_clip_none_udg_left = 1 
  bamutils_clip_none_udg_right = 1 
  bamutils_softclip = false

  //Genotyping options
  run_genotyping = false
  genotyping_tool = ''
  genotyping_source = 'raw'
  // gatk options
  gatk_call_conf = 30
  gatk_ploidy = 2
  gatk_downsample = 250
  gatk_dbsnp = ''
  gatk_hc_out_mode = 'EMIT_VARIANTS_ONLY'
  gatk_hc_emitrefconf = 'GVCF'
  gatk_ug_genotype_model = 'SNP'
  gatk_ug_out_mode = 'EMIT_VARIANTS_ONLY'
  gatk_ug_keep_realign_bam = false
  gatk_ug_defaultbasequalities = ''
  // freebayes options
  freebayes_C = 1
  freebayes_g = 0
  freebayes_p = 2
  // Sequencetools pileupCaller
  pileupcaller_snpfile = ''
  pileupcaller_bedfile = ''
  pileupcaller_method = 'randomHaploid'
  pileupcaller_transitions_mode = 'AllSites'
  // ANGSD Genotype Likelihoods
  angsd_glmodel = 'samtools'
  angsd_glformat = 'binary'
  angsd_createfasta = false
  angsd_fastamethod = 'random'

  //Consensus sequence generation
  run_vcf2genome = false
  vcf2genome_outfile = ''
  vcf2genome_header = ''
  vcf2genome_minc = 5
  vcf2genome_minq = 30
  vcf2genome_minfreq = 0.8

  //MultiVCFAnalyzer Options
  run_multivcfanalyzer = false
  write_allele_frequencies = false
  min_genotype_quality = 30
  min_base_coverage = 5
  min_allele_freq_hom = 0.9
  min_allele_freq_het = 0.9
  additional_vcf_files = ''
  reference_gff_annotations = 'NA'
  reference_gff_exclude = 'NA'
  snp_eff_results = 'NA'

  //mtnucratio
  run_mtnucratio = false
  mtnucratio_header = 'MT'

  //Sex.DetERRmine settings
  run_sexdeterrmine = false
  sexdeterrmine_bedfile = ''

  //Nuclear contamination based on chromosome X heterozygosity.
  run_nuclear_contamination = false
  contamination_chrom_name = 'X' // Default to using hs37d5 name

  // taxonomic classifier
  run_metagenomic_screening  = false
  
  metagenomic_complexity_filter = false
  metagenomic_complexity_entropy = 0.3

  metagenomic_tool = ''
  database  = ''
  metagenomic_min_support_reads = 1
  percent_identity = 85
  malt_mode = 'BlastN'
  malt_alignment_mode = 'SemiGlobal'
  malt_top_percent = 1
  malt_min_support_mode = 'percent'
  malt_min_support_percent = 0.01
  malt_max_queries = 100
  malt_memory_mode = 'load'
  malt_sam_output = false

  // maltextract - only including number 
  // parameters if default documented or duplicate of MALT
  run_maltextract = false
  maltextract_taxon_list = ''
  maltextract_ncbifiles = ''
  maltextract_filter = 'def_anc'
  maltextract_toppercent = 0.01
  maltextract_destackingoff = false
  maltextract_downsamplingoff = false
  maltextract_duplicateremovaloff = false
  maltextract_matches = false
  maltextract_megansummary = false
  maltextract_percentidentity = 85.0
  maltextract_topalignment =  false

  // Boilerplate options
  name = false
  multiqc_config = false
  email = false
  email_on_fail = false
  max_multiqc_email_size = 25.MB
  plaintext_email = false
  monochrome_logs = false
  help = false
  igenomes_base = 's3://ngi-igenomes/igenomes/'
  tracedir = "${params.outdir}/pipeline_info"
  igenomes_ignore = false
  custom_config_version = 'master'
  custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
  hostnames = false
  config_profile_description = false
  config_profile_contact = false
  config_profile_url = false

  // Defaults only, expecting to be overwritten
  max_memory = 128.GB
  max_cpus = 16
  max_time = 240.h

}

// Container slug. Stable releases should specify release tag!
// Developmental code should specify :dev
process.container = 'nfcore/eager:2.3.1'

// Load base.config by default for all pipelines
includeConfig 'conf/base.config'

// Load nf-core custom profiles from different Institutions
try {
  includeConfig "${params.custom_config_base}/nfcore_custom.config"
} catch (Exception e) {
  System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
}

// Load nf-core/eager custom profiles from different institutions
try {
  includeConfig "${params.custom_config_base}/pipeline/eager.config"
} catch (Exception e) {
  System.err.println("WARNING: Could not load nf-core/config/eager profiles: ${params.custom_config_base}/pipeline/eager.config")
}

profiles {
  conda { process.conda = "$projectDir/environment.yml" }
  debug { process.beforeScript = 'echo $HOSTNAME' }
  docker {
    docker.enabled = true
    // Avoid this error:
    //   WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.
    // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351
    // once this is established and works well, nextflow might implement this behavior as new default.
    docker.runOptions = '-u \$(id -u):\$(id -g)'
  }
  singularity {
    singularity.enabled = true
    singularity.autoMounts = true
  }
  podman {
    podman.enabled = true
  }
  test { includeConfig 'conf/test.config' }
  test_full { includeConfig 'conf/test_full.config' }
  test_bam { includeConfig 'conf/test_bam.config'}
  test_fna { includeConfig 'conf/test_fna.config'}
  test_humanbam { includeConfig 'conf/test_humanbam.config' }
  test_pretrim { includeConfig 'conf/test_pretrim.config' }
  test_kraken { includeConfig 'conf/test_kraken.config' }
  test_tsv { includeConfig 'conf/test_tsv.config'}
  test_tsv_bam { includeConfig 'conf/test_tsv_bam.config'}
  test_tsv_fna { includeConfig 'conf/test_tsv_fna.config'}
  test_tsv_humanbam { includeConfig 'conf/test_tsv_humanbam.config' }
  test_tsv_pretrim { includeConfig 'conf/test_tsv_pretrim.config' }
  test_tsv_kraken { includeConfig 'conf/test_tsv_kraken.config' }
  test_tsv_complex { includeConfig 'conf/test_tsv_complex.config' }
  test_stresstest_human { includeConfig 'conf/test_stresstest_human.config' }
  benchmarking_human { includeConfig 'conf/benchmarking_human.config' }
  benchmarking_vikingfish { includeConfig 'conf/benchmarking_vikingfish.config' }
}
// Load igenomes.config if required
if (!params.igenomes_ignore) {
  includeConfig 'conf/igenomes.config'
}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
env {
  PYTHONNOUSERSITE = 1
  R_PROFILE_USER = "/.Rprofile"
  R_ENVIRON_USER = "/.Renviron"
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

timeline {
  enabled = true
  file = "${params.tracedir}/execution_timeline.html"
}
report {
  enabled = true
  file = "${params.tracedir}/execution_report.html"
}
trace {
  enabled = true
  file = "${params.tracedir}/execution_trace.txt"
}
dag {
  enabled = true
  file = "${params.tracedir}/pipeline_dag.svg"
}

manifest {
  name = 'nf-core/eager'
  author = 'The nf-core/eager community'
  homePage = 'https://github.com/nf-core/eager'
  description = 'A fully reproducible and state-of-the-art ancient DNA analysis pipeline'
  mainScript = 'main.nf'
  nextflowVersion = '!>=20.04.0'
  version = '2.3.1'
}

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
  if (type == 'memory') {
    try {
      if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
        return params.max_memory as nextflow.util.MemoryUnit
      else
        return obj
    } catch (all) {
      println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
      return obj
    }
  } else if (type == 'time') {
    try {
      if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
        return params.max_time as nextflow.util.Duration
      else
        return obj
    } catch (all) {
      println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
      return obj
    }
  } else if (type == 'cpus') {
    try {
      return Math.min( obj, params.max_cpus as int )
    } catch (all) {
      println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
      return obj
    }
  }
}