###########################################
#Welcome to zUMIs
#below, please fill the mandatory inputs
#We expect full paths for all files.
###########################################

#define a project name that will be used to name output files
project: Tunic

#Sequencing File Inputs:
#For each input file, make one list object & define path and barcode ranges
#base definition vocabulary: BC(n) UMI(n) cDNA(n).
#Barcode range definition needs to account for all ranges. You can give several comma-separated ranges for BC & UMI sequences, eg. BC(1-6,20-26)
#you can specify between 1 and 4 input files
sequence_files:
  file1:
    name: /sbgenomics/workspaces/2a8bf1a5-ab5c-454c-a3a9-c188ebc82179/tasks/bc1ef90e-8d07-497e-9ef5-ef3b889c11bf/zumis-2-5-5-fs-modified/zummis_inputs/Tunic_R1.fastq.gz
    base_definition:
      - UMI(1-12)
      - cDNA(13-100)
  file2:
    name: /sbgenomics/workspaces/2a8bf1a5-ab5c-454c-a3a9-c188ebc82179/tasks/bc1ef90e-8d07-497e-9ef5-ef3b889c11bf/zumis-2-5-5-fs-modified/zummis_inputs/Tunic_R2.fastq.gz
    base_definition:
      - UMI(1-12)
      - cDNA(13-100)
  file3:
    name: /sbgenomics/workspaces/2a8bf1a5-ab5c-454c-a3a9-c188ebc82179/tasks/bc1ef90e-8d07-497e-9ef5-ef3b889c11bf/zumis-2-5-5-fs-modified/zummis_inputs/Tunic_I1.fastq.gz
    base_definition:
      - BC(1-8)
  file4:
    name: /sbgenomics/workspaces/2a8bf1a5-ab5c-454c-a3a9-c188ebc82179/tasks/bc1ef90e-8d07-497e-9ef5-ef3b889c11bf/zumis-2-5-5-fs-modified/zummis_inputs/Tunic_I2.fastq.gz
    base_definition:
      - BC(1-8)
      - ##UMI_base_range4

#reference genome setup
reference:
  STAR_index: /sbgenomics/workspaces/2a8bf1a5-ab5c-454c-a3a9-c188ebc82179/tasks/bc1ef90e-8d07-497e-9ef5-ef3b889c11bf/zumis-2-5-5-fs-modified/star_reference_dir #path to STAR genome index
  GTF_file: /sbgenomics/workspaces/2a8bf1a5-ab5c-454c-a3a9-c188ebc82179/tasks/bc1ef90e-8d07-497e-9ef5-ef3b889c11bf/zumis-2-5-5-fs-modified/zummis_inputs/GRCh38ERCC.ensembl91.gtf #path to gene annotation file in GTF format
  additional_files: ##additional_files #Optional parameter. It is possible to give additional reference sequences here, eg ERCC.fa
  additional_STAR_params: --limitOutSJcollapsed 8000000 --limitSjdbInsertNsj 8000000 --outFilterScoreMinOverLread 0.40 --outFilterMatchNminOverLread 0.40 --limitIObufferSize=300000000 #Optional parameter. you may add custom mapping parameters to STAR here

#output directory
out_dir: /sbgenomics/workspaces/2a8bf1a5-ab5c-454c-a3a9-c188ebc82179/tasks/bc1ef90e-8d07-497e-9ef5-ef3b889c11bf/zumis-2-5-5-fs-modified #specify the full path to the output directory

###########################################
#below, you may optionally change default parameters
###########################################

#number of processors to use
num_threads: 48
mem_limit: 100 #Memory limit in Gigabytes, null meaning unlimited RAM usage.

#barcode & UMI filtering options
#number of bases under the base quality cutoff that should be filtered out.
#Phred score base-cutoff for quality control.
filter_cutoffs:
  BC_filter:
    num_bases: 8
    phred: 20
  UMI_filter:
    num_bases: 12
    phred: 20

#Options for Barcode handling
#You can give either number of top barcodes to use or give an annotation of cell barcodes.
#If you leave both barcode_num and barcode_file empty, zUMIs will perform automatic cell barcode selection for you!
barcodes:
  barcode_num: null
  barcode_file: /sbgenomics/Projects/2a8bf1a5-ab5c-454c-a3a9-c188ebc82179/inputs/barcode_whitelist.txt
  automatic: no  #Give yes/no to this option. If the cell barcodes should be detected automatically. If the barcode file is given in combination with automatic barcode detection, the list of given barcodes will be used as whitelist.
  BarcodeBinning: 1   #Hamming distance binning of close cell barcode sequences.
  nReadsperCell: 100   #Keep only the cell barcodes with atleast n number of reads.
  demultiplex: yes  #produce per-cell demultiplexed bam files.

#Options related to counting of reads towards expression profiles
counting_opts:
  introns: yes   #can be set to no for exon-only counting.
  downsampling: 0   #Number of reads to downsample to. This value can be a fixed number of reads (e.g. 10000) or a desired range (e.g. 10000-20000) Barcodes with less than <d> will not be reported. 0 means adaptive downsampling. Default: 0.
  strand: 0   #Is the library stranded? 0 = unstranded, 1 = positively stranded, 2 = negatively stranded
  Ham_Dist: 0   #Hamming distance collapsing of UMI sequences.
  write_ham: no   #If hamming distance collapse of UMI sequences is performed, write out mapping tables & UB corrected bam files.
  velocyto: no   #Would you like velocyto to do counting of intron-exon spanning reads
  primaryHit: yes  #Do you want to count the primary Hits of multimapping reads towards gene expression levels?
  twoPass: no  #perform basic STAR twoPass mapping

#produce stats files and plots?
make_stats: yes

#Start zUMIs from stage. Possible TEXT(Filtering, Mapping, Counting, Summarising). Default: Filtering.
which_Stage: Filtering

#define dependencies program paths
samtools_exec: samtools #samtools executable
Rscript_exec: Rscript #Rscript executable
STAR_exec: /opt/STAR-2.5.4b/bin/Linux_x86_64/STAR #STAR executable
pigz_exec: /opt/pigz-2.4/pigz #pigz executable

#below, fqfilter will add a read_layout flag defining SE or PE
zUMIs_directory: /opt/zUMIs/
read_layout: PE