Skip to content

Commit

Permalink
add SAMPLE > CHEMISTRY > DEFAULT config logic and simplify STAR argum…
Browse files Browse the repository at this point in the history
…ents
  • Loading branch information
agillen committed Aug 28, 2024
1 parent 2dfdaf0 commit c5b2539
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 62 deletions.
2 changes: 1 addition & 1 deletion Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _get_config(sample, item):
return SAMPLES[sample][item]
except KeyError:
try:
return CHEMISTRY[item]
return CHEMISTRY[SAMPLES[sample]["chemistry"]][item]
except KeyError:
return DEFAULTS[item]

Expand Down
51 changes: 21 additions & 30 deletions chemistry.yaml
Original file line number Diff line number Diff line change
@@ -1,47 +1,38 @@
chromiumV3_illumina:
filter_R1_length: 58
bc_cut: ""
STAR_R1: "[WHITELIST_V3,\"--soloUMIlen 12 --clip5pNbases 58 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2\"]"
STAR_R2: "[WHITELIST_V3,\"--soloUMIlen 12\"]"
STAR_paired: "--alignEndsProtrude 58 ConcordantPair"
bc_whitelist: WHITELIST_V3
STAR_R1: --soloUMIlen 12 --clip5pNbases 58 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2
STAR_R2: --soloUMIlen 12
chromiumV3_element:
filter_R1_length: False
bc_cut: ""
STAR_R1: "[WHITELIST_V3,\"--soloUMIlen 12 --clip5pNbases 48 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17\"]"
STAR_R2: "[WHITELIST_V3,\"--soloUMIlen 12\"]"
STAR_paired: "--alignEndsProtrude 58 ConcordantPair"
bc_whitelist: WHITELIST_V3
STAR_R1: --soloUMIlen 12 --clip5pNbases 48 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17
STAR_R2: --soloUMIlen 12
chromiumV3_ultima:
filter_R1_length: 58
bc_cut: ""
STAR_R1: "[WHITELIST_V3,\"--soloUMIlen 9 --clip5pNbases 58 --soloCBstart 23 --soloCBlen 16 --soloUMIstart 39\"]"
bc_whitelist: WHITELIST_V3
STAR_R1: --soloUMIlen 9 --clip5pNbases 58 --soloCBstart 23 --soloCBlen 16 --soloUMIstart 39
chromiumV2_illumina:
filter_R1_length: 56
bc_cut: ""
STAR_R1: "[WHITELIST_V2,\"--soloUMIlen 10 --clip5pNbases 56 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2\"]"
STAR_R2: "[WHITELIST_V2,\"--soloUMIlen 10\"]"
STAR_paired: "--alignEndsProtrude 56 ConcordantPair"
bc_whitelist: WHITELIST_V2
STAR_R1: --soloUMIlen 10 --clip5pNbases 56 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2
STAR_R2: --soloUMIlen 10
dropseq_illumina:
filter_R1_length: 50
bc_cut: ""
STAR_R1: "None --soloUMIlen 8 --clip5pNbases 50 0 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2"
STAR_R2: "None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13"
STAR_paired: "--alignEndsProtrude 50 ConcordantPair"
STAR_R1: --soloUMIlen 8 --clip5pNbases 50 0 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2
STAR_R2: --soloUMIlen 8 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13
microwellseq_illumina:
filter_R1_length: 54
bc_cut: "CGACTCACTACAGGG...TCGGTGACACGATCG"
STAR_R1: "None --soloUMIlen 6 --clip5pNbases 54 0 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2"
STAR_R2: "None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19"
STAR_paired: "--alignEndsProtrude 54 ConcordantPair"
bc_cut: CGACTCACTACAGGG...TCGGTGACACGATCG
STAR_R1: --soloUMIlen 6 --clip5pNbases 54 0 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2
STAR_R2: --soloUMIlen 6 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19
bd_illumina:
filter_R1_length: 53
bc_cut: "ACTGGCCTGCGA...GGTAGCGGTGACA"
STAR_R1: "None --soloUMIlen 8 --clip5pNbases 53 0 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2"
STAR_R2: "None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28"
STAR_paired: "--alignEndsProtrude 53 ConcordantPair"
bc_cut: ACTGGCCTGCGA...GGTAGCGGTGACA
STAR_R1: --soloUMIlen 8 --clip5pNbases 53 0 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2
STAR_R2: --soloUMIlen 8 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28
indrop_illumina:
filter_R1_length: 32
bc_cut: ""
STAR_R1: "None --soloUMIlen 6 --clip5pNbases 32 0 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2"
STAR_R2: "None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9"
STAR_paired: "--alignEndsProtrude 32 ConcordantPair"
STAR_R1: --soloUMIlen 6 --clip5pNbases 32 0 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2
STAR_R2: --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9

8 changes: 4 additions & 4 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,23 @@ POLYA_SITES:
# "SAF"

DEFAULTS:
# default config options, overridden by SAMPLES
# default config options, overridden by SAMPLES and CHEMISTRY definitions
chemistry: chromiumV3_illumina
alignments:
- R2
- paired
extra_args: ""
STAR_paired: --alignEndsProtrude 58 ConcordantPair

SAMPLES:
# per-sample labels, FASTQ basenames, and config options
# required:
# basename
# optional:
# chemisty
# platform
# alignments
# cutadapt_args
# star_args
# star_args (STAR_R1, STAR_R2, STAR_paired)
# extra star_args (STAR_R1_extra_args, STAR_R2_extra_args, STAR_paired_extra_args)
test:
basename: sample
chemistry: chromiumV2_illumina
Expand Down
50 changes: 23 additions & 27 deletions rules/cutadapt_star.snake
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,34 @@ def _get_fq_paths(wildcards):

""" Process complex barcodes into simple, if needed"""
def _get_bc_cut(wildcards):
args = CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["bc_cut"]
return args
try:
return CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["bc_cut"]
except KeyError:
return ""

""" Extract per-capture chemistry from gex libs (base function)"""
def _get_chem_version(sample, alignment):
try:
whitelist = eval(_get_config(sample, "bc_whitelist"))
except KeyError:
whitelist = "None"
try:
extra_args = _get_config(sample, alignment+"_extra_args")
except KeyError:
extra_args = ""
return [whitelist, _get_config(sample, alignment), extra_args]

""" Extract per-capture chemistry from gex libs (R2)"""
def _get_chem_version_R2(wildcards):
try:
return eval(CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_R2"])
except SyntaxError:
return CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_R2"]
return _get_chem_version(wildcards.sample, "STAR_R2")

""" Extract per-capture chemistry from gex libs (R1/paired)"""
def _get_chem_version_R1(wildcards):
try:
return eval(CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_R1"])
except SyntaxError:
return CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_R1"]
return _get_chem_version(wildcards.sample, "STAR_R1")

""" Extract per-capture extra arguments for gex paired alignments """
def _get_chem_version_paired(wildcards):
try:
return eval(CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_paired"])
except SyntaxError:
return CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_paired"]

""" Extract per-capture extra arguments for gex libs """
def _get_extra_args(wildcards):
return _get_config(wildcards.sample, "extra_args")
return _get_chem_version(wildcards.sample, "STAR_R1") + [_get_config(wildcards.sample, "STAR_paired")]

""" This rule trims R1-only libraries """
rule cutadapt_R1:
Expand Down Expand Up @@ -134,7 +135,6 @@ rule starsolo_R1:
"{results}/{sample}/{sample}_R1_Aligned.sortedByCoord.out.bam"
params:
chemistry = _get_chem_version_R1,
extra_args = _get_extra_args,
out_dir = "{results}/{sample}/{sample}_R1_",
job_name = "star_R1"
log: "{results}/logs/{sample}_star_R1.out"
Expand All @@ -157,7 +157,7 @@ rule starsolo_R1:
--soloFeatures Gene \
--alignMatesGapMax 100000 \
--alignIntronMax 100000 \
--soloCellFilter None {params.extra_args} \
--soloCellFilter None \
--outFileNamePrefix {params.out_dir} \
--outSAMtype BAM SortedByCoordinate \
--limitBAMsortRAM 48000000000 \
Expand All @@ -175,7 +175,6 @@ rule starsolo_R2:
"{results}/{sample}/{sample}_R2_Aligned.sortedByCoord.out.bam"
params:
chemistry = _get_chem_version_R2,
extra_args = _get_extra_args,
out_dir = "{results}/{sample}/{sample}_R2_",
job_name = "star_R2"
log: "{results}/logs/{sample}_star_R2.out"
Expand All @@ -197,7 +196,7 @@ rule starsolo_R2:
--clipAdapterType CellRanger4 \
--genomeDir {STAR_INDEX} \
--soloFeatures Gene \
--soloCellFilter None {params.extra_args} \
--soloCellFilter None \
--outFileNamePrefix {params.out_dir} \
--outSAMtype BAM SortedByCoordinate \
--limitBAMsortRAM 48000000000 \
Expand All @@ -215,9 +214,7 @@ rule starsolo_paired:
output:
"{results}/{sample}/{sample}_paired_Aligned.sortedByCoord.out.bam"
params:
chemistry = _get_chem_version_R1,
star_args_paired = _get_chem_version_paired,
extra_args = _get_extra_args,
chemistry = _get_chem_version_paired,
out_dir = "{results}/{sample}/{sample}_paired_",
job_name = "star_paired"
log: "{results}/logs/{sample}_star_paired.out"
Expand All @@ -234,14 +231,13 @@ rule starsolo_paired:
--readFilesCommand gunzip -c \
--runThreadN 12 \
--soloCBwhitelist {params.chemistry} \
{params.star_args_paired} \
--soloBarcodeMate 1 \
--soloStrand Reverse \
--genomeDir {STAR_INDEX} \
--soloFeatures Gene \
--alignMatesGapMax 100000 \
--alignIntronMax 100000 \
--soloCellFilter None {params.extra_args} \
--soloCellFilter None \
--outFileNamePrefix {params.out_dir} \
--outSAMtype BAM SortedByCoordinate \
--limitBAMsortRAM 48000000000 \
Expand Down

0 comments on commit c5b2539

Please sign in to comment.