diff --git a/Snakefile b/Snakefile index 0847f66..71f989c 100644 --- a/Snakefile +++ b/Snakefile @@ -24,7 +24,7 @@ def _get_config(sample, item): return SAMPLES[sample][item] except KeyError: try: - return CHEMISTRY[item] + return CHEMISTRY[SAMPLES[sample]["chemistry"]][item] except KeyError: return DEFAULTS[item] diff --git a/chemistry.yaml b/chemistry.yaml index 5aa8b39..1e22f9a 100644 --- a/chemistry.yaml +++ b/chemistry.yaml @@ -1,47 +1,38 @@ chromiumV3_illumina: filter_R1_length: 58 - bc_cut: "" - STAR_R1: "[WHITELIST_V3,\"--soloUMIlen 12 --clip5pNbases 58 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2\"]" - STAR_R2: "[WHITELIST_V3,\"--soloUMIlen 12\"]" - STAR_paired: "--alignEndsProtrude 58 ConcordantPair" + bc_whitelist: WHITELIST_V3 + STAR_R1: --soloUMIlen 12 --clip5pNbases 58 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2 + STAR_R2: --soloUMIlen 12 chromiumV3_element: filter_R1_length: False - bc_cut: "" - STAR_R1: "[WHITELIST_V3,\"--soloUMIlen 12 --clip5pNbases 48 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17\"]" - STAR_R2: "[WHITELIST_V3,\"--soloUMIlen 12\"]" - STAR_paired: "--alignEndsProtrude 58 ConcordantPair" + bc_whitelist: WHITELIST_V3 + STAR_R1: --soloUMIlen 12 --clip5pNbases 48 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 + STAR_R2: --soloUMIlen 12 chromiumV3_ultima: filter_R1_length: 58 - bc_cut: "" - STAR_R1: "[WHITELIST_V3,\"--soloUMIlen 9 --clip5pNbases 58 --soloCBstart 23 --soloCBlen 16 --soloUMIstart 39\"]" + bc_whitelist: WHITELIST_V3 + STAR_R1: --soloUMIlen 9 --clip5pNbases 58 --soloCBstart 23 --soloCBlen 16 --soloUMIstart 39 chromiumV2_illumina: filter_R1_length: 56 - bc_cut: "" - STAR_R1: "[WHITELIST_V2,\"--soloUMIlen 10 --clip5pNbases 56 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2\"]" - STAR_R2: "[WHITELIST_V2,\"--soloUMIlen 10\"]" - STAR_paired: "--alignEndsProtrude 56 ConcordantPair" + bc_whitelist: WHITELIST_V2 + STAR_R1: --soloUMIlen 10 --clip5pNbases 56 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2 + STAR_R2: --soloUMIlen 10 dropseq_illumina: filter_R1_length: 50 - bc_cut: "" - STAR_R1: "None --soloUMIlen 8 --clip5pNbases 50 0 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2" - STAR_R2: "None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13" - STAR_paired: "--alignEndsProtrude 50 ConcordantPair" + STAR_R1: --soloUMIlen 8 --clip5pNbases 50 0 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2 + STAR_R2: --soloUMIlen 8 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13 microwellseq_illumina: filter_R1_length: 54 - bc_cut: "CGACTCACTACAGGG...TCGGTGACACGATCG" - STAR_R1: "None --soloUMIlen 6 --clip5pNbases 54 0 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2" - STAR_R2: "None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19" - STAR_paired: "--alignEndsProtrude 54 ConcordantPair" + bc_cut: CGACTCACTACAGGG...TCGGTGACACGATCG + STAR_R1: --soloUMIlen 6 --clip5pNbases 54 0 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2 + STAR_R2: --soloUMIlen 6 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19 bd_illumina: filter_R1_length: 53 - bc_cut: "ACTGGCCTGCGA...GGTAGCGGTGACA" - STAR_R1: "None --soloUMIlen 8 --clip5pNbases 53 0 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2" - STAR_R2: "None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28" - STAR_paired: "--alignEndsProtrude 53 ConcordantPair" + bc_cut: ACTGGCCTGCGA...GGTAGCGGTGACA + STAR_R1: --soloUMIlen 8 --clip5pNbases 53 0 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2 + STAR_R2: --soloUMIlen 8 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28 indrop_illumina: filter_R1_length: 32 - bc_cut: "" - STAR_R1: "None --soloUMIlen 6 --clip5pNbases 32 0 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2" - STAR_R2: "None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9" - STAR_paired: "--alignEndsProtrude 32 ConcordantPair" + STAR_R1: --soloUMIlen 6 --clip5pNbases 32 0 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9 --outFilterMultimapNmax 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.2 + STAR_R2: --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9 \ No newline at end of file diff --git a/config.yaml b/config.yaml index 32ad94f..6771791 100644 --- a/config.yaml +++ b/config.yaml @@ -34,12 +34,12 @@ POLYA_SITES: # "SAF" DEFAULTS: -# default config options, overridden by SAMPLES +# default config options, overridden by SAMPLES and CHEMISTRY definitions chemistry: chromiumV3_illumina alignments: - R2 - paired - extra_args: "" + STAR_paired: --alignEndsProtrude 58 ConcordantPair SAMPLES: # per-sample labels, FASTQ basenames, and config options @@ -47,10 +47,10 @@ SAMPLES: # basename # optional: # chemisty -# platform # alignments # cutadapt_args -# star_args +# star_args (STAR_R1, STAR_R2, STAR_paired) +# extra star_args (STAR_R1_extra_args, STAR_R2_extra_args, STAR_paired_extra_args) test: basename: sample chemistry: chromiumV2_illumina diff --git a/rules/cutadapt_star.snake b/rules/cutadapt_star.snake index 7351bc1..887584c 100644 --- a/rules/cutadapt_star.snake +++ b/rules/cutadapt_star.snake @@ -15,33 +15,34 @@ def _get_fq_paths(wildcards): """ Process complex barcodes into simple, if needed""" def _get_bc_cut(wildcards): - args = CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["bc_cut"] - return args + try: + return CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["bc_cut"] + except KeyError: + return "" + +""" Extract per-capture chemistry from gex libs (base function)""" +def _get_chem_version(sample, alignment): + try: + whitelist = eval(_get_config(sample, "bc_whitelist")) + except KeyError: + whitelist = "None" + try: + extra_args = _get_config(sample, alignment+"_extra_args") + except KeyError: + extra_args = "" + return [whitelist, _get_config(sample, alignment), extra_args] """ Extract per-capture chemistry from gex libs (R2)""" def _get_chem_version_R2(wildcards): - try: - return eval(CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_R2"]) - except SyntaxError: - return CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_R2"] + return _get_chem_version(wildcards.sample, "STAR_R2") """ Extract per-capture chemistry from gex libs (R1/paired)""" def _get_chem_version_R1(wildcards): - try: - return eval(CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_R1"]) - except SyntaxError: - return CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_R1"] + return _get_chem_version(wildcards.sample, "STAR_R1") """ Extract per-capture extra arguments for gex paired alignments """ def _get_chem_version_paired(wildcards): - try: - return eval(CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_paired"]) - except SyntaxError: - return CHEMISTRY[_get_config(wildcards.sample, "chemistry")]["STAR_paired"] - -""" Extract per-capture extra arguments for gex libs """ -def _get_extra_args(wildcards): - return _get_config(wildcards.sample, "extra_args") + return _get_chem_version(wildcards.sample, "STAR_R1") + [_get_config(wildcards.sample, "STAR_paired")] """ This rule trims R1-only libraries """ rule cutadapt_R1: @@ -134,7 +135,6 @@ rule starsolo_R1: "{results}/{sample}/{sample}_R1_Aligned.sortedByCoord.out.bam" params: chemistry = _get_chem_version_R1, - extra_args = _get_extra_args, out_dir = "{results}/{sample}/{sample}_R1_", job_name = "star_R1" log: "{results}/logs/{sample}_star_R1.out" @@ -157,7 +157,7 @@ rule starsolo_R1: --soloFeatures Gene \ --alignMatesGapMax 100000 \ --alignIntronMax 100000 \ - --soloCellFilter None {params.extra_args} \ + --soloCellFilter None \ --outFileNamePrefix {params.out_dir} \ --outSAMtype BAM SortedByCoordinate \ --limitBAMsortRAM 48000000000 \ @@ -175,7 +175,6 @@ rule starsolo_R2: "{results}/{sample}/{sample}_R2_Aligned.sortedByCoord.out.bam" params: chemistry = _get_chem_version_R2, - extra_args = _get_extra_args, out_dir = "{results}/{sample}/{sample}_R2_", job_name = "star_R2" log: "{results}/logs/{sample}_star_R2.out" @@ -197,7 +196,7 @@ rule starsolo_R2: --clipAdapterType CellRanger4 \ --genomeDir {STAR_INDEX} \ --soloFeatures Gene \ - --soloCellFilter None {params.extra_args} \ + --soloCellFilter None \ --outFileNamePrefix {params.out_dir} \ --outSAMtype BAM SortedByCoordinate \ --limitBAMsortRAM 48000000000 \ @@ -215,9 +214,7 @@ rule starsolo_paired: output: "{results}/{sample}/{sample}_paired_Aligned.sortedByCoord.out.bam" params: - chemistry = _get_chem_version_R1, - star_args_paired = _get_chem_version_paired, - extra_args = _get_extra_args, + chemistry = _get_chem_version_paired, out_dir = "{results}/{sample}/{sample}_paired_", job_name = "star_paired" log: "{results}/logs/{sample}_star_paired.out" @@ -234,14 +231,13 @@ rule starsolo_paired: --readFilesCommand gunzip -c \ --runThreadN 12 \ --soloCBwhitelist {params.chemistry} \ - {params.star_args_paired} \ --soloBarcodeMate 1 \ --soloStrand Reverse \ --genomeDir {STAR_INDEX} \ --soloFeatures Gene \ --alignMatesGapMax 100000 \ --alignIntronMax 100000 \ - --soloCellFilter None {params.extra_args} \ + --soloCellFilter None \ --outFileNamePrefix {params.out_dir} \ --outSAMtype BAM SortedByCoordinate \ --limitBAMsortRAM 48000000000 \