Skip to content

Commit

Permalink
Merge pull request #97 from BIMSBbioinfo/snakemake-resources
Browse files Browse the repository at this point in the history
add snakemake resources + memory requirements per rule
  • Loading branch information
borauyar authored Jun 7, 2021
2 parents e8da601 + 18fd78c commit 3ac5261
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 4 deletions.
1 change: 1 addition & 0 deletions etc/settings.yaml.in
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ execution:
submit-to-cluster: no
jobs: 6
nice: 19
mem_mb: 4000
cluster:
missing-file-timeout: 120
memory: 8G
Expand Down
1 change: 1 addition & 0 deletions pigx-rnaseq.in
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ command = [
"--configfile={}".format(args.configfile),
"--directory={}".format(config['locations']['output-dir']),
"--jobs={}".format(config['execution']['jobs']),
"--resources", "mem_mb={}".format(config['execution']['mem_mb']),
]

if config['execution']['submit-to-cluster']:
Expand Down
37 changes: 33 additions & 4 deletions snakefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,6 @@ def trim_reads_input(args):
sample = args[0]
return [os.path.join(READS_DIR, f) for f in lookup('name', sample, ['reads', 'reads2']) if f]


# fastp both trims/filters reads and outputs QC reports in html/json format
rule trim_qc_reads_pe:
input: trim_reads_input
Expand All @@ -297,6 +296,8 @@ def trim_reads_input(args):
input: GENOME_FASTA
output:
star_index_file = os.path.join(OUTPUT_DIR, 'star_index', "SAindex")
resources:
mem_mb = 32000
params:
star_index_dir = os.path.join(OUTPUT_DIR, 'star_index')
log: os.path.join(LOG_DIR, 'star_index.log')
Expand All @@ -306,6 +307,8 @@ def trim_reads_input(args):
input: GENOME_FASTA
output:
[os.path.join(OUTPUT_DIR, "hisat2_index", f"{GENOME_BUILD}_index.{n}.ht2l") for n in [1, 2, 3, 4, 5, 6, 7, 8]]
resources:
mem_mb = 21000
params:
index_directory = os.path.join(OUTPUT_DIR, "hisat2_index"),
log: os.path.join(LOG_DIR, 'hisat2_index.log')
Expand Down Expand Up @@ -336,6 +339,8 @@ def hisat2_file_arguments(args):
reads = map_input
output:
os.path.join(MAPPED_READS_DIR, 'star', '{sample}_Aligned.sortedByCoord.out.bam')
resources:
mem_mb = 16000
params:
index_dir = rules.star_index.params.star_index_dir,
output_prefix=os.path.join(MAPPED_READS_DIR, 'star', '{sample}_')
Expand All @@ -348,6 +353,8 @@ def hisat2_file_arguments(args):
reads = map_input
output:
os.path.join(MAPPED_READS_DIR, 'hisat2', '{sample}_Aligned.sortedByCoord.out.bam')
resources:
mem_mb = 8000
params:
samfile = lambda wildcards: os.path.join(MAPPED_READS_DIR, 'hisat2', "_".join([wildcards.sample, 'Aligned.out.sam'])),
index_dir = rules.hisat2_index.params.index_directory,
Expand All @@ -361,10 +368,12 @@ def hisat2_file_arguments(args):
{SAMTOOLS_EXEC} view -bh {params.samfile} | {SAMTOOLS_EXEC} sort -o {output} >> {log[1]} 2>&1
rm {params.samfile}
"""

rule index_bam:
input: os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam')
output: os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam.bai')
resources:
mem_mb = 100
log: os.path.join(LOG_DIR, 'samtools_index_{sample}.log')
shell: "{SAMTOOLS_EXEC} index {input} {output} >> {log} 2>&1"

Expand All @@ -373,6 +382,8 @@ def hisat2_file_arguments(args):
CDNA_FASTA
output:
salmon_index_file = os.path.join(OUTPUT_DIR, 'salmon_index', "sa.bin")
resources:
mem_mb = 5000
params:
salmon_index_dir = os.path.join(OUTPUT_DIR, 'salmon_index')
log: os.path.join(LOG_DIR, "salmon", 'salmon_index.log')
Expand All @@ -388,6 +399,8 @@ def hisat2_file_arguments(args):
output:
os.path.join(SALMON_DIR, "{sample}", "quant.sf"),
os.path.join(SALMON_DIR, "{sample}", "quant.genes.sf")
resources:
mem_mb = 6000
params:
index_dir = rules.salmon_index.params.salmon_index_dir,
outfolder = os.path.join(SALMON_DIR, "{sample}")
Expand All @@ -409,6 +422,8 @@ def hisat2_file_arguments(args):
os.path.join(COUNTS_DIR, "raw_counts", "salmon","counts_from_SALMON.genes.tsv"),
os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.transcripts.tsv"),
os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.genes.tsv")
resources:
mem_mb = 1000
log: os.path.join(LOG_DIR, "salmon", 'salmon_import_counts.log')
shell: "{RSCRIPT_EXEC} {SCRIPTS_DIR}/counts_matrix_from_SALMON.R {SALMON_DIR} {COUNTS_DIR} {input.colDataFile} >> {log} 2>&1"

Expand All @@ -425,8 +440,8 @@ def hisat2_file_arguments(args):
os.path.join(LOG_DIR, MAPPER, 'genomeCoverage.forward.{sample}.log'),
os.path.join(LOG_DIR, MAPPER, 'genomeCoverage.reverse.{sample}.log'),
os.path.join(LOG_DIR, MAPPER, 'genomeCoverage.{sample}.log')
params:
outdir = os.path.join(BIGWIG_DIR, MAPPER)
resources:
mem_mb = 4000
shell:
"""
{BAMCOVERAGE_EXEC} -b {input.bam} -o {output[0]} --filterRNAstrand forward >> {log[0]} 2>&1
Expand All @@ -439,6 +454,8 @@ def hisat2_file_arguments(args):
salmon_output=expand(os.path.join(SALMON_DIR, "{sample}", "quant.sf"), sample = SAMPLES),
mapping_output=expand(os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam'), sample=SAMPLES)
output: os.path.join(MULTIQC_DIR, 'multiqc_report.html')
resources:
mem_mb = 200
log: os.path.join(LOG_DIR, f'multiqc.{MAPPER}.log')
shell: "{MULTIQC_EXEC} -o {MULTIQC_DIR} {OUTPUT_DIR} >> {log} 2>&1"

Expand All @@ -448,6 +465,8 @@ def hisat2_file_arguments(args):
bai = os.path.join(MAPPED_READS_DIR, MAPPER, "{sample}_Aligned.sortedByCoord.out.bam.bai")
output:
os.path.join(MAPPED_READS_DIR, MAPPER, "{sample}.read_counts.csv")
resources:
mem_mb = 5000
log: os.path.join(LOG_DIR, MAPPER, "{sample}.count_reads.log")
params:
single_end = isSingleEnd,
Expand All @@ -467,6 +486,8 @@ def hisat2_file_arguments(args):
expand(os.path.join(MAPPED_READS_DIR, MAPPER, "{sample}.read_counts.csv"), sample = SAMPLES)
output:
os.path.join(COUNTS_DIR, "raw_counts", MAPPER, "counts.tsv")
resources:
mem_mb = 200
log: os.path.join(LOG_DIR, MAPPER, "collate_read_counts.log")
params:
mapped_dir = os.path.join(MAPPED_READS_DIR, MAPPER),
Expand All @@ -484,6 +505,8 @@ def hisat2_file_arguments(args):
output:
size_factors = os.path.join(COUNTS_DIR, "normalized", MAPPER, "deseq_size_factors.txt"),
norm_counts = os.path.join(COUNTS_DIR, "normalized", MAPPER, "deseq_normalized_counts.tsv")
resources:
mem_mb = 1000
log:
os.path.join(LOG_DIR, MAPPER, "norm_counts_deseq.log")
params:
Expand All @@ -507,6 +530,8 @@ def hisat2_file_arguments(args):
log: os.path.join(LOG_DIR, MAPPER, "{analysis}.report.log")
output:
os.path.join(OUTPUT_DIR, "report", MAPPER, '{analysis}.deseq.report.html')
resources:
mem_mb = 4000
shell:
"{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' >> {log} 2>&1"

Expand All @@ -525,6 +550,8 @@ def hisat2_file_arguments(args):
log: os.path.join(LOG_DIR, "salmon", "{analysis}.report.salmon.transcripts.log")
output:
os.path.join(OUTPUT_DIR, "report", 'salmon', '{analysis}.salmon.transcripts.deseq.report.html')
resources:
mem_mb = 4000
shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.transcripts' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' >> {log} 2>&1"

rule report3:
Expand All @@ -542,4 +569,6 @@ def hisat2_file_arguments(args):
log: os.path.join(LOG_DIR, "salmon", "{analysis}.report.salmon.genes.log")
output:
os.path.join(OUTPUT_DIR, "report", "salmon", '{analysis}.salmon.genes.deseq.report.html')
resources:
mem_mb = 4000
shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.genes' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' >> {log} 2>&1"

0 comments on commit 3ac5261

Please sign in to comment.