diff --git a/etc/settings.yaml.in b/etc/settings.yaml.in index a31e186..6fb5b89 100755 --- a/etc/settings.yaml.in +++ b/etc/settings.yaml.in @@ -59,6 +59,7 @@ execution: submit-to-cluster: no jobs: 6 nice: 19 + mem_mb: 4000 cluster: missing-file-timeout: 120 memory: 8G diff --git a/pigx-rnaseq.in b/pigx-rnaseq.in index 3ead2fc..7cea4a4 100644 --- a/pigx-rnaseq.in +++ b/pigx-rnaseq.in @@ -323,6 +323,7 @@ command = [ "--configfile={}".format(args.configfile), "--directory={}".format(config['locations']['output-dir']), "--jobs={}".format(config['execution']['jobs']), + "--resources", "mem_mb={}".format(config['execution']['mem_mb']), ] if config['execution']['submit-to-cluster']: diff --git a/snakefile.py b/snakefile.py index 2fab462..0579e33 100755 --- a/snakefile.py +++ b/snakefile.py @@ -271,7 +271,6 @@ def trim_reads_input(args): sample = args[0] return [os.path.join(READS_DIR, f) for f in lookup('name', sample, ['reads', 'reads2']) if f] - # fastp both trims/filters reads and outputs QC reports in html/json format rule trim_qc_reads_pe: input: trim_reads_input @@ -297,6 +296,8 @@ def trim_reads_input(args): input: GENOME_FASTA output: star_index_file = os.path.join(OUTPUT_DIR, 'star_index', "SAindex") + resources: + mem_mb = 32000 params: star_index_dir = os.path.join(OUTPUT_DIR, 'star_index') log: os.path.join(LOG_DIR, 'star_index.log') @@ -306,6 +307,8 @@ def trim_reads_input(args): input: GENOME_FASTA output: [os.path.join(OUTPUT_DIR, "hisat2_index", f"{GENOME_BUILD}_index.{n}.ht2l") for n in [1, 2, 3, 4, 5, 6, 7, 8]] + resources: + mem_mb = 21000 params: index_directory = os.path.join(OUTPUT_DIR, "hisat2_index"), log: os.path.join(LOG_DIR, 'hisat2_index.log') @@ -336,6 +339,8 @@ def hisat2_file_arguments(args): reads = map_input output: os.path.join(MAPPED_READS_DIR, 'star', '{sample}_Aligned.sortedByCoord.out.bam') + resources: + mem_mb = 16000 params: index_dir = rules.star_index.params.star_index_dir, output_prefix=os.path.join(MAPPED_READS_DIR, 'star', '{sample}_') @@ -348,6 +353,8 @@ def hisat2_file_arguments(args): reads = map_input output: os.path.join(MAPPED_READS_DIR, 'hisat2', '{sample}_Aligned.sortedByCoord.out.bam') + resources: + mem_mb = 8000 params: samfile = lambda wildcards: os.path.join(MAPPED_READS_DIR, 'hisat2', "_".join([wildcards.sample, 'Aligned.out.sam'])), index_dir = rules.hisat2_index.params.index_directory, @@ -361,10 +368,12 @@ def hisat2_file_arguments(args): {SAMTOOLS_EXEC} view -bh {params.samfile} | {SAMTOOLS_EXEC} sort -o {output} >> {log[1]} 2>&1 rm {params.samfile} """ - + rule index_bam: input: os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam') output: os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam.bai') + resources: + mem_mb = 100 log: os.path.join(LOG_DIR, 'samtools_index_{sample}.log') shell: "{SAMTOOLS_EXEC} index {input} {output} >> {log} 2>&1" @@ -373,6 +382,8 @@ def hisat2_file_arguments(args): CDNA_FASTA output: salmon_index_file = os.path.join(OUTPUT_DIR, 'salmon_index', "sa.bin") + resources: + mem_mb = 5000 params: salmon_index_dir = os.path.join(OUTPUT_DIR, 'salmon_index') log: os.path.join(LOG_DIR, "salmon", 'salmon_index.log') @@ -388,6 +399,8 @@ def hisat2_file_arguments(args): output: os.path.join(SALMON_DIR, "{sample}", "quant.sf"), os.path.join(SALMON_DIR, "{sample}", "quant.genes.sf") + resources: + mem_mb = 6000 params: index_dir = rules.salmon_index.params.salmon_index_dir, outfolder = os.path.join(SALMON_DIR, "{sample}") @@ -409,6 +422,8 @@ def hisat2_file_arguments(args): os.path.join(COUNTS_DIR, "raw_counts", "salmon","counts_from_SALMON.genes.tsv"), os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.transcripts.tsv"), os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.genes.tsv") + resources: + mem_mb = 1000 log: os.path.join(LOG_DIR, "salmon", 'salmon_import_counts.log') shell: "{RSCRIPT_EXEC} {SCRIPTS_DIR}/counts_matrix_from_SALMON.R {SALMON_DIR} {COUNTS_DIR} {input.colDataFile} >> {log} 2>&1" @@ -425,8 +440,8 @@ def hisat2_file_arguments(args): os.path.join(LOG_DIR, MAPPER, 'genomeCoverage.forward.{sample}.log'), os.path.join(LOG_DIR, MAPPER, 'genomeCoverage.reverse.{sample}.log'), os.path.join(LOG_DIR, MAPPER, 'genomeCoverage.{sample}.log') - params: - outdir = os.path.join(BIGWIG_DIR, MAPPER) + resources: + mem_mb = 4000 shell: """ {BAMCOVERAGE_EXEC} -b {input.bam} -o {output[0]} --filterRNAstrand forward >> {log[0]} 2>&1 @@ -439,6 +454,8 @@ def hisat2_file_arguments(args): salmon_output=expand(os.path.join(SALMON_DIR, "{sample}", "quant.sf"), sample = SAMPLES), mapping_output=expand(os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam'), sample=SAMPLES) output: os.path.join(MULTIQC_DIR, 'multiqc_report.html') + resources: + mem_mb = 200 log: os.path.join(LOG_DIR, f'multiqc.{MAPPER}.log') shell: "{MULTIQC_EXEC} -o {MULTIQC_DIR} {OUTPUT_DIR} >> {log} 2>&1" @@ -448,6 +465,8 @@ def hisat2_file_arguments(args): bai = os.path.join(MAPPED_READS_DIR, MAPPER, "{sample}_Aligned.sortedByCoord.out.bam.bai") output: os.path.join(MAPPED_READS_DIR, MAPPER, "{sample}.read_counts.csv") + resources: + mem_mb = 5000 log: os.path.join(LOG_DIR, MAPPER, "{sample}.count_reads.log") params: single_end = isSingleEnd, @@ -467,6 +486,8 @@ def hisat2_file_arguments(args): expand(os.path.join(MAPPED_READS_DIR, MAPPER, "{sample}.read_counts.csv"), sample = SAMPLES) output: os.path.join(COUNTS_DIR, "raw_counts", MAPPER, "counts.tsv") + resources: + mem_mb = 200 log: os.path.join(LOG_DIR, MAPPER, "collate_read_counts.log") params: mapped_dir = os.path.join(MAPPED_READS_DIR, MAPPER), @@ -484,6 +505,8 @@ def hisat2_file_arguments(args): output: size_factors = os.path.join(COUNTS_DIR, "normalized", MAPPER, "deseq_size_factors.txt"), norm_counts = os.path.join(COUNTS_DIR, "normalized", MAPPER, "deseq_normalized_counts.tsv") + resources: + mem_mb = 1000 log: os.path.join(LOG_DIR, MAPPER, "norm_counts_deseq.log") params: @@ -507,6 +530,8 @@ def hisat2_file_arguments(args): log: os.path.join(LOG_DIR, MAPPER, "{analysis}.report.log") output: os.path.join(OUTPUT_DIR, "report", MAPPER, '{analysis}.deseq.report.html') + resources: + mem_mb = 4000 shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' >> {log} 2>&1" @@ -525,6 +550,8 @@ def hisat2_file_arguments(args): log: os.path.join(LOG_DIR, "salmon", "{analysis}.report.salmon.transcripts.log") output: os.path.join(OUTPUT_DIR, "report", 'salmon', '{analysis}.salmon.transcripts.deseq.report.html') + resources: + mem_mb = 4000 shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.transcripts' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' >> {log} 2>&1" rule report3: @@ -542,4 +569,6 @@ def hisat2_file_arguments(args): log: os.path.join(LOG_DIR, "salmon", "{analysis}.report.salmon.genes.log") output: os.path.join(OUTPUT_DIR, "report", "salmon", '{analysis}.salmon.genes.deseq.report.html') + resources: + mem_mb = 4000 shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.genes' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' >> {log} 2>&1"