From b492db6d740f90593fac061d3301d65fe19dce61 Mon Sep 17 00:00:00 2001 From: Samantha Zarate Date: Fri, 26 Jul 2019 11:41:48 -0700 Subject: [PATCH] Updating to monitor memory usage while running svviz, as this (#73) * Updating to monitor memory usage while running svviz, as this tool is known to hit OOM issues. Updating gnu parallel in Docker container to leverage the memfree argument. * Update README.md Adding information about svviz OOM failures in the FAQ section of README * Update Readme.md Adding text warning of OOM errors when svviz is enabled. * Update Dockerfile to build properly * Updating to v0.1.11 * small readme fiix --- Dockerfile | 34 ++++++++++++-------------- README.md | 10 ++++++++ dx_app_code/parliament2/Readme.md | 10 +++++++- dx_app_code/parliament2/dxapp.json | 4 +-- dx_app_code/parliament2/parliament2.py | 4 +-- parliament2.sh | 3 +-- test/set_up_tests.sh | 8 +++--- 7 files changed, 43 insertions(+), 30 deletions(-) diff --git a/Dockerfile b/Dockerfile index 84c2fec1..252baf36 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,18 +4,13 @@ FROM ubuntu:14.04 # File Author / Maintainer MAINTAINER Samantha Zarate -# System packages -RUN apt-get update && apt-get install -y curl wget +# System packages +RUN apt-get update && apt-get install -y curl wget parallel # Install miniconda to /miniconda RUN curl -LO http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh && bash Miniconda-latest-Linux-x86_64.sh -p /miniconda -b && rm Miniconda-latest-Linux-x86_64.sh ENV PATH=/miniconda/bin:${PATH} -# RUN conda update -y conda - -RUN /bin/bash -c "echo 'deb http://dnanexus-apt-prod.s3.amazonaws.com/ubuntu trusty/amd64/' > /etc/apt/sources.list.d/dnanexus.list" -RUN /bin/bash -c "echo 'deb http://dnanexus-apt-prod.s3.amazonaws.com/ubuntu trusty/all/' >> /etc/apt/sources.list.d/dnanexus.list" -RUN wget https://wiki.dnanexus.com/images/files/ubuntu-signing-key.gpg -RUN apt-key add ubuntu-signing-key.gpg +RUN conda update -y conda RUN apt-get update -y && apt-get upgrade -y && apt-get install -y --force-yes \ autoconf \ @@ -23,7 +18,6 @@ RUN apt-get update -y && apt-get upgrade -y && apt-get install -y --force-yes \ bsdtar \ build-essential \ cmake \ - dx-toolkit \ g++ \ gcc \ gettext \ @@ -59,7 +53,7 @@ RUN apt-get update -y && apt-get upgrade -y && apt-get install -y --force-yes \ wkhtmltopdf \ xvfb \ zlib1g-dev -RUN apt-get update + RUN apt-get update RUN conda config --add channels conda-forge RUN conda config --add channels bioconda @@ -69,8 +63,8 @@ RUN conda install -c bioconda sambamba -y RUN conda install -c bioconda bcftools -y RUN conda install -c bcbio bx-python -y RUN conda install -c defaults networkx -y -RUN conda install -c bioconda samblaster -y RUN conda install gcc_linux-64 -y +RUN conda install -c bioconda samblaster -y RUN conda install -c bioconda manta RUN conda update -y pyopenssl @@ -78,7 +72,13 @@ WORKDIR / ADD resources.tar.gz / RUN cp -a /resources/* / && rm -rf /resources/ -RUN conda install -c defaults -y numpy +ENV LD_LIBRARY_PATH=/usr/lib/root/lib +ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/:${LD_LIBRARY_PATH} +ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/home/dnanexus/root/lib +ENV LD_LIBRARY_PATH=/usr/local/lib64/:${LD_LIBRARY_PATH} +ENV LD_LIBRARY_PATH=/miniconda/lib:/${LD_LIBRARY_PATH} + +RUN conda install -c conda-forge -y numpy RUN pip install https://github.com/bioinform/breakseq2/archive/2.2.tar.gz RUN pip install pycparser RUN pip install asn1crypto @@ -86,7 +86,7 @@ RUN pip install idna RUN pip install ipaddress RUN pip install dxpy - + WORKDIR /root RUN mkdir -p /home/dnanexus/in /home/dnanexus/out @@ -96,19 +96,15 @@ COPY parliament2.sh . COPY svtyper_env.yml . RUN conda create -y --name svviz_env svviz -# We have to use a slightly different method for -# svtyper as it installs software directly from git +# We have to use a slightly different method for +# svtyper as it installs software directly from git RUN conda env create --name svtyper_env --file svtyper_env.yml -RUN /bin/bash -c "source /etc/profile.d/dnanexus.environment.sh" - ENV PATH=${PATH}:/home/dnanexus/ ENV PATH=${PATH}:/opt/conda/bin/ ENV PATH=${PATH}:/usr/bin/ ENV PYTHONPATH=${PYTHONPATH}:/opt/conda/bin/ ENV ROOTSYS=/home/dnanexus/root -ENV LD_LIBRARY_PATH=/usr/lib/root/lib -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/home/dnanexus/root/lib ENV DYLD_LIBRARY_PATH=/usr/lib/root/lib ENV HTSLIB_LIBRARY_DIR=/usr/local/lib ENV HTSLIB_INCLUDE_DIR=/usr/local/include diff --git a/README.md b/README.md index bbaa982f..f7795139 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,16 @@ breakseq2 -2.2- has requirement pysam==0.7.7, but you'll have pysam 0.15.1 which This is a known error message caused by how we currently manage the conflicting pysam versions required for BreakSeq and SVTyper. This issue should be resolved in v0.1.10. +#### Out of memory error + +> The tool fails or runs for a long time and I see an error message in the log: +``` +Out of memory: Kill process XXXX (svviz) score 112 or sacrifice child +``` +> What's going on? + +The svviz tool can at times consume a large amount of memory, causing the application to be killed. If you see this message in the log, you should kill the job if it is still running and retry on a machine with more memory or without enabling svviz. This issue should be resolved in v0.1.11. + ### Tool versions #### Structural variant callers diff --git a/dx_app_code/parliament2/Readme.md b/dx_app_code/parliament2/Readme.md index b49ba5be..3a35baca 100644 --- a/dx_app_code/parliament2/Readme.md +++ b/dx_app_code/parliament2/Readme.md @@ -50,6 +50,14 @@ Because the field of structural variation is relatively new and complex, we view Breakseq2 may only be able to work when using the 1000 Genomes reference genome (hs37d5). For other reference genomes, you may not get Breakseq2 results. +The svviz tool can at times consume a large amount of memory, causing the application to be killed. If you see +a message like: +``` +Out of memory: Kill process XXXX (svviz) score 112 or sacrifice child +``` +in the log, you should kill the job if it is still running and retry on a machine with more memory or without enabling svviz. + + Parliament2 is open-source and can be found here: . The Docker container can be found here: . ## Supporting information @@ -62,4 +70,4 @@ For additional information, please see the following papers: - Breakseq2: Abyzov A, Li S, Kim DR, Mohiyuddin M, Stütz AM, Parrish NF, Mu XJ, Clark W, Chen K, Hurles M, Korbel JO, Lam HYK, Lee C, Gerstein MB. 2015. "Analysis of deletion breakpoints from 1,092 humans reveals details of mutation mechanisms". Nature Communications 6. doi:10.1038/ncomms8256 - Delly2: Tobias Rausch, Thomas Zichner, Andreas Schlattl, Adrian M. Stuetz, Vladimir Benes, Jan O. Korbel. 2012. Delly: structural variant discovery by integrated paired-end and split-read analysis. Bioinformatics 28:i333-i339. doi: 10.1093/bioinformatics/bts378 - CNVnator: Abyzov A, Urban AE, Snyder M, Gerstein M. 2011. "CNVnator: an approach to discover, genotype, and characterize typical and atypical CNVs from family and population genome sequencing". Genome Research (6):974. doi: 10.1101/gr.114876.110 -- Parliament: Adam C English, William J Salerno, Oliver A Hampton, Claudia Gonzaga-Jauregui, Shruthi Ambreth, Deborah I Ritter, Christine R Beck, Caleb F Davis, Mahmoud Dahdouli, Singer Ma, Andrew Carroll, Narayanan Veeraraghavan, Jeremy Bruestle, Becky Drees, Alex Hastie, Ernest T Lam, Simon White, Pamela Mishra, Min Wang, Yi Han, Feng Zhang, Pawel Stankiewicz, David A Wheeler, Jeffrey G Reid, Donna M Muzny, Jeffrey Rogers, Aniko Sabo, Kim C Worley, James R Lupski, Eric Boerwinkle and Richard A Gibbs. Assessing structural variation in a personal genome—towards a human reference diploid genome. BMC Genomics 2015, 16:286 doi:10.1186/s12864-015-1479-3. \ No newline at end of file +- Parliament: Adam C English, William J Salerno, Oliver A Hampton, Claudia Gonzaga-Jauregui, Shruthi Ambreth, Deborah I Ritter, Christine R Beck, Caleb F Davis, Mahmoud Dahdouli, Singer Ma, Andrew Carroll, Narayanan Veeraraghavan, Jeremy Bruestle, Becky Drees, Alex Hastie, Ernest T Lam, Simon White, Pamela Mishra, Min Wang, Yi Han, Feng Zhang, Pawel Stankiewicz, David A Wheeler, Jeffrey G Reid, Donna M Muzny, Jeffrey Rogers, Aniko Sabo, Kim C Worley, James R Lupski, Eric Boerwinkle and Richard A Gibbs. Assessing structural variation in a personal genome—towards a human reference diploid genome. BMC Genomics 2015, 16:286 doi:10.1186/s12864-015-1479-3. diff --git a/dx_app_code/parliament2/dxapp.json b/dx_app_code/parliament2/dxapp.json index 151ed506..c0a59bc8 100644 --- a/dx_app_code/parliament2/dxapp.json +++ b/dx_app_code/parliament2/dxapp.json @@ -7,7 +7,7 @@ "WGS" ], "dxapi": "1.0.0", - "version": "0.1.10", + "version": "0.1.11", "inputSpec": [ { "name": "illumina_bam", @@ -271,7 +271,7 @@ "doi:10.1186/s12864-015-1479-3" ], "upstreamAuthor": "Baylor College of Medicine", - "whatsNew": "* 0.1.10: Resolved bug breaking SVTyper; added Singularity functionality; modify run on DNAnexus. * 0.1.9: Add more logging functionality; resolved bug breaking SVVIZ; improved documentation; migrate to all regions; more sophisticated BAM storage management; other bug fixes.\n* 0.1.8: Add optional upload of log files; update SURVIVOR; add quality scores; improved error messaging; bug fixes.\n* 0.1.7: Update CNVnator, SVTyper; bug fixes.\n* 0.1.6: Bug fixes.\n* 0.1.5: Dockerized Parliament2.\n* 0.1.0: Adding Parliament2." + "whatsNew": "* 0.1.11: Resolved OOM bug breaking svviz.\n* 0.1.10: Resolved bug breaking SVTyper; added Singularity functionality; modify run on DNAnexus.\n* 0.1.9: Add more logging functionality; resolved bug breaking SVVIZ; improved documentation; migrate to all regions; more sophisticated BAM storage management; other bug fixes.\n* 0.1.8: Add optional upload of log files; update SURVIVOR; add quality scores; improved error messaging; bug fixes.\n* 0.1.7: Update CNVnator, SVTyper; bug fixes.\n* 0.1.6: Bug fixes.\n* 0.1.5: Dockerized Parliament2.\n* 0.1.0: Adding Parliament2." }, "regionalOptions": { "aws:us-east-1": { diff --git a/dx_app_code/parliament2/parliament2.py b/dx_app_code/parliament2/parliament2.py index ab298872..0ca4872b 100644 --- a/dx_app_code/parliament2/parliament2.py +++ b/dx_app_code/parliament2/parliament2.py @@ -19,7 +19,7 @@ def main(**job_inputs): prefix = job_inputs['prefix'] subprocess.check_call(['mkdir', '-p', '/home/dnanexus/in', '/home/dnanexus/out']) - docker_pull = ['docker', 'pull', 'dnanexus/parliament2:0.1.10'] + docker_pull = ['docker', 'pull', 'dnanexus/parliament2:0.1.11'] subprocess.check_call(docker_pull) print "Downloading input files" @@ -32,7 +32,7 @@ def main(**job_inputs): ref_name = "/home/dnanexus/in/{0}".format(ref_genome.name) dxpy.download_dxfile(ref_genome.id, ref_name) - docker_call = ['docker', 'run', '-v', '/home/dnanexus/in/:/home/dnanexus/in/', '-v', '/home/dnanexus/out/:/home/dnanexus/out/', 'dnanexus/parliament2:0.1.10', '--bam', bam_name, '-r', ref_name, '--prefix', str(prefix)] + docker_call = ['docker', 'run', '-v', '/home/dnanexus/in/:/home/dnanexus/in/', '-v', '/home/dnanexus/out/:/home/dnanexus/out/', 'dnanexus/parliament2:0.1.11', '--bam', bam_name, '-r', ref_name, '--prefix', str(prefix)] if 'illumina_bai' in job_inputs: input_bai = dxpy.DXFile(job_inputs['illumina_bai']) diff --git a/parliament2.sh b/parliament2.sh index 9916678e..b9233255 100644 --- a/parliament2.sh +++ b/parliament2.sh @@ -586,8 +586,7 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then threads="$(nproc)" threads=$((threads / 2)) # removing the memfree option as it doesn't seem to exist in Ubuntu 14.04 - #parallel --memfree 5G --retries 2 --verbose -a commands.txt eval 1>/home/dnanexus/out/log_files/svviz_logs/svviz.stdout.log 2>/home/dnanexus/out/log_files/svviz_logs/svviz.stderr.log - parallel --retries 2 --verbose -a commands.txt eval 1>/home/dnanexus/out/log_files/svviz_logs/svviz.stdout.log 2>/home/dnanexus/out/log_files/svviz_logs/svviz.stderr.log + parallel --memfree 5G --retries 2 --verbose -a commands.txt eval 1>/home/dnanexus/out/log_files/svviz_logs/svviz.stdout.log 2>/home/dnanexus/out/log_files/svviz_logs/svviz.stderr.log cd svviz_outputs && tar -czf /home/dnanexus/out/"${prefix}".svviz_outputs.tar.gz . fi diff --git a/test/set_up_tests.sh b/test/set_up_tests.sh index 39a5e924..0136e283 100644 --- a/test/set_up_tests.sh +++ b/test/set_up_tests.sh @@ -4,8 +4,8 @@ mkdir -p /home/dnanexus/in mkdir -p /home/dnanexus/out # Download small input BAM and index -wget https://dl.dnanex.us/F/D/6FX1Jz6bp92b9KFv574Fg306kjkGFK1ybKpG35yp/SRR504516_small.bam -O /home/dnanexus/in/small_input.bam -wget https://dl.dnanex.us/F/D/K695ZppQz1v8Pq6yJKKf0426VYyjkk8g32vgv3kZ/SRR504516_small.bam.bai -O /home/dnanexus/in/small_input.bai +wget https://dl.dnanex.us/F/D/6FX1Jz6bp92b9KFv574Fg306kjkGFK1ybKpG35yp/SRR504516_small.bam -O /home/dnanexus/in/small_input.bam -q +wget https://dl.dnanex.us/F/D/K695ZppQz1v8Pq6yJKKf0426VYyjkk8g32vgv3kZ/SRR504516_small.bam.bai -O /home/dnanexus/in/small_input.bai -q # Download reference FASTA and index -wget https://dl.dnanex.us/F/D/bb37ZbXqjFjZY6Fv0G4GzPGjx5QkYJB8XV6kB5K8/hs37d5.fa.gz -O /home/dnanexus/in/ref.fa.gz -wget https://dl.dnanex.us/F/D/p2qJ1z7gb1ZfFJX3XgPP37325kYKVk06zZp1JPbY/hs37d5.fa.fai -O /home/dnanexus/in/ref.fa.fai +wget https://dl.dnanex.us/F/D/bb37ZbXqjFjZY6Fv0G4GzPGjx5QkYJB8XV6kB5K8/hs37d5.fa.gz -O /home/dnanexus/in/ref.fa.gz -q +wget https://dl.dnanex.us/F/D/p2qJ1z7gb1ZfFJX3XgPP37325kYKVk06zZp1JPbY/hs37d5.fa.fai -O /home/dnanexus/in/ref.fa.fai -q