-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from seanome/olgabot/multisearch
Add multisearch with probability of overlap calculation
- Loading branch information
Showing
18 changed files
with
357 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,44 @@ | ||
FROM nfcore/base:2.1 | ||
# Miniconda3 uses a faster solver than the one available for nf-core/base:2.1 | ||
FROM continuumio/miniconda3 | ||
LABEL \ | ||
author="Olga Botvinnik" \ | ||
description="sourmash branchwater image for nf-core pipelines" \ | ||
description="sourmash branchwater image with latest commit for nf-core pipelines" \ | ||
maintainer="olga.botvinnik@gmail.com" | ||
|
||
# Install the conda environment | ||
COPY environment.yml / | ||
RUN conda env create -f /environment.yml && conda clean -a | ||
|
||
ARG SOURMASH_BRANCHWATER_VERSION=0.9.3 | ||
ADD https://api.github.com/repos/sourmash-bio/sourmash_plugin_branchwater/git/refs/heads/main version.json | ||
RUN git clone --depth 1 https://github.com/sourmash-bio/sourmash_plugin_branchwater/ | ||
RUN cd sourmash_plugin_branchwater \ | ||
&& conda env create -y -n sourmash-branchwater -f environment.yml | ||
|
||
# COPY environment.yml / | ||
# RUN conda env create -f /environment.yml && conda clean -a | ||
|
||
# ARG SOURMASH_BRANCHWATER_VERSION=0.9.3 | ||
|
||
# Add conda installation dir to PATH (instead of doing 'conda activate') | ||
ENV PATH /opt/conda/envs/nf-core-sourmash-branchwater-${SOURMASH_BRANCHWATER_VERSION}/bin:$PATH | ||
ENV PATH /opt/conda/envs/sourmash-branchwater/bin:$PATH | ||
|
||
|
||
RUN which -a conda | ||
RUN which -a pip | ||
RUN which -a maturin | ||
|
||
# Need to install clang libraries | ||
RUN apt update && apt-get install -y clang | ||
|
||
RUN which -a clang | ||
|
||
RUN cd sourmash_plugin_branchwater \ | ||
&& which -a pip \ | ||
&& which -a clang \ | ||
&& which -a maturin \ | ||
&& export LIBCLANG_PATH=/usr/lib/x86_64-linux-gnu/ \ | ||
&& export CONDA_PREFIX=/opt/conda \ | ||
&& pip install -e . \ | ||
&& maturin develop | ||
|
||
|
||
# Dump the details of the installed packages to a file for posterity | ||
RUN conda env export --name nf-core-snpeff-${SOURMASH_BRANCHWATER_VERSION} > nf-core-snpeff-${SOURMASH_BRANCHWATER_VERSION}.yml | ||
RUN conda env export --name sourmash-branchwater > nf-core-sourmash-branchwater.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Miniconda3 uses a faster solver than the one available for nf-core/base:2.1 | ||
FROM continuumio/miniconda3 | ||
LABEL \ | ||
author="Olga Botvinnik" \ | ||
description="sourmash branchwater image with specialized probability of overlap calculation for multisearch for nf-core pipelines" \ | ||
maintainer="olga.botvinnik@gmail.com" | ||
|
||
# Install the conda environment | ||
|
||
ADD https://api.github.com/repos/sourmash-bio/sourmash_plugin_branchwater/git/refs/heads/olgabot/multisearch-evalue version.json | ||
RUN git clone --depth 1 --branch olgabot/multisearch-evalue https://github.com/sourmash-bio/sourmash_plugin_branchwater/ | ||
RUN cd sourmash_plugin_branchwater \ | ||
&& conda env create -y -n sourmash-branchwater-multisearch-prob-overlap -f environment.yml | ||
|
||
# COPY environment.yml / | ||
# RUN conda env create -f /environment.yml && conda clean -a | ||
|
||
# ARG SOURMASH_BRANCHWATER_VERSION=0.9.3 | ||
|
||
# Add conda installation dir to PATH (instead of doing 'conda activate') | ||
ENV PATH /opt/conda/envs/sourmash-branchwater-multisearch-prob-overlap/bin:$PATH | ||
|
||
|
||
RUN which -a conda | ||
RUN which -a pip | ||
RUN which -a maturin | ||
|
||
# Need to install clang libraries | ||
RUN apt update && apt-get install -y clang | ||
|
||
RUN which -a clang | ||
|
||
RUN cd sourmash_plugin_branchwater \ | ||
&& which -a pip \ | ||
&& which -a clang \ | ||
&& which -a maturin \ | ||
&& export LIBCLANG_PATH=/usr/lib/x86_64-linux-gnu/ \ | ||
&& export CONDA_PREFIX=/opt/conda \ | ||
&& pip install -e . \ | ||
&& maturin develop | ||
|
||
|
||
# Dump the details of the installed packages to a file for posterity | ||
RUN conda env export --name sourmash-branchwater-multisearch-prob-overlap > nf-core-sourmash-branchwater-${SOURMASH_BRANCHWATER_VERSION}.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
NAME=olgabot/sourmash_branchwater_multisearch_prob_overlap | ||
|
||
all: build login push | ||
|
||
# TOOD: Switch to podman for building and quay.io for hosting images at some point | ||
build: | ||
docker build -t ${NAME} . | ||
|
||
login: | ||
docker login | ||
|
||
push: | ||
docker push ${NAME} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
name: nf-core-sourmash-branchwater-0.9.3 | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- conda-forge::sourmash_plugin_branchwater=0.9.3 | ||
- conda-forge::sourmash-minimal=4.8.8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
process SOURMASH_MULTISEARCH { | ||
tag "${meta.alphabet}_k${meta.ksize}" | ||
label "process_medium" | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "docker.io/olgabot/sourmash_branchwater_multisearch_prob_overlap" | ||
|
||
input: | ||
tuple val(meta), val(query_meta), path(query_sig), val(against_meta), path(against_sig) | ||
|
||
output: | ||
tuple val(meta), path("*.csv"), emit: csv | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
// $ sourmash scripts multisearch --help | ||
|
||
// == This is sourmash version 4.8.11. == | ||
// == Please cite Irber et. al (2024), doi:10.21105/joss.06830. == | ||
|
||
// usage: multisearch [-h] [-q] [-d] -o OUTPUT [-t THRESHOLD] [-k KSIZE] [-s SCALED] [-m {DNA,protein,dayhoff,hp}] [-c CORES] [-a] query_paths against_paths | ||
|
||
// massively parallel in-memory sketch search | ||
|
||
// positional arguments: | ||
// query_paths input file of sketches | ||
// against_paths input file of sketches | ||
|
||
// options: | ||
// -h, --help show this help message and exit | ||
// -q, --quiet suppress non-error output | ||
// -d, --debug provide debugging output | ||
// -o OUTPUT, --output OUTPUT | ||
// CSV output file for matches | ||
// -t THRESHOLD, --threshold THRESHOLD | ||
// containment threshold for reporting matches (default: 0.01) | ||
// -k KSIZE, --ksize KSIZE | ||
// k-mer size at which to select sketches | ||
// -s SCALED, --scaled SCALED | ||
// scaled factor at which to do comparisons | ||
// -m {DNA,protein,dayhoff,hp}, --moltype {DNA,protein,dayhoff,hp} | ||
// molecule type (DNA, protein, dayhoff, or hp; default DNA) | ||
// -c CORES, --cores CORES | ||
// number of cores to use (default is all available) | ||
// -a, --ani estimate ANI from containment | ||
// | ||
// Example run: | ||
// sourmash scripts multisearch query.sig.gz database.zip -o results.csv | ||
|
||
// required defaults for the tool to run, but can be overridden | ||
def args = "--ksize ${meta.ksize} --moltype ${meta.alphabet} --threshold 0 --scaled 1" | ||
def prefix = task.ext.prefix ?: "${query_meta.id}--in--${against_meta.id}.${meta.alphabet}.${meta.ksize}" | ||
def BRANCHWATER_VERSION = '0.9.3' // Version not available using command line | ||
""" | ||
sourmash scripts multisearch \\ | ||
--debug \\ | ||
-c $task.cpus \\ | ||
$args \\ | ||
--output '${prefix}.multisearch.csv' \\ | ||
${query_sig} \\ | ||
${against_sig} | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
sourmash: \$(echo \$(sourmash --version 2>&1) | sed 's/^sourmash //' ) | ||
sourmash_plugin_branchwater: $BRANCHWATER_VERSION | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
name: sourmash_sketch | ||
description: Create a signature (a group of FracMinHash sketches) of a sequence using sourmash | ||
keywords: | ||
- hash sketch | ||
- sourmash | ||
- genomics | ||
- metagenomics | ||
- taxonomic classification | ||
- taxonomic profiling | ||
- kmer | ||
tools: | ||
- sourmash: | ||
description: Compute and compare FracMinHash signatures for DNA and protein data sets. | ||
homepage: https://sourmash.readthedocs.io/ | ||
documentation: https://sourmash.readthedocs.io/ | ||
tool_dev_url: https://github.com/sourmash-bio/sourmash | ||
doi: "10.21105/joss.00027" | ||
licence: ["BSD-3-clause"] | ||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- sequence: | ||
type: file | ||
description: FASTA or FASTQ file containing (genomic, transcriptomic, or proteomic) sequence data | ||
pattern: "*.{fna,fa,fasta,fastq,fq,faa}.gz" | ||
- ksize: | ||
type: int | ||
description: | | ||
The k-mer size to use for the MinHash sketches. | ||
e.g.: 31 | ||
output: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
- signatures: | ||
type: file | ||
description: FracMinHash signature of the given sequence | ||
pattern: "*.{sig}" | ||
authors: | ||
- "@Midnighter,@olgabot" | ||
maintainers: | ||
- "@olgabot" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.