Skip to content

Commit

Permalink
Support blast and diamond blastp for ortholog identification (#643)
Browse files Browse the repository at this point in the history
* add ignore zero anchor parameter for skip no matches in batch search

* ignore blank line in bed

* set label/chr size in layout file

* add genelabel rotation (customize x and y for rotation with angle 0)

* add customize x y for genelabelrotation 0; add --outputprefix for customized output

* Reverting some changes when I messed up the "Resolving Conflicts" step

* add blast diamond blastp

---------

Co-authored-by: Haibao Tang <tanghaibao@gmail.com>
  • Loading branch information
Tong-Chen and tanghaibao authored Apr 12, 2024
1 parent 1092b21 commit ad09808
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 1 deletion.
128 changes: 128 additions & 0 deletions jcvi/apps/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ def run_formatdb(infile=None, outfile=None, dbtype="nucl"):
sh(cmd)


@depends
def run_diamond_makedb(infile=None, outfile=None):
cmd = "diamond makedb"
cmd += " --in {0} --db {1} -p 5".format(infile, infile)
sh(cmd)


@depends
def run_blat(
infile=None,
Expand Down Expand Up @@ -586,5 +593,126 @@ def last(args, dbtype=None):
return lastfile


def blast_main(args, dbtype=None):
"""
%prog database.fasta query.fasta
Run blastp/blastn by calling BLAST+ blastp/blastn depends on dbtype.
"""
p = OptionParser(blast_main.__doc__)
p.add_option(
"--dbtype",
default="nucl",
choices=("nucl", "prot"),
help="Molecule type of subject database",
)
p.add_option("--path", help="Specify BLAST path for blastn or blastp")

p.set_cpus()
p.set_outdir()
p.set_params()

opts, args = p.parse_args(args)

if len(args) != 2:
sys.exit(not p.print_help())

subject, query = args
path = opts.path
cpus = opts.cpus
if not dbtype:
dbtype = opts.dbtype

getpath = lambda x: op.join(path, x) if path else x
cmd = "blastn" if dbtype == "nucl" else "blastp"
lastdb_bin = getpath("makeblastdb")
lastal_bin = getpath(cmd)
for bin in (lastdb_bin, lastal_bin):
if not which(bin):
logging.fatal("`%s` not found on PATH. Have you installed BLAST?", bin)
sys.exit(1)

db_suffix = '.nin' if dbtype == "nucl" else ".pin"

run_formatdb(
infile=subject,
outfile=subject + db_suffix,
dbtype=dbtype)

blastfile = get_outfile(subject, query, suffix="last", outdir=opts.outdir)
# Make several attempts to run LASTAL
try:
sh(
cmd + f" -num_threads {cpus} -query {query} -db {subject} -out {blastfile}" +
" -outfmt 6 -max_target_seqs 1000 -evalue 1e-5",
check=False,
redirect_error=STDOUT,
)
except CalledProcessError as e: # multi-threading disabled
message = f"{cmd} failed with message:"
message += "\n{0}".format(e.output.decode())
logging.error(message)
logging.fatal("Failed to run `blast`. Aborted.")
cleanup(blastfile)
sys.exit(1)
return blastfile


def diamond_blastp_main(args, dbtype="prot"):
"""
%prog database.fasta query.fasta
Run diamond blastp for protein alignment.
"""
p = OptionParser(diamond_blastp_main.__doc__)

p.add_option("--path", help="Specify diamond path for diamond blastp")

p.set_cpus()
p.set_outdir()
p.set_params()

opts, args = p.parse_args(args)

if len(args) != 2:
sys.exit(not p.print_help())

subject, query = args
path = opts.path
cpus = opts.cpus
if not dbtype:
dbtype = opts.dbtype

getpath = lambda x: op.join(path, x) if path else x
cmd = "diamond blastp"
diamond_bin = getpath("diamond")
for bin in (diamond_bin,):
if not which(bin):
logging.fatal("`%s` not found on PATH. Have you installed Diamond?", bin)
sys.exit(1)

run_diamond_makedb(
infile=subject,
outfile=subject + '.dmnd',)

blastfile = get_outfile(subject, query, suffix="last", outdir=opts.outdir)
# Make several attempts to run LASTAL
try:
sh(
cmd + f" --threads {cpus} --query {query} --db {subject} --out {blastfile}" +
" --ultra-sensitive --max-target-seqs 1000 --evalue 1e-5 --outfmt 6",
check=False,
redirect_error=STDOUT,
)
except CalledProcessError as e: # multi-threading disabled
message = f"{cmd} failed with message:"
message += "\n{0}".format(e.output.decode())
logging.error(message)
logging.fatal("Failed to run `diamond blastp`. Aborted.")
cleanup(blastfile)
sys.exit(1)
return blastfile


if __name__ == "__main__":
main()
21 changes: 20 additions & 1 deletion jcvi/compara/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,7 @@ def ortholog(args):
match (RBH).
"""
from jcvi.apps.align import last as last_main
from jcvi.apps.align import diamond_blastp_main, blast_main
from jcvi.compara.blastfilter import main as blastfilter_main
from jcvi.compara.quota import main as quota_main
from jcvi.compara.synteny import scan, mcscan, liftover
Expand All @@ -621,6 +622,7 @@ def ortholog(args):
choices=("nucl", "prot"),
help="Molecule type of subject database",
)

p.add_option(
"--full",
default=False,
Expand Down Expand Up @@ -673,6 +675,13 @@ def ortholog(args):
help="Ignore this pair of ortholog identification instead of throwing an error when performing many pairs of cataloging."
)

p.add_option(
"--align_soft",
default="last",
choices=("last", "blast", "diamond_blastp"),
help="Sequence alignment software. Default <last> for both <nucl> and <prot>. Users could also use <blast> for both <nucl> and <prot>, or <diamond_blastp> for <prot>.",
)

opts, args = p.parse_args(args)

if len(args) != 2:
Expand All @@ -690,14 +699,24 @@ def ortholog(args):
dist = "--dist={0}".format(opts.dist)
minsize_flag = "--min_size={}".format(opts.n)
cpus_flag = "--cpus={}".format(opts.cpus)
align_soft = opts.align_soft

aprefix = op.basename(a)
bprefix = op.basename(b)
pprefix = ".".join((aprefix, bprefix))
qprefix = ".".join((bprefix, aprefix))
last = pprefix + ".last"
if need_update((afasta, bfasta), last, warn=True):
last_main([bfasta, afasta, cpus_flag], dbtype)
if align_soft == "blast":
blast_main(
[bfasta, afasta, cpus_flag], dbtype
)
elif dbtype == "prot" and align_soft == "diamond_blastp":
diamond_blastp_main(
[bfasta, afasta, cpus_flag], dbtype
)
else:
last_main([bfasta, afasta, cpus_flag], dbtype)

self_remove = opts.self_remove
if a == b:
Expand Down

0 comments on commit ad09808

Please sign in to comment.