Skip to content

Commit

Permalink
For #280: fixed the previous error.
Browse files Browse the repository at this point in the history
  • Loading branch information
lucventurini committed Mar 4, 2020
1 parent 7e03df6 commit eecc06a
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 11 deletions.
8 changes: 4 additions & 4 deletions Mikado/serializers/blast_serializer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np

valid_letters = 'ACDEFGHIKLMNPQRSTVWYBXZJUO'
letters = np.array(list(valid_letters))
letters = np.array(list(valid_letters), dtype=np.str_)


__author__ = 'Luca Venturini'
Expand Down Expand Up @@ -35,7 +35,7 @@ def prepare_hsp(hsp, counter, qmultiplier=1, tmultiplier=1):

hsp_dict = dict()
# We must start from 1, otherwise MySQL crashes as its indices start from 1 not 0
match, identical_positions, positives = _prepare_aln_strings(hsp, qmultiplier=qmultiplier, tmultiplier=tmultiplier)
match, identical_positions, positives = _prepare_aln_strings(hsp, qmultiplier=qmultiplier)
hsp_dict["counter"] = counter + 1
hsp_dict["query_hsp_start"] = hsp.query_start
hsp_dict["query_hsp_end"] = hsp.query_end
Expand All @@ -56,7 +56,7 @@ def _np_grouper(data):
return np.array(np.split(data, np.where(np.diff(data) != 1)[0] + 1))


def _prepare_aln_strings(hsp, qmultiplier=1, tmultiplier=1):
def _prepare_aln_strings(hsp, qmultiplier=1):

"""This private method calculates the identical positions, the positives, and a re-factored match line
starting from the HSP.
Expand All @@ -66,7 +66,7 @@ def _prepare_aln_strings(hsp, qmultiplier=1, tmultiplier=1):
lett_array = np.array([
list(str(hsp.query.seq)),
list(hsp.aln_annotation["similarity"]),
list(str(hsp.hit.seq))])
list(str(hsp.hit.seq))], dtype=np.str_)

match = lett_array[1]
match[np.where(~((lett_array[1] == "+") | (np.isin(lett_array[1], letters))))] = " "
Expand Down
9 changes: 3 additions & 6 deletions Mikado/serializers/blast_serializer/xml_serialiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ def xml_pickler(json_conf, filename, default_header,
qmult, tmult = None, None
for query_counter, record in enumerate(opened, start=1):
if qmult is None:
qmult, tmult = XmlSerializer._get_multipliers(record)

qmult, tmult = XmlSerializer.get_multipliers(record)
hits, hsps, cache = objectify_record(
session, record, [], [], cache, max_target_seqs=max_target_seqs,
qmult=qmult, tmult=tmult)
Expand Down Expand Up @@ -547,15 +546,13 @@ def get_multipliers(record):
Private quick method to determine the multipliers for a BLAST alignment
according to the application present in the record.
:param record:
:type record: Bio.Blast.Record.Blast
:type record: Bio.SearchIO.Record
:return:
"""

q_mult, h_mult = 1, 1

# application = record.application.upper()
application = record.application.upper()

application = record.program.upper()
if application in ("BLASTN", "TBLASTX", "BLASTP"):
q_mult = 1
h_mult = 1
Expand Down
49 changes: 48 additions & 1 deletion Mikado/tests/test_system_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -1677,6 +1677,53 @@ def setUp(self):
def setUpClass(cls):
cls.fai = pysam.FastaFile(pkg_resources.resource_filename("Mikado.tests", "chr5.fas.gz"))

def test_subprocess_single(self):

xml = pkg_resources.resource_filename("Mikado.tests", "chunk-001-proteins.xml.gz")
transcripts = pkg_resources.resource_filename("Mikado.tests", "mikado_prepared.fasta")
junctions = pkg_resources.resource_filename("Mikado.tests", "junctions.bed")
orfs = pkg_resources.resource_filename("Mikado.tests", "transcripts.fasta.prodigal.gff3")
uniprot = pkg_resources.resource_filename("Mikado.tests", "uniprot_sprot_plants.fasta.gz")
mobjects = 300 # Let's test properly the serialisation for BLAST

dir = tempfile.TemporaryDirectory()
json_file = os.path.join(dir.name, "mikado.yaml")
db = os.path.join(dir.name, "mikado.db")
log = os.path.join(dir.name, "serialise.log")
uni_out = os.path.join(dir.name, "uniprot_sprot_plants.fasta")
with gzip.open(uniprot, "rb") as uni, open(uni_out, "wb") as uni_out_handle:
uni_out_handle.write(uni.read())

with open(json_file, "wt") as json_handle:
sub_configure.print_config(yaml.dump(self.json_conf, default_flow_style=False),
json_handle)
# Set up the command arguments
for procs in (1,):
with self.subTest(proc=procs):
sys.argv = [str(_) for _ in ["mikado", "serialise", "--json-conf", json_file,
"--transcripts", transcripts, "--blast_targets", uni_out,
"--orfs", orfs, "--junctions", junctions, "--xml", xml,
"-p", procs, "-mo", mobjects, db, "--log", log, "--seed", "1078"]]
pkg_resources.load_entry_point("Mikado", "console_scripts", "mikado")()
logged = [_.rstrip() for _ in open(log)]

self.assertTrue(os.path.exists(db))
conn = sqlite3.connect(db)
cursor = conn.cursor()
self.assertEqual(cursor.execute("select count(*) from hit").fetchall()[0][0], 562, logged)
self.assertEqual(cursor.execute("select count(*) from hsp").fetchall()[0][0], 669)
self.assertEqual(cursor.execute("select count(distinct(query_id)) from hsp").fetchall()[0][0], 71)
self.assertEqual(cursor.execute("select count(distinct(query_id)) from hit").fetchall()[0][0], 71)
self.assertEqual(cursor.execute("select count(distinct(target_id)) from hsp").fetchall()[0][0], 32)
self.assertEqual(cursor.execute("select count(distinct(target_id)) from hit").fetchall()[0][0], 32)
self.assertEqual(cursor.execute("select count(*) from junctions").fetchall()[0][0], 372)
self.assertEqual(cursor.execute("select count(distinct(chrom_id)) from junctions").fetchall()[0][0], 2)
self.assertEqual(cursor.execute("select count(*) from orf").fetchall()[0][0], 169,
"\n".join(logged))
self.assertEqual(cursor.execute("select count(distinct(query_id)) from orf").fetchall()[0][0], 81)
os.remove(db)
dir.cleanup()

def test_subprocess_multi(self):

xml = pkg_resources.resource_filename("Mikado.tests", "chunk-001-proteins.xml.gz")
Expand All @@ -1698,7 +1745,7 @@ def test_subprocess_multi(self):
sub_configure.print_config(yaml.dump(self.json_conf, default_flow_style=False),
json_handle)
# Set up the command arguments
for procs in (1, 3):
for procs in (3,):
with self.subTest(proc=procs):
sys.argv = [str(_) for _ in ["mikado", "serialise", "--json-conf", json_file,
"--transcripts", transcripts, "--blast_targets", uni_out,
Expand Down

0 comments on commit eecc06a

Please sign in to comment.