Skip to content

Commit

Permalink
more tests and checks for expections
Browse files Browse the repository at this point in the history
  • Loading branch information
metageni committed Jun 5, 2018
1 parent a308550 commit ccf75fd
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 36 deletions.
28 changes: 18 additions & 10 deletions focus_app/focus.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,24 @@ def count_kmers(query_file, kmer_size, threads, kmer_order):
os.system("rm {}".format(output_count))

if output_dump.exists():
counts = defaultdict(int)
with open(output_dump) as counts_file:
counts_reader = csv.reader(counts_file, delimiter=' ')
for kmer, count in counts_reader:
counts[kmer] = int(count)
# not empty file
if output_dump.stat().st_size:
counts = defaultdict(int)
with open(output_dump) as counts_file:
counts_reader = csv.reader(counts_file, delimiter=' ')
for kmer, count in counts_reader:
counts[kmer] = int(count)
# delete dump file
os.system("rm {}".format(output_dump))

return [counts[kmer_temp] for kmer_temp in kmer_order]

else:
os.system("rm {}".format(output_dump))
raise Exception('{} has no k-mers count. Probably not valid file'.format(query_file))

# delete dump file
os.system("rm {}".format(output_dump))

return [counts[kmer_temp] for kmer_temp in kmer_order]
else:
raise Exception('Something went wrong when trying to dump the k-mer couting.')


def write_results(results, output_directory, query_files, taxonomy_level):
Expand All @@ -167,7 +175,7 @@ def aggregate_level(results, position):
"""Aggregate abundance of metagenomes by taxonomic level.
Args:
results (dict): Path to database
results (dict): Path to results
position (int): Position of level in the results
Returns:
Expand Down
3 changes: 3 additions & 0 deletions tests/data/k6_small_sample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Kingdom Phylum Class Order Family Genus Species Strain GAACGC GAACGA CACCCA
Bacteria Spirochaetes Spirochaetia Spirochaetales Spirochaetaceae Sphaerochaeta Sphaerochaeta_pleomorpha Sphaerochaeta_pleomorpha_Grapes_uid82365 990 1439 1320
Bacteria Firmicutes Bacilli Lactobacillales Streptococcaceae Streptococcus Streptococcus_suis Streptococcus_suis_98HAH33_uid58665 548 753 661
3 changes: 3 additions & 0 deletions tests/data/k6_small_sample_empty_record.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Kingdom Phylum Class Order Family Genus Species Strain GAACGC GAACGA CACCCA
Bacteria Spirochaetes Spirochaetia Spirochaetales Spirochaetaceae Sphaerochaeta Sphaerochaeta_pleomorpha Sphaerochaeta_pleomorpha_Grapes_uid82365 0 0 0
Bacteria Firmicutes Bacilli Lactobacillales Streptococcaceae Streptococcus Streptococcus_suis Streptococcus_suis_98HAH33_uid58665 0 0 1
4 changes: 0 additions & 4 deletions tests/data/k6_subset.txt

This file was deleted.

101 changes: 79 additions & 22 deletions tests/test_basic_functions.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# -*- coding: utf-8 -*-

from focus_app.focus import normalise, load_database, is_wanted_file, count_kmers, which, load_database
import random

from focus_app.focus import normalise, load_database, is_wanted_file, count_kmers, which, run_nnls, aggregate_level

import pytest

from numpy import array



def test_normalise():
assert list(normalise(array([1, 1,]))) == [0.5, 0.5]
assert list(normalise(array([2, 2, 2, 2]))) == [0.25, 0.25, 0.25, 0.25]
Expand All @@ -17,20 +18,6 @@ def test_normalise():
normalise([0, 0, 0])


def test_load_database():
database_path = "tests/data/k6_subset.txt"
database_matrix, organisms, kmers_order = load_database(database_path)

assert list(organisms) == ['Bacteria\tSpirochaetes\tSpirochaetia\tSpirochaetales\tSpirochaetaceae\tSphaerochaeta\tSphaerochaeta_pleomorpha\tSphaerochaeta_pleomorpha_Grapes_uid82365',
'Bacteria\tFirmicutes\tBacilli\tLactobacillales\tStreptococcaceae\tStreptococcus\tStreptococcus_suis\tStreptococcus_suis_98HAH33_uid58665',
'Bacteria\tProteobacteria\tGammaproteobacteria\tAlteromonadales\tAlteromonadaceae\tAlteromonas\tAlteromonas_macleodii\tAlteromonas_macleodii__Deep_ecotype__uid58251'
]
# look into small subset of the matrix
assert list(database_matrix[0])[0] == 0.00027570089293671023
assert list(database_matrix[1])[0] == 0.00040074099488477375
assert list(database_matrix[2])[0] == 0.00036760119058228027


def test_is_wanted_file():
assert is_wanted_file(["a.fasta", "b.fastq", "x.FASTq", "y.FASTA", "n.fna"]) == ['a.fasta', 'b.fastq', 'n.fna', 'x.FASTq', 'y.FASTA']
assert is_wanted_file(["f.png", "a.fasta", "b.fastq", "x.FASTq", "y.FASTA", "n.fna"]) == ['a.fasta', 'b.fastq', 'n.fna', 'x.FASTq', 'y.FASTA']
Expand All @@ -50,24 +37,94 @@ def test_count_kmers():
kmer_order = ["AAAAAAA", "AAAGAAT", "ATTTTTT"]
assert count_kmers(query_file, kmer_size, threads, kmer_order) == [17, 0, 0]

# testing for empty input
query_file = "tests/data/mock_sample_empty.fasta"
kmer_size = "6"
kmer_order = ["AAAAAA", "AAAAAT", "TTTTTT"]
assert count_kmers (query_file, kmer_size, threads, kmer_order) == [0, 0, 0]
with pytest.raises(Exception):
count_kmers(query_file, kmer_size, threads, kmer_order)


def test_load_database():
pass
# simple database
database_matrix, organisms, kmer_order = load_database("tests/data/k6_small_sample.txt")
assert kmer_order == ['GAACGC', 'GAACGA', 'CACCCA']
assert organisms == ['Bacteria\tSpirochaetes\tSpirochaetia\tSpirochaetales\tSpirochaetaceae\tSphaerochaeta\tSphaerochaeta_pleomorpha\tSphaerochaeta_pleomorpha_Grapes_uid82365',
'Bacteria\tFirmicutes\tBacilli\tLactobacillales\tStreptococcaceae\tStreptococcus\tStreptococcus_suis\tStreptococcus_suis_98HAH33_uid58665']
assert [list(xx) for xx in database_matrix] == [[0.26407041877834087, 0.27930682976554538],
[0.38383568951720459,0.38379204892966362],
[0.35209389170445454, 0.33690112130479105]]

# bad database with all keys 0 in row = raise error
with pytest.raises (RuntimeWarning):
load_database("tests/data/k6_small_sample_empty_record.txt")

def test_write_results():
pass

def test_run_nnls():
database_matrix, organisms, kmer_order = load_database("tests/data/k6_small_sample.txt")

# fake metagenomic count
# example 1
random.seed(1128)
fake_query_count = normalise([random.randint(10000, 200000) for _ in range (3)])
assert list(run_nnls(database_matrix, fake_query_count)) == [0.11743935706399153, 0.88256064293600844]

# example 2
random.seed(2)
fake_query_count = normalise([random.randint(10000, 200000) for _ in range (3)])
assert list(run_nnls(database_matrix, fake_query_count)) == [0.79079139795692821, 0.20920860204307179]

# example 3
random.seed(500)
fake_query_count = normalise([random.randint(10000, 200000) for _ in range (3)])
assert list(run_nnls(database_matrix, fake_query_count)) == [0.40650798820847034, 0.59349201179152955]


def test_aggregate_level():
pass

true_answer = [
{'Bacteria': [0.54337724854388625, 0.76762773844686816,
0.68899501300924559]},
{'Spirochaetes': [0.26407041877834087, 0.38383568951720459,
0.35209389170445454], 'Firmicutes': [0.27930682976554538,
0.38379204892966362, 0.33690112130479105]},
{'Spirochaetia': [0.26407041877834087, 0.38383568951720459,
0.35209389170445454], 'Bacilli': [0.27930682976554538,
0.38379204892966362, 0.33690112130479105]},
{'Spirochaetales': [0.26407041877834087, 0.38383568951720459,
0.35209389170445454], 'Lactobacillales': [0.27930682976554538,
0.38379204892966362, 0.33690112130479105]},
{'Spirochaetaceae': [0.26407041877834087, 0.38383568951720459,
0.35209389170445454], 'Streptococcaceae': [0.27930682976554538,
0.38379204892966362, 0.33690112130479105]},
{'Sphaerochaeta': [0.26407041877834087, 0.38383568951720459,
0.35209389170445454], 'Streptococcus': [0.27930682976554538,
0.38379204892966362, 0.33690112130479105]},
{'Sphaerochaeta_pleomorpha': [0.26407041877834087,
0.38383568951720459, 0.35209389170445454],
'Streptococcus_suis': [0.27930682976554538, 0.38379204892966362,
0.33690112130479105]},
{'Sphaerochaeta_pleomorpha_Grapes_uid82365': [0.26407041877834087,
0.38383568951720459, 0.35209389170445454],
'Streptococcus_suis_98HAH33_uid58665': [0.27930682976554538,
0.38379204892966362, 0.33690112130479105]},
]
database_matrix, organisms, kmer_order = load_database ("tests/data/k6_small_sample.txt")
results = {organisms[pos]: profile for pos, profile in enumerate(database_matrix.T)}

producted_result = []

# runs aggregate function in all the levels
for pos in range(8):
aggregate_results = aggregate_level(results, pos)

# convert results to list rather the numpy.array
aggregate_results_list = {level:list(aggregate_results[level]) for level in aggregate_results}

producted_result.append(aggregate_results_list)

assert producted_result == true_answer

def test_run_nnls():

def test_write_results():
pass

0 comments on commit ccf75fd

Please sign in to comment.