diff --git a/focus_app/focus.py b/focus_app/focus.py index 50fb28d..31f0b2e 100644 --- a/focus_app/focus.py +++ b/focus_app/focus.py @@ -132,16 +132,24 @@ def count_kmers(query_file, kmer_size, threads, kmer_order): os.system("rm {}".format(output_count)) if output_dump.exists(): - counts = defaultdict(int) - with open(output_dump) as counts_file: - counts_reader = csv.reader(counts_file, delimiter=' ') - for kmer, count in counts_reader: - counts[kmer] = int(count) + # not empty file + if output_dump.stat().st_size: + counts = defaultdict(int) + with open(output_dump) as counts_file: + counts_reader = csv.reader(counts_file, delimiter=' ') + for kmer, count in counts_reader: + counts[kmer] = int(count) + # delete dump file + os.system("rm {}".format(output_dump)) + + return [counts[kmer_temp] for kmer_temp in kmer_order] + + else: + os.system("rm {}".format(output_dump)) + raise Exception('{} has no k-mers count. Probably not valid file'.format(query_file)) - # delete dump file - os.system("rm {}".format(output_dump)) - - return [counts[kmer_temp] for kmer_temp in kmer_order] + else: + raise Exception('Something went wrong when trying to dump the k-mer couting.') def write_results(results, output_directory, query_files, taxonomy_level): @@ -167,7 +175,7 @@ def aggregate_level(results, position): """Aggregate abundance of metagenomes by taxonomic level. Args: - results (dict): Path to database + results (dict): Path to results position (int): Position of level in the results Returns: diff --git a/tests/data/k6_small_sample.txt b/tests/data/k6_small_sample.txt new file mode 100644 index 0000000..a04b931 --- /dev/null +++ b/tests/data/k6_small_sample.txt @@ -0,0 +1,3 @@ +Kingdom Phylum Class Order Family Genus Species Strain GAACGC GAACGA CACCCA +Bacteria Spirochaetes Spirochaetia Spirochaetales Spirochaetaceae Sphaerochaeta Sphaerochaeta_pleomorpha Sphaerochaeta_pleomorpha_Grapes_uid82365 990 1439 1320 +Bacteria Firmicutes Bacilli Lactobacillales Streptococcaceae Streptococcus Streptococcus_suis Streptococcus_suis_98HAH33_uid58665 548 753 661 \ No newline at end of file diff --git a/tests/data/k6_small_sample_empty_record.txt b/tests/data/k6_small_sample_empty_record.txt new file mode 100644 index 0000000..3eaa36f --- /dev/null +++ b/tests/data/k6_small_sample_empty_record.txt @@ -0,0 +1,3 @@ +Kingdom Phylum Class Order Family Genus Species Strain GAACGC GAACGA CACCCA +Bacteria Spirochaetes Spirochaetia Spirochaetales Spirochaetaceae Sphaerochaeta Sphaerochaeta_pleomorpha Sphaerochaeta_pleomorpha_Grapes_uid82365 0 0 0 +Bacteria Firmicutes Bacilli Lactobacillales Streptococcaceae Streptococcus Streptococcus_suis Streptococcus_suis_98HAH33_uid58665 0 0 1 \ No newline at end of file diff --git a/tests/data/k6_subset.txt b/tests/data/k6_subset.txt deleted file mode 100644 index c1f31da..0000000 --- a/tests/data/k6_subset.txt +++ /dev/null @@ -1,4 +0,0 @@ -Kingdom Phylum Class Order Family Genus Species Strain GAACGC GAACGA CACCCA CTTCTA CACCCC CTTCTC CACCCG CGTGTC CGTGTA TAAGGA GTCAAA GTCAAC CTGTCC CTGTCA TCAGAA GTATCA GGTGTA CCGGGC CCGGGA TATCCA ATTCCG ATTCCC ATTCCA GTTCTA CCCGCG CGCGGC CGCGGA CCCGCC CTCCGC TTACAA ACCTGG TCATGA ACCTGC ACCTGA GCCTGA TAAAGA GCATAA AAGGTC CAAATA CAAATC CAAATG AATACT GGCATA AATACG AATACA AATACC TCCCCA GAGCAC ACTGAT ACTGAG AGTAGA AGTAGG ACTGAA ATACAG TCGAAA GTGTCA GTTGTA CTTGCA CTTGCC CGAAAG CGAAAA CGAAAC AATCAT AAATGC CACGGC AAATGA ACAGTC CACGGG ACAGTA AATCAG AATCAA AAATGT AATCAC ATACGA ATACGC TCTAGA ATACGG TCGCCA ACCCAT ACCCAC GGTAAC ACCCAA AGATGG ACCCAG AGATGA AGATGC CACTAG CACTAA CACTAC CCCCAA CCCCAC CCCCAG CTCGAG GGGTCA CCCACA AAGGAA AAGGAC AAGGAG ATATGC GTTCCA ATATGA ATATGG AAGGAT CTATCA GGTCCA GCCCAA GCCCAC AGACCC AGACCA AGACCG CTTCGC ATGCAA AGGTCC AGGTCA AGGTCG TCCGGA CTTAGC ATTGTG CTTAGA ATGCAC ATTGTC ATTGTA CGCTAC ATTTTG CGCTAA ATTTTA CGCTAG ATTTTC TAGCAA TAAATA GCCTTA GATACC GATACA AAGACA AAGACC GATTTA AAGACG GATCAC GATCAA AAGACT CTTCCA GCGGTA CTTCCC ACTTGC ATGTGA ACTTGA ACTTGG CGGTGA CGGTGC ATGTGC TAGACA ATCTCG ATCTCA ATCTCC GGCCAA GGCCAC ACCGTG ACCGTA ACCGTC CCATAA CAAAGG CCATAC CAAAGA CAAAGC CCATAG GGCAGA CGTGAG CAACGC CTAACA CAACGA CTAACC CAACGG CGTGAA GAAACC CTCTAA CTCTAC CGATCC CGATCA CGATCG GGGGGA CAGACC CAGACA CAGACG GCTAAC GCTAAA AACCCT AACCCC AACCCA AACCCG ACGTAA AACTGG AACTGA AACTGC CCCAGA CCCAGC CCCAGG AACTGT ACGTAT CTCCAA CGCATA ATACTG CGCATC ATACTA ATACTC GCGCAA GCGCAC GCTCCC GCTCCA TATTGA CCTACG TGTACA CCTACA CCTACC AGTATC GACTAA AGTATA GACTAC AGTATG ACCTTG ACCTTC ACCTTA ACAGGG CTGGGA CTGGGC ACAGGC ACAGGA CAGGAC CAGGAA AGGACC AGGACA ACAGGT CTCCGA AGGACT GCAGCA GCAGCC CCTCAG CCTCAA CCTCAC ATGGGC ATGGGA ATGGGG GAAAGA ATCAAG GAAAGC ATCAAC ATCAAA CATGAG ATCAAT CATGAC CATGAA GCTTTA GTAGCA CAAGTG GCGGCC CAAGTC GCGGCA CAAGTA TAATCA CGGCCC AAAATT CGGCCG CTTGTA AGTTTA GGTCTA AAAATG AAAATC AAAATA TCGTCA GAACAA ATCCCG GAACAC ATCCCC ATCCCA AACGGA CACCAC CACCAA CACCAG AACATT AACGGG GAGTTA ACCAGA AACATG AACATA AACATC CAGTCG GTCAGA GCACGA CAGTCC GCACGC CAGTCA GCACCA GCAAGC GCAAGA GTATAC GTATAA CCGGAA CCGGAC CCGGAG CGTCTC CGTCTA CTCTTC CTCTTA TACGTA TGTTAA CGCGAA CGCGAC CGCGAG CTCCAC AGTCGG AGTCGA AGTCGC GCCTAC GCCTAA CCGACA CCGACC CCGACG CCCGAG CCCGAA CGCACA ATACAC CGCACC ATACAA ACTGCT CCCGAC CGCACG ACTGCA ACTGCC ATACAT ACTGCG CTCACC CTCACA CTTGAA TAGTGA GTGAGA TCGACA ACAATA ACAATC GTGGGA ACAATG TCAAAA CGAACG CGAACC CGAACA CTGGTA CTGGTC GAGGGA TTGCAA GTCGTA GAGCAA ATATCG TCGCAA CCGTTA CCGTTC AATGCT GGTACA GGTACC GCGGAC AATGCG AATGCC AATGCA CCCCCC CCCCCA CCCCCG ATGATG ATGATA ATGATC TGCAAA AGTAGC AAGGCC ATAACC AAGGCA ATAACA AAGGCG ATAACG GAGAAA GAGAAC ACTGAC AGGTAT ACCAAG ACCAAA ATCATC ACCAAC GGTCAC GATCGC ACCAAT AGGTAG AGGTAA AGGTAC AATTGG TCTTAA AATTGA ATCATG AATTGC TAGCCA CGCTCA CGCTCC AATTGT GATAAA GATAAC AAGAAT AAGAAC GATCCA AAGAAA GATCCC AAGAAG GCCCCA TATTAA CAAGGG CAAGGC CAAGGA GGTTCA AAGGCT ATTATA ATTATC ATTATG TCCCAA TAGAAA AGTCTA AGTCTC AGTCTG TAACTA ATCTAG ATCTAC ATCTAA GCCCCC GGTCAA CCATGC TCCAGA CCATGA CCATGG GGGGAA GGGGAC CTCTCC CTCTCA CGATAG CGATAA CGATAC GTGTTA CGTGCC GTGGTA GCTACA GCTACC CGTGCA CACGGA TGCCCA ACAGTG AGCGTA AGCGTC AGCGTG GGCCCC GGCCCA CACGTG CACGTA CACGTC ACGTGC ACGTGA ACGTGG AAATGG GGGACA GGGACC GCTTCC AGAGAG AGAGAA AGAGAC AGAGAT GTCTTA GCTCAA GCTCAC GCAGAC GCAGAA CTTGAC ATGCGG ATGCGC ATGCGA GGCTAC TGTAAA GGCTAA ACTGTA ACTGTC ACTGTG GACTGC GACTGA GTTTCA ATAATG CCTTTA CCTTTC ATAATC ATAATA ACAGAA ACAGAC ACAGAG CTGGAC CTGGAA ACAGAT AAACCG AAACCA CCAATC AAACCC CCAATA GAGATC AACGAG GAGATA AACGAA AACGAC AAACCT ATTTGC ATTTGG AACGAT CCCTAC CCCTAA ATCACA ATCACC ATCACG CATTAG CATTAA CATTAC CATGCA CATGCC CATGCG GTAGAA GTAGAC ACAACA ACAACC GATTGC ACAACG ACTAAG GGGTAC ACAACT GGGTAA AGTCCG GTCCGA TAATAA TCAACA ATTCGC ATTCGA TAACGA ATTCGG TCGTAA ATCCAA ATCCAC ATCCAG GAACCC GAACCA TACCTA ACCATC ACCATA ACCATG ATGTTG AGCTCG AGTGGC AGCTCC AGCTCA CAGTAA CAGTAC CAGTAG AAAGTA AAAGTC AAAGTG GATCTA CCGGCC ATTAGG CCGGCG ATTAGC AAGTGA AAGTGC AAGTGG CACAAG AAGTGT CACAAC TCTGAA CGCGCC CGCGCA GGTAAA CGCGCG AGCGGG AGCGGA ACCTCT ACCTCC ACCTCA ACCTCG CTCCCC TAGCTA CGGGAG TAAGCA CGGGAA CCCATA CGGGAC CCCATC GCCTCA GCCTCC CCGAAC CCTGCG CCGAAA CCGAAG CCTGCC CCTGCA CCATTC CCATTA TATAAA TCTACA CGCAAC CGCAAA ATACCG CGCAAG ATACCA ATACCC TAGTAA CTCAAG CTCAAA CTCAAC GTGTGA GTTGGA CCTTGA CCTTGC ATCTTG ATCTTA ATCTTC AGGATC AGGATA AGGATG GACATC GACATA CATAGC CATAGA GAATAC GAATAA AATGAT GAAATA GAAATC AATGAA AATGAC AATGAG CTTGGA CTTGGC AGCCCT GTCCTA TGCACA TTGAAA AGCCCG AGCCCA AGCCCC ATAAAT GATTCA ATAAAG ATAAAA ATAAAC ACGATG CTACAG GAGACC ACGATC GAGACA ACGATA GGTTAC GGTTAA ACCACT ACCACG ATGTGG ACCACC ACCACA TCCGCA AACGTC AACGTA AACGTG CGTGAC AGTGTG AGTGTA TCTTCA AGTGTC AACGTT AAAGGT GCTCGA GACTTA AAAGGG AAAGGA AAAGGC GAGTAC CAAGAA CAAGAC CAAGAG TACGGA AGGGCG AGGGCA AGGGCC TCGAGA CTGTGC TAGAGA GGCGAA GTGCTA GGCGAC ATCTGA CGAATC ATCTGC CGAATA ATCTGG ATCATA GATAGC AAGCAG GATAGA AAGCAA AAGCAC CATTTC CATTTA GTTATA CAAACA AAGCAT CAAACC CAAACG CTAGGC CTAGGA CTAAGA CTAAGC AAGTTG AAGTTA AAGTTC TACAGA GGGGCA TATATA GGACTA CACATC CACATA CACATG ATGAGA ATGAGC ATGAGG ACTCTG ACTCTA TGCCAA ACTCTC AAATTC AAATTA AAATTG CTTCGA AAATTT TTCGAA GGGAAC GGGAAA GGTGCA AGAGCG AGAGCC AGAGCA GCGTAC GCGTAA AGAGCT GCGACA GCGACC GCAGGA GCAGGC CCGTGA CCGTGC CAATTG TATTCA CAATTA CAATTC GGCTCA AATCCT CTTTAC AGCATG AGCATC AGCATA AGACCT GTTTAA AACTTG GAGGTA AGGCGG AGGCGA AGGCGC ACAGCC ACAGCA ACAGCG AGGAGC GTCGGA AGGAGA AGGAGG CTGGCA CTGGCC GCGCCC ACAGCT AACGCT AAACAG AAACAC GCTCTA AAACAA AACGCG AATTTA AATTTC AACGCC ATTGGA AATTTG ATTTAC AAACAT ATTTAG CCCTCG TGACCA GCGCCA CATTCG CATTCC CATTCA ACAAAT GATTAA GATTAC GTAGGA ACAAAC ACAAAA ACAAAG GCGGGA AACCTG TAATGA GGCGTA GCCCTA GATGTA AACCTC TGAAAA CCCTCC ATCCGC ATCCGA ATCCGG AGCTAT AGTTGG AGCTAA AGCTAC AGTTGC AGTTGA AGCTAG TAACAA CTGCAC CTAATA CAGTGC CTAATC CAGTGA CAGTGG CTGCAA TCAGGA CAACCA CGAGCG CGAGCA CGAGCC GTTCAA TGAGCA AAGTAC AAGTAA AAGTAG TACATA GGACCA GGACCC CACACG CACACC CACACA TCTGCA ATTGCC CCCGTC CCCGTA TGTCAA TAGATA AGTGAC AGTGAA AGTGAG CGGGCC TAAGAA CGGGCA AGTGAT CCTGAA CCTGAC CCTGAG ATTGGG CTCCAG GTTAGA GCCAAA TATACA ATTGGC ACTAGT GTAATA AACGCA CCACCG CCACCA CCACCC ACTAGG ACTAGC CCCTCA CGTTCA CGTTCC ATTTAA ACTGGA ACTGGC TAGTCA GTGACA CTTTGC CTACGC GTGCAC ACCTAG CGACGA ACCTAA CGACGC ACCTAC CCGCAG CCGCAA CCGCAC ACCTAT ATTACA CGAAGA GCGCTA TCGGAA GAAGAA ATTACG GAAGAC GGATCA GAATCA GAATCC ATGGTC CAGATG ATGGTA ATGGTG CAGATC ACGCCC CAGATA ACGCCT CTCGAC CTCGAA AATGGT AATGGC AATGGA AATGGG AATAGA AATAGC AGCCAG AAAAGT AGCCAC AGCCAA TGGTAA AAAAGG AGCCAT AAAAGC AAAAGA CGCCAG CGCCAA GAGTGC CGCCAC GAGTGA GCCAGA GCCAGC TACCGA TACTGA GCCCGC GCCCGA CTTCAC CTTCAA TCCGAA AATTCA AATTCC AATTCG AATTCT GTCATA AGCAGG AGCAGC AGCAGA CCAGGG CCAGGA CCAGGC ATAGAG ACGGTC CATCTA ATAGAC CATCTC ATAGAA ATTAGA CAAGCC CAAGCA CAAGCG ACATGA ACATGC ACATGG AGGGAT CGTAAG GAGTCA ACATGT CGTAAC CGTAAA AGGGAG AGGGAC AGGGAA ATCCTC CTATGA ATCCTA ATCCTG CTATGC AAGCCG AAGCCC AAGCCA CAAAAC CAAAAA AAGCCT CAAAAG CCCTTA CAGTTC CAGTTA GACCCA GACCCC CTCTGC CTCTGA CTACTC GCGAAC GCGAAA TAGGGA CTACTA GTGATA GCTGGA TGATAA GCTAGA ATGAAT GCTAGC AAGGGT ATGAAC ATGAAA GACGGA ATGAAG GACGGC CTTGTC GGTGAA GGTGAC GGAATA GCGTCA GCGTCC CGTCCC CGTCCA GAAGTC GAAGTA GCCGTA CCGTAC CCGTAA CCGTAG GCACTA ATGCCC ATGCCA GGCGCC ATGCCG GGCGCA ACTATG ACTATC TTTAAA ACTATA GGGCAA GGGCAC GTTTGA GTACGA CCGCTC CCGCTA GCGCGC GCGCGA GCATGC GCATGA GTCGAC GTCGAA CGTGGC GAGCTA GAGCTC AAACGA AAACGC GTAGTA AAACGG ATTGAA ATTGAC ATTGAG TGCGCA CACGCA AAACGT ATTTCG ATTTCA ATTTCC ATCGGG ATCGGC ATCGGA CATAGG ATCAGG ATCAGA ATCAGC CTTTTC CTTTTA GATTCC ACGGGG ACGGGA ACGGGC CGTAGA TCACGA TTCCAA ACCCTA ACCCTC ACCCTG AGTTAT CTGACC CTGACA AGTTAA AGTTAC CCCCTC AGTTAG CCCCTA ACACAG ACACAA ACACAC CTGCAG CATGGG CATGGA CATGGC AATGTC ACACAT AATGTA AATGTG GCACAC GCACAA CCACAG ACATTG ACATTA ACATTC CGAGAG ATGCTC ATTACC CGAGAC CGAGAA AAGTCT ATGCTA GCTATA AAGTCG AAGTCA AAGTCC GGACAC GGACAA GAGGCA AGCGCT GAGGCC ACTCGA ACTCGC ACTCGG AGCGCA AGCGCC AGGGTG AGGGTA AGCGCG AGGGTC TAAAAA ACGTTC AGCTGC ACGTTA AGCTGA ACGTTG AGCTGG AGTGCG AGTGCA AGTGCC CCAAAA AGAGTC AGAGTA AGAGTG GTTAAA CCTGGC GTTAAC CCTGGA GCACCC GTCTGA GGAAGA GCGTTA CAATGA CCCGCA CAATGC CAATGG ATGCTG CCACAC CCACAA TCCTCA CGTTAG CGTTAC CGTTAA TAACCA ACCGCT GTGAAA GTGAAC ACCGCG ACCGCA ACCGCC AGATCC GTTGCA GTGCCA CCGCCG AGGCTA CCTTCC AGGCTC CCGCCC CCTTCG AGGCTG TCCAAA ATTTGA GGATAA GGATAC GAAGCC GCAGTA GAAGCA CGCCCG GAGAGA CGCCCC CGCCCA AGTTTG ACGCAG ACGCAC ACGCAA CTCGCA ACGTAC CCCTAG ACGCAT AGTTTC GCCGGC GCCGGA AGATCT ACGTAG TACCCA CATGTA CATGTC GGCCTA AATCGG AATCGC AATCGA AATCGT AGCCGA CCTTCA AGCCGC AAAAAT AGCCGG GATGGA GATGGC AAAAAA AAAAAC GAGCGA AAAAAG CCAAGG CCAAGC CCAAGA AGAACT CCGCCA AGATAG ACCCGG ACCCGA AGATAC ACCCGC AGATAA AGAACG TACCAA AGAACA AGAACC ATGTCA CACTGG AGATAT TGACAA ACCAGG CACTGC ATGTCG CACTGA AATTAT CTGTTC CTGTTA AATTAC AATTAA AATTAG CGGGTC CGGGTA CCAAAC AGCAAA ATATAT AGCAAC AAAGCT AGCAAG AAAGCA ATATAG ATATAA AGCAAT ATATAC ATAGCA ATAGCC ATAGCG TGCTAA ACATAC CTTAAG ACATAA CTTAAA ACATAG CTTAAC CGTACA CGTACC CGAGTA CGAGTC AGGGGA AGGGGC TATGCA AGGGGG CGACTA CGACTC AAGTAT ATCGTG ATCGTC ATCGTA TGGGAA GATTGA CTAGCC CTAGCA CGGTAG ACTTAG ACTTAA CGGTAC ACTTAC CGGTAA GACCAC GACCAA ACTTAT TACACA GCGAGA GAGAGC GCGAGC CAGCCG CAGCCA CAGCCC GAACTC GAACTA CACCTA AACTAT CACCTC CACCTG AACACT TGATCA AACTAG AACTAC AACTAA AACACG AACACA AACACC CCAGCA CTATAC CAACAG CAACAA CCAGCC CAACAC CTATAA ATATCC GACAGC GACAGA GTATTA CCAGCG GGTGGA AGAGGC GTAAGA AGAGGA CTCGCC AGAGGG CGTCAA CGTCAC CGTCAG GTCGCA CCGTCG CCGTCA CCGTCC CGCGTC CGCGTA ATGCAG CCACTA CCACTC GGCTGA ATGCAT ATGACG ATGACA ATGACC GGAGTA GGGCCC GGGCCA ACGAGT AGGCCA AGGCCC AGGCCG ACGAGG ACGAGC ACGAGA AGGCCT AGACGG GGGTTA CTACGA AACGGT TGCGAA AACGGC ATCCAT ATTGCA ATTGCG CTCGTA CTCGTC CCCTGA AAGCTT CCCTGC GTGTAA GATATC AAGCTC GATATA AAGCTA AAGCTG CCAGTA CCAGTC GACACA CATCGA CATCGC TACTCA CATCGG CAGGGG CAGGGA CAGGGC AATCTG AATCTC AATCTA TCACAA GCCATA CGGACG CGGACA CGGACC ATTAAC ATTAAA ATTAAG TGAACA ATTAAT GGTATA CTGAAA CTGAAC CTGAAG CATTGC CATTGA CATTGG ATGTTA ACACCG CTGCCA CTGCCC ACACCC ACACCA ATGTTC CTAGTC CTAGTA ACACCT AGACTG AGACTC AGACTA CCTCGC CCTCGA CGAGGA CGAGGC ACTTTC ACTTTA ACTTTG GACCTA GACCTC CACCGG CACCGA CACCGC GGACGA CTATTA CACAGC CTTATC CACAGA CTTATA CACAGG ACTCAT CGTATA GACGTA CGTATC CTATTC GAGGAC GAGGAA AGCGAT ACTCAC ACTCAA ACTCAG AGCGAC TAAACA AGCGAA AGCGAG TCTAAA AGTTCC AGTTCA AGTTCG CTGTGA TAGGAA GCAAAA GCAAAC CGGGGC CGGGGA GTATGA AGTGGA TAATTA GTTACA TATCGA GTCTAA AGGACG TTCAAA TATAGA AGTGGG GGAAAA AACCTT GGGCTA CAGGAG ACTAGA GAATGA ACTACT GAATGC CCACGA CAATAC CCACGC CAATAA CAATAG CCACGG AACCTA GGCTTA ACTACC ACTACA ACTACG CGTTGA AGTCAG CGTTGC AGTCAC TCCTAA AGTCAA GTTTTA TCATCA AGTCAT TTATAA CTTTCC CTTTCA GAGCGC CGACCA CGACCC GCATAC CGACCG GTTGAA ACTGGG CCTTAC CCTTAA CCTTAG CAACTC CAACTA CAACTG AATAGG TCTCAA AATAGT GGATGA GGGGTA GAGTCC AAACTG AAACTA TCTCCA AAACTC ACGCGA AGTACG ACGCGC AGTACC ACGCGG AGTACA AAACTT CCCTTC AGTACT ACGCGT GCCGAA GCCGAC GTGGAA ACCGAG AAATCG CACGCG ACCGAC AAATCC ACCGAA AAATCA CTTTGA AAATCT ACCGAT ACCAGT GGAGCA GGAGCC AAAACT GATGAC GATGAA AAAACC AAAACA AAAACG CCAAAG CTAGAG TCGCGA GCCACA AGATCA GCCACC AGAAAT AGATCG ACCAGC GTGCAA ACCCGT AGAAAG AGAAAC ATGTCC AGAAAA CTACCC ATGTAC GACCGA ATGTAA ATGTAG CCTATA CTACCA CCGGCA GACGAC CCTATC AAAGTT AGCACT TTAGAA CGGCTC CGGCTA AGCACC AGCACA AGCACG AAAGAC CGAAGC AAAGAA AAAGAG ATATCA TGTGAA AAAGAT ACATCT AGACGC TACAAA AGACGA AGGTGG CCTCTC AGGTGC CCTCTA AGGTGA TCCACA ACATCG CTTACC ACATCA CTTACA ACATCC CCCGGC CCCGGA CCCGGG TATGAA TGGAAA AAGAGG AAGAGA AAGAGC AAGCGT GCTTAC GCTTAA AAGCGC AAGAGT AAGCGA AAGCGG CTGCTA CTGCTC ACTTCT CGGTCA CGGTCC CTAGAA CTAGAC ACTTCG GACCGC ACTTCC ACTTCA CACAAA GGTCGA CGTCGC CGTCGA GCGTGA CAGCAG CAGCAC CAGCAA GTACAC GCTGCC GCTGCA AACTCT TGGCCA AACTCA AACTCC AACTCG CTACAA GGATCC AATATT GTTCGA GGAGGA GGCACA GGCACC CAACCG ACGACC AATATG CAACCC AATATC GACAAA GACAAC ACGACA GCAATA CGATGC CGATGA GAATTA GAATTC GTAAAC CTACAC GTAAAA ACGCCG GGATTA AGCGGC ACGCCA AAAGCC CTATCC AACCGG GGCGGA AACCGC AACCGA AAAGCG AACCGT CTCCTA CTCCTC AACAAG CCCACG AACAAC CCCACC AACAAT GTGCGA CCGATA AGCCTG AGCCTA AGCCTC GTACCA ACGAAT AGGCAT CTCCCA ACGAAA CGCCTC AGGCAA CGCCTA AGGCAG ACGAAG CAGAGG GCATCC GCATCA CAGAGC CGGCCA CAGAGA CCTAGA CCTAGC CCTAGG ATCGCC ATCGCA ATCGCG AAGGTG ACGGCT AAGGTA ACGGCA CATCAC ACGGCC CATCAA CATCAG ACGGCG TCACCA CGGAAG AATATA CGGAAC CGGAAA ATGGCG ATGGCC ATGGCA AGATTA AGATTC GAAACA AGATTG AGAATG AGAATA AGAATC CTGAGC CTGAGA CATACC CATACA CATACG GTCCAC GTCCAA CGGCGC CGGCGA CGGTTA GGTTTA CGGTTC CGTACG GCTGTA AGGTTG AGGTTC AGGTTA ACTCCT GAGGGC ACATAT TTAAAA ACTCCG ACTCCA ACTCCC CTGTAA CTGTAC GTCACA GCAACC TAGGCA GCAACA TCATAA ATTCAG ATTCAA ATTCAC GTCTCA GCGATA GAAGGC GAAGGA CGATTC CGATTA GGAACC GGAACA CAATCG CAATCA CAATCC AGTCCA AGTCCC ACTAAA ACTAAC CAGCTG CAGCTA CAGCTC ACTAAT ACGGAC CCGAGG CCGAGA ACGGAA CCGAGC CGACAC CGACAA CGACAG GTACTA CCGCGC ACGGAG CCGCGA TACGCA CCGCGG AGGAAC AATAAC AATAAA AATAAG TACTAA AATAAT CCAACC CCAACA CCAACG CGCCGC AGTAAA CGCCGA AGTAAC GAGTAA AGTAAG CTCAGC CTCAGA GCTTCA AGTAAT GTGGCA CTCGGA CTCGGC GCCGCC GCCGCA CATATG CATATC CATATA TCAGCA ACACTC ACACTA ACACTG ACCGGA AAATAA ACCGGC AAATAC CTTTAA ACCGGG AAATAG CACGAG CAGGTA AACTTC CAGGTC CACGAC CACGAA AATCCC AATCCA AAATAT AATCCG GGGATA GGAGAC GGGTGA GGAGAA GAGCCA GAGCCC CGCAGA GATGCA CGCAGC GATGCC ACCCCT ACCCCA GGTAGA ACCCCC CGGATA ACCCCG CGGATC AGAAGA AGAAGC AGAAGG CACTCC CACTCA CACTCG CCCCGG CCCCGC CCCCGA AGCTTC AGCTTA AGCTTG AACTTA AAGGGG ATAAGG AAGGGC ATAAGC AAGGGA ATAAGA CCAGAC CCAGAA CCAGAG ATAGGG ATAGGA ATAGGC AGACAA GACGCA AGACAC AGACAG GACGCC TCAAGA CTATAG AGACAT CGTGGA CGCTGA CGCTGC TATGGA AACAAA CGTAGC TGGACA GCTTGA TGAGAA CCGGTC CCGGTA CCTGTC CCTGTA TATCAA GGTTGA GGAAAC TCCCGA GTAACA ATTCTC ATTCTA ATTCTG CAGCGA CAGCGC CAGCGG CGTTTA CGTTTC TGGCAA CCGATC GCTGAA GCTGAC GGCCGA CCATCG CCATCC CCATCA CTGATC CTGATA GGCAAC GGCAAA CTAAAG CTAAAC CTAAAA GACACC ACCGGT TAGCGA TACGAA ACGCTG ACGCTA ACGCTC TCGGCA AGGCAC AACCAA AACCAC ACGAAC AACCAG AACCAT AACAGA ACGTCG AACAGC ACGTCC AACAGG ACGTCA CCCAAG GACGAA CCCAAC CCCAAA ACGTCT GGGAGA AACAGT GGGCGA GTACAA ACGACT CAGAAA CAGAAC ACGACG CAGAAG CCTAAC CCTAAA CACTTC CCTAAG CACTTA AAGATT ATCGAT CTCATC GACTCC CTCATA GACTCA AAGATA AAGATC AAGATG ATCGAG ATCGAA ATCGAC GCATTA ACGGTA ATATTC ATATTA ATATTG ATAGTA ATAGTC ATAGTG GATCGA CAGGCA AGGAAG CATCCG AGGAAA CATCCA CAGGCG CATCCC ACGGTG AGGAAT ACGGAT CGGAGA CGGAGC CAGGCC ATGGAA ATGGAC ATGGAG CGCTTA CGCTTC GAAAAC ATAGAT GAAAAA TAAGTA ACAAGG ACAAGA ACAAGC ACACGT GACGTC CATAAG GCGGAA CATAAA GCCAAC CATAAC ACACGC ACAAGT ACACGA ACACGG GTCCCA CTGCGA CTGCGC CGGCAA CGGCAC CGGCAG CCTCCC CCTCCA CACGCC CCTCCG -Bacteria Spirochaetes Spirochaetia Spirochaetales Spirochaetaceae Sphaerochaeta Sphaerochaeta_pleomorpha Sphaerochaeta_pleomorpha_Grapes_uid82365 990 1439 1320 983 1384 2489 663 494 526 1784 2463 1415 1886 2114 2286 2019 1106 823 1561 2696 1585 3885 3026 778 396 399 434 747 853 1997 2272 1199 2424 1966 1754 2011 1492 2545 2339 2590 1936 2001 2081 1465 2494 3162 2515 830 1751 1363 790 953 2035 2603 2642 913 1479 3903 3910 1978 2866 1659 3107 2313 660 2666 1244 795 1581 2224 3214 1644 1267 1386 949 154 1616 1376 2072 781 1923 1862 2203 1883 2160 1765 319 404 819 2043 1038 1942 527 1365 1002 6701 2339 2443 2105 2310 2122 2401 4377 2070 1302 1446 642 1372 1887 1262 1558 3843 1533 1915 1253 446 539 1227 323 1300 2810 1992 777 3297 342 2026 201 4863 1755 1256 1045 2908 1962 2469 2552 1085 1223 713 2455 1766 3220 1389 3819 1808 834 1585 1431 1726 1152 1002 1063 1276 1943 1721 1979 866 1020 1617 1180 1896 4725 1955 5041 3450 2307 2232 636 1056 420 1937 397 1961 1137 3752 273 1297 1058 1510 441 2036 1679 1905 902 392 703 2614 2212 2196 1369 713 2389 1830 1920 2658 1664 2633 1531 1149 1923 854 2155 1019 1140 1538 922 365 1317 1218 2665 1035 771 1492 1827 1898 409 1366 1161 1543 3781 2758 1178 2844 2711 2139 2135 3281 2003 4691 1756 2072 2212 1172 1427 2375 1909 1796 2562 1212 1779 2653 2233 4144 3758 4011 2275 3544 1508 2994 1778 2594 1174 1302 1280 649 1548 1275 1484 1692 705 3273 232 1841 758 714 3567 4367 3901 1430 2960 1450 1234 2173 2038 1704 1135 1941 1429 1744 1844 376 2706 1724 1661 2034 979 1131 618 1374 510 1597 1262 2659 2624 648 1281 1865 688 997 728 396 2408 628 380 616 567 336 292 1038 951 1295 544 1183 438 1395 1196 644 1185 1600 565 1139 657 2410 1690 1035 289 2478 1834 1973 772 1307 734 3887 845 1099 2083 2256 2285 1059 2296 3780 1018 1367 1783 2548 2170 2096 3023 910 2456 3058 1417 884 1776 2018 1519 943 548 1049 2542 2951 988 1559 1245 3242 2895 1947 5304 818 2864 2254 4232 1592 1466 1030 4477 2051 1196 2438 3158 2915 3388 1745 972 677 2354 1796 2466 1539 2114 677 2751 2623 3072 1448 1058 718 2030 2708 1847 3787 2995 1254 6222 1282 3971 1321 781 4209 4310 5599 1915 3110 1578 2711 1854 2420 1960 625 1195 1375 541 321 1180 458 1088 2633 2139 2970 3064 1664 4214 1303 1588 1295 1746 2212 2377 587 630 1322 1147 1672 943 865 2591 1182 1033 712 695 1092 1114 169 532 315 529 583 470 2855 1708 1333 3325 1619 3137 1355 1653 621 1981 748 1592 3301 2117 1069 744 1016 1381 2027 631 1307 1431 898 1524 1270 2590 1913 1763 3822 2663 1945 3090 1471 1668 1657 3861 2109 2632 3642 1765 3207 2414 1130 1147 1683 1989 1116 3534 2837 1824 1873 1447 604 1160 1925 778 456 724 1856 2126 1747 793 1514 883 1963 2511 3532 1178 380 1276 1279 2120 573 728 1241 2494 1153 1723 815 1492 1138 2596 1127 2619 1915 2385 1297 2591 2154 2533 1921 806 981 1202 1267 1566 1555 1102 1995 2028 1596 445 936 482 650 509 1358 1269 1183 1098 1100 1028 1953 227 360 3072 76 1344 1435 1413 1396 1669 1166 1747 338 996 1365 2726 2056 976 2145 1645 1514 1376 1207 2305 1827 2516 4230 2253 1490 2075 1488 966 1436 2302 1163 2620 2619 952 2517 2355 1579 523 1550 5070 4788 2579 927 2744 1796 3193 3037 1742 1240 1904 2365 2451 1794 2780 3384 4299 3244 1971 1852 2848 2786 1841 625 1632 5423 784 1502 1522 1736 1805 2194 3512 1571 1673 1431 1408 796 1465 1841 1631 676 1043 733 1090 1665 1316 1037 826 1263 775 597 616 893 3374 979 623 3183 1332 452 3451 4893 3526 1003 3971 2175 1974 1124 931 2764 1415 2027 1636 1277 1493 518 778 1747 1277 2516 1638 3020 2400 2069 3165 1789 4516 1513 3037 938 1200 2918 2742 2850 1652 365 532 449 428 1574 660 1875 2062 1906 911 723 1117 890 700 2128 1729 2089 1141 609 3399 998 3017 1546 2836 2137 1106 1054 2662 5971 1515 1048 1512 1806 592 654 1284 894 802 3197 2146 1056 756 1364 2269 1307 3096 1114 3802 2053 1761 2206 1890 1832 562 1728 1493 1084 1424 591 1910 2168 1098 1392 874 2196 1670 2587 1465 407 1595 1096 3923 1770 761 4252 436 1088 3169 1020 2414 2081 1431 2322 481 1407 2491 549 1085 2608 1780 1956 816 2318 1349 2098 3437 3148 1060 2985 1518 650 782 1735 1909 5885 1460 959 1243 1180 1544 1191 494 1009 1921 1400 409 1392 1612 461 1530 408 1880 1520 5010 2674 2828 624 1513 698 2539 2284 1338 1432 1002 1436 1522 989 296 687 604 3806 4585 981 1164 2296 1182 863 1921 1034 600 1473 1311 1237 2867 1716 1785 1927 1716 236 2532 1736 2154 142 1558 1138 1508 1733 1142 362 337 1817 1386 1648 719 1937 1386 1046 1090 4793 582 322 503 1032 526 442 1328 1164 1491 620 2165 1769 2116 392 2192 4801 1035 2144 1466 2234 2564 2223 2203 2802 1820 1497 641 2809 803 1414 1447 2495 2441 2842 2211 1712 1632 1897 2411 878 2397 3065 4743 2278 4283 5169 807 1118 977 641 972 1961 1365 2531 1441 563 899 1848 3479 1471 2730 3408 1572 2765 1634 2966 1582 2384 2488 2973 2288 1937 1180 1017 1425 1671 2246 376 2145 3221 1573 1327 1126 1595 2412 721 850 404 806 1101 2093 1657 5323 2639 2634 1233 3579 2189 1586 2238 2521 3368 5151 2972 3675 1359 2356 848 1125 1287 2172 1272 602 1033 1684 1559 681 1337 2014 2859 340 2437 225 3013 2266 4509 925 2846 819 2512 2315 1159 2903 557 487 781 811 1794 1633 1051 1113 1158 1069 569 2159 2572 237 1743 565 1543 1639 653 1251 3714 918 2430 917 987 850 115 345 724 1714 517 1474 618 597 476 1934 1186 1053 2225 3838 2012 2313 475 631 1287 1858 3586 4766 2379 2026 2024 2707 2777 2045 1941 4715 1996 3428 1282 1535 815 920 855 3944 1081 1595 2396 983 1439 1874 404 783 1383 255 1273 1031 1156 530 1692 2325 2911 2357 1601 835 1607 1054 518 1298 1529 1942 772 1648 977 1886 3171 957 2333 1539 1132 1419 1189 1597 1497 786 2609 2188 406 1372 1030 461 733 881 656 1617 2338 343 1516 3536 1205 1530 499 1646 1119 1911 549 1495 1269 3587 910 1041 1007 644 2044 176 2932 1166 1008 3675 458 3633 996 3024 3391 2115 632 1315 2528 300 940 386 1607 1444 2281 1080 521 1330 1051 1326 3143 1215 725 1547 3684 1545 701 2250 2378 3025 2329 2193 1959 2481 1769 3120 405 1622 707 799 1711 748 448 1073 781 731 573 886 2206 444 1070 454 525 1957 1233 1199 799 2351 1383 3080 1829 1282 3235 815 4922 1278 2783 2347 7512 5341 1090 6220 4926 2103 2643 1730 1029 2036 998 1228 1831 745 1889 1358 2664 2281 2645 1672 1504 2464 3028 2412 1561 1239 1804 1946 3074 1126 1663 839 491 826 1265 1745 3685 1206 2386 2745 2903 4230 1872 1913 3018 1975 1952 2262 1266 676 1257 281 1108 870 1389 514 1028 1038 755 523 2876 1515 2639 1601 774 629 1975 1058 1443 1765 4074 2423 471 565 1492 270 437 1592 828 2673 911 2216 848 885 933 1394 660 1062 2168 1533 1457 867 869 1624 978 1417 1167 1087 357 1124 466 697 1318 1802 2224 1494 2568 3047 1606 1393 1470 3520 1852 1727 1039 1246 1586 1580 1001 2219 857 1750 1369 590 773 746 825 1195 835 243 294 3100 683 730 1716 1190 1070 2467 2357 1064 440 1217 519 1957 1419 1225 917 683 1070 1081 1050 977 1268 1919 1395 1379 2859 4578 1338 767 883 2751 1326 3152 817 2045 1634 2212 1117 2380 1587 1442 745 3533 1639 1112 2695 2663 3892 2284 2883 2109 1182 1214 1956 447 1063 637 572 1042 652 2539 447 1868 3293 1689 2199 3820 3318 2556 825 912 2568 1781 1229 1384 2095 270 310 1281 1530 978 914 1077 2338 2010 1473 2258 789 1951 800 1264 1534 1843 902 1077 1098 1115 1887 1678 1223 1900 1416 1076 563 1418 1620 1198 3594 1507 652 1493 1029 872 1254 1973 1201 591 2106 1734 985 1395 2026 4687 2858 1303 2010 1768 1227 428 1009 3231 239 910 3872 1609 865 7286 3410 903 1852 273 2030 826 2054 828 2787 429 2327 1908 866 1556 844 1533 1141 657 1692 1182 1839 707 627 1709 1663 3657 1492 706 4588 3445 541 1620 1028 1678 883 2558 1720 1878 882 622 1309 969 1939 1921 2728 1507 2610 1766 870 2527 1194 2462 1867 404 711 226 1506 305 1224 1853 3144 551 117 1607 905 1924 1578 2611 196 1372 3582 2322 3521 3695 2627 2694 1879 1526 1259 2766 2003 3603 5298 4674 2249 3531 301 238 1005 1519 1294 3679 951 1944 2029 1152 4224 3352 1574 5952 1311 1367 1310 1445 1226 1909 2109 1847 817 2077 1790 1032 556 750 847 1337 1137 551 3070 1546 6730 3207 3041 1332 3358 1238 618 2478 1037 1256 1465 1185 1161 1587 1243 1552 2158 1990 1159 1848 1139 1097 518 2617 5704 2287 2584 1930 1319 1271 602 1005 1416 1959 1868 1090 1957 1688 1630 984 488 251 1010 795 2235 1676 1630 1810 492 1019 654 2410 1196 3265 706 1975 2592 1206 1037 1603 1267 933 1863 590 1677 1595 2016 1120 1256 1769 976 2548 2292 3459 3080 2015 1150 3095 2452 3007 1230 1566 1367 748 2850 503 1528 1001 767 3847 1722 2315 861 1260 2282 2096 1690 922 1342 2953 559 2409 1094 2838 627 2826 2431 1054 1847 1542 1204 2548 1511 1730 805 4729 519 2674 1400 2049 2175 2392 1565 1128 2281 440 519 326 1691 1412 584 2045 870 2900 1742 1783 821 3590 2291 476 1946 1735 2433 1240 2758 1155 2066 3602 1470 2299 3421 2733 2512 2596 2841 1265 1762 2041 1933 1141 650 1374 375 926 1158 1115 1988 288 1569 2675 2435 1216 1301 1360 1473 1720 667 1102 1279 1705 1720 740 3061 1694 2744 1689 1937 2714 1496 1194 1495 2791 3584 2254 1515 2796 2760 2263 2365 2644 1130 1334 614 324 930 1066 1376 449 756 1724 1513 1854 966 872 2069 1342 647 281 850 554 800 305 3098 1467 2471 1511 488 2324 1751 1728 1364 522 1501 881 918 829 940 1520 1184 2087 1420 1513 966 1207 880 981 763 2322 1705 2219 859 575 1121 1671 2411 1614 2845 861 1684 2146 392 2549 1714 1924 620 794 2506 2709 3478 1301 2081 1290 1447 3380 1313 975 1159 2907 773 2652 2014 1861 1676 1281 1424 1493 698 2932 2489 3108 890 1109 532 1574 1115 2080 2152 798 2088 730 2687 1984 2525 1494 3938 1522 1790 3561 1924 2071 2298 2080 2711 586 858 2036 563 3190 676 1297 683 1368 1108 2393 3451 832 2012 2434 3285 1474 2292 2368 2521 3312 2241 3711 1992 1302 3118 1083 2726 1649 754 1519 798 1933 4746 1251 2263 1225 1116 2529 2267 2867 1554 2445 3070 4750 629 425 1139 1211 1323 855 1267 898 489 830 2466 1215 3351 1389 1149 2794 2928 2871 412 2235 550 3683 620 2311 1433 1318 2215 544 2090 1902 1093 1520 781 4548 2190 556 2774 420 766 1468 527 566 2955 1678 2440 1128 1562 766 2546 1632 2475 2812 3068 2285 1169 1847 3541 1367 3408 1569 1617 1130 1677 3052 4145 1117 5918 2542 1112 2204 1019 3851 1158 1231 778 1728 4532 1574 1964 788 1694 4615 1867 7796 635 3115 2242 1831 378 211 1227 1407 1759 1421 1345 410 1216 563 662 1220 1274 725 3126 927 1534 1661 1859 481 1196 -Bacteria Firmicutes Bacilli Lactobacillales Streptococcaceae Streptococcus Streptococcus_suis Streptococcus_suis_98HAH33_uid58665 548 753 661 1663 284 1377 302 517 348 1266 2329 1403 817 1570 1718 1398 686 52 88 1755 614 962 1805 884 85 100 229 227 526 1715 724 755 1414 1282 881 2006 854 1455 1962 2466 1906 1487 658 905 1366 1590 798 441 1088 674 1146 717 1114 1068 1105 861 948 1289 1199 745 1370 610 2587 1518 329 2420 874 245 852 1891 3955 1650 1351 904 381 344 757 948 626 412 759 894 1489 674 2055 1095 537 579 551 752 352 447 85 587 421 2505 1058 1450 726 1281 1296 1118 1451 1688 815 878 341 457 1505 631 565 1061 608 1467 647 97 904 887 873 489 1648 1479 493 3214 413 2943 279 3993 1471 1863 791 1221 1197 1709 1551 1659 710 272 965 1498 1928 523 724 1320 812 1773 1837 655 492 430 934 462 1059 1193 822 238 626 792 844 1200 1534 841 2180 1482 807 1384 447 522 741 989 446 845 900 1331 743 864 266 372 104 350 957 1142 649 578 1402 596 398 735 280 473 1575 1372 1301 864 738 381 1394 460 1574 435 1084 665 908 963 473 141 472 1105 2432 412 295 681 498 1051 686 964 787 860 1876 1439 906 628 866 673 937 1121 679 1588 533 941 1338 511 823 1095 798 573 1290 519 680 985 632 2071 2686 1217 1934 3285 811 2959 1008 1441 1184 839 1272 189 1747 446 1394 1684 123 4083 66 1190 1280 679 3110 4232 4423 1114 1764 349 618 611 930 869 934 1836 1223 1560 359 705 1487 1056 1141 1535 728 1111 438 642 349 1325 1108 1081 1543 172 951 246 111 122 298 439 1730 754 161 1219 305 210 152 811 627 590 632 599 351 697 483 290 126 302 293 564 415 1420 1187 264 206 830 793 943 414 649 394 2437 926 613 635 1505 1612 531 1245 3999 370 480 768 1279 1231 626 988 686 1689 875 966 564 502 1332 773 248 506 589 1144 880 106 344 160 2039 1939 551 1801 1025 711 1087 1598 1247 682 664 2155 739 880 1181 2470 2514 2036 1761 861 269 2206 976 1284 604 2093 978 2616 1469 1661 1430 622 504 1752 2707 1239 2570 1729 684 4364 238 3098 531 1344 991 1509 1846 988 1835 1545 2155 1279 1336 2378 904 514 1122 705 772 1023 1071 225 2057 714 1530 1313 478 667 276 640 588 922 1385 842 654 416 931 925 796 459 553 996 441 548 472 559 143 306 266 403 304 473 578 477 2432 631 297 893 826 1622 590 1151 892 1463 482 896 1691 1709 661 300 631 899 1870 853 795 881 518 759 962 1901 1524 1266 1156 1975 2180 1660 966 741 853 1606 1426 906 2643 1870 595 1596 245 637 1123 1193 859 1704 1715 2338 1242 375 398 958 1041 677 590 998 816 598 604 414 1450 543 1255 934 1527 722 629 258 1390 541 453 485 1818 2111 721 769 738 568 987 2150 955 1416 371 1200 799 1508 1193 1231 1533 470 595 833 983 938 544 866 1658 1538 1143 274 96 517 84 904 1285 750 1130 897 869 655 1506 187 201 1725 50 373 738 903 623 902 345 390 522 213 855 370 674 172 656 627 467 425 444 766 576 835 1264 1375 1087 1934 1167 550 946 751 753 1842 526 1106 829 1696 1165 641 1506 1886 1138 1855 1135 2115 375 1273 953 1199 701 1022 1061 1189 1524 2071 2149 2394 2233 1356 1023 2149 1599 795 459 570 4238 307 845 444 2335 1148 2046 3318 1481 906 641 465 329 625 1084 658 789 912 716 927 1239 872 931 574 619 694 540 412 594 2679 485 514 1653 445 795 898 2020 1504 422 2792 1486 1512 777 372 802 212 466 530 1040 650 638 565 1427 664 1544 838 1450 1540 1390 1713 1691 2121 790 1796 1208 765 1630 1210 1613 1243 377 644 942 511 1880 1229 1352 1129 503 477 648 808 558 574 1079 958 951 669 705 2371 613 2686 2294 2957 879 1671 423 527 1275 801 623 736 1250 219 471 1069 671 575 1185 830 573 329 894 1791 1249 2128 879 1500 1271 822 1111 980 1059 857 1837 694 512 710 278 952 1047 449 876 651 1310 824 1135 384 130 1288 792 1988 1045 731 2885 247 1991 2807 640 1915 2398 1603 1600 1251 257 2294 419 701 993 1200 2043 1095 954 609 1342 2497 1526 319 1295 1304 346 371 941 865 4526 366 803 716 186 1465 1708 1229 983 1275 2021 843 1431 770 764 278 714 600 537 1782 1230 1899 304 635 376 1730 1302 835 1518 1381 669 841 177 384 636 330 1379 1573 377 262 2505 1540 771 1035 639 98 1650 355 1073 1171 977 697 877 733 747 1889 820 1713 363 1048 571 602 1416 461 490 913 592 801 542 1532 1185 944 1231 854 1220 406 121 560 508 607 215 725 508 977 391 1005 1108 1140 271 806 3555 584 1323 582 1397 787 1476 1840 1512 1269 413 314 1678 420 416 569 2136 1691 1711 958 1647 1753 1310 2171 888 2113 1922 2119 1526 2135 3502 599 1008 333 464 707 1086 979 816 944 262 223 1080 2953 766 2000 1920 935 2287 979 1491 1199 1795 303 554 456 1205 840 1125 1162 950 1811 997 1384 1561 1065 1020 466 924 844 583 722 262 453 1007 632 537 1065 962 1289 725 963 802 589 867 1730 2103 3894 1412 2231 623 954 971 543 236 969 611 620 463 739 595 901 1052 1320 2790 873 1798 115 645 1218 2925 902 2103 529 1600 1089 816 1239 475 312 295 748 1153 1283 465 404 696 439 377 681 1229 92 453 285 941 1236 1109 946 1033 322 2208 466 444 512 54 131 186 886 104 732 350 732 253 1451 855 738 1021 2826 2193 1682 159 494 1119 858 2712 2452 326 557 750 748 1125 1348 1405 2089 1779 938 201 350 350 581 825 3139 379 438 497 1126 1156 1095 1015 705 316 764 295 593 678 302 373 635 1121 920 1231 505 1109 734 294 708 566 1233 852 899 332 898 1045 263 672 1480 917 788 855 1726 1172 455 1410 616 200 253 472 379 369 417 317 459 472 204 432 3821 687 1122 389 1653 720 1572 238 503 398 2748 552 881 580 1237 419 338 573 327 959 1963 325 1721 428 1067 1602 1220 525 1096 1212 278 528 546 1168 713 1565 583 242 679 620 374 1155 860 282 1146 831 681 276 583 1396 2784 2636 1501 814 1018 723 1598 127 704 199 425 1897 477 271 648 334 323 296 511 1420 73 153 174 396 667 613 691 300 956 1035 1038 1362 687 1429 442 5699 179 1389 1048 5142 3196 704 3828 1691 1558 2090 1283 752 1731 82 281 1186 348 2074 777 2294 1319 864 1307 485 1513 2254 664 441 568 478 2001 1205 1038 1048 1554 1196 220 251 1711 1974 554 1429 1748 1841 1820 1164 1525 1938 760 1321 1398 769 1293 550 206 878 1065 874 690 443 378 392 330 614 406 793 282 650 351 1407 626 865 948 1386 2568 560 812 469 695 734 387 702 679 816 2229 999 532 387 570 383 514 1532 611 930 759 612 1399 618 1292 806 419 1044 909 1049 598 688 1129 1518 734 1507 2257 1131 904 1081 1189 933 1365 851 538 1074 710 937 1248 392 664 1208 536 665 762 448 1021 515 163 167 1134 594 671 1172 219 642 1538 1240 615 38 322 530 635 238 227 429 643 519 310 668 472 711 1060 617 644 1418 1406 815 529 557 577 636 605 602 230 1043 1093 1431 1684 913 1142 528 851 706 823 661 426 712 478 1916 1547 1206 1125 765 290 576 331 966 1848 927 1492 625 661 2247 883 989 1182 2044 1152 1021 575 1561 620 405 1265 1138 765 667 1009 1042 535 981 288 368 537 243 1438 1260 1428 702 596 207 828 532 601 1130 554 1649 721 961 664 980 344 308 771 1128 589 1498 1034 515 1208 691 703 1249 916 559 1678 912 1133 667 768 1156 2198 1122 127 182 907 940 421 835 1099 679 488 3798 1058 485 2857 1777 604 860 884 1238 880 675 587 1416 445 2032 1207 381 784 684 771 843 491 1022 1115 624 799 844 1864 1732 2627 1226 824 1129 1836 275 1112 210 484 421 2081 677 814 797 638 1315 1250 1949 1027 1411 1470 1382 291 353 1768 1687 1336 1629 224 407 360 604 184 712 2042 724 286 94 532 439 1194 539 1657 379 743 2330 735 4005 2265 2759 1194 1309 1067 511 2529 1303 2515 1992 2879 1548 1613 523 169 719 499 720 3124 264 1722 832 342 2172 1783 871 4254 356 550 787 899 940 651 1250 221 549 1065 1958 599 1802 367 530 870 585 508 1767 647 3764 2013 1718 1071 2215 1329 349 1837 703 984 499 808 859 954 1102 769 897 1513 669 872 70 90 16 1854 2891 1429 1654 1443 831 577 585 512 1483 1111 807 1239 1226 1784 966 350 1179 572 687 545 1273 1490 1168 552 362 283 588 1494 693 1901 388 760 959 1457 419 1351 1147 624 1429 170 1409 559 900 514 649 762 780 1568 571 2584 1823 1096 618 1362 503 1200 1414 457 1019 606 2186 233 964 394 623 1512 1041 231 676 654 780 938 942 762 848 2055 300 1602 430 2100 349 993 1161 859 780 1002 1080 1003 713 973 398 1404 322 1019 1037 738 736 1253 934 295 791 601 478 154 780 765 279 1233 692 1409 612 1074 264 2766 1147 399 1473 666 1705 418 1040 671 646 1581 1297 1105 1760 2059 1293 1859 1157 883 955 828 612 482 680 1491 146 326 478 912 461 92 846 1276 879 862 910 499 767 2571 427 962 490 1266 690 923 921 756 1293 1605 1253 2221 867 574 848 1022 1332 618 793 579 1146 1323 2698 1497 1088 451 1167 541 782 1313 1074 846 548 297 357 947 314 428 919 706 494 93 583 253 416 63 953 1329 2954 1451 869 2701 1224 1542 708 215 1477 289 781 706 881 981 750 1527 1129 794 351 321 299 499 188 1158 805 1648 441 557 430 228 3061 211 1892 1408 87 2432 398 1143 1676 1028 559 875 1191 1940 3012 947 704 475 513 1499 1145 309 542 851 424 960 348 510 986 211 813 140 265 2941 1788 1674 581 587 404 57 184 167 1434 714 1534 1032 613 1337 833 1033 1145 1574 1049 1657 811 571 1005 910 1584 426 589 1161 290 2338 232 1076 524 557 327 1334 2580 468 1405 1664 1837 196 164 1075 895 3178 1429 1211 347 937 1359 1317 1350 634 280 455 661 994 2231 217 1581 1063 228 652 827 1633 503 1425 979 1534 991 745 2200 803 136 763 905 502 408 630 645 513 2592 1058 838 1449 1811 1578 284 1403 393 1390 441 958 955 802 1223 376 659 1367 403 890 735 2436 891 520 2049 363 940 1122 380 497 2551 409 1410 446 724 633 2163 553 2053 596 1165 704 685 783 1886 1537 2240 1135 1272 787 295 969 1642 703 2443 1560 518 601 757 1709 1023 502 462 456 1966 1032 1139 453 805 1917 1279 5008 806 1507 1674 1241 405 154 882 739 1585 1113 791 364 1320 608 470 689 532 307 617 282 572 327 854 314 351 -Bacteria Proteobacteria Gammaproteobacteria Alteromonadales Alteromonadaceae Alteromonas Alteromonas_macleodii Alteromonas_macleodii__Deep_ecotype__uid58251 2204 1622 1889 3140 935 1810 1377 976 1591 1225 2612 1686 697 1621 2202 2277 2399 985 456 2077 998 1481 1884 1529 1145 1818 764 1276 620 2826 1163 874 2562 2225 2372 4301 2722 1922 3902 2111 2982 2986 2252 3588 3215 4184 936 1367 2133 1438 1890 1462 2647 1926 2612 1369 2258 2359 2977 2554 3844 2144 2507 4310 1779 3565 1013 1140 2018 2369 3337 2672 2153 1666 2916 555 1805 3605 2024 1930 2985 2330 1682 1893 2597 2306 1283 2617 1945 1438 1136 1013 280 1137 1593 2192 806 1017 2069 1292 1833 1957 1198 2662 650 2206 1931 999 1595 1031 3438 2702 452 1617 1461 129 2634 2260 1723 1959 2000 1924 2777 4398 4273 4441 2328 4093 3494 4724 2333 2441 1779 2676 1955 3308 2828 1154 1456 1928 2274 3663 1621 3025 1510 2250 2155 2765 1848 1731 1893 1298 1417 685 2370 1384 1897 2213 1903 2538 2692 1794 3205 4710 1971 1950 1170 3070 2329 2465 2006 2054 2580 3014 1849 1759 480 1153 253 667 1054 1831 1477 3149 4577 2382 1523 2598 2044 2290 2355 2421 2719 1311 1838 728 1903 1888 940 2025 2318 2043 1363 1540 4459 2356 706 1103 4438 1266 715 1616 1391 1794 1384 1282 1217 1649 3038 3123 2400 1456 996 1575 2400 1056 607 1764 375 718 2078 353 594 3975 2018 1025 1652 1050 2323 1220 1534 2670 3088 4164 3288 4138 1187 3865 1543 2380 5602 1892 2337 1625 1482 2810 2679 2476 1002 4635 445 2133 3066 1250 5005 3435 6087 2623 3229 672 1773 843 1238 1206 2595 3496 2223 3115 2066 1812 1713 2232 2094 2464 1223 1508 1457 527 1951 1611 2615 3896 2846 701 1885 849 311 403 786 2010 2675 1264 810 3334 2754 1686 1510 814 1192 1559 1827 1894 2567 1468 843 1163 794 1221 1717 1932 1795 2436 2728 827 1305 2127 2202 1848 2029 2010 1159 4015 2279 1183 2001 3288 1499 1096 2846 4706 1546 1257 2159 1898 1117 675 1546 1547 3353 3342 2347 2702 1270 3464 1785 691 474 3375 3520 3165 470 933 541 3096 2258 1396 3902 2070 2318 2990 3131 2779 3699 3391 3097 1305 1864 2583 3118 3966 2772 2424 1265 1021 3343 1897 3726 1842 2558 1971 3389 2054 3358 3067 2416 803 2285 5059 3320 2878 2651 756 5958 566 5158 1614 4278 1905 2611 1101 2023 3054 1436 2938 2135 1274 3877 1249 915 1473 2010 1397 1956 2345 932 2088 2278 1090 1899 1095 1329 379 753 977 2602 4393 2498 2321 1464 2592 2170 2222 1564 560 2325 1997 3106 2209 2707 693 1260 606 1594 1308 1658 1667 1638 4017 607 287 1902 1215 2393 1077 1569 1250 3039 2033 1679 3172 1227 1759 3116 1792 1833 3277 2735 1533 1437 1581 1130 1173 3457 3114 3451 2130 1930 2880 2198 1410 1396 769 1805 1371 3445 4295 1784 3142 3617 590 1999 1726 3291 2586 3490 3738 2967 2836 1119 1557 1836 2589 1702 2527 4366 2952 1735 2164 1627 2499 1518 3174 2024 1956 2691 1667 1159 1938 2966 391 374 4280 3705 2827 1693 3288 1279 2108 1629 1378 1319 1093 1695 2511 2038 2035 2324 2802 1684 2431 681 2278 3289 2149 1644 4155 2803 4001 686 758 1683 1311 3146 3249 3392 3029 2066 2234 2000 2451 2213 2401 5082 715 1992 1274 1422 478 1470 1045 689 1098 515 3776 1313 1698 354 1324 1435 592 1239 1851 1904 1869 1562 2164 1940 3096 3534 2137 2258 3898 2861 2698 2867 2165 2336 1873 2373 2061 1331 1263 2102 2472 2228 1448 3151 578 1063 907 1551 1361 2450 1656 2729 3441 2830 3628 2138 4196 2659 2065 1211 2968 2013 451 2020 5607 1674 2210 1453 3819 1752 4765 5843 3820 2468 1662 600 754 1095 2823 2360 3660 2338 2073 1872 2538 2371 863 2082 2720 2346 1316 1439 1531 4928 1242 1799 3701 1406 2024 2504 1713 4345 1370 3321 1972 1921 709 1823 1969 1072 1105 1951 1544 3291 1976 1931 1718 959 2068 2724 1600 1602 3018 4092 1881 5236 3488 3137 3440 1971 3378 3330 3165 3346 1192 508 1230 2783 3046 3412 2605 1600 1405 706 545 1839 1472 1200 1312 2205 1156 1332 1253 3774 1068 3468 4093 4338 2109 2079 1140 1001 1927 2147 2204 1578 2501 2022 3229 1982 2218 1619 1219 2625 1350 1404 1188 3838 2945 1931 1753 999 3825 2346 2259 2257 1234 4438 3084 1363 1899 2563 2681 1881 3191 2835 733 452 877 526 2450 1498 1817 2139 4438 3630 3508 2279 4967 3269 3756 3792 3753 1510 3873 3663 3060 3321 817 2044 3648 1780 1540 2346 2947 2348 1944 683 3197 4668 3751 935 2293 3301 2874 1548 2155 1345 6369 389 1183 624 597 2650 1730 3153 1891 2031 2254 1922 3682 2198 2451 2379 1210 2427 1936 3084 1191 2604 1601 2034 1162 3198 2818 3039 4258 2842 1558 534 246 1132 1810 1338 2584 3525 935 1201 1244 2745 1791 3303 1734 936 2940 1733 2378 2764 1404 1262 1963 715 2266 4392 1638 3900 364 3001 3503 1831 2662 1238 902 1402 1013 2503 994 4426 1267 2821 1568 1685 4125 2091 742 1520 1905 2468 1744 2039 1656 2148 1319 2030 2321 2938 4183 796 4910 2336 2792 810 1481 550 1483 1932 2240 2975 672 1825 2451 2510 1148 1343 3174 4456 2249 2430 2689 4224 2490 4623 2351 3727 3419 3990 3264 7936 5017 2879 4141 1102 2702 1252 2206 3128 2615 2390 1815 1220 3505 5875 701 3357 1671 2371 2202 1732 2529 3608 2300 656 530 1181 1725 1159 2307 1777 1191 2615 1953 2692 3536 3628 1592 1698 1873 1004 1868 1138 551 1860 3492 639 460 1498 731 1659 719 900 2078 3147 3081 4466 4064 6206 3385 4163 1600 1778 2510 1096 645 1918 1352 996 2216 3891 827 1556 2570 1620 4285 2187 2616 1227 2171 2509 4664 723 3879 1742 1715 3867 1873 2225 2267 727 546 1129 1745 2980 2112 1195 1928 1307 2352 1898 3168 719 2539 3157 1738 2044 3363 1190 2629 1288 3442 1222 1060 2602 1426 2473 985 1926 598 1855 1698 1672 414 3337 5364 1737 3357 4738 2135 2395 1525 1825 3333 2682 4227 1999 1087 2252 615 1538 1961 1737 3130 4179 5138 618 1045 789 1746 1652 1707 2265 1549 1032 1849 2291 1413 1879 3225 1981 649 2247 936 1698 2229 1356 994 1650 1424 2842 2246 1510 2433 2112 1390 2822 1667 2817 2645 1772 1025 1672 3296 870 1953 2136 2154 2200 2489 2561 868 539 1112 1383 2612 860 1350 1894 991 4482 3461 1460 1788 3695 971 7611 1698 2694 3156 2658 2559 2238 2099 2404 1997 3580 1107 1395 1151 4820 1163 1767 631 1089 1543 1812 4325 3053 1438 3429 3256 3171 1642 2300 965 2684 2779 4617 3334 3135 3784 2165 2300 2418 2704 631 2173 2108 1661 1892 1280 1457 1296 2010 2059 1805 3216 1964 924 2881 2817 3376 1423 912 1340 2240 3326 2199 1819 2901 1261 1681 802 2403 2243 661 742 397 1752 2594 1412 1421 1347 1413 2631 2084 1946 1767 3272 2451 6230 1302 1438 2588 7467 6224 1770 8084 1919 2808 2019 1756 2507 2037 996 1293 2096 2193 3742 1750 3424 2204 1475 1894 1990 2635 2283 1008 2419 2051 2099 2673 1951 2306 2852 4109 2538 828 1994 2840 5484 789 3220 4842 4205 5457 1793 2315 4095 1605 2608 2768 3866 3345 1403 1182 2039 4163 1675 2896 1458 1571 1664 853 916 1448 2089 762 1418 1033 2490 1477 1978 1700 1890 2011 1396 1872 2151 2101 3325 1826 2608 4285 1216 2098 2680 1796 1816 1230 2131 1485 2652 1564 1401 1431 2186 2076 1097 2104 2468 929 1266 1940 2538 2377 2441 2982 2689 1509 2629 4595 1749 2668 1847 1659 1783 1317 2939 2606 1225 1640 1675 943 2023 1104 2714 1709 2015 1402 1351 1874 537 1292 2073 2800 1719 1149 2113 1000 2358 2359 1639 793 233 1347 1332 1927 1147 1456 996 1743 1361 731 1542 2581 1320 3043 2364 3068 1907 2939 3002 1079 1300 1603 2546 1700 2185 1472 2843 1906 3218 4124 2174 1291 1218 2475 2986 2110 1942 971 878 1714 1702 1462 1718 2162 2881 409 584 300 3586 5033 2945 3045 1769 1863 3835 2337 3372 3658 3295 2757 2205 2093 2728 1617 1522 2935 1838 488 1121 2091 1246 1122 1061 1277 788 658 1226 3128 3479 3436 1221 608 1546 1923 2698 803 2574 2223 3067 1433 1828 1584 1853 1618 1677 1781 2030 497 1261 3166 1807 2198 1579 2495 4000 3964 2323 3315 958 2640 1859 1505 1357 5878 3737 897 721 1484 1373 1361 1872 3845 1961 674 5450 1651 1691 3133 3440 1819 725 1126 1831 1616 1869 1253 3794 1917 5043 3018 1362 2138 3114 1820 1784 1619 2851 1859 2589 1516 974 1612 4586 4204 1890 1361 1958 3837 2346 1740 1158 1754 959 3985 1600 1805 2211 1301 1309 1824 1774 2382 1816 2550 1477 1332 346 3337 2555 948 2772 1853 1960 2624 1941 1622 2049 3818 1781 1037 947 2080 1579 1881 1323 2942 1360 1809 1692 2287 3728 3211 2631 2102 2122 1002 527 3847 2312 4281 4733 5176 5390 2956 1074 1184 2245 1193 2390 3787 679 2696 2495 1762 3744 3110 720 5920 1419 1571 997 2246 1894 1253 2016 1639 1665 1439 3709 2725 3103 1032 1954 2231 2324 2137 3374 2931 5475 3036 3362 2319 3309 1739 2024 3426 1859 1786 760 2167 1332 2685 1271 2260 3033 3009 1926 1001 961 383 280 2810 3013 1824 2248 2843 4421 3313 4861 6469 1981 4109 3731 2093 1963 2805 1607 380 1756 1146 2265 1228 1727 3709 3188 1293 1850 1585 2044 3017 2586 4920 1450 2470 3196 1981 1087 2543 876 1894 2402 184 2293 1641 422 1833 1546 1800 1563 2681 1733 4004 2730 1980 2128 4590 2416 3150 2411 896 3395 1749 5062 2218 1202 3124 3621 5391 1082 1664 937 2966 1985 6722 2605 574 499 3931 1445 3589 1364 3732 1422 2222 2401 1993 1345 1852 2035 2523 753 2817 1305 2935 2397 1921 2729 1193 1015 3230 1913 1864 1549 950 1474 348 3011 2089 2318 3342 2102 3558 2554 2628 1513 4802 2609 2530 3077 1135 2547 720 1633 3659 2061 3782 1611 1208 2611 1739 1746 2391 1091 1989 1163 1988 1510 1288 751 955 2123 2198 2721 3460 1253 566 2532 1957 1391 2379 680 967 1368 6573 467 869 741 2139 1623 1194 2096 2179 3389 2379 2552 3510 2616 914 3946 2913 1174 1180 2143 677 1383 1809 2681 1021 843 458 3047 1991 897 2530 1281 1944 525 685 864 1280 1201 1502 2670 1592 1466 1791 486 1573 2205 521 999 4075 6159 4101 2111 4090 1764 2426 2017 2666 4094 1693 2475 2058 2575 1939 1119 4722 2796 2327 378 1246 2446 2162 488 2144 1426 3839 999 1684 1763 777 5661 1985 4511 5304 979 4223 1054 2618 3024 1013 1370 1777 890 1636 3946 940 1302 536 1812 1151 1648 885 2003 2472 2686 2192 1180 1637 2053 636 1187 982 388 3852 3787 2429 634 1560 1156 446 908 742 3104 3676 3656 3064 1750 2241 2648 4009 1220 2025 1203 2196 1321 1382 937 2263 2359 1790 1475 1747 1469 2367 573 1914 954 3085 3272 1646 5220 1933 1159 3483 2166 593 1923 1120 1685 4785 2210 1674 674 2072 1450 1427 1675 2751 3864 2132 4137 2518 4329 479 4233 2392 1316 2045 971 2692 1009 2973 2453 4194 2938 3010 4300 1449 992 3318 1990 3051 3140 1849 2380 1773 3835 2919 1724 2301 2873 2025 1491 3509 691 2601 2034 1603 2247 1501 2180 1677 714 2380 1890 2078 1585 3630 1700 1893 2763 1860 2631 2757 1392 2811 2307 1276 2063 455 1211 1224 3588 1349 2893 1598 2577 2240 3916 2922 2978 2817 4077 1752 1504 2189 564 2459 1693 796 2192 1763 2251 999 2370 1690 907 586 471 1467 2511 1306 1034 4400 3980 4271 1759 6720 2815 2162 2209 3323 1497 413 1960 1561 3496 2750 2086 2059 2152 1261 1259 705 1555 3159 3256 1670 1916 435 583 2079 338 \ No newline at end of file diff --git a/tests/test_basic_functions.py b/tests/test_basic_functions.py index bedfdca..4229218 100644 --- a/tests/test_basic_functions.py +++ b/tests/test_basic_functions.py @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- -from focus_app.focus import normalise, load_database, is_wanted_file, count_kmers, which, load_database +import random + +from focus_app.focus import normalise, load_database, is_wanted_file, count_kmers, which, run_nnls, aggregate_level import pytest from numpy import array - def test_normalise(): assert list(normalise(array([1, 1,]))) == [0.5, 0.5] assert list(normalise(array([2, 2, 2, 2]))) == [0.25, 0.25, 0.25, 0.25] @@ -17,20 +18,6 @@ def test_normalise(): normalise([0, 0, 0]) -def test_load_database(): - database_path = "tests/data/k6_subset.txt" - database_matrix, organisms, kmers_order = load_database(database_path) - - assert list(organisms) == ['Bacteria\tSpirochaetes\tSpirochaetia\tSpirochaetales\tSpirochaetaceae\tSphaerochaeta\tSphaerochaeta_pleomorpha\tSphaerochaeta_pleomorpha_Grapes_uid82365', - 'Bacteria\tFirmicutes\tBacilli\tLactobacillales\tStreptococcaceae\tStreptococcus\tStreptococcus_suis\tStreptococcus_suis_98HAH33_uid58665', - 'Bacteria\tProteobacteria\tGammaproteobacteria\tAlteromonadales\tAlteromonadaceae\tAlteromonas\tAlteromonas_macleodii\tAlteromonas_macleodii__Deep_ecotype__uid58251' - ] - # look into small subset of the matrix - assert list(database_matrix[0])[0] == 0.00027570089293671023 - assert list(database_matrix[1])[0] == 0.00040074099488477375 - assert list(database_matrix[2])[0] == 0.00036760119058228027 - - def test_is_wanted_file(): assert is_wanted_file(["a.fasta", "b.fastq", "x.FASTq", "y.FASTA", "n.fna"]) == ['a.fasta', 'b.fastq', 'n.fna', 'x.FASTq', 'y.FASTA'] assert is_wanted_file(["f.png", "a.fasta", "b.fastq", "x.FASTq", "y.FASTA", "n.fna"]) == ['a.fasta', 'b.fastq', 'n.fna', 'x.FASTq', 'y.FASTA'] @@ -50,24 +37,94 @@ def test_count_kmers(): kmer_order = ["AAAAAAA", "AAAGAAT", "ATTTTTT"] assert count_kmers(query_file, kmer_size, threads, kmer_order) == [17, 0, 0] + # testing for empty input query_file = "tests/data/mock_sample_empty.fasta" kmer_size = "6" kmer_order = ["AAAAAA", "AAAAAT", "TTTTTT"] - assert count_kmers (query_file, kmer_size, threads, kmer_order) == [0, 0, 0] + with pytest.raises(Exception): + count_kmers(query_file, kmer_size, threads, kmer_order) def test_load_database(): - pass + # simple database + database_matrix, organisms, kmer_order = load_database("tests/data/k6_small_sample.txt") + assert kmer_order == ['GAACGC', 'GAACGA', 'CACCCA'] + assert organisms == ['Bacteria\tSpirochaetes\tSpirochaetia\tSpirochaetales\tSpirochaetaceae\tSphaerochaeta\tSphaerochaeta_pleomorpha\tSphaerochaeta_pleomorpha_Grapes_uid82365', + 'Bacteria\tFirmicutes\tBacilli\tLactobacillales\tStreptococcaceae\tStreptococcus\tStreptococcus_suis\tStreptococcus_suis_98HAH33_uid58665'] + assert [list(xx) for xx in database_matrix] == [[0.26407041877834087, 0.27930682976554538], + [0.38383568951720459,0.38379204892966362], + [0.35209389170445454, 0.33690112130479105]] # bad database with all keys 0 in row = raise error + with pytest.raises (RuntimeWarning): + load_database("tests/data/k6_small_sample_empty_record.txt") -def test_write_results(): - pass + +def test_run_nnls(): + database_matrix, organisms, kmer_order = load_database("tests/data/k6_small_sample.txt") + + # fake metagenomic count + # example 1 + random.seed(1128) + fake_query_count = normalise([random.randint(10000, 200000) for _ in range (3)]) + assert list(run_nnls(database_matrix, fake_query_count)) == [0.11743935706399153, 0.88256064293600844] + + # example 2 + random.seed(2) + fake_query_count = normalise([random.randint(10000, 200000) for _ in range (3)]) + assert list(run_nnls(database_matrix, fake_query_count)) == [0.79079139795692821, 0.20920860204307179] + + # example 3 + random.seed(500) + fake_query_count = normalise([random.randint(10000, 200000) for _ in range (3)]) + assert list(run_nnls(database_matrix, fake_query_count)) == [0.40650798820847034, 0.59349201179152955] def test_aggregate_level(): - pass + true_answer = [ + {'Bacteria': [0.54337724854388625, 0.76762773844686816, + 0.68899501300924559]}, + {'Spirochaetes': [0.26407041877834087, 0.38383568951720459, + 0.35209389170445454], 'Firmicutes': [0.27930682976554538, + 0.38379204892966362, 0.33690112130479105]}, + {'Spirochaetia': [0.26407041877834087, 0.38383568951720459, + 0.35209389170445454], 'Bacilli': [0.27930682976554538, + 0.38379204892966362, 0.33690112130479105]}, + {'Spirochaetales': [0.26407041877834087, 0.38383568951720459, + 0.35209389170445454], 'Lactobacillales': [0.27930682976554538, + 0.38379204892966362, 0.33690112130479105]}, + {'Spirochaetaceae': [0.26407041877834087, 0.38383568951720459, + 0.35209389170445454], 'Streptococcaceae': [0.27930682976554538, + 0.38379204892966362, 0.33690112130479105]}, + {'Sphaerochaeta': [0.26407041877834087, 0.38383568951720459, + 0.35209389170445454], 'Streptococcus': [0.27930682976554538, + 0.38379204892966362, 0.33690112130479105]}, + {'Sphaerochaeta_pleomorpha': [0.26407041877834087, + 0.38383568951720459, 0.35209389170445454], + 'Streptococcus_suis': [0.27930682976554538, 0.38379204892966362, + 0.33690112130479105]}, + {'Sphaerochaeta_pleomorpha_Grapes_uid82365': [0.26407041877834087, + 0.38383568951720459, 0.35209389170445454], + 'Streptococcus_suis_98HAH33_uid58665': [0.27930682976554538, + 0.38379204892966362, 0.33690112130479105]}, + ] + database_matrix, organisms, kmer_order = load_database ("tests/data/k6_small_sample.txt") + results = {organisms[pos]: profile for pos, profile in enumerate(database_matrix.T)} + + producted_result = [] + + # runs aggregate function in all the levels + for pos in range(8): + aggregate_results = aggregate_level(results, pos) + + # convert results to list rather the numpy.array + aggregate_results_list = {level:list(aggregate_results[level]) for level in aggregate_results} + + producted_result.append(aggregate_results_list) + + assert producted_result == true_answer -def test_run_nnls(): + +def test_write_results(): pass