Skip to content

Commit

Permalink
fixed inparalog handling
Browse files Browse the repository at this point in the history
  • Loading branch information
JLSteenwyk committed Dec 20, 2023
1 parent 7022276 commit 6294768
Show file tree
Hide file tree
Showing 29 changed files with 1,194 additions and 6 deletions.
17 changes: 12 additions & 5 deletions orthosnap/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,6 @@ def write_output_fasta_and_account_for_assigned_tips_single_copy_case(
write_summary_file_with_inparalog_handling(
inparalog_handling, fasta,
output_path, subgroup_counter,
assigned_tips
)
subgroup_counter += 1

Expand All @@ -387,7 +386,6 @@ def write_summary_file_with_inparalog_handling(
fasta: str,
output_path: str,
subgroup_count: int,
assigned_tips: list
):
res_arr = []

Expand All @@ -406,10 +404,19 @@ def write_summary_file_with_inparalog_handling(
f"{output_path}/{fasta_path_stripped}.orthosnap.{subgroup_count}.fa"
)

if res_arr:
for i in res_arr:
try:
if res_arr[0][1] in open(output_fasta_file_name).read():
if string_exact_match(f">{i[1]}", output_fasta_file_name):
with open(f"{output_path}{inparalog_report_output_name}", "a") as file:
file.writelines('\t'.join(i) + '\n' for i in res_arr)
file.writelines('\t'.join(i) + '\n')
except FileNotFoundError:
1


def string_exact_match(string, filename):
with open(filename, 'r') as f:
for line in f:
line = line.rstrip()
if re.search(r'\b{}\b'.format(string), line):
return True
return False
1 change: 0 additions & 1 deletion orthosnap/orthosnap.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ def execute(
fasta,
output_path,
subgroup_counter,
assigned_tips,
)

write_output_stats(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
fake_orthologous_group_of_genes.faa.orthosnap.0 species0|gene0-duplicate_copy_1 species0|gene0-duplicate_copy_2;species0|gene0-duplicate_copy_0
fake_orthologous_group_of_genes.faa.orthosnap.1 species4|gene2-duplicate_copy_1 species4|gene2-duplicate_copy_0
fake_orthologous_group_of_genes.faa.orthosnap.1 species2|gene2-duplicate_copy_1 species2|gene2-duplicate_copy_0
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
>species0|gene0-duplicate_copy_1
MFGAVAAGSEESPQAPRCISTRSSSFRVYLSAWNFGMSPERVTTEPLHSPDWDNDWLRQL
AGDIVAGSLSATIIAPITTVIDRSVVERLSSNRSILHTLRTHAICSILKPRKFYFSRPFF
IAWSLYAATYATANATDTSLEHLSKVTEKSTTASLVPTFSFLPTYVVNVCLGILKDIRFS
QIYGHPEGRLKQPPPIPRLAYMAFLFRDSITISSSFTLAPQVASLVPDWITADPHTKRTV
TQLALPALVQYVNTPFHMIALDVIARPQVATIAERSVTIRRGDLAEILNSPAYDYGQDVE
KKKNLDDTSPEDEDPFGNEEFAEVKYRTMGWWKTGILMVAENVSIGILSLPSAFATLGFV
PALIILIGISGISWYTAYILCQFKLRYPQVHSMGDAGEIIMGRFGRELLGIGQLLFLIFV
MASHVLTFTVLMNTITEHGTCTIVFGVIALIVSCVGALPRTMDKVYWMSIASFLSIVAAT
MATMIAVGVEYKGHIPLAVTTHLSFNEEFLAVSNLFFAYVGHASFFGFISEMDKPREFTK
SISVLQVIDTSLYIASAVVIYRYVGADVQSPALGSAGPLGKKIAYGLAIPTVLIAGIVNG
HVASKYVYVRVFRGTNHMHERTLLSIGSWVAIGLISWVVAWVIAESIPVFNNLLSLITAL
FGCWFAYGFPAIFWFTLNKGQWFASSRKIFLTLSNTFILAMAITLCGLGLYVSGDAISKD
SGSGVWTCANNAVTTTTTT
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
>species4|gene2-duplicate_copy_1
MAVSRDLEAPAVVNDPTADDAMVEKKEYADGTPANDPFGNEECGEVRYRVMSWWQCGTLM
VAENISLGILSLPSAVATLGIVPAVILLLGLSAISWYTGYIMGQFKLRFPQIHSMGDAGE
LLMGRFGRELFGIGQLLFLIFLMASHILTFTVVFNTITNHGTCTIVFGVVGLVVSFIGAL
PRTMGKVYWMSMASCISIVTATVVTMIAIGVQAPDHVHVDATTEVSFQDAFLAVTNIIFA
YIAHVAFFGFISEMHDPRDFPKSLTMLQVVDTSLYIVTAMVIYRYAGPDVASPALSSAGP
LMKKVAYGLAIPTVVIAGVVFGHVACKYIYVRIFRGSAHMHQNSFLAIGSWVAIALGVWV
VAWVIAESIPVFNELLSLISSLFGSWFSYGLPAIFWLVMNKGRWFSTRSKICLTIVNFLI
LAFACALCGMGLYVSGKSIHDSSSKASWTCKNNATTTT
>species2|gene2-duplicate_copy_1
MMLWLKRRNMLMGRRQMTRLEMKNAERSNIVSCRGVMVAENISLGILSLSSAVATLGIVP
AVILLLGLSAISWYTGYIMGQFKLRFPQIHSMGDAGELLMGRFGRELFGIGQLLFLIFLM
ASHILTFSVVFNTITNHGTCTIVFGVVGLVVSFIGALPRTMGKVYWMSMASCISIVTATV
VTMIAIGVQAPDHVHVNVTTKVSFQDAFLAVTNIIFAYIAHVAFFGFISEMHDPRDFPKS
LTMLQVVDTSLYIVTAMVIYRYAGPDVASPALSSAGPLMKKVAYGLAIPTVVIAGVVFGH
VACKYIYVRIFRGSAHMHQNSFLAIGSWVAIALGVWVVAWVIAESIPVFNELLSLISSLF
GSWFSYGLPAIFWLVMNKGRWFSTRSKICLTIVNFFILAFACALCGMGLYVSGKSIHDSS
SKASWTCKNNAT
>species1|gene2
MAVSRDLEAPAVVNDPTAYDATVEKKEYADGTPANDPFGNEECGEVKYRVMSWWQCGTLM
VAENISLGILSLPSAVATLGIVPAVILLLGLSAISWYTGYIMGQFKLRFPQVHSMGDAGE
LLMGRFGRELFGIGQLLFLIFLMASHILTFTVVFNTITNHGTCTIVFGVVGLVVSFIGAL
PRTMGKVYWMSMASCISIVTATVVTMIAIGVQAPEHVHVDATTEVSFQDAFLAVTNIIFA
YIAHVAFFGFISEMHDPRDFPKSLTMLQVVDTSLYIVTAMVIYRYAGPDVASPALSSAGP
VMKKVAYGLAIPTVVIAGVVFGHVACKYIYVRIFRGSAHMHQNSFLAIGSWVAIALSVWV
VAWVIAESIPVFNELLSLISSLFGSWFSYGLPAIFWLVMNKGRWFSTRSKICLTIVNFLI
LAFACALCGMGLYVSGKSIHDSSSKASWTCKNNAT
>species3|gene7
MAPTTRDLEALTVHHDSDIMADDLAEKKVSANESPPENDPFGNEECGEVKYRVMKWWHCG
ILMIAENISLGILSLPSAVATLGIVPSIFLILGLSGISWYTGYVIGQFKLRYPQVHSMGD
AGEILFGRIGREILFFGQLLFCIFLMSSHILTFTVLFNTITGHGTCTIVFGVVGLVVSFI
GALPRTMGKVYWMSLASCTSITVATIVTMVAIAMQAPDHVQVDITTHPSFSTAFLSVTNI
VFAFIAHVAFFGFASEMEDPRDFPKSLAMLQVTDTTMYIVTAMVIYRYAGPDVASPALSS
AGPLMSKVAYGLAIPTVIIAGVVFGHVASKYIYVRVWRGSPQMHTNSLAAVGSWVAIALG
VWVIAWIIAESIPVFNDLLSLISSLFGSWFSYGLPAMFWLVMNRGQYTASPRKIFLTIVN
LVIFGIACAICGLGLYVSGKAIHDSSSSASWTCANNAST
>species0|gene1
MAPTTRDLEALAVHHDSDIMADDLAEKKVSANESPPENDPFGNEECGEVKYRVMKWWHCG
ILMIAENISLGILSLPSAVATLGIVPSIFLILGLSGISWYTGYVIGQFKLRYPQVHSMGD
AGEILFGRIGREILFFGQLLFCIFLMSSHILTFTVLFNTITGHGTCTIVFGVVGLVVSFI
GALPRTMGKVYWMSLASCTSITVATIVTMVAIAVQAPDHVQVDITTHPSFSTAFLSVTNI
VFAFIAHVAFFGFASEMEDPRDFPKSLAMLQVTDTTMYIVTAMVIYRYAGPDVASPALSS
AGPLMSKVAYGLAIPTVIIAGVVFGHVASKYIYVRVWRGSPQMHTNSLAAVGSWVAIALG
VWVIAWIIAESIPVFNDLLSLISSLFGSWFSYGLPAMFWLVMNRGQYTASPRKIFLTIVN
LVIFGIACAICGLGLYVSGKAIHDSSSSASWTCANNAST
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
>species2|gene5
MSPDTSDLDLETRPAVSLNRGEEYKEQPETPDEEPFGDEDDAEVRYRTLEWWFVSPVMLA
GGTSLGILTLPSAVATLGIVPGVILIVGIAILTVYTGYVMGQFKQRYPHVHSIADGGEVL
FGWVGREILGAGLLLCLVFVMGGHILTFTVMMNTLTDHGTCSVVFGVVGLLISLILSLPR
TFKRMSWLSVISFASIVGAVLVTMIALGVQRPPNVRVEVTRPTSLYRAFLAVTDIVFAYA
AHPAFFGFISEMKTPTDWPKTLCFVEIINTTLYTVTGVVIYRFAGQHVASPALGSTSPLM
AKVAYGTAIPTIVIAGVINGHIACKYIYVRVFRGTEHMHRRSLFAIGTWVVISVVLWTVA
WVIAEAVPEFNNLLSLITSLFCSWFSYGLCGAFWLFINKGLWFSSPRKTFLTIVNFTLLG
MGACLCGLGLYASGRAISEESAGRIFSCASTA
>species4|gene1
MSPDTSDLDLETRPAVSLNRGEGYKEQPETPDEEPFGNEEGAEVRYRTLEWWFVSPGSAE
GRQSRSDVACEQEMRDSHAGWGHIARHPNASLGCGDTGNRPVGLPGVILIVGIAILTVYT
GCVMGQFKQRYPHVHSIADGGEVLFGWIGREVLGAGLLLCLVFVMGGHILTFTVMMNTLT
DHGTCSVVFGVVGLLISLILSLPRTFKRMSWLSVISFASIVAAVLVTMIALGVQRPPNVK
VEVTRPTSLYRAFLAVTDIVFAYAAHPAFFGYISEMKTPTDWPKTLCFVEVINTTLYTVT
GVVIYRFAGQHVASPALGSSSPLMAKVAYGIAIPTIVIAGVINGHIACKYIYVRLFRGTE
RMHQRSLFSIGTWVAISVVLWTIAWVIAEAVPEFNNLLSLITSLFCSWFSYGLCGAFWLF
INQGLWFSSPRKTFLTIVNFTLLGMGACLCGLGLYASGRAISEESAGRSFSCASTA
>species1|gene0
MSPDTSDLDLGTRPAVSLNRGEGYKEQPETPDEEPFGDEEGAEVRYRTLEWWFVSPGSAE
GRQSRSDVACEQEMRDRGVILIVGIAILTVYTGCVMGQFKQRYPHVHSIADGGEVLFGWI
GREVLGTGLLLCLVFVMGGHILTFTVMMNTLTDHGTCSIVFGVVGLLISLILSLPRTFKR
MSWLSVISFASIVAAVLVTMIALGVQRPPNVKVEVTRPTSLYRAFLAVTDIVFAYAAHPA
FFGYISEMKTPTDWPKTLCFVEVINTTLYTVTGVVIYRFAGQHVASPALGSSSPLMAKVA
YGIAIPTIVIAGVINGHIACKYIYVRLFRGTEHMHQRSLFAIGTWVAISVVLWTIAWVIA
EAVPEFNNLLSLVLVFVCVSF
>species3|gene3
MSPPSAINNPGDPLAEQEKPVGARNTTGTEDPFSHDGVGGVKYRTLAWWQCAMIMVAETI
SLGILSLPSAVASLGLVAAVILILGLGALATYTGYTLGQFKLRYPHVHSMGDAGEVLMGR
IGREVLGTAQLLFLIFIMGSHLLTFTVMMNTLTDHGTCSIVFGVIGLAVSFAFTLPRTLK
KVSWFSISSFISIIAAVLITMIAIAIQKPGGGRVDAIVDNSFYKAFLAVTNIVFAYAGHV
AFFGFISEMRTPTDYPKTLYMLQGIDTSMYTISAVVIYRYGGRDVASPALGSTSPLMSKI
AYGIAIPTIVIAGVINGHVACKYIYVRLFRGTDRMHQRGLVSIGTWVMIGLVLWTLAWII
AEAIPVFNDLLSLITALFASWFTYGLSGIFWLFLNWGRYSSSRRKILLTGLNLLVVVVGG
CLCALGLYVSGKSIHDHPRSSSFSCANNA
>species0|gene8
MSPPSAINNPGDPLAEQEKPAGARNTTGTEDPFSHDGVGGVKYRTLAWWQCAMIMVAETI
SLGILSLPSAVASLGLVAAVILIIGLGALATYTGYTLGQFKLRYPHVHSMGDAGEVLMGR
IGREVLGTAQLLFLIFIMGSHLLTFTVMMNTLTDHGTCSIVFGVIGLAVSFAFTLPRTLK
KVSWFSISSFISIIAAVLITMIAIAIQKPGGGRVDAIVDNSFYKAFLAVTNIVFAYAGHV
AFFGFISEMRTPTDYPKTLYMLQGIDTSMYTISAVVIYRYGGRDVASPALGSTSPLMSKI
AYGIAIPTIVIAGVINGHVACKYIYVRLFRGTDRMHQRGLVSIGTWVIIGLVLWTLAWII
AEAIPVFNDLLSLITALFASWFTYGLSGIFWLFLNWGRYSSSRRKILLTGLNLLVVVVGG
CLCALGLYVSGKSIHDHPRSSSFSCANNA
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
>species3|gene4
MSTLDVKDIENGPARRVEEEGGMWENDMEKTPSVERDPFGNEAVGEVHYKTLDWWQSGML
MIAETVSLGVLSLPATVAEVGLIPAIILIVGMGIIATYSGYVIGQFRARYPFIHSMADAG
EVLCGRYGRMFTEFAQLVFFMFASGXHLVTFTVMMNTLTNHGTCSVVFGVVGLVLSFACS
LPRTMKNVSWLAVTSFLSIFTAVLITMIGVAVEHPNPPPMQLTRSTSFVKGFSAVTNIAF
AYCGHPAFFGFIAEMKEPKDFPKSLCMLQGFEIVFYTVASAVIYRYAGQNVTSPALGSAG
IVVRKVAYGIAIPTIVIAGVVLGHVAIKNVYVRLFRGTDVMHKRSALGIGAWIGLAAGYW
IIAWVIAEAIPVFSDLVSLVSALFASWFSFGLPGVFWLYMYWGNYFTSVRKTLLTLANLA
LFGIGATICVCGLWVSGLSISSDSSGSSFSCANNA
>species0|gene7
MSTLDVKDIENGPARRVEEEGGMWENDMEKTPSVERDPFGNEAVGEVHYKTLDWWQSGML
MIAETVSLGVLSLPATVAEVGLIPAIILIVGMGIIATYSGYVIGQFRARYPFIHSMADAG
EVLCGRYGRMFTEFAQLVFFMFASGSHLVTFTVMMNTLTNHGTCSVVFGVVGLVLSFACS
LPRTMKNVSWLAVTSFLSIFTAVLITMIGVAVEHPNPPPMQLTRSTSFVKGFSAVTNIAF
AYCGHPAFFGFIAEMKEPKDFPKSLCMLQGFEIVFYTVASAVIYRYAGQNVTSPALGSAG
IIVRKVAYGIAIPTIVIAGVVLGHVAIKNVYVRLFRGTDVMHKRSALGIGAWIGLAAGYW
IIAWVIAEAIPVFSDLVSLVSALFASWFSFGLPGVFWLYMYWGNYFTSVRKTLLTLANLA
LFGIGATICVCGLWVSGLSISSDSSGSSFSCANNA
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
>species2|gene4
MEAINANPPPYRTEKVEETKYTSDYEEEGQLKTGQVADAFGNEESAEIKYKTLKWWQCGL
LMICESVSLGVLSLPAAVATLGLVPAVILIVGLGLLATYTGYNIGLFRERYPRIQNLGDA
GEILMGPIGREIFGLGQFLFFIFVMGSHILTFRVMMNTVTEHGTCSIVFSVVGMVISMVL
SIPRTMKGLTWISFASFLSIFGAVMITMISVGVQDHPGRIIEATVDTTLYSGFQAVSNIV
FAYCAHVAFFGLIAEMENPRDFKKSLFMLQSFEISLYLTAAVVIYYFVGKDVASPALISA
GPVMKKVAFGIAIPTIVGAGVVNGHVGLKYIYFRLCHKSDLIHRRSKRSVGIWIGLGLTC
WVVAWIIAEAIPVFSDLNGLISALFASWFSYGLSGIYWLHLNYGQWFASPRKILLTILNI
SIALFGLALCVLGLYASGTAIHNDTSSSSFSCANTDA
>species4|gene0
MEAIKANPPAYRTEKVEETKYTSDYEEEGQLKTGQVADAFGNEESAEIKYKTLKWWQCGL
LMICESVSLGVLSLPAAVATLGLVPAVILIVGLGLLATYTGYNIGLFRERYPKIQNLGDA
GEILMGPIGREIFGLGQFLFFIFVMGSHILTFRVMMNTITEHGTCSIVFSVVGMVISMVL
SIPRTMKGLTWISFASFLSIFGAVMITMISVGVQDHPDRIIEATVDTTLYSGFQAVSNIV
FAYCAHVAFFGLIAEMENPRDFKKSLFMLQSFEISLYLTAAVVIYYFVGKDVASPALISA
GPVMKKVAFGIAIPTIVGAGVVNGHVGLKYIYFRLCHKSDLIHSRSKRSVGIWIGLGLTC
WVVAWVIAEAIPVFSDLNGLISALFASWFSYGLSGIYWLHLNYGQWFASPRKILLTILNI
SIALFGLALCVLGLYASGTAIHNDTSSSSFSCANTDA
>species1|gene4
MEAINANPPAYRTEKVEETKYTSDYEEEGQLKTGQVADAFGNEESAEIKYKTLKWWQCGL
LMICESVSLGVLSLPAAVATLGLVPAVILIVGLGLLATYTGYNIGLFRERYPKIQNLGDA
GEILMGPIGREIFGLGQFLFFIFVMGSHILTFRVMMNTITEHGTCSIVFSVVGMVISMVL
SIPRTMKGLTWISFASFLSIFGAVMITMISVGVQDHPHRIIEATVDTTLYSGFQAVSNIV
FAYCAHVAFFGLIAEMENPRDFKKSLFMLQSFEICLYLTAAVVIYYFVGKDVASPALISA
GPVMKKVAFGIAIPTIVGAGVVNGHVGLKYIYFRLCHKSDLIHSRSKRSVGIWIGLGLTC
WVVAWVIAEAIPVFSDLNGLISALFASWFSYGLSGIYWLHLNYGHWFASPRKILLTILNI
SIALFGLALCVLGLYASGTAIHNDTSSSSFSCANTDA
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
>species3|gene0
MQDYLDYLYPLIPIVHRPSFQQSLQQDRDREDSGFLGLVTAIAAVVIATMPSRFHFYRSA
TPPLRFTSRRDMVRHCYDKILRLRDSTYFDHINFQKFAISYLLYAAFRQLGDHNWSRMLD
VEATQIARLLNLHRISEYDGLNCIETQLRKKGFWLIFYGFVHNQLQNVLGERLSYLDPIL
LHSINPEDLMPLEVDDEMIFENEVLMPPSHTPCLVTGFILHSRVFWAAIRSTCPESPAEP
CPCVRARDAAVQVAYIQDRLHSLRFLLEDIPPLLRPWQPPDSQAIAHEGGSTGVTEMTQS
HFASMRANLHVTHLWLQSLLVDQLEAAQAHKSEPSLVSTNHVQPMVDAKALWLQREGLCR
QLFCILYSLPQINLEANGLHLAYKVRDIAAGLLVCPFHAAGPEAERATEYLRQSTDILSR
LDSSEGMVTMHLQTWIDTDRIKSS
>species0|gene5
MEAVHDSPPPYATEGIDEKKEDISQVEQNLKPGLEESDAFGNEEFAEIKYKTLKWWQCGL
LMICESVSLGVLSLPAAVATLGFVPAVILIVGLGILATYTGYNIGLFRERYPHIQNLADA
GEILMGPFGRELFGLGQFLFCIFVMGSHLLTFRVMMNTITDHGTCSIVFSVVGMIISMVL
SIPRTMKGMTWISFASFLSIFSAVMITMIGVGVEKHPGRIIEATVDTTLYTAFTAVSNIV
FAYCAHVAFFGLIAEMEKPKDFKKSLFMLQAFEISLYVTAACVIYYYVGKDVQSPALSSA
GPLLKKVAYGIAIPTIVGAGVVNGHIGLKYIYFRTCSKSGLIHSRSRRSVAVWIALGLAC
WLVAWIIAEAIPVFSDLNSLISALFASWFSYGLSGIYWLHLNYGQWFASPRKIALTVLNA
AIAVFGLVLCVLGLYASGTAIHNDANSNKIGWPIECWHNREPFRVRHSVRFLLPQALKHT
GKYRAIRTNCWQPAICNHPESFPFGHRLRPNVAMHTRAELATQACDICRKRKVKCNVTSS
STDVPSRCGRCARLDLPCTFLSPSRTRGPKKRSRTGSPAQEQPDWGTGGSRASGAVNYPT
DDVCDRRMFSCIMQDYLDYLYPLIPIVHRPSFQQSLQEDRDREDSGFLGLVTAIAAVVIA
TMPSRFHFYRSATPPLRFTSRRDMVRHCYDKILRLRDSTYFDHINFQKFAISYLLYAAFR
QLGDHNWSRMLDVEATQIARLLNLHRISEYDGLNCIETQLRKKGFWLIFYGFVHNQLQNV
LGERLSYLDPILLHSINPEDLMPLEVDDEMIFENEVLMPPSHTPCLVTGFILHSRVFWAA
IRSTCPESPAEPCPCVRARDAAVQVAYIQDRLHSLRFLLEDIPPLLRPWQPPDSQAIAHE
GGSTGVTEMTQSHFASMRANLHVTHLWLQSLLVDQLEAAQAHKSEPSLVSTNHVQPMVDA
KALWLQREGLCRQLFCILYSLPQINLEANGLHLAYKVRDIAAGLLVCPFHPAGPEAERAT
EYLRQSTDILSRLDSSEGMVTMHLQTWIDTDRIKSS
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
>species4|gene4
MMLEGIPPPPEPIEAKQNDQEKALDDGADLKPIDNTPYIDPFGDEQNAEVKYKTLKWWQC
GMFMIAESVSLGVLSLPATLAALGLVPAIILIVGLGILALYTGYTIGQFRQCYPHIHNLA
DAGEILMGRFGRELFGLGQILFSIFIMGSHIVTFTVMMNTITDHGTCSIVFSIVGMLICM
VLSLPRTIKNLTYISFASFLSIFSAVMITMIGVAVQFKGGSNISITAETNLYHAFTGVTN
IVFAYCAHVAFFGLIAEMEDPKEFPKSLCMLQFFEIALYVTAAIVIYYYVGNDVVSPALG
SAGPLLKKVAYGIAIPTIVGAGVVNGHVGLKYIYVRIFRKTGRMHKRDLVSVGSWIAIGL
SCWIIAWIIAEGIPSFTNIVSLISSLFASWFSYGLPGVYWLHINWGRWFSSPRKICLTII
NLLVVGIGATMCGLGLYVSGKAIHDDSSNTSFTCANTAN
>species2|gene6
MMLDGVPQPPEPIEAKQNDQEKALDDGADLKPIDNTPYIDPFGDEQNAEVKYKTLKWWTC
AGNVECVSYGMGKRSSRLMIAESVSLGVLSLPATLASLGLVPAIILIVGLGILALYTGYT
IGQFRQCYPHIHNLADAGEILMGRFGRELFGLGQILFSIFIMGSHIVTFTVMMNTITDHG
TCSIVFSIVGMLICMVLSLPRTIKNLTYISFASFLSIFSAVMITMIGVAVQFKGGSNISV
TAETNLYHAFTGVTNIVFAYCAHVAFFGLIAEMEDPKEFPKALCMLQFFEIALYVTAAIV
IYYYVGNDVVSPALGSAGPLLKKVAYGIAIPTIVGAGVVNGHVGLKYIYVRIFRKTNRMH
KRDLVSVGSWIAIGLSCWIIAWIIAEGIPSFTNIVSLISSLFASWFSYGLPGVYWLHINW
GRWFSSPRKICLTIVNLLIVCIGATMCGLGLYVSGKAIHDDSSNTSFTCANTAS
>species1|gene1
MMLEGVPPPSEPIEAKQKDQEKALDDGADLKPIDNTPYVDPFGDEQNAEVKYKTLKWWQC
GMFMIAESVSLGVLSLPATLAALGLVPAIILIVGLGILALYTGYTIGQFRQCYPHIHNLA
DAGEILMGRFGRELFGLGQILFSIFIMGSHIVTFTVMMNTITDHGTCSIVFSIVGMLICM
VLSLPRTIKNLTYISFASFLSIFSAVMITMIGVAVQFKGGANISITTETNLYHAFTGVTN
IVFAYCAHVAFFGLIAEMEDPKEFPKSLCMLQFFEIALYVTAAIVIYYYVGNDVVSPALG
SAGPLLKKVAYGIAIPTIVGAGVVNGHVGLKYIYVRIFRKTGRMHKRDLVSVGSWIAIGL
SCWIIAWIIAEGIPSFTNIVSLISSLFASWFSYGLPGVYWLHINWGRWFSSPRKICLTII
NLLIVCIGATMCGLGLYVSGKAIHDDSSNTSFTCANTAN
>species3|gene8
MRLDGVAPPPDAVEPKSQREKDEDVEDLKAINNAPEVDAFGDEANAEVKYKTLKWWQCGM
FMIAESVSLGVLSLPATMTALGLVPSLILIIGLGILALYTGYVIGQFRERHPYIHNLADA
GEILMGSFGRELFGLGQILFSIFIMGSHIVTFTVMMNTITDHGTCSIVFSIVAFVICLVL
SLPRTIKNLTYISTASFLSIFSAVMITMIGVGVQYKGGQNISITTETNLYTAFSGVTQIM
FAYCAHVAFFGLIAEMEEPKDFPKALCLLQGFEISLYVTAAIVIYYYVGNGVDSPALGSA
GPVLKKVAYGMAIPTIIGAGVVNGHVGLKYIYVRIFRKSGRMHKNDWVSVGSWIGIGVTC
WVIAWIIGEGIPSFSNLVSLISSLFASWFSFGLPGAYWLHMNYGQWWSSPRKCALTIINM
LIFAIGGAMCGLGLYASGKAIHDDSSRSSFSCANNA
>species0|gene4
MRLLNKVALVTGSSSGIGRAIALRYAREGAKVACADITPTARSPVPNELDITTHDAISQE
GGQAFFLQTDVGDASQMENAVLKTAQQFGRLDIMVNNAGVSLESRTPARIHETTNELYDT
TMRINTRSVFLGSKYAITQMLKQDPHPSGDRGWIINLSSILGIVAATENPSYCASKGAVS
NLTRQVALDYARDRIHANAICPGYTRTAIYEETTEYMHAAADLIRRHPFNGPGLPDDIAR
VAVVLASEDASWMTGAVVPVDGGYTARFSFASIGTLVAVTTAALLRPSICNQTALSFPVT
MRLDGVAPPPDAVEPKSQREKDEDVEDLKAIDNAPEVDAFGDEANAEVKYKTLKWWQCGM
FMIAESVSLGVLSLPATMTALGLVPSLILIIGLGILALYTGYVIGQFRERHPYIHNLADA
GEILMGSFGRELFGLGQILFSIFIMGSHIVTFTVMMNTITDHGTCSIVFSIVAFVICLVL
SLPRTIKNLTYISTASFLSIFSAVMITMIGVGVQYKGGQNISITTETNLYTAFSGVTQIM
FAYCAHVAFFGLIAEMEEPKDFPKALCLLQGFEISLYVTAAIVIYYYVGNGVDSPALGSA
GPVLKKVAYGMAIPTIIGAGVVNGHVGLKYIYVRIFRKSGRMHKNDWVSVGSWIGIGVTC
WVIAWIIGEGIPSFSNLVSLISSLFASWFSFGLPGAYWLHMNYGQWWSSPRKCALTIINM
LIFAIGGAMCGLGLYASGKAIHDDSSRSSFSCANNA
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
>species4|gene6
MNAESQTQPQKPEDMDQKKEESMPPVRQDAFGDEEFAEVKYKVLKWWQGGLLMVAETISL
GILSLPAAVGTVGLAPGLAILISMGILASYNGYVIGQIKLRIPHISSMSDAGEVLLGPFG
RELLNAAQILLLIFIMASHILTFTVAFNVITGHATCSIVFGIVGAVISCLLSLPRTLEKV
SWLSLVSFVSIFVAVMVTMVSIGIIKPTSTWAVAKNTDLVTGFGGVTNMVFAYASHNSFF
TFIAELRDPREFPKALALLQSIDISLYIIAAVVIYYFAGDGVASPALGSAGPLISKIAYG
IALPTIIIAGVINGHIAAKAIYLRMFSGTDRIHKRDWVAVGSWIGIMAVLWTISWIIAEA
IPVFNDLIGLIAALFLSWFTFGLPGVFWLYMNKGMWFLSRRKIFLTVVNVSSVCIGLVVC
ALGLYASGVSIHQNPAGSVFSCGARS
>species2|gene1
MKAESQTQAQKPEDMDQKKEEPMPPVRQDAFGDEEFAEVKYKVLKWWQGGLLMVAETISL
GILSLPAAVGTVGLAPGLAILISMGILASYNGYVIGQIKLRFPHITSMSDAGEVLLGPFG
RELLNAAQILLLIFIMASHILTFTVAFNVMTGHATCSIVFGVVGAVISCLLSLPRTLEKV
SWLSLVSFVSIFAAVMVTMVSIGIIKPTSTWAVAKHTDLVTAFGGVTNMVFAYASHNSFF
TFIAELRDPREFPKALALLQSIDISLYVVAAVVIYYFAGDGVTSPALGSAGPLISKAAYG
IALPTIVIAGVINGHIAAKAIYLRMFSGTDRIHKRDWIAVGSWIGIMAVLWTISWIIAEA
IPVFNDLIGLIAALFLSWFTFGLPGVFWLYMNKGIWFLSRRKLFLTVVNVASVCIGLVVV
SIYILGSPTVLGLIDQCALGLYASGVSINHNPAGSVFSCGARS
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
>species4|gene3
MIQIVNDPPVFDPENPQEKGIASRDASLAEGEKKYAATPAYRQDAFGDESNAEVKYKVMK
WWQCGLLMVAETVSLGVLSLPAAVAGLGLVPSVILLVSLGIVATYTGYVLGQFKLKYPWV
HNMGLAGEVVFGSWGREILGAAQMLFLVFIMASHILTFVIAMNTLTDHGTCSIVFGVGGM
IISFILSLPRTLAKMSWLSLVSFISIISAVIICMIGVIIKHPGGKVMATVDTDLVHGFSA
VTNIVFAFSGHAAYFGLMAELKDPRDFPKALMLLQSVDVCLYIIAAIVIYVYGGDAISSP
ALGSADPIVSKVAYGIALPTIIIAGVINGHVAIKYVYLRIFADKKERIHKRDWVAVSSWV
AIALSLWTVAWIIAEAIPVFSNLLSLITALFASWFTYGLSGIFWLYLNWGKYLSSPRKMF
LTIVNLFCLVFGAVLCGLGLYVSGKAIHDNPSSVSFSCANNA
>species2|gene3
MIQSVNDPPLSNPENLQEKGIASRDASLAEDEKKYAATLAYRQDAFGDESNAEVKYKVMK
WWQCGLLMVAETVSLGVLSLPAAVAGLGLVPSVILLVSLGIIATYTGYVLGQFKLQYPWV
HNMGLAGEVVFGSWGREILGAAQMLLLVFIMASHILTFVIAMNTLTDHGTCSIVFGVAGL
IVSFILSLPRTLAKMSWLSLVSFISIISAVIICMIGVIIKHPGGKVMATVDTDLVHGFSA
VTNIVFAFSGHAAYFGLMAELKDPRDFPKALMLLQSVDVCLYIIAAIVIYVYGGDEIASP
ALGSADPLISKVAYGIALPTIIIAGVINGHVAIKYVYLRIFANKKERIHKRDWVAVSSWV
AIALSLWTVAWIIAEAIPVFSNLLSLITALFASWFTYGLSGIFWLYLNRGQYLSSPRKMF
LTIVNLFCLVFGAVLCGLGLYVSGKAIHDNPSSMSFSCANNA
>species1|gene3
MTQIVNVPPVSDLENPQEKGTSHDASLAEDEKKYDATPAYRQDAFGDESNAEVKYKVMKW
CIISAVIICMIGVIIKHPGGKVMATVDTDLVHGFSAVTNIVFAFSGHAAYFGLMAELKDP
RDFPKALMLLQSVDVCLYIIAAIVIYVYGGDAISSPALGSADPIVSKVAYGIALPTIIIA
GVINGHVAIKYVYIRIFAGKKERIHKRDWVAVSSWVAIALSLWTVAWIIAEAIPVFSNLL
SLITALFASWFTYGLSGIFWLYLNWGKYLSSPRKMFLTVVNLFCLVFGAVLCGLGLYVSG
KAIHDNPSSASFSCANNA
>species3|gene5
MRSSEIYPAPPTAADQELWEEKEVSTKQSSLEIGENKDFALHQTQDAFGNEEFAEVKYKV
LKWWQCGLLMVAETVSLGVLSLPAAVAGLGLVPSVIILVCLGALATYTGYVIGQFKWRYP
HICSMADAGEVLAGRFGRELLGFAQIIFLVFIMASHLLTFTIAMNDLTNHGTCSIVFGVV
GLAISFVCTLPRTLEKMSWLSLISFISILSSVFITMIGVGISHPGKVIEATVKTDLIHGF
TAVANIVFAFSGHAAFFSLAAELKNPADYPKALMLLQSVDITLYLVAAIVIYCYGGSTVT
SPALGSASTVVSKVAYGIALPTIIIAGVINGHVSAKSVYVRIFRGTDHMHKRSWIAVGSW
TAIVLALWVLAWIIAEAIPVFNKLLSLVTALFASWFTFGLSAIFWFYMNHGQWFSSPKKV
ALSAVNLLALGVGCCLCGLGLYVSGKAIHDDPHHASFTCMSTV
>species0|gene6
MRSSEIYPANPTAADQELWEEKEVSTKQSSLEIGENKDFALHQTQDAFGNEEFAEVKYKV
LKWWQCGLLMVAETVSLGVLSLPAAVAGLGLVPSVIILVCLGALATYTGYVIGQFKWRYP
HICSMADAGEVLAGRFGRELLGFAQIIFLVFIMASHLLTFTIAMNDLTNHGTCSIVFGVV
GLAISFVCTLPRTLEKMSWLSLISFISILSSVFITMIGVGISHPGKVIEATVKTDLIHGF
TAVANIVFAFSGHAAFFSLAAELKNPADYPKALMLLQSVDITLYLVAAIVIYCYGGSTVT
SPALGSASTVVSKVAYGIALPTIIIAGVINGHVSAKSVYVRIFRGTDHMHKRSWIAVGSW
TAIVLALWVLAWIIAEAIPVFNKLLSLVTALFASWFTFGLSAIFWFYMNHGQWFSSPKKV
ALSAVNLLALGVGCCLCGLGLYVSGKAIHDDPHHASFTCMSTV
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
>species4|gene5
MGSMHEAGSRPAADADMDTDRVHPEAVSNSERDFEKQDSKPEYRDAFGDEEYAEVKYKTL
SWWQCGFLMVAETVSLGILSLPAVVAALGLVPAIILLVALGLMSTYTGYTIGQFKWAYPH
IHSMGDAGEVIMGRFGRELFGTGQLLLVVFIMASHILTFTVAMNSITDHGTCSIVFGVVG
LVISFVLCLPRTLAKVSFLSVASFISVFSAVLIVMIAVGVQRPWHGSVNATVDTSLYKAF
LAVCNIVFSFCRLFLHFISFMDRTDRTAGHVAFFGFMAELRNPRDYPKSLFLLQGIDTCL
YIVAAVVIYCYAGDDVTSPALGSASTIVKKVAYGIALPTIIIGGVVNGHVACKYIYVRMW
RHSDRMHKRDLVATGSWVLIGLATWIVAWIIAEAIPVFNNLLSLVASLFASWFTYGFSAL
FWLYLNKGRFFSTPMKTALTILNVVIMGIACCICGLGLYVSGKALHDDPSSASFSCANNA
>species2|gene0
MGSMHEAGSRPAAGADMDTDRVHPEAVSDNERDFEKQDSKPEYQDAFGDEEYAEVKYKTL
SWCVFSAVLIVMIAVGVQRPWHGGLNATVDTNLYKAFLAVCNIVFSFCRLFLHFSSFMDR
TNKTAGHVAFFGFMAELRNPRDYPKSLFLLQGIDTCLYIIAAVVIYCYAGDDVTSPALGS
ASTIVKKVAYGIALPTIIIGGVVNGHVACKYIYVRMWRHSDRMHKRDLVATGSWVLIGLA
TWIVAWIIAEAIPVFNNLLSLVREVLVVICTMIWERVLIAPSVTAFRQPGTPDQPTEHLF
PLALFVATRDPRSIG
>species1|gene5
MGSMLEAGSRPAADAEMDTDRVHPEAVSDGERDFEKQDSKPEYQDAFGDEEYAEVKYKTL
SWCHGCRDSVTGYSLPASGGGRLGPCPFKWAYPHIHSMGDAGEVIMGRFGRELFGTGQLL
LVVFIMASHILTFTVAMNSITDHGTCSIVFGVVGLVISFVLCLPRTLAKVSFLSVASFIS
VFSAVLIVMIAVGVQRPWHGSVNATVDTSLYKAFLAVCNIVFSFSGHVAFFGFMAELKNP
RDYPKSLFLLQGIDTCLYIVAAVVIYCYAGDDVTSPALGSASIVVKKVAYGIALPTIIIG
GVVNGHVACKYIYVRMWRHSDRMHKRDLVATGSWVLIGLATWIVAWIIAEAIPVFNNLLS
LVASLFASWFTYGFSALFWLYLNKGRFFSTPMKTALTILNVVIMGIACCICGLGLYVSGK
ALHDDPSSASFSCANNA
>species3|gene6
MAHPTGDKVDSHLNVQTGQFFQDGREEPYLHDAEEKQDEKKGSPIYNDTFGDEEYAEVKY
KVLSWWQCGFLMVAETVSLGILSLPAVVATLGLAPAIVLIVGLGLLATYTGYVIGQFRWR
YPHVQNLADAGEILFGSIGREIFGIGQLLLVIFIMASHLLTFSVAMNTITEHGTCSIVFG
VVGLVICFLLGLPRTSANVSYLSVASFISVFSAVMIVMIAVGVERPYKGTLSATVDTSLY
EAFLAVCNIVFSFSGHVAFFGFMSELKDHREYPKALCLLQGLDTILYLVTSVVIYIYAGP
NVTSPALGSASELVGKVAYGIALPTIIIGGVVNGHVACKYVYVRIFRHGDRMHSRDLLAT
GSWVGIALGLWIIAWIIAEAIPVFNDLLSLIASLFASWSTFGFSGMFWLYLNKDRLFSSP
RKIALTIFNVIIIGIAACICGLGLYVSGRSLHDDANGSSFSCASNA
>species0|gene2
MAHPTGDKVDSHLNVQTGQFFQDGREEPYLHDAEEKQDEKKGSPIYNDTFGDEEYAEVKY
KVLSWWQCGFLMVAETVSLGILSLPAVVATLGLAPAIVLIVGLGLLATYTGYVIGQFRWR
YPHVQNLADAGEILFGSIGREIFGIGQLLLVIFIMASHLLTFSVAMNTITEHGTCSIVFG
VVGLVICFLLGLPRTSANVSYLSVASFISVFSAVMIVMIAVGVERPYKGTLSATVDTSLY
EAFLAVCNIVFSFSGHVAFFGFMSELKDHREYPKALCLLQGLDTILYLVTSVVIYIYAGP
NVTSPALGSASELVGKVAYGIALPTIIIGGVVNGHVACKYVYVRIFRHGDRMHSRDLLAT
GSWVGIALGLWIIAWIIAEAIPVFNDLLSLIASLFASWSTFGFSGMFWLYLNKDRLFSSP
RKIALTFFNVIIIGIAACICGLGLYVSGRSLHDDANGSSFSCASNA
Loading

0 comments on commit 6294768

Please sign in to comment.