Skip to content

Commit

Permalink
Add samples from FormatSpecimens.jl repo
Browse files Browse the repository at this point in the history
  • Loading branch information
Keats committed Sep 10, 2019
1 parent ddbc60a commit 4621ab0
Show file tree
Hide file tree
Showing 110 changed files with 5,826 additions and 45 deletions.
5 changes: 2 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@ criterion = "0.3"
bio = "0.28"
seq_io = "0.3"

# for testing with the FormatSpecimens.jl repo
reqwest = "0.9"
toml = "0.4"
# for testing with the FormatSpecimens.jl repo samples
toml = "0.5"
serde = "1.0"
serde_derive = "1.0"

Expand Down
70 changes: 28 additions & 42 deletions tests/format_specimens.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::fs;
use std::io::Read;

use needletail::formats::{FastaParser, FastqParser, RecParser};
use needletail::{ParseError, ParseErrorType};
use reqwest::get;
use serde_derive::Deserialize;
use toml;

Expand Down Expand Up @@ -33,33 +33,6 @@ fn test_fasta_file(reader: &mut dyn Read, filename: &str) -> Result<(), ParseErr
Ok(())
}

#[test]
#[ignore]
fn test_specimen_fasta() {
let base_path = "https://raw.githubusercontent.com/BioJulia/FormatSpecimens.jl/master/FASTA";
let idx_path = format!("{}/index.toml", base_path);
let raw_index = get(&idx_path)
.expect("Could not retrieve index")
.text()
.expect("Could not decode index");

let index: TestIndex = toml::from_str(&raw_index).expect("Could not deserialize index");
for test in index.valid {
// what kind of sicko puts comments in FASTAs?
if test
.tags
.unwrap_or_else(Vec::new)
.contains(&String::from("comments"))
{
continue;
}

let test_path = format!("{}/{}", base_path, test.filename);
let mut test_reader = get(&test_path).expect("Could not retrieve test data");
assert_eq!(test_fasta_file(&mut test_reader, &test.filename), Ok(()));
}
}

fn test_fastq_file(reader: &mut dyn Read, filename: &str) -> Result<(), ParseError> {
let mut data: Vec<u8> = Vec::new();
let _ = reader.read_to_end(&mut data)?;
Expand All @@ -85,15 +58,28 @@ fn test_fastq_file(reader: &mut dyn Read, filename: &str) -> Result<(), ParseErr
}

#[test]
#[ignore]
fn test_specimen_fastq() {
let base_path = "https://raw.githubusercontent.com/BioJulia/FormatSpecimens.jl/master/FASTQ/";
let idx_path = format!("{}/index.toml", base_path);
let raw_index = get(&idx_path)
.expect("Could not retrieve index")
.text()
.expect("Could not decode index");
fn test_specimen_fasta() {
let raw_index = fs::read_to_string("tests/specimen/FASTA/index.toml").unwrap();
let index: TestIndex = toml::from_str(&raw_index).expect("Could not deserialize index");
for test in index.valid {
// what kind of sicko puts comments in FASTAs?
if test
.tags
.unwrap_or_else(Vec::new)
.contains(&String::from("comments"))
{
continue;
}

let mut test_content =
fs::File::open(&format!("tests/specimen/FASTA/{}", test.filename)).unwrap();
assert_eq!(test_fasta_file(&mut test_content, &test.filename), Ok(()));
}
}

#[test]
fn test_specimen_fastq() {
let raw_index = fs::read_to_string("tests/specimen/FASTQ/index.toml").unwrap();
let index: TestIndex = toml::from_str(&raw_index).expect("Could not deserialize index");

for test in index.valid {
Expand All @@ -102,10 +88,10 @@ fn test_specimen_fastq() {
// (sequences are one-line, but quality scores are line-wrapped)
continue;
}
let test_path = format!("{}/{}", base_path, test.filename);
let mut test_reader = get(&test_path).expect("Could not retrieve test data");
let mut test_content =
fs::File::open(&format!("tests/specimen/FASTQ/{}", test.filename)).unwrap();
assert_eq!(
test_fastq_file(&mut test_reader, &test.filename),
test_fastq_file(&mut test_content, &test.filename),
Ok(()),
"File {} is bad?",
test.filename
Expand All @@ -117,10 +103,10 @@ fn test_specimen_fastq() {
// we don't care if the sequence ID doesn't match the quality id?
continue;
}
let test_path = format!("{}/{}", base_path, test.filename);
let mut test_reader = get(&test_path).expect("Could not retrieve test data");
let mut test_content =
fs::File::open(&format!("tests/specimen/FASTQ/{}", test.filename)).unwrap();
assert!(
test_fastq_file(&mut test_reader, &test.filename).is_err(),
test_fastq_file(&mut test_content, &test.filename).is_err(),
format!("File {} is good?", test.filename)
);
}
Expand Down
10 changes: 10 additions & 0 deletions tests/specimen/FASTA/AAC12660.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
>AAC12660 TAK1 binding protein [Homo sapiens].
MAAQRRSLLQSEQQPSWTDDLPLCHLSGVGSASNRSYSADGKGTESHPPEDSWLKFRSEN
NCFLYGVFNGYDGNRVTNFVAQRLSAELLLGQLNAEHAEADVRRVLLQAFDVVERSFLES
IDDALAEKASLQSQLPEGVPQHQLPPQYQKILERLKTLEREISGGAMAVVAVLLNNKLYV
ANVGTNRALLCKSTVDGLQVTQLNVDHTTENEDELFRLSQLGLDAGKIKQVGIICGQEST
RRIGDYKVKYGYTDIDLLSAAKSKPIIAEPEIHGAQPLDGVTGFLVLMSEGLYKALEAAH
GPGQANQEIAAMIDTEFAKQTSLDAVAQAVVDRVKRIHSDTFASGGERARFCPRHEDMTL
LVRNFGYPLGEMSQPTPSPAPAAGGRVYPVSVPYSSAQSTSKTSVTLSLVMPSQGQMVNG
AHSASTLDEATPTLTNQSPTLTLQSTNTHTQSSSSSSDGGLFRSRPAHSLPPGEDGRVEP
YVDFAEFYRLWSVDHGEQSVVTAP
8 changes: 8 additions & 0 deletions tests/specimen/FASTA/EFTU_BACSU.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
>sp|P33166|EFTU_BACSU Elongation factor Tu;
MAKEKFDRSKSHANIGTIGHVDHGKTTLTAAITTVLHKKSGKGTAMAYDQIDGAPEERER
GITISTAHVEYETETRHYAHVDCPGHADYVKNMITGAAQMDGAILVVSAADGPMPQTREH
ILLSKNVGVPYIVVFLNKCDMVDDEELLELVEMEVRDLLSEYDFPGDDVPVVKGSALKAL
EGDAEWEAKIFELMDAVDEYIPTPERDTEKPFMMPVEDVFSITGRGTVATGRVERGQVKV
GDEVEIIGLQEENKKTTVTGVEMFRKLLDYAEAGDNIGALLRGVSREEIQRGQVLAKPGT
ITPHSKFKAEVYVLSKEEGGRHTPFFSNYRPQFYFRTTDVTGIIHLPEGVEMVMPGDNTE
MNVELISTIAIEEGTRFSIREGGRTVGSGVVSTITE
52 changes: 52 additions & 0 deletions tests/specimen/FASTA/HUMBETGLOA.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
>HUMBETGLOA Human haplotype C4 beta-globin gene, complete cds.
ACCTCCTATTTGACACCACTGATTACCCCATTGATAGTCACACTTTGGGTTGTAAGTGAC
TTTTTATTTATTTGTATTTTTGACTGCATTAAGAGGTCTCTAGTTTTTTACCTCTTGTTT
CCCAAAACCTAATAAGTAACTAATGCACAGAGCACATTGATTTGTATTTATTCTATTTTT
AGACATAATTTATTAGCATGCATGAGCAAATTAAGAAAAACAACAACAAATGAATGCATA
TATATGTATATGTATGTGTGTACATATACACATATATATATATATATATTTTTTCTTTTC
TTACCAGAAGGTTTTAATCCAAATAAGGAGAAGATATGCTTAGAACTGAGGTAGAGTTTT
CATCCATTCTGTCCTGTAAGTATTTTGCATATTCTGGAGACGCAGGAAGAGATCCATCTA
CATATCCCAAAGCTGAATTATGGTAGACAAAACTCTTCCACTTTTAGTGCATCAACTTCT
TATTTGTGTAATAAGAAAATTGGGAAAACGATCTTCAATATGCTTACCAAGCTGTGATTC
CAAATATTACGTAAATACACTTGCAAAGGAGGATGTTTTTAGTAGCAATTTGTACTGATG
GTATGGGGCCAAGAGATATATCTTAGAGGGAGGGCTGAGGGTTTGAAGTCCAACTCCTAA
GCCAGTGCCAGAAGAGCCAAGGACAGGTACGGCTGTCATCACTTAGACCTCACCCTGTGG
AGCCACACCCTAGGGTTGGCCAATCTACTCCCAGGAGCAGGGAGGGCAGGAGCCAGGGCT
GGGCATAAAAGTCAGGGCAGAGCCATCTATTGCTTACATTTGCTTCTGACACAACTGTGT
TCACTAGCAACCTCAAACAGACACCATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGT
TACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGTTGGTGGTGAGGCCCTGGGCAGGTT
GGTATCAAGGTTACAAGACAGGTTTAAGGAGACCAATAGAAACTGGGCATGTGGAGACAG
AGAAGACTCTTGGGTTTCTGATAGGCACTGACTCTCTCTGCCTATTGGTCTATTTTCCCA
CCCTTAGGCTGCTGGTGGTCTACCCTTGGACCCAGAGGTTCTTTGAGTCCTTTGGGGATC
TGTCCACTCCTGATGCTGTTATGGGCAACCCTAAGGTGAAGGCTCATGGCAAGAAAGTGC
TCGGTGCCTTTAGTGATGGCCTGGCTCACCTGGACAACCTCAAGGGCACCTTTGCCACAC
TGAGTGAGCTGCACTGTGACAAGCTGCACGTGGATCCTGAGAACTTCAGGGTGAGTCTAT
GGGACCCTTGATGTTTTCTTTCCCCTTCTTTTCTATGGTTAAGTTCATGTCATAGGAAGG
GGATAAGTAACAGGGTACAGTTTAGAATGGGAAACAGACGAATGATTGCATCAGTGTGGA
AGTCTCAGGATCGTTTTAGTTTCTTTTATTTGCTGTTCATAACAATTGTTTTCTTTTGTT
TAATTCTTGCTTTCTTTTTTTTTCTTCTCCGCAATTTTTACTATTATACTTAATGCCTTA
ACATTGTGTATAACAAAAGGAAATATCTCTGAGATACATTAAGTAACTTAAAAAAAAACT
TTACACAGTCTGCCTAGTACATTACTATTTGGAATATATGTGTGCTTATTTGCATATTCA
TAATCTCCCTACTTTATTTTCTTTTATTTTTAATTGATACATAATCATTATACATATTTA
TGGGTTAAAGTGTAATGTTTTAATATGTGTACACATATTGACCAAATCAGGGTAATTTTG
CATTTGTAATTTTAAAAAATGCTTTCTTCTTTTAATATACTTTTTTGTTTATCTTATTTC
TAATACTTTCCCTAATCTCTTTCTTTCAGGGCAATAATGATACAATGTATCATGCCTCTT
TGCACCATTCTAAAGAATAACAGTGATAATTTCTGGGTTAAGGCAATAGCAATATCTCTG
CATATAAATATTTCTGCATATAAATTGTAACTGATGTAAGAGGTTTCATATTGCTAATAG
CAGCTACAATCCAGCTACCATTCTGCTTTTATTTTATGGTTGGGATAAGGCTGGATTATT
CTGAGTCCAAGCTAGGCCCTTTTGCTAATCATGTTCATACCTCTTATCTTCCTCCCACAG
CTCCTGGGCAACGTGCTGGTCTGTGTGCTGGCCCATCACTTTGGCAAAGAATTCACCCCA
CCAGTGCAGGCTGCCTATCAGAAAGTGGTGGCTGGTGTGGCTAATGCCCTGGCCCACAAG
TATCACTAAGCTCGCTTTCTTGCTGTCCAATTTCTATTAAAGGTTCCTTTGTTCCCTAAG
TCCAACTACTAAACTGGGGGATATTATGAAGGGCCTTGAGCATCTGGATTCTGCCTAATA
AAAAACATTTATTTTCATTGCAATGATGTATTTAAATTATTTCTGAATATTTTACTAAAA
AGGGAATGTGGGAGGTCAGTGCATTTAAAACATAAAGAAATGAAGAGCTAGTTCAAACCT
TGGGAAAATACACTATATCTTAAACTCCATGAAAGAAGGTGAGGCTGCAAACAGCTAATG
CACATTGGCAACAGCCCTGATGCATATGCCTTATTCATCCCTCAGAAAAGGATTCAAGTA
GAGGCTTGATTTGGAGGTTAAAGTTTTGCTATGCTGTATTTTACATTACTTATTGTTTTA
GCTGTCCTCATGAATGTCTTTTCACTACCCATTTGCTTATCCTGCATCTCTCAGCCTTGA
CTCCACTCAGTTCTCTTGCTTAGAGATACCACCTTTCCCCTGAAGTGTTCCTTCCATGTT
TTACGGCGAGATGGTTTCTCCTCGCCTGGCCACTCAGCCTTAGTTGTCTCTGTTGTCTTA
TAGAGGTCTACTTGAAGAAGGAAAAACAGGGGTCATGGTTTGACTGTCCTGTGAGCCCTT
CTTCCCTGCCTCCCCCACTCACAGTGACCCGGAATCTGCAGTGCTAGTCTCCCGGAACTA
TC
2 changes: 2 additions & 0 deletions tests/specimen/FASTA/P02456.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>sp|P02456|CO1A1_RABIT Collagen alpha-1(I) chain (Fragment) OS=Oryctolagus cuniculus GN=COL1A1 PE=1 SV=1
SYGYBZKSAGVSVPGPMGPSGPRGLPGPPGAPGPZGFZGPPGZPGZPGSSGPM
9 changes: 9 additions & 0 deletions tests/specimen/FASTA/alnfile.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
>BS1-fragment 7fab light chain variable region
TISCTGSSSNIGAGNHVKWYQQLPG
>BS2-fragment 2fb4 light chain variable region
VTISCTGTSSNIGSITVNWYQQLPG
>BS3-fragment 2fb4 heavy chain variable region
LRLSCSSSGFIFSSYAMYWVRQAPG
>BS4-fragment 7fab heavy chain variable region
LSLTCTVSGTSFDDYYSTWVRQPPG

10 changes: 10 additions & 0 deletions tests/specimen/FASTA/amino.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
>CYS1_DICDI fragment
SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGE

>ALEU_HORVU
MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVRYGK
SYESAAEVRRRFRIFSESLEEVRSTNRKGLPYRLGINRFSDMSWEEFQATRLGAAQTCSATLAGNHLMRD
AAALPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNFGCNG
GLPSQAFEYIKYNGGIDTEESYPYKGVNGVCHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAF
QVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVENGVPYWLIKNSWGADWGDNGYFKMEMGKNMCA
IATCASYPVVAA
3 changes: 3 additions & 0 deletions tests/specimen/FASTA/aster.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
>gi|3298468|dbj|BAA31520.1| SAMIPF
GGHVNPAVTFGAFVGGNITLLRGIVYIIAQLLGSTVACLLLKFVTNDMAVGVFSLSAGVGVTNALVFEIV
MTFGLVYTVYATAIDPKKGSLGTIAPIAIGFIVGANI
27 changes: 27 additions & 0 deletions tests/specimen/FASTA/badfasta.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
>CEESC12R
gcacgagtccatctccatatgccaccacaacantggtcctgtcgaaccaa
caaccagcttggctcaatgacaaaatgcttcgcgcgccanaatgccaaca
aatcccgtgccaccagagccaccggcgcgatatgcagatcataccgctgg
aagacgatctcgatcgagccgtgcatccgatgggagaggaactctgaatg
gcggactccatcaccggactagcggaagtcaacggtcggatagtccacct
cacacagatgtgagctatgttcagcttcactcatccgatggaactggtag
tagtaaggaaagaantngggagcggagaacaccaccgaataaa
>CEESC13F
cttgcttgaaaaatttatataaatatttaagagaagaaaaataaataatc
gcatctaatgacgtctgtccttgtatccctggtttccattgactggtgca
ctttcctgtctttgaggacatggacaatattcggcatcagttcctggctc
tccctcctctcctggtgctccagcagaaccgttctctccattatctccct
tgtctccacgtggtccacgctctcctggtgctcctggaataccttgagct
ccctcgtgccgaattcctgcagcccgggggatccactagttctagagcgg
ccgccaccgcggtgggagctccagcttttgttncctttagtgagggttaa

tttcgagcttggcgtaatcatggtcatagctgtttcctg
>CEESC13R
gcacgagggagctcaaggtattccaggagcaccaggagagcgtggaccac
gtggagacaagggagataatggagagaacggttctgctggagcaccagga

gaggagggagagccaggaactgatgccgaatattgtccatgtcctcaaag
acaggaaagtgcaccagtcaatggaaaccagggatacaagaacagacgtc
attagatgcgattatttatttttcttctcttaaatatttatataaatttt
tcaagcaag

8 changes: 8 additions & 0 deletions tests/specimen/FASTA/bug2901.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
>seq1
GGTACCAGCAGGTGGGCCGCCTACTGCGCACGCGCGGGTTTGCGGGCAGCCGC
>seq2
GGTACCAGCTGGTGGGCCGCCTACTGCGCACGCGCGGGTTTGCGGGCAGCCGC
>seq3
GGTACCAGCAGGTGGTCCGCCTA------------------------------
>seq4
--------------------------CGCACGCGCGTGTTTGCGGGCAGCCGC
18 changes: 18 additions & 0 deletions tests/specimen/FASTA/centaurea.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
>gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal RNA gene, partial sequence
CCTGTCACTTAACTTTTTGTTCATAAGGTATATATGGGGGGGTGTGTGTCTTCGGACACACTTTCGATTG
GGCCATGGTGCCCCCCTGCCAGGCAAAGGGATGCCTGGGCATGGCCACCCGGGAAAATGCGCTTCACTTC
TTGCATGCCAGTCGTTTTTAAAAACGGCTGCGGCTCGTGTATCGTATTTTCCCCGGGATCGGTGACGCTT
AGATGACGACGGCCCTTGACGGATATAGATGAAGGAATTGACGGATATGCAGATCGGTTGCCTATTGACC
CTTGTCGNTACTATGTAGGCTACATCCCGGTCATGGTGGGTTCGATTGTGGGTTTGTTACTTTCGCATCG
TTCGGTGCATTTTGGGGGCTGTTTTGGCTTTTCGTTGCTTGCTTGACCTTTTTTGGTTGGGTGGGTGGCT
TCTAGTTGGGCGGCTTTCGGGAATGTCGACCGGGGAGGTGCATGAGTGGTGATTGGTATGTTACGTGTGG
ATGGCTGTTTGCTCGCGCACCAACGTTCACGCTTCATACCTCTTCAGTTTTGCGTCAAGACTTGATTTGT
CTTGAGCAATGTCGTGGGTTTCCTGTGTTGCATACCTAATTGACGGTAACGTGTGGGCTTTACGGTTATC
TATTTCGTCATGTGTCGTTCATGCGGCGTGGGATGAACAATGAAACGGCCTTTGTGATCCACAGTGGTGT
TGCGTTTGCAACAACGATGTGGCCTATGAAGCATCGTTTGGTCTCTTGTATACGGAACCTGATGTGGGCA
TGGGTCTTCATAAGATCTTCATTATGCCCTCGTTAAGCGTTCTCGCTTCTCCAAAACGATTGCCTGCCAT
GTTACGGCTTTGTTCGTTCATGGTGGGCTACGTCGAAGAGGAATGCTACCTGGTTGATCCTGCCAGTAGT
CATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTACGAACAAATTCAGACTGTGAAACTGCGAA
TGGCTCATTAAATCAGTTAGAG


58 changes: 58 additions & 0 deletions tests/specimen/FASTA/cysprot.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
>CYS1_DICDI
MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHK
ADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCG
SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQT
ESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCN
PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII


>ALEU_HORVU
MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVRYGK
SYESAAEVRRRFRIFSESLEEVRSTNRKGLPYRLGINRFSDMSWEEFQATRLGAAQTCSATLAGNHLMRD
AAALPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNFGCNG
GLPSQAFEYIKYNGGIDTEESYPYKGVNGVCHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAF
QVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVENGVPYWLIKNSWGADWGDNGYFKMEMGKNMCA
IATCASYPVVAA


>CATH_HUMAN
MWATLPLLCAGAWLLGVPVCGAAELSVNSLEKFHFKSWMSKHRKTYSTEEYHHRLQTFASNWRKINAHNN
GNHTFKMALNQFSDMSFAEIKHKYLWSEPQNCSATKSNYLRGTGPYPPSVDWRKKGNFVSPVKNQGACGS
CWTFSTTGALESAIAIATGKMLSLAEQQLVDCAQDFNNYGCQGGLPSQAFEYILYNKGIMGEDTYPYQGK
DGYCKFQPGKAIGFVKDVANITIYDEEAMVEAVALYNPVSFAFEVTQDFMMYRTGIYSSTSCHKTPDKVN
HAVLAVGYGEKNGIPYWIVKNSWGPQWGMNGYFLIERGKNMCGLAACASYPIPLV


>CATH_RAT
MWTALPLLCAGAWLLSAGATAELTVNAIEKFHFTSWMKQHQKTYSSREYSHRLQVFANNWRKIQAHNQRN
HTFKMGLNQFSDMSFAEIKHKYLWSEPQNCSATKSNYLRGTGPYPSSMDWRKKGNVVSPVKNQGACGSCW
TFSTTGALESAVAIASGKMMTLAEQQLVDCAQNFNNHGCQGGLPSQAFEYILYNKGIMGEDSYPYIGKNG
QCKFNPEKAVAFVKNVVNITLNDEAAMVEAVALYNPVSFAFEVTEDFMMYKSGVYSSNSCHKTPDKVNHA
VLAVGYGEQNGLLYWIVKNSWGSNWGNNGYFLIERGKNMCGLAACASYPIPQV


>CATL_HUMAN
MNPTLILAAFCLGIASATLTFDHSLEAQWTKWKAMHNRLYGMNEEGWRRAVWEKNMKMIELHNQEYREGK
HSFTMAMNAFGDMTSEEFRQVMNGFQNRKPRKGKVFQEPLFYEAPRSVDWREKGYVTPVKNQGQCGSCWA
FSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEES
CKYNPKYSVANDTGFVDIPKQEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLV
VGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAASYPTV


>CATL_RAT
MTPLLLLAVLCLGTALATPKFDQTFNAQWHQWKSTHRRLYGTNEEEWRRAVWEKNMRMIQLHNGEYSNGK
HGFTMEMNAFGDMTNEEFRQIVNGYRHQKHKKGRLFQEPLMLQIPKTVDWREKGCVTPVKNQGQCGSCWA
FSASGCLEGQMFLKTGKLISLSEQNLVDCSHDQGNQGCNGGLMDFAFQYIKENGGLDSEESYPYEAKDGS
CKYRAEYAVANDTGFVDIPQQEKALMKAVATVGPISVAMDASHPSLQFYSSGIYYEPNCSSKDLDHGVLV
VGYGYEGTDSNKDKYWLVKNSWGKEWGMDGYIKIAKDRNNHCGLATAASYPIVN


>PAPA_CARPA
MAMIPSISKLLFVAICLFVYMGLSFGDFSIVGYSQNDLTSTERLIQLFESWMLKHNKIYKNIDEKIYRFE
IFKDNLKYIDETNKKNNSYWLGLNVFADMSNDEFKEKYTGSIAGNYTTTELSYEEVLNDGDVNIPEYVDW
RQKGAVTPVKNQGSCGSCWAFSAVVTIEGIIKIRTGNLNEYSEQELLDCDRRSYGCNGGYPWSALQLVAQ
YGIHYRNTYPYEGVQRYCRSREKGPYAAKTDGVRQVQPYNEGALLYSIANQPVSVVLEAAGKDFQLYRGG
IFVGPCGNKVDHAVAAVGYGPNYILIKNSWGTGWGENGYIRIKRGTGNSYGVCGLYTSSFYPVKN



6 changes: 6 additions & 0 deletions tests/specimen/FASTA/cysprot1.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>CYS1_DICDI
MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHK
ADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCG
SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQT
ESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCN
PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII
24 changes: 24 additions & 0 deletions tests/specimen/FASTA/cysprot1a.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
>CYS1_DICDI
MKVILLFVLAVFTVFVSSRGIPPEEQSQFLEFQDKFNKKYSHEEYLERFEIFKSNLGKIEELNLIAINHK
ADTKFGVNKFADLSSDEFKNYYLNNKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCG
SCWSFSTTGNVEGQHFISQNKLVSLSEQNLVDCDHECMEYEGEEACDEGCNGGLQPNAYNYIIKNGGIQT
ESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGPLAIAADAVEWQFYIGGVFDIPCN
PNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII


>ALEU_HORVU
MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTLESAVLGALGRTRHALRFARFAVRYGK
SYESAAEVRRRFRIFSESLEEVRSTNRKGLPYRLGINRFSDMSWEEFQATRLGAAQTCSATLAGNHLMRD
AAALPETKDWREDGIVSPVKNQAHCGSCWTFSTTGALEAAYTQATGKNISLSEQQLVDCAGGFNNFGCNG
GLPSQAFEYIKYNGGIDTEESYPYKGVNGVCHYKAENAAVQVLDSVNITLNAEDELKNAVGLVRPVSVAF
QVIDGFRQYKSGVYTSDHCGTTPDDVNHAVLAVGYGVENGVPYWLIKNSWGADWGDNGYFKMEMGKNMCA
IATCASYPVVAA


>CATH_HUMAN
MWATLPLLCAGAWLLGVPVCGAAELSVNSLEKFHFKSWMSKHRKTYSTEEYHHRLQTFASNWRKINAHNN
GNHTFKMALNQFSDMSFAEIKHKYLWSEPQNCSATKSNYLRGTGPYPPSVDWRKKGNFVSPVKNQGACGS
CWTFSTTGALESAIAIATGKMLSLAEQQLVDCAQDFNNYGCQGGLPSQAFEYILYNKGIMGEDTYPYQGK
DGYCKFQPGKAIGFVKDVANITIYDEEAMVEAVALYNPVSFAFEVTQDFMMYRTGIYSSTSCHKTPDKVN
HAVLAVGYGEKNGIPYWIVKNSWGPQWGMNGYFLIERGKNMCGLAACASYPIPLV

Loading

0 comments on commit 4621ab0

Please sign in to comment.