-
Notifications
You must be signed in to change notification settings - Fork 2
/
parser.rs
64 lines (59 loc) · 1.38 KB
/
parser.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
use std::str;
use nom::{IResult,not_line_ending,line_ending,eof};
#[derive(Debug)]
pub struct Base {
pub key: Nucleobase,
pub score: f32,
}
#[derive(Debug)]
pub struct Sequence<'a> {
pub id: &'a str,
pub bases: Vec<Base>,
}
#[derive(Debug,PartialEq,Eq)]
pub enum Nucleobase {
A,
C,
G,
T,
U,
N
}
pub fn quality_scores(input:&[u8]) -> IResult<&[u8], &str> {
chain!(input,
tag!("+") ~
line_ending ~
scores: map_res!(not_line_ending, str::from_utf8) ~
alt!(eof | line_ending),
||{scores}
)
}
pub fn many_reads(input:&[u8]) -> IResult<&[u8], Vec<Sequence>> {
many1!(input,
chain!(
tag!("@") ~
id: map_res!(not_line_ending, str::from_utf8) ~ line_ending ~
bases: map_res!(not_line_ending, str::from_utf8) ~ alt!(line_ending | eof) ~
scores: opt!(quality_scores),
||{
Sequence {
id: id,
bases:
bases.chars().zip(
scores.unwrap().chars()
).map(|x| Base{key:
match x.0 {
'A' => Nucleobase::A,
'C' => Nucleobase::C,
'G' => Nucleobase::G,
'T' => Nucleobase::T,
'U' => Nucleobase::U,
_ => Nucleobase::N,
}
, score: ((x.1 as u8) - ('!' as u8)) as f32})
.collect::<Vec<Base>>()
}
}
)
)
}