Skip to content

Commit 0568c6a

Browse files
committed
Clean up lexer a little
1 parent c100b04 commit 0568c6a

File tree

3 files changed

+92
-79
lines changed

3 files changed

+92
-79
lines changed

pikelet/src/surface.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::ops::{Range, RangeFrom};
66

77
pub mod projections;
88

9-
mod lex;
9+
mod lexer;
1010

1111
#[allow(clippy::all, unused_parens)]
1212
mod grammar {
@@ -41,12 +41,12 @@ pub enum Term<S> {
4141
Error(Range<usize>),
4242
}
4343

44-
type ParseError<'input> = lalrpop_util::ParseError<usize, lex::Token<'input>, lex::LexicalError>;
44+
type ParseError<'input> = lalrpop_util::ParseError<usize, lexer::Token<'input>, lexer::LexerError>;
4545

4646
impl<'input> Term<&'input str> {
4747
#[allow(clippy::should_implement_trait)]
4848
pub fn from_str(input: &'input str) -> Result<Term<&'input str>, ParseError<'input>> {
49-
let tokens = lex::Tokens::new(input);
49+
let tokens = lexer::Tokens::new(input);
5050
grammar::TermParser::new().parse(tokens)
5151
}
5252
}

pikelet/src/surface/grammar.lalrpop

+26-25
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,36 @@
11
use std::ops::Range;
22

33
use crate::surface::{Term, Literal};
4-
use crate::surface::lex::{LexicalError, Token};
4+
use crate::surface::lexer::{LexerError, Token};
55

66
grammar<'input>;
77

88
extern {
9-
type Location = usize;
10-
type Error = LexicalError;
11-
enum Token<'input> {
12-
"->" => Token::Arrow,
13-
"=>" => Token::DArrow,
14-
":" => Token::Colon,
15-
"," => Token::Comma,
16-
"." => Token::Dot,
17-
"fun" => Token::Fun,
18-
"record" => Token::RecordTerm,
19-
"Record" => Token::RecordType,
20-
"{" => Token::LBrace,
21-
"}" => Token::RBrace,
22-
"[" => Token::LBrack,
23-
"]" => Token::RBrack,
24-
"(" => Token::LParen,
25-
")" => Token::RParen,
26-
"=" => Token::Equal,
27-
CharLiteral => Token::CharLiteral(<&'input str>),
28-
StrLiteral => Token::StrLiteral(<&'input str>),
29-
NumLiteral => Token::NumLiteral(<&'input str>),
30-
Id => Token::Name(<&'input str>),
31-
Shift => Token::Shift(<&'input str>),
32-
}
9+
type Location = usize;
10+
type Error = LexerError;
11+
12+
enum Token<'input> {
13+
"->" => Token::Arrow,
14+
"=>" => Token::DArrow,
15+
":" => Token::Colon,
16+
"," => Token::Comma,
17+
"." => Token::Dot,
18+
"fun" => Token::FunTerm,
19+
"record" => Token::RecordTerm,
20+
"Record" => Token::RecordType,
21+
"{" => Token::LBrace,
22+
"}" => Token::RBrace,
23+
"[" => Token::LBrack,
24+
"]" => Token::RBrack,
25+
"(" => Token::LParen,
26+
")" => Token::RParen,
27+
"=" => Token::Equal,
28+
CharLiteral => Token::CharLiteral(<&'input str>),
29+
StrLiteral => Token::StrLiteral(<&'input str>),
30+
NumLiteral => Token::NumLiteral(<&'input str>),
31+
Id => Token::Name(<&'input str>),
32+
Shift => Token::Shift(<&'input str>),
33+
}
3334
}
3435

3536
pub Term: Term<&'input str> = {

pikelet/src/surface/lex.rs pikelet/src/surface/lexer.rs

+63-51
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
11
use logos::Logos;
22
use std::fmt;
3+
use std::ops::Range;
34

4-
/// The complete set of `LexToken`s some of which never escape the lexer.
5-
/// See Token for a list of which Tokens do and do not escape.
5+
/// The complete set of `LexerToken`s some of which never escape the lexer.
6+
/// See `Token` for a list of which Tokens do and do not escape.
67
#[derive(Logos)]
7-
enum LexToken {
8+
enum LexerToken {
89
#[end]
9-
EOF,
10+
Eof,
1011
#[token = ":"]
1112
Colon,
1213
#[token = ","]
1314
Comma,
1415
#[token = "fun"]
15-
Fun,
16+
FunTerm,
1617
#[token = "=>"]
1718
DArrow,
1819
#[token = "->"]
@@ -59,19 +60,21 @@ enum LexToken {
5960
Error,
6061
}
6162

62-
/// The subset of `LexToken`s for the parser.
63-
/// The tokens in `LexToken`s which are excluded from this enum are:
64-
/// * Whitespace -- skipped.
65-
/// * EOF -- turned into None, rather than a token.
66-
/// * Error -- turned into Some(Err(...)).
63+
/// The subset of `LexerToken`s for the parser.
64+
///
65+
/// The tokens in `LexerToken`s which are excluded from this enum are:
66+
///
67+
/// - `Whitespace`: skipped.
68+
/// - `Eof`: turned into `None`, rather than a token.
69+
/// - `Error`: turned into `Some(Err(...))`.
6770
///
6871
/// Comment while a valid token has been reserved but is not currently
6972
/// emitted by the lexer.
7073
#[derive(Debug, Clone)]
7174
pub enum Token<'a> {
7275
Colon,
7376
Comma,
74-
Fun,
77+
FunTerm,
7578
DArrow,
7679
Arrow,
7780
LParen,
@@ -98,7 +101,7 @@ impl<'a> fmt::Display for Token<'a> {
98101
match self {
99102
Token::Colon => write!(f, ":"),
100103
Token::Comma => write!(f, ","),
101-
Token::Fun => write!(f, "Fun"),
104+
Token::FunTerm => write!(f, "fun"),
102105
Token::DArrow => write!(f, "=>"),
103106
Token::Arrow => write!(f, "->"),
104107
Token::LParen => write!(f, "("),
@@ -121,72 +124,81 @@ impl<'a> fmt::Display for Token<'a> {
121124
}
122125
}
123126

124-
#[derive(Debug)]
125-
pub struct LexicalError(std::ops::Range<usize>, &'static str);
127+
#[derive(Debug, Clone)]
128+
pub enum LexerError {
129+
InvalidToken(Range<usize>),
130+
}
126131

127-
impl fmt::Display for LexicalError {
132+
impl fmt::Display for LexerError {
128133
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
129-
write!(f, "Lexical error: {:?} {}", self.0, self.1)
134+
match self {
135+
LexerError::InvalidToken(range) => write!(f, "Invalid token: {:?}", range),
136+
}
130137
}
131138
}
132139

133-
pub struct Tokens<'a>(logos::Lexer<LexToken, &'a str>);
140+
pub struct Tokens<'a> {
141+
lexer: logos::Lexer<LexerToken, &'a str>,
142+
}
143+
134144
pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
135145

136146
impl<'a> Tokens<'a> {
137147
pub fn new(source: &'a str) -> Tokens<'a> {
138-
Tokens(LexToken::lexer(source))
148+
Tokens {
149+
lexer: LexerToken::lexer(source),
150+
}
139151
}
140152
}
141153

142154
impl<'a> Iterator for Tokens<'a> {
143-
type Item = Spanned<Token<'a>, usize, LexicalError>;
155+
type Item = Spanned<Token<'a>, usize, LexerError>;
144156

145157
fn next(&mut self) -> Option<Self::Item> {
146-
let lex = &mut self.0;
158+
let lexer = &mut self.lexer;
147159

148160
const fn tok<'a>(
149-
r: std::ops::Range<usize>,
161+
r: Range<usize>,
150162
t: Token<'a>,
151-
) -> Option<Spanned<Token<'a>, usize, LexicalError>> {
163+
) -> Option<Spanned<Token<'a>, usize, LexerError>> {
152164
Some(Ok((r.start, t, r.end)))
153165
}
154166

155-
let range = lex.range();
167+
let range = lexer.range();
156168

157169
let token = loop {
158-
match &lex.token {
159-
// There doesn't seem to be any harm in advancing after EOF.
170+
match &lexer.token {
171+
// There doesn't seem to be any harm in advancing after `Eof`.
160172
// But we might as well return.
161-
LexToken::EOF => return None,
162-
LexToken::Error => break Some(Err(LexicalError(range, "Lexical error"))),
163-
LexToken::Whitespace | LexToken::Comment => {
164-
lex.advance();
173+
LexerToken::Eof => return None,
174+
LexerToken::Error => break Some(Err(LexerError::InvalidToken(range))),
175+
LexerToken::Whitespace | LexerToken::Comment => {
176+
lexer.advance();
165177
continue;
166178
}
167-
LexToken::Colon => break tok(range, Token::Colon),
168-
LexToken::Comma => break tok(range, Token::Comma),
169-
LexToken::Fun => break tok(range, Token::Fun),
170-
LexToken::DArrow => break tok(range, Token::DArrow),
171-
LexToken::Arrow => break tok(range, Token::Arrow),
172-
LexToken::LParen => break tok(range, Token::LParen),
173-
LexToken::RParen => break tok(range, Token::RParen),
174-
LexToken::LBrack => break tok(range, Token::LBrack),
175-
LexToken::RBrack => break tok(range, Token::RBrack),
176-
LexToken::LBrace => break tok(range, Token::LBrace),
177-
LexToken::RBrace => break tok(range, Token::RBrace),
178-
LexToken::Dot => break tok(range, Token::Dot),
179-
LexToken::Equal => break tok(range, Token::Equal),
180-
LexToken::RecordTerm => break tok(range, Token::RecordTerm),
181-
LexToken::RecordType => break tok(range, Token::RecordType),
182-
LexToken::Name => break tok(range, Token::Name(lex.slice())),
183-
LexToken::Shift => break tok(range, Token::Shift(lex.slice())),
184-
LexToken::NumLiteral => break tok(range, Token::NumLiteral(lex.slice())),
185-
LexToken::CharLiteral => break tok(range, Token::CharLiteral(lex.slice())),
186-
LexToken::StrLiteral => break tok(range, Token::StrLiteral(lex.slice())),
179+
LexerToken::Colon => break tok(range, Token::Colon),
180+
LexerToken::Comma => break tok(range, Token::Comma),
181+
LexerToken::FunTerm => break tok(range, Token::FunTerm),
182+
LexerToken::DArrow => break tok(range, Token::DArrow),
183+
LexerToken::Arrow => break tok(range, Token::Arrow),
184+
LexerToken::LParen => break tok(range, Token::LParen),
185+
LexerToken::RParen => break tok(range, Token::RParen),
186+
LexerToken::LBrack => break tok(range, Token::LBrack),
187+
LexerToken::RBrack => break tok(range, Token::RBrack),
188+
LexerToken::LBrace => break tok(range, Token::LBrace),
189+
LexerToken::RBrace => break tok(range, Token::RBrace),
190+
LexerToken::Dot => break tok(range, Token::Dot),
191+
LexerToken::Equal => break tok(range, Token::Equal),
192+
LexerToken::RecordTerm => break tok(range, Token::RecordTerm),
193+
LexerToken::RecordType => break tok(range, Token::RecordType),
194+
LexerToken::Name => break tok(range, Token::Name(lexer.slice())),
195+
LexerToken::Shift => break tok(range, Token::Shift(lexer.slice())),
196+
LexerToken::NumLiteral => break tok(range, Token::NumLiteral(lexer.slice())),
197+
LexerToken::CharLiteral => break tok(range, Token::CharLiteral(lexer.slice())),
198+
LexerToken::StrLiteral => break tok(range, Token::StrLiteral(lexer.slice())),
187199
}
188200
};
189-
lex.advance();
201+
lexer.advance();
190202
token
191203
}
192204
}
@@ -195,7 +207,7 @@ impl<'a> Iterator for Tokens<'a> {
195207
fn behavior_after_error() {
196208
let starts_with_invalid = "@.";
197209
// [Err(...), Some(Token::DOT)]
198-
let from_lex: Vec<Spanned<Token<'static>, usize, LexicalError>> =
210+
let from_lex: Vec<Spanned<Token<'static>, usize, LexerError>> =
199211
Tokens::new(starts_with_invalid).collect();
200212
let result: Vec<bool> = from_lex.iter().map(Result::is_ok).collect();
201213
assert_eq!(result, vec![false, true]);

0 commit comments

Comments
 (0)