Merge #2911

2911: Implement collecting errors while tokenizing r=matklad a=Veetaha Now we are collecting errors from `rustc_lexer` and returning them in `ParsedToken { token, error }` and `ParsedTokens { tokens, errors }` structures **([UPD]: this is now simplified, see updates bellow)**. The main changes are introduced in `ra_syntax/parsing/lexer.rs`. It now exposes the following functions and types: ```rust pub fn tokenize(text: &str) -> ParsedTokens; pub fn tokenize_append(text: &str, parsed_tokens_to_append_to: &mut ParsedTokens); pub fn first_token(text: &str) -> Option<ParsedToken>; // allows any number of tokens in text pub fn single_token(text: &str) -> Option<ParsedToken>; // allows only a single token in text pub struct ParsedToken { pub token: Token, pub error: Option<SyntaxError> } pub struct ParsedTokens { pub tokens: Vec<Token>, pub errors: Vec<SyntaxError> } pub enum TokenizeError { /* Simple enum which reflects rustc_lexer tokenization errors */ } ``` In the first commit I implemented it with iterators, but then decided that since this crate is ad hoc for `rust-analyzer` and we clearly see the places of its usage it would be better to simplify it to vectors. This is currently WIP, because I want to add tests for error messages generated by the lexer. I'd like to listen to you thoughts how to define these tests in `ra_syntax/test-data` dir. Related issues: #223 **[UPD]** After the PR review the API was simplified: ```rust pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>); // Both lex functions do not check for unescape errors pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)>; pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind>; // This will be removed in the next PR in favour of simlifying `SyntaxError` to `(String, TextRange)` pub enum TokenizeError { /* Simple enum which reflects rustc_lexer tokenization errors */ } // this is private, but may be made public if such demand would exist in future (least privilege principle) fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)>; ``` Co-authored-by: Veetaha <gerzoh1@gmail.com>
rust-lang · Feb 3, 2020 · 918547d · 918547d
2 parents b090ee5 + a3e5663
commit 918547d
Show file tree

Hide file tree

Showing 160 changed files with 798 additions and 172 deletions.
diff --git a/crates/ra_ide/src/references/rename.rs b/crates/ra_ide/src/references/rename.rs
@@ -2,7 +2,9 @@
 
 use hir::ModuleSource;
 use ra_db::{RelativePath, RelativePathBuf, SourceDatabase, SourceDatabaseExt};
-use ra_syntax::{algo::find_node_at_offset, ast, tokenize, AstNode, SyntaxKind, SyntaxNode};
+use ra_syntax::{
+    algo::find_node_at_offset, ast, lex_single_valid_syntax_kind, AstNode, SyntaxKind, SyntaxNode,
+};
 use ra_text_edit::TextEdit;
 
 use crate::{
@@ -17,11 +19,9 @@ pub(crate) fn rename(
     position: FilePosition,
     new_name: &str,
 ) -> Option<RangeInfo<SourceChange>> {
-    let tokens = tokenize(new_name);
-    if tokens.len() != 1
-        || (tokens[0].kind != SyntaxKind::IDENT && tokens[0].kind != SyntaxKind::UNDERSCORE)
-    {
-        return None;
+    match lex_single_valid_syntax_kind(new_name)? {
+        SyntaxKind::IDENT | SyntaxKind::UNDERSCORE => (),
+        _ => return None,
     }
 
     let parse = db.parse(position.file_id);

diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs
@@ -1,7 +1,7 @@
 //! FIXME: write short doc here
 
 use ra_parser::{Token, TokenSource};
-use ra_syntax::{classify_literal, SmolStr, SyntaxKind, SyntaxKind::*, T};
+use ra_syntax::{lex_single_valid_syntax_kind, SmolStr, SyntaxKind, SyntaxKind::*, T};
 use std::cell::{Cell, Ref, RefCell};
 use tt::buffer::{Cursor, TokenBuffer};
 
@@ -129,8 +129,9 @@ fn convert_delim(d: Option<tt::DelimiterKind>, closing: bool) -> TtToken {
 }
 
 fn convert_literal(l: &tt::Literal) -> TtToken {
-    let kind =
-        classify_literal(&l.text).map(|tkn| tkn.kind).unwrap_or_else(|| match l.text.as_ref() {
+    let kind = lex_single_valid_syntax_kind(&l.text)
+        .filter(|kind| kind.is_literal())
+        .unwrap_or_else(|| match l.text.as_ref() {
             "true" => T![true],
             "false" => T![false],
             _ => panic!("Fail to convert given literal {:#?}", &l),

diff --git a/crates/ra_syntax/src/algo.rs b/crates/ra_syntax/src/algo.rs
@@ -81,7 +81,7 @@ impl TreeDiff {
 /// Specifically, returns a map whose keys are descendants of `from` and values
 /// are descendants of `to`, such that  `replace_descendants(from, map) == to`.
 ///
-/// A trivial solution is a singletom map `{ from: to }`, but this function
+/// A trivial solution is a singleton map `{ from: to }`, but this function
 /// tries to find a more fine-grained diff.
 pub fn diff(from: &SyntaxNode, to: &SyntaxNode) -> TreeDiff {
     let mut buf = FxHashMap::default();

diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs
@@ -41,7 +41,9 @@ use crate::syntax_node::GreenNode;
 pub use crate::{
     algo::InsertPosition,
     ast::{AstNode, AstToken},
-    parsing::{classify_literal, tokenize, Token},
+    parsing::{
+        lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token, TokenizeError,
+    },
     ptr::{AstPtr, SyntaxNodePtr},
     syntax_error::{Location, SyntaxError, SyntaxErrorKind},
     syntax_node::{

diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs
@@ -7,15 +7,23 @@ mod text_tree_sink;
 mod reparsing;
 
 use crate::{syntax_node::GreenNode, SyntaxError};
+use text_token_source::TextTokenSource;
+use text_tree_sink::TextTreeSink;
 
-pub use self::lexer::{classify_literal, tokenize, Token};
+pub use lexer::*;
 
 pub(crate) use self::reparsing::incremental_reparse;
 
 pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
-    let tokens = tokenize(&text);
-    let mut token_source = text_token_source::TextTokenSource::new(text, &tokens);
-    let mut tree_sink = text_tree_sink::TextTreeSink::new(text, &tokens);
+    let (tokens, lexer_errors) = tokenize(&text);
+
+    let mut token_source = TextTokenSource::new(text, &tokens);
+    let mut tree_sink = TextTreeSink::new(text, &tokens);
+
     ra_parser::parse(&mut token_source, &mut tree_sink);
-    tree_sink.finish()
+
+    let (tree, mut parser_errors) = tree_sink.finish();
+    parser_errors.extend(lexer_errors);
+
+    (tree, parser_errors)
 }