diff --git a/Cargo.lock b/Cargo.lock index b564d50..18e699a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -56,7 +56,7 @@ dependencies = [ [[package]] name = "forth-lexer" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "nom", diff --git a/Cargo.toml b/Cargo.toml index 7c6f9c0..9eff8e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,6 @@ anyhow = "1.0.71" thiserror = "1.0.40" [dependencies.forth-lexer] -version = "0.1.0" +version = "0.1.1" path = "lib/forth-lexer" features = ["ropey"] diff --git a/lib/forth-lexer/src/parser.rs b/lib/forth-lexer/src/parser.rs index dbca66c..94399c8 100644 --- a/lib/forth-lexer/src/parser.rs +++ b/lib/forth-lexer/src/parser.rs @@ -10,6 +10,7 @@ pub struct Lexer<'a> { position: usize, read_position: usize, ch: char, + raw: &'a str, input: Peekable>, } @@ -20,6 +21,7 @@ impl<'a> Lexer<'a> { read_position: 0, ch: '0', input: input.chars().peekable(), + raw: input, }; lex.read_char(); @@ -32,34 +34,29 @@ impl<'a> Lexer<'a> { self.ch = '\0'; } - pub fn here(&self) -> Data - where - T: Default, - { + pub fn here(&self) -> Data<'a> { Data { start: self.position, end: self.position, - value: T::default(), + value: "", } } - pub fn next_token(&mut self) -> Result { + pub fn next_token(&mut self) -> Result, LexError> { self.skip_whitespace(); let tok = match self.ch { ':' => { - let mut dat = self.here::(); - dat.value = ':'; + let mut dat = self.here(); + dat.value = &self.raw[self.position..self.read_position]; Token::Colon(dat) } ';' => { - let mut dat = self.here::(); - dat.value = ';'; + let mut dat = self.here(); + dat.value = &self.raw[self.position..self.read_position]; dat.end = dat.start + 1; Token::Semicolon(dat) } - //TODO: comments - //TODO: strings '%' => { if self.peek_char().is_digit(2) { let ident = self.read_number(); @@ -88,21 +85,21 @@ impl<'a> Lexer<'a> { } } '\'' => { + let begin = self.position; if !self.peek_char().is_whitespace() { self.read_char(); if self.peek_char() == '\'' { - let num = self.ch; self.read_char(); - let number = Data:: { + let number = Data { start: self.position - 2, end: self.position + 1, - value: format!("'{}'", num), + value: &self.raw[begin..(self.position + 1)], }; Token::Number(number) } else { let mut ident = self.read_ident(); ident.start -= 1; - ident.value = format!("{}{}", "'", ident.value); + ident.value = &self.raw[begin..self.position]; Token::Word(ident) } } else { @@ -142,8 +139,8 @@ impl<'a> Lexer<'a> { } } '\0' => { - let mut dat = self.here::(); - dat.value = '\0'; + let mut dat = self.here(); + dat.value = "\0"; Token::Eof(dat) } _ => { @@ -181,7 +178,7 @@ impl<'a> Lexer<'a> { } } - fn read_comment_to(&mut self, to: char) -> Data { + fn read_comment_to(&mut self, to: char) -> Data<'a> { let start = self.position; let mut value = String::new(); while self.ch != to { @@ -193,28 +190,28 @@ impl<'a> Lexer<'a> { self.read_char(); } - Data:: { + Data { start, end: self.position, - value, + value: &self.raw[start..self.position], } } - fn read_ident(&mut self) -> Data { + fn read_ident(&mut self) -> Data<'a> { let start = self.position; let mut value = String::new(); while !self.ch.is_whitespace() && self.ch != '\0' { value.push(self.ch); self.read_char(); } - Data:: { + Data { start, end: self.position, - value, + value: &self.raw[start..self.position], } } - fn read_number(&mut self) -> Data { + fn read_number(&mut self) -> Data<'a> { let start = self.position; let mut value = String::new(); //TODO: parse legal forth numbers @@ -228,14 +225,14 @@ impl<'a> Lexer<'a> { value.push(self.ch); self.read_char(); } - Data:: { + Data { start, end: self.position, - value, + value: &self.raw[start..self.position], } } - pub fn parse(&mut self) -> Vec { + pub fn parse(&mut self) -> Vec> { let mut tokens = vec![]; while let Ok(tok) = self.next_token() { match tok { @@ -243,7 +240,7 @@ impl<'a> Lexer<'a> { break; } _ => { - tokens.push(tok); + tokens.push(tok.clone()); } } } @@ -261,13 +258,13 @@ mod tests { let mut lexer = Lexer::new(": add1 ( n -- n )\n 1 + \\ adds one\n;"); let tokens = lexer.parse(); let expected = vec![ - Colon(Data::new(0, 0, ':')), - Word(Data::new(2, 6, "add1".into())), - Comment(Data::new(7, 17, "( n -- n )".into())), - Number(Data::new(20, 21, "1".into())), - Word(Data::new(22, 23, "+".into())), - Comment(Data::new(24, 34, "\\ adds one".into())), - Semicolon(Data::new(35, 36, ';')), + Colon(Data::new(0, 0, ":")), + Word(Data::new(2, 6, "add1")), + Comment(Data::new(7, 17, "( n -- n )")), + Number(Data::new(20, 21, "1")), + Word(Data::new(22, 23, "+")), + Comment(Data::new(24, 34, "\\ adds one")), + Semicolon(Data::new(35, 36, ";")), ]; assert_eq!(tokens, expected) } @@ -277,11 +274,11 @@ mod tests { let mut lexer = Lexer::new(": add1 1 + ;"); let tokens = lexer.parse(); let expected = vec![ - Colon(Data::new(0, 0, ':')), - Word(Data::new(2, 6, "add1".into())), - Number(Data::new(7, 8, "1".into())), - Word(Data::new(9, 10, "+".into())), - Semicolon(Data::new(11, 12, ';')), + Colon(Data::new(0, 0, ":")), + Word(Data::new(2, 6, "add1")), + Number(Data::new(7, 8, "1")), + Word(Data::new(9, 10, "+")), + Semicolon(Data::new(11, 12, ";")), ]; assert_eq!(tokens, expected) } @@ -291,11 +288,11 @@ mod tests { let mut lexer = Lexer::new("word \\ this is a comment\nword2 ( and this ) word3"); let tokens = lexer.parse(); let expected = vec![ - Word(Data::new(0, 4, "word".into())), - Comment(Data::new(5, 24, "\\ this is a comment".into())), - Word(Data::new(25, 30, "word2".into())), - Comment(Data::new(31, 43, "( and this )".into())), - Word(Data::new(44, 49, "word3".into())), + Word(Data::new(0, 4, "word")), + Comment(Data::new(5, 24, "\\ this is a comment")), + Word(Data::new(25, 30, "word2")), + Comment(Data::new(31, 43, "( and this )")), + Word(Data::new(44, 49, "word3")), ]; assert_eq!(tokens, expected) } @@ -305,9 +302,9 @@ mod tests { let mut lexer = Lexer::new("some\nwords here\0"); let tokens = lexer.parse(); let expected = vec![ - Word(Data::new(0, 4, "some".into())), - Word(Data::new(5, 10, "words".into())), - Word(Data::new(11, 15, "here".into())), + Word(Data::new(0, 4, "some")), + Word(Data::new(5, 10, "words")), + Word(Data::new(11, 15, "here")), ]; assert_eq!(tokens, expected) } @@ -316,7 +313,7 @@ mod tests { fn test_parse_number_literal() { let mut lexer = Lexer::new("12"); let tokens = lexer.parse(); - let expected = vec![Number(Data::new(0, 2, "12".into()))]; + let expected = vec![Number(Data::new(0, 2, "12"))]; assert_eq!(tokens, expected) } @@ -324,7 +321,7 @@ mod tests { fn test_parse_number_oct() { let mut lexer = Lexer::new("&12"); let tokens = lexer.parse(); - let expected = vec![Number(Data::new(0, 3, "&12".into()))]; + let expected = vec![Number(Data::new(0, 3, "&12"))]; assert_eq!(tokens, expected) } @@ -332,7 +329,7 @@ mod tests { fn test_parse_number_bin() { let mut lexer = Lexer::new("%0100101"); let tokens = lexer.parse(); - let expected = vec![Number(Data::new(0, 8, "%0100101".into()))]; + let expected = vec![Number(Data::new(0, 8, "%0100101"))]; assert_eq!(tokens, expected); } @@ -343,7 +340,7 @@ mod tests { // %12345 is not a binary number let mut lexer = Lexer::new("%12345"); let tokens = lexer.parse(); - let expected = vec![Word(Data::new(0, 6, "%12345".into()))]; + let expected = vec![Word(Data::new(0, 6, "%12345"))]; assert_eq!(tokens, expected); } @@ -351,7 +348,7 @@ mod tests { fn test_parse_number_hex() { let mut lexer = Lexer::new("$FfAaDd"); let tokens = lexer.parse(); - let expected = vec![Number(Data::new(0, 7, "$FfAaDd".into()))]; + let expected = vec![Number(Data::new(0, 7, "$FfAaDd"))]; assert_eq!(tokens, expected) } @@ -359,7 +356,7 @@ mod tests { fn test_parse_number_0xhex() { let mut lexer = Lexer::new("0xFE"); let tokens = lexer.parse(); - let expected = vec![Number(Data::new(0, 4, "0xFE".into()))]; + let expected = vec![Number(Data::new(0, 4, "0xFE"))]; assert_eq!(tokens, expected) } @@ -367,7 +364,7 @@ mod tests { fn test_parse_number_char() { let mut lexer = Lexer::new("'c'"); let tokens = lexer.parse(); - let expected = vec![Number(Data::new(0, 3, "'c'".into()))]; + let expected = vec![Number(Data::new(0, 3, "'c'"))]; assert_eq!(tokens, expected) } @@ -375,7 +372,7 @@ mod tests { fn test_parse_number_word() { let mut lexer = Lexer::new("word"); let tokens = lexer.parse(); - let expected = vec![Word(Data::new(0, 4, "word".into()))]; + let expected = vec![Word(Data::new(0, 4, "word"))]; assert_eq!(tokens, expected) } @@ -389,7 +386,7 @@ mod tests { let word2 = if let Some(Token::Word(word)) = tokens.get(1) { word.to_owned() } else { - Data::::default() + Data::default() }; let x = rope.slice(&word2); assert_eq!("word2", word2.value); diff --git a/lib/forth-lexer/src/token.rs b/lib/forth-lexer/src/token.rs index ad9eaec..3207a52 100644 --- a/lib/forth-lexer/src/token.rs +++ b/lib/forth-lexer/src/token.rs @@ -1,19 +1,19 @@ use std::{fmt::Display, ops::RangeBounds}; #[derive(Debug, PartialEq, Default, Copy, Clone)] -pub struct Data { +pub struct Data<'a> { pub start: usize, pub end: usize, - pub value: T, + pub value: &'a str, } -impl Data { - pub fn new(start: usize, end: usize, value: T) -> Data { - Data:: { start, end, value } +impl<'a> Data<'a> { + pub fn new(start: usize, end: usize, value: &'a str) -> Data { + Data { start, end, value } } } -impl RangeBounds for &Data { +impl<'a> RangeBounds for &Data<'a> { fn start_bound(&self) -> std::ops::Bound<&usize> { std::ops::Bound::Included(&self.start) } @@ -24,18 +24,33 @@ impl RangeBounds for &Data { } #[derive(Debug, PartialEq, Clone)] -pub enum Token { - Illegal(Data), - Eof(Data), - Colon(Data), - Semicolon(Data), - Word(Data), - Number(Data), - Comment(Data), - StackComment(Data), +pub enum Token<'a> { + Illegal(Data<'a>), + Eof(Data<'a>), + Colon(Data<'a>), + Semicolon(Data<'a>), + Word(Data<'a>), + Number(Data<'a>), + Comment(Data<'a>), + StackComment(Data<'a>), } -impl Display for Token { +impl<'a> Token<'a> { + pub fn get_data(&self) -> &Data<'a> { + match self { + Token::Illegal(dat) => dat, + Token::Eof(dat) => dat, + Token::Colon(dat) => dat, + Token::Semicolon(dat) => dat, + Token::Word(dat) => dat, + Token::Number(dat) => dat, + Token::Comment(dat) => dat, + Token::StackComment(dat) => dat, + } + } +} + +impl<'a> Display for Token<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Token::Illegal(_) => write!(f, ""), @@ -50,23 +65,19 @@ impl Display for Token { } } -impl From> for Token { - fn from(ch: Data) -> Self { - match ch.value { - ';' => Self::Semicolon(ch), - ':' => Self::Colon(ch), - '\0' => Self::Eof(ch), - _ => Self::Illegal(ch), - } - } -} - -impl From> for Token { - fn from(value: Data) -> Self { - if value.value.chars().all(|b| b.is_ascii_digit()) { - Self::Number(value) - } else { - Self::Word(value) +impl<'a> From> for Token<'a> { + fn from(value: Data<'a>) -> Self { + match value.value { + ";" => Self::Semicolon(value), + ":" => Self::Colon(value), + "\0" => Self::Eof(value), + _ => { + if value.value.chars().all(|b| b.is_ascii_digit()) { + Self::Number(value) + } else { + Self::Word(value) + } + } } } } diff --git a/src/error.rs b/src/error.rs index f125c85..c365e81 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,5 +1,6 @@ #[derive(thiserror::Error, Debug)] pub enum Error { + #[allow(dead_code)] #[error("Generic {0}")] Generic(String), #[error("SendError {0}")] diff --git a/src/main.rs b/src/main.rs index 0c20f3a..0355a9a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,9 @@ mod utils; mod words; use crate::prelude::*; + +use crate::utils::data_to_position::ToPosition; +use crate::utils::find_variant_sublists_from_to::FindVariantSublistsFromTo; use crate::utils::ropey_get_ix::GetIx; use crate::utils::ropey_word_at_char::WordAtChar; use crate::words::{Word, Words}; @@ -11,19 +14,20 @@ use crate::words::{Word, Words}; use std::collections::HashMap; use std::ffi::OsStr; use std::fs; +use std::mem::discriminant; use std::path::Path; use forth_lexer::parser::Lexer; +use forth_lexer::token::{Data, Token}; use lsp_types::request::{Completion, HoverRequest}; use lsp_types::{ request::GotoDefinition, GotoDefinitionResponse, InitializeParams, ServerCapabilities, }; use lsp_types::{ - CompletionItem, CompletionResponse, Hover, Location, OneOf, Position, Range, - TextDocumentSyncKind, Url, + CompletionItem, CompletionResponse, Hover, Location, OneOf, Range, TextDocumentSyncKind, Url, }; -use lsp_server::{Connection, ExtractError, Message, Notification, Request, RequestId, Response}; +use lsp_server::{Connection, Message, Notification, Request, RequestId, Response}; use ropey::Rope; fn main() -> Result<()> { @@ -103,6 +107,7 @@ fn main_loop(connection: Connection, params: serde_json::Value) -> Result<()> { }; let result = if word.len_chars() > 0 { let mut ret = vec![]; + //TODO: let candidates = data.words.iter().filter(|x| { x.token .to_lowercase() @@ -243,56 +248,21 @@ fn main_loop(connection: Connection, params: serde_json::Value) -> Result<()> { let progn = rope.to_string(); let mut lexer = Lexer::new(progn.as_str()); let tokens = lexer.parse(); - let bind1 = tokens.clone(); - let mut start_line = 0u32; - let mut start_char = 0u32; - let mut end_line = 0u32; - let mut end_char = 0u32; - let mut found_defn = false; - for (x, y) in tokens.into_iter().zip(bind1.iter().skip(1)) { - if let forth_lexer::token::Token::Colon(x_dat) = x { - if let forth_lexer::token::Token::Word(y_dat) = y { - if y_dat.value.eq_ignore_ascii_case(word.as_str()) { - eprintln!("Found word defn {:?}", y_dat); - start_line = rope.char_to_line(x_dat.start) as u32; - start_char = (x_dat.start - - rope.line_to_char(start_line as usize)) - as u32; - found_defn = true; - } else { - found_defn = false; - } - } - } - if let forth_lexer::token::Token::Semicolon(y_dat) = y { - if found_defn { - eprintln!("found end {:?}", y_dat); - end_line = rope.char_to_line(y_dat.end) as u32; - end_char = (y_dat.end - - rope.line_to_char(end_line as usize)) - as u32; - break; - } - } - } - eprintln!("GOT HERE"); - if (start_line, start_char) != (end_line, end_char) { - eprintln!( - "{} {} {} {}", - start_line, start_char, end_line, end_char - ); + + for result in tokens.find_variant_sublists_from_to( + discriminant(&Token::Colon(Data::default())), + discriminant(&Token::Semicolon(Data::default())), + ) { + eprintln!("{:?}", result); + let tok = Token::Illegal(Data::new(0, 0, "")); + let begin = result.first().unwrap_or(&tok).get_data(); + let end = result.last().unwrap_or(&tok).get_data(); if let Ok(uri) = Url::from_file_path(file) { ret.push(Location { uri, range: Range { - start: Position { - line: start_line, - character: start_char, - }, - end: Position { - line: end_line, - character: end_char, - }, + start: begin.to_position_start(rope), + end: end.to_position_end(rope), }, }); } else { diff --git a/src/prelude.rs b/src/prelude.rs index d584f18..8c3ed98 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -1,4 +1,5 @@ pub use crate::error::Error; pub type Result = core::result::Result; -pub struct W(pub T); +// Usual wrapper, but evidently not needed this time +// pub struct W(pub T); diff --git a/src/utils/data_to_position.rs b/src/utils/data_to_position.rs new file mode 100644 index 0000000..f47217d --- /dev/null +++ b/src/utils/data_to_position.rs @@ -0,0 +1,29 @@ +use forth_lexer::token::Data; +use lsp_types::Position; +pub trait ToPosition { + fn to_position_start(&self, rope: &ropey::Rope) -> Position; + fn to_position_end(&self, rope: &ropey::Rope) -> Position; +} + +impl<'a> ToPosition for Data<'a> { + fn to_position_start(&self, rope: &ropey::Rope) -> Position { + let (start_line, start_char) = to_line_char(self.start, rope); + Position { + line: start_line, + character: start_char, + } + } + fn to_position_end(&self, rope: &ropey::Rope) -> Position { + let (start_line, start_char) = to_line_char(self.end, rope); + Position { + line: start_line, + character: start_char, + } + } +} + +fn to_line_char(chix: usize, rope: &ropey::Rope) -> (u32, u32) { + let start_line = rope.char_to_line(chix) as u32; + let start_char = (chix - rope.line_to_char(start_line as usize)) as u32; + (start_line, start_char) +} diff --git a/src/utils/find_variant_sublists.rs b/src/utils/find_variant_sublists.rs new file mode 100644 index 0000000..dc509c6 --- /dev/null +++ b/src/utils/find_variant_sublists.rs @@ -0,0 +1,53 @@ +use std::mem::{discriminant, Discriminant}; + +pub trait FindVariantSublists { + fn find_variant_sublists(&self, find: Vec>) -> Vec<&[T]>; +} + +impl FindVariantSublists for Vec { + fn find_variant_sublists(&self, find: Vec>) -> Vec<&[T]> { + let mut ret = vec![]; + for i in 0..self.len() { + let disc = (0..find.len()) + .filter(|j| i + j < self.len()) + .map(|j| discriminant(&self[i + j]) == find[j]) + .all(|x| x); + if disc { + ret.push(&self[i..(i + find.len())]) + } + } + ret + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, PartialEq, PartialOrd)] + enum Test { + One(&'static str), + Two(&'static str), + Three(&'static str), + Four(&'static str), + } + + #[test] + fn test() { + use Test::*; + let enums = vec![ + One("one"), + Two("two"), + Three("three"), + Four("four"), + One("1"), + Two("2"), + ]; + let results = enums.find_variant_sublists(vec![ + discriminant(&Test::One("")), + discriminant(&Test::Two("")), + ]); + let expected: Vec<&[Test]> = vec![&enums[0..2], &enums[4..6]]; + assert_eq!(expected, results); + } +} diff --git a/src/utils/find_variant_sublists_from_to.rs b/src/utils/find_variant_sublists_from_to.rs new file mode 100644 index 0000000..4ede01a --- /dev/null +++ b/src/utils/find_variant_sublists_from_to.rs @@ -0,0 +1,64 @@ +use std::mem::{discriminant, Discriminant}; + +pub trait FindVariantSublistsFromTo { + fn find_variant_sublists_from_to( + &self, + from: Discriminant, + to: Discriminant, + ) -> Vec<&[T]>; +} + +impl FindVariantSublistsFromTo for Vec { + fn find_variant_sublists_from_to( + &self, + from: Discriminant, + to: Discriminant, + ) -> Vec<&[T]> { + let mut ret = vec![]; + for i in 0..self.len() { + if discriminant(&self[i]) == from { + for j in (i + 1)..self.len() { + if discriminant(&self[j]) == to { + ret.push(&self[i..(j + 1)]); + break; + } + } + } + } + ret + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, PartialEq, PartialOrd)] + enum Test { + One(&'static str), + Two(&'static str), + Three(&'static str), + Four(&'static str), + } + + #[test] + fn test() { + use Test::*; + let enums = vec![ + One("one"), + Two("two"), + Three("three"), + Four("four"), + One("1"), + Two("2"), + Three("3"), + Four("4"), + ]; + let results = enums.find_variant_sublists_from_to( + discriminant(&Test::Two("")), + discriminant(&Test::Four("")), + ); + let expected: Vec<&[Test]> = vec![&enums[1..4], &enums[5..8]]; + assert_eq!(expected, results); + } +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 16a8ad2..d5315cb 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,4 +1,8 @@ +#[allow(unused_imports)] use crate::prelude::*; +pub mod data_to_position; +pub mod find_variant_sublists; +pub mod find_variant_sublists_from_to; pub mod ropey_get_ix; pub mod ropey_word_at_char;