diff --git a/.gitmodules b/.gitmodules index 7c33d36..3b09cc5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "tremor-www-docs"] - path = tremor-www-docs - url = https://github.com/wayfair-tremor/tremor-www-docs.git + path = tremor-www-docs + url = https://github.com/wayfair-tremor/tremor-www-docs.git +[submodule "tremor-runtime"] + path = tremor-runtime + url = https://github.com/wayfair-tremor/tremor-runtime.git diff --git a/Cargo.lock b/Cargo.lock index be50a7e..c695538 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -315,11 +315,21 @@ dependencies = [ [[package]] name = "codespan" -version = "0.8.0" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c7ddb4fbbadc95403d087ced01b50729650c0fba279e3711c6d4f94c844c449" +dependencies = [ + "codespan-reporting", +] + +[[package]] +name = "codespan-reporting" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52899426b69706219a1aaee2e95868fd01a0bd8006bb163f069578a0af5b5bb2" +checksum = "d5680df8512a0e825b9edc41b619ec88b367644d394b4d862a04b4d6387c65da" dependencies = [ - "unicode-segmentation", + "termcolor", + "unicode-width", ] [[package]] @@ -828,7 +838,7 @@ dependencies = [ "docopt", "ena", "itertools", - "lalrpop-util 0.18.1", + "lalrpop-util", "petgraph", "regex", "regex-syntax", @@ -840,12 +850,6 @@ dependencies = [ "unicode-xid 0.2.0", ] -[[package]] -name = "lalrpop-util" -version = "0.17.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c277d18683b36349ab5cd030158b54856fca6bb2d5dc5263b06288f486958b7c" - [[package]] name = "lalrpop-util" version = "0.18.1" @@ -910,15 +914,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "libmath" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfd3416934a853ae80d5c3b006f632dfcbaf320300c5167e88a469e9ac214502" -dependencies = [ - "rand 0.3.23", -] - [[package]] name = "log" version = "0.4.8" @@ -1217,29 +1212,6 @@ dependencies = [ "proc-macro2 1.0.12", ] -[[package]] -name = "rand" -version = "0.3.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ac302d8f83c0c1974bf758f6b041c6c8ada916fbb44a609158ca8b064cc76c" -dependencies = [ - "libc", - "rand 0.4.6", -] - -[[package]] -name = "rand" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" -dependencies = [ - "fuchsia-cprng", - "libc", - "rand_core 0.3.1", - "rdrand", - "winapi", -] - [[package]] name = "rand" version = "0.6.5" @@ -1495,6 +1467,15 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "semver" version = "0.9.0" @@ -1832,7 +1813,7 @@ dependencies = [ [[package]] name = "tremor-language-server" -version = "0.7.4" +version = "0.8.0" dependencies = [ "bincode", "clap", @@ -1843,15 +1824,16 @@ dependencies = [ "tokio", "tower-lsp", "tremor-script", + "walkdir", ] [[package]] name = "tremor-script" -version = "0.7.7" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a03beb98bc0e9b6fdf6df63485a2a8d767ffae0040795af60c16e38936399d86" +checksum = "e5c73b3d93be6b6d6e6a9510927a7dc28f28b5632f04a04e5620d19825e00a4c" dependencies = [ - "base64 0.11.0", + "base64 0.12.1", "chrono", "cidr-utils", "clap", @@ -1867,15 +1849,14 @@ dependencies = [ "hostname", "jumphash", "lalrpop", - "lalrpop-util 0.17.2", - "libmath", + "lalrpop-util", "matches", + "percent-encoding", "rand 0.7.3", "regex", "rental", "serde", "serde_derive", - "serde_json", "simd-json", "sketches-ddsketch", "termcolor", @@ -1910,12 +1891,6 @@ dependencies = [ "smallvec", ] -[[package]] -name = "unicode-segmentation" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" - [[package]] name = "unicode-width" version = "0.1.7" @@ -1976,6 +1951,17 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce" +[[package]] +name = "walkdir" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + [[package]] name = "wasi" version = "0.9.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index d62d23c..3cec968 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tremor-language-server" -version = "0.7.4" +version = "0.8.0" description = "Tremor Language Server (Trill)" authors = ["The Tremor Team"] edition = "2018" @@ -9,8 +9,10 @@ license = "Apache-2.0" [build-dependencies] bincode = "1.2.1" regex = "1.3" -#tremor-script = { "path" = "../tremor-runtime/tremor-script" } # for local use -tremor-script = "0.7" +walkdir = "2.3" + +# tremor deps +tremor-script = "0.8.1" [dependencies] @@ -24,5 +26,4 @@ tokio = { version = "0.2", features = ["io-std", "macros", "sync"] } tower-lsp = "0.11" # tremor deps -#tremor-script = { "path" = "../tremor-runtime/tremor-script" } # for local use -tremor-script = "0.7" +tremor-script = "0.8.1" diff --git a/build.rs b/build.rs index d5cf69d..4b5124b 100644 --- a/build.rs +++ b/build.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use bincode; use regex::Regex; use std::borrow::Borrow; // used instead of halfbrown::Hashmap because bincode can't deserialize that @@ -23,13 +22,20 @@ use std::fs::{self, File}; use std::io::{BufReader, BufWriter, Read}; use std::path::{Path, PathBuf}; use std::process::{self, Command}; +use walkdir::WalkDir; +use tremor_script::ast::FnDoc; +// TODO get rid of this once we can switch to FnDoc for aggregate functions too use tremor_script::docs::{FunctionDoc, FunctionSignatureDoc}; +use tremor_script::path::ModulePath; +use tremor_script::{registry, Script}; const LANGUAGES: &[&str] = &["tremor-script", "tremor-query"]; const BASE_DOCS_DIR: &str = "tremor-www-docs/docs"; +const TREMOR_STDLIB_DIR: &str = "tremor-runtime/tremor-script/lib"; + /* fn get_test_function_doc(language_name: &str) -> (String, FunctionDoc) { let test_func = match language_name { @@ -59,6 +65,75 @@ fn get_test_function_doc(language_name: &str) -> (String, FunctionDoc) { } */ +fn parse_tremor_stdlib() -> HashMap { + let mut function_docs: HashMap = HashMap::new(); + + for entry in WalkDir::new(TREMOR_STDLIB_DIR) { + let entry = entry.unwrap(); + let path = entry.path(); + + if path.is_file() { + println!("Parsing tremor file: {}", path.display()); + + let module_file = File::open(Path::new(&path)).unwrap(); + let mut buffered_reader = BufReader::new(module_file); + + let mut module_text = String::new(); + buffered_reader.read_to_string(&mut module_text).unwrap(); + + let module_path = ModulePath::load(); + let registry = registry::registry(); + + match Script::parse( + &module_path, + &path.to_string_lossy(), + module_text, + ®istry, + ) { + Ok(script) => { + let docs = script.docs(); + + // module name here is "self" always so can't use it right now + // TODO fix this? + //if let Some(module_doc) = &docs.module { + // println!("Found module: {}", module_doc.name); + //} + + // filenames match module name here + let module_name = path.file_stem().unwrap().to_string_lossy(); + println!("Found module: {}", module_name); + + for fndoc in &docs.fns { + let function_doc = fndoc_to_function_doc(fndoc, &module_name); + println!("Found function: {}", function_doc.signature); + + function_docs + .insert(function_doc.signature.full_name.clone(), function_doc); + } + } + Err(e) => eprintln!("Error parsing file {}: {:?}", path.display(), e), + } + } + } + + function_docs +} + +fn fndoc_to_function_doc(fndoc: &FnDoc, module_name: &str) -> FunctionDoc { + let signature_doc = FunctionSignatureDoc { + full_name: format!("{}::{}", module_name, fndoc.name), + args: fndoc.args.iter().map(|s| s.to_string()).collect(), + result: String::new(), // TODO adopt comment convention to represent result type + }; + + FunctionDoc { + signature: signature_doc, + description: fndoc.doc.as_ref().unwrap_or(&String::new()).to_string(), + summary: None, // TODO add first line? + examples: None, // TODO parse out stuff in code blocks + } +} + fn parse_raw_function_docs(language_name: &str) -> HashMap { let mut function_docs: HashMap = HashMap::new(); @@ -76,7 +151,7 @@ fn parse_raw_function_docs(language_name: &str) -> HashMap //dbg!(path.ends_with("md")); //if path.is_file() && path.ends_with(".md") { if path.is_file() && path.to_str().unwrap().ends_with(".md") { - println!("Parsing markdown file: {:?}", path); + println!("Parsing markdown file: {}", path.display()); let module_doc_file = File::open(Path::new(&path)).unwrap(); //File::open(Path::new(&function_docs_path).join(module_doc_filename)).unwrap(); @@ -84,7 +159,9 @@ fn parse_raw_function_docs(language_name: &str) -> HashMap let mut buffered_reader = BufReader::new(module_doc_file); let mut module_doc_contents = String::new(); - buffered_reader.read_to_string(&mut module_doc_contents); + buffered_reader + .read_to_string(&mut module_doc_contents) + .unwrap(); // test // TODO remove @@ -116,7 +193,7 @@ fn to_function_doc(raw_doc: &str) -> FunctionDoc { println!("Found function: {}", &caps[0]); FunctionSignatureDoc { full_name: caps[1].trim().to_string(), - args: caps[2].split(",").map(|s| s.trim().to_string()).collect(), + args: caps[2].split(',').map(|s| s.trim().to_string()).collect(), result: caps[3].trim().to_string(), } } @@ -149,7 +226,13 @@ fn bindump_function_docs(language_name: &str, dest_dir: &str) { language_name, dest_path.to_str().unwrap() ); - bincode::serialize_into(&mut f, &parse_raw_function_docs(language_name)).unwrap(); + + let function_docs = match language_name { + "tremor-script" => parse_tremor_stdlib(), + _ => parse_raw_function_docs(language_name), + }; + + bincode::serialize_into(&mut f, &function_docs).unwrap(); } // lifted from https://github.com/fede1024/rust-rdkafka/blob/v0.23.0/rdkafka-sys/build.rs#L7 @@ -179,7 +262,7 @@ where ); let ret = Command::new(cmd).current_dir(dir).args(args).status(); match ret.map(|status| (status.success(), status.code())) { - Ok((true, _)) => return, + Ok((true, _)) => (), Ok((false, Some(c))) => panic!("Command failed with error code {}", c), Ok((false, None)) => panic!("Command got killed"), Err(e) => panic!("Command failed with error: {}", e), @@ -190,7 +273,9 @@ fn main() { // Tremor docs repo is needed right now for generating the function documentation // as well as module completion items. Once we store those items in a structured // way as part of the tremor-script codebase, this won't be needed. - if !Path::new("tremor-www-docs/LICENSE").exists() { + if !(Path::new("tremor-www-docs/LICENSE").exists() + && Path::new("tremor-runtime/LICENSE").exists()) + { eprintln!("Setting up docs submodule..."); run_command_or_fail(".", "git", &["submodule", "update", "--init"]); } diff --git a/src/backend.rs b/src/backend.rs index 16abe81..0b257a2 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -45,7 +45,7 @@ impl Backend { } async fn update(&self, uri: Url, text: &str) { - // TODO implement update as well. also remove unwrap + // TODO implement update as well. also remove unwraps self.state.lock().await.insert( uri, DocumentState { @@ -56,12 +56,12 @@ impl Backend { // LSP helper functions - fn get_diagnostics(&self, text: &str) -> Vec { + fn get_diagnostics(&self, uri: &Url, text: &str) -> Vec { file_dbg("get_diagnostics", text); let mut diagnostics = Vec::new(); - if let Some(errors) = self.language.parse_errors(text) { + if let Some(errors) = self.language.parse_errors(uri, text) { for e in &errors { let range = Range { start: lsp_utils::to_lsp_position(&e.start()), @@ -89,13 +89,13 @@ impl Backend { diagnostics } - fn get_completions(&self, text: &str, position: Position) -> Vec { + fn get_completions(&self, uri: &Url, text: &str, position: Position) -> Vec { let pre_position = Position { line: position.line, character: position.character - 1, }; - if let Some(tokens) = self.language.tokenize(text) { + if let Some(tokens) = self.language.tokenize(uri, text) { if let Some(token) = lsp_utils::get_token(tokens, pre_position) { file_dbg("get_completions_token", &token); // TODO eliminate the need for this by improving get_token() @@ -105,7 +105,7 @@ impl Backend { file_dbg("get_completions_module_name", module_name); return self .language - .functions(module_name) + .functions(uri, module_name) .iter() .map(|function_name| { let mut detail = None; @@ -113,7 +113,7 @@ impl Backend { let mut insert_text = None; if let Some(function_doc) = self .language - .function_doc(&format!("{}::{}", module_name, function_name)) + .function_doc(uri, &format!("{}::{}", module_name, function_name)) { file_dbg("get_completions_function_doc", &function_doc.description); detail = Some(function_doc.signature.to_string()); @@ -151,12 +151,17 @@ impl Backend { vec![] } - fn get_hover_content(&self, text: &str, position: Position) -> Option { + fn get_hover_content( + &self, + uri: &Url, + text: &str, + position: Position, + ) -> Option { // TODO merge the repeated tokenize operation with get_completions()? - if let Some(tokens) = self.language.tokenize(text) { + if let Some(tokens) = self.language.tokenize(uri, text) { if let Some(token) = lsp_utils::get_token(tokens, position) { file_dbg("get_hover_content_token", &token); - if let Some(function_doc) = self.language.function_doc(&token) { + if let Some(function_doc) = self.language.function_doc(uri, &token) { file_dbg("get_hover_content_function_doc", &function_doc.description); return Some(MarkupContent { kind: MarkupKind::Markdown, @@ -271,7 +276,8 @@ impl LanguageServer for Backend { // TODO cleanup if let Ok(text) = fs::read_to_string(path) { self.update(uri.clone(), &text).await; - client.publish_diagnostics(uri, self.get_diagnostics(&text), None); + let d = self.get_diagnostics(&uri, &text); + client.publish_diagnostics(uri, d, None); } } } @@ -282,7 +288,7 @@ impl LanguageServer for Backend { let uri = params.text_document.uri; let text = ¶ms.content_changes[0].text; self.update(uri.clone(), text).await; - client.publish_diagnostics(uri, self.get_diagnostics(text), None); + client.publish_diagnostics(uri.clone(), self.get_diagnostics(&uri, text), None); } async fn did_close(&self, client: &Client, params: DidCloseTextDocumentParams) { @@ -301,8 +307,10 @@ impl LanguageServer for Backend { let doc = state .get(¶ms.text_document_position.text_document.uri) .unwrap(); + let uri = params.text_document_position.text_document.uri; Ok(Some(CompletionResponse::Array(self.get_completions( + &uri, &doc.text, params.text_document_position.position, )))) @@ -313,12 +321,15 @@ impl LanguageServer for Backend { // TODO remove unwraps // TODO bake state lookup in self let state = self.state.lock().await; - let doc = state - .get(¶ms.text_document_position_params.text_document.uri) - .unwrap(); + let uri = params.text_document_position_params.text_document.uri; + let doc = state.get(&uri).unwrap(); let result = self - .get_hover_content(&doc.text, params.text_document_position_params.position) + .get_hover_content( + &uri, + &doc.text, + params.text_document_position_params.position, + ) .map(|hover_content| Hover { contents: HoverContents::Markup(hover_content), range: None, @@ -336,5 +347,5 @@ pub fn file_dbg(name: &str, content: &str) { let path = format!("/tmp/tremor_{}", name); let mut output = File::create(path).unwrap(); - write!(output, "{}", content); + write!(output, "{}", content).unwrap(); } diff --git a/src/language/prelude.rs b/src/language/prelude.rs index aea603f..f3809d5 100644 --- a/src/language/prelude.rs +++ b/src/language/prelude.rs @@ -13,6 +13,8 @@ // limitations under the License. pub use std::collections::HashMap; +pub use std::path::Path; +pub use tower_lsp::lsp_types::Url; pub use tremor_script::docs::FunctionDoc; pub use tremor_script::highlighter::Error; pub use tremor_script::registry; @@ -20,17 +22,17 @@ pub use tremor_script::registry; pub use tremor_script::lexer::{Token, TokenSpan, Tokenizer}; pub trait Language: Send + Sync { - fn parse_errors(&self, text: &str) -> Option>; + fn parse_errors(&self, uri: &Url, text: &str) -> Option>; - fn functions(&self, _module_name: &str) -> Vec { + fn functions(&self, _uri: &Url, _module_name: &str) -> Vec { vec![] } - fn function_doc(&self, _full_function_name: &str) -> Option<&FunctionDoc> { + fn function_doc(&self, _uri: &Url, _full_function_name: &str) -> Option<&FunctionDoc> { None } - fn tokenize<'input>(&self, text: &'input str) -> Option>> { + fn tokenize<'input>(&self, _uri: &Url, text: &'input str) -> Option>> { match Tokenizer::new(text).collect() { Ok(tokens) => Some(tokens), // TODO log error, or pass on as result diff --git a/src/language/query.rs b/src/language/query.rs index 8879a24..562a8c7 100644 --- a/src/language/query.rs +++ b/src/language/query.rs @@ -14,6 +14,7 @@ use crate::language::prelude::*; use crate::language::script::TremorScript; +use tremor_script::path::ModulePath; use tremor_script::query::Query; pub const LANGUAGE_NAME: &str = "tremor-query"; @@ -41,27 +42,33 @@ impl Default for TremorQuery { } impl Language for TremorQuery { - fn parse_errors(&self, text: &str) -> Option> { - match Query::parse(text, &self.registry, &self.aggr_registry) { + fn parse_errors(&self, uri: &Url, text: &str) -> Option> { + // FIXME .unwrap() should we path in something here? + let mut m = ModulePath::load(); + let file = uri.as_str().replace("file://", ""); + let p = Path::new(&file); + m.add(p.ancestors().nth(2).unwrap().to_str().unwrap().to_string()); + let cus = vec![]; + match Query::parse(&m, "", text, cus, &self.registry, &self.aggr_registry) { Ok(query) => Some(query.warnings.iter().map(|w| w.into()).collect()), Err(ref e) => Some(vec![e.into()]), } } - fn functions(&self, module_name: &str) -> Vec { + fn functions(&self, uri: &Url, module_name: &str) -> Vec { if let Some(module) = self.aggr_registry.find_module(module_name) { let mut vec: Vec = module.keys().cloned().collect(); vec.sort(); vec } else { // no agg functions found so try for script functions - self.tremor_script.functions(module_name) + self.tremor_script.functions(uri, module_name) } } - fn function_doc(&self, full_function_name: &str) -> Option<&FunctionDoc> { + fn function_doc(&self, uri: &Url, full_function_name: &str) -> Option<&FunctionDoc> { self.all_function_docs .get(full_function_name) - .or_else(|| self.tremor_script.function_doc(full_function_name)) + .or_else(|| self.tremor_script.function_doc(uri, full_function_name)) } } diff --git a/src/language/script.rs b/src/language/script.rs index dc96b10..952661b 100644 --- a/src/language/script.rs +++ b/src/language/script.rs @@ -13,6 +13,7 @@ // limitations under the License. use crate::language::prelude::*; +use tremor_script::path::ModulePath; use tremor_script::Script; pub const LANGUAGE_NAME: &str = "tremor-script"; @@ -34,14 +35,20 @@ impl Default for TremorScript { } impl Language for TremorScript { - fn parse_errors(&self, text: &str) -> Option> { - match Script::parse(text, &self.registry) { + fn parse_errors(&self, uri: &Url, text: &str) -> Option> { + // FIXME .unwrap() should we path in something here? + let mut m = ModulePath::load(); + let file = uri.as_str().replace("file://", ""); + let p = Path::new(&file); + m.add(p.ancestors().nth(2).unwrap().to_str().unwrap().to_string()); + let text = text.to_string(); + match Script::parse(&m, "", text, &self.registry) { Ok(script) => Some(script.warnings().iter().map(|w| w.into()).collect()), Err(ref e) => Some(vec![e.into()]), } } - fn functions(&self, module_name: &str) -> Vec { + fn functions(&self, _uri: &Url, module_name: &str) -> Vec { if let Some(module) = self.registry.find_module(module_name) { let mut vec: Vec = module.keys().cloned().collect(); vec.sort(); @@ -51,7 +58,7 @@ impl Language for TremorScript { } } - fn function_doc(&self, full_function_name: &str) -> Option<&FunctionDoc> { + fn function_doc(&self, _uri: &Url, full_function_name: &str) -> Option<&FunctionDoc> { self.all_function_docs.get(full_function_name) } } diff --git a/src/main.rs b/src/main.rs index 8bf8a52..d6b29e0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -37,6 +37,14 @@ async fn main() { .possible_values(language::LANGUAGE_NAMES) .default_value(language::DEFAULT_LANGUAGE_NAME), ) + .arg( + Arg::with_name("path") + .help("TREMOR_PATH to set") + .short("p") + .long("path") + .takes_value(true) + .default_value(""), + ) .get_matches(); let language_name = matches @@ -44,6 +52,22 @@ async fn main() { // this is safe because we provide a default value for this arg above .unwrap_or_else(|| unreachable!()); + let path = matches + .value_of("path") + // this is safe because we provide a default value for this arg above + .unwrap_or_else(|| unreachable!()); + + if !path.is_empty() { + std::env::set_var( + "TREMOR_PATH", + match std::env::var("TREMOR_PATH") { + // append to existing path if it's already set + Ok(p) => format!("{}:{}", p, path), + Err(_) => path.to_string(), + }, + ); + } + match language::lookup(language_name) { Some(language) => { let stdin = tokio::io::stdin(); diff --git a/tremor-runtime b/tremor-runtime new file mode 160000 index 0000000..4a228aa --- /dev/null +++ b/tremor-runtime @@ -0,0 +1 @@ +Subproject commit 4a228aad205663f9171496def8b038132aaaf6b1