From 283d0721e89624b06a16d1d1408c90be1add32b2 Mon Sep 17 00:00:00 2001 From: jneem Date: Fri, 8 Mar 2024 13:27:44 -0600 Subject: [PATCH] LSP evaluation in the background (#1814) * Initial version of background eval Doesn't support termination, which is probably not good enough * Kill misbehaving workers * Clean up the diagnostics * Improve diagnostics and factor out World from Server * Fix mistaken ranges for cross-file diagnostics * Stray dbg * Fix panic * Fix rebase issues * Remove some spurious diagnostics * Fix link * Workspace dependencies * Code review comments * Add a test * Issue multiple diagnostics, and ignore partial configurations * Move eval_permissive to VirtualMachine --- Cargo.lock | 74 +++- Cargo.toml | 3 + core/src/error/mod.rs | 2 +- core/src/eval/mod.rs | 47 +++ lsp/lsp-harness/src/lib.rs | 78 +++- lsp/lsp-harness/src/output.rs | 8 +- lsp/nls/Cargo.toml | 28 +- lsp/nls/src/actions.rs | 7 +- lsp/nls/src/background.rs | 352 +++++++++++++++++ lsp/nls/src/command.rs | 12 +- lsp/nls/src/diagnostic.rs | 137 +++++-- lsp/nls/src/field_walker.rs | 16 +- lsp/nls/src/files.rs | 125 +----- lsp/nls/src/incomplete.rs | 24 +- lsp/nls/src/main.rs | 14 + lsp/nls/src/requests/completion.rs | 31 +- lsp/nls/src/requests/formatting.rs | 4 +- lsp/nls/src/requests/goto.rs | 33 +- lsp/nls/src/requests/hover.rs | 25 +- lsp/nls/src/requests/rename.rs | 23 +- lsp/nls/src/requests/symbols.rs | 14 +- lsp/nls/src/server.rs | 364 +++++------------- lsp/nls/src/utils.rs | 43 +++ lsp/nls/src/world.rs | 354 +++++++++++++++++ lsp/nls/tests/inputs/diagnostics-basic.ncl | 6 + lsp/nls/tests/main.rs | 27 +- ..._tests__inputs__diagnostics-basic.ncl.snap | 11 + 27 files changed, 1333 insertions(+), 529 deletions(-) create mode 100644 lsp/nls/src/background.rs create mode 100644 lsp/nls/src/utils.rs create mode 100644 lsp/nls/src/world.rs create mode 100644 lsp/nls/tests/inputs/diagnostics-basic.ncl create mode 100644 lsp/nls/tests/snapshots/main__lsp__nls__tests__inputs__diagnostics-basic.ncl.snap diff --git a/Cargo.lock b/Cargo.lock index 30be4af03b..6a42c8c67b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -274,9 +274,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.15.3" +version = "3.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" +checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" [[package]] name = "bytemuck" @@ -1241,6 +1241,25 @@ dependencies = [ "yaml-rust", ] +[[package]] +name = "ipc-channel" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ab3a34c91b7e84a72643bd75d1bac3afd241f78f9859fe0b5e5b2a6a75732c2" +dependencies = [ + "bincode", + "crossbeam-channel", + "fnv", + "lazy_static", + "libc", + "mio", + "rand", + "serde", + "tempfile", + "uuid", + "windows", +] + [[package]] name = "is-terminal" version = "0.4.12" @@ -1727,15 +1746,18 @@ dependencies = [ "anyhow", "assert_cmd", "assert_matches", + "bincode", "clap 4.5.2", "codespan", "codespan-reporting", "criterion", + "crossbeam", "csv", "derive_more", "env_logger", "glob", "insta", + "ipc-channel", "lalrpop", "lalrpop-util", "lazy_static", @@ -2026,6 +2048,12 @@ dependencies = [ "thiserror", ] +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + [[package]] name = "precomputed-hash" version = "0.1.1" @@ -2241,6 +2269,36 @@ dependencies = [ "nibble_vec", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + [[package]] name = "raw-cpuid" version = "10.7.0" @@ -3168,6 +3226,9 @@ name = "uuid" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" +dependencies = [ + "getrandom", +] [[package]] name = "version_check" @@ -3335,6 +3396,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index 6fe3466ee5..81209dbf9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,12 +37,14 @@ ansi_term = "0.12" anyhow = "1.0" assert_cmd = "2.0.11" assert_matches = "1.5.0" +bincode = "1.3.3" clap = "4.3" clap_complete = "4.3.2" codespan = { version = "0.11", features = ["serialization"] } codespan-reporting = { version = "0.11", features = ["serialization"] } comrak = "0.17.0" criterion = "0.4" +crossbeam = "0.8.4" csv = "1" cxx = "1.0" cxx-build = "1.0" @@ -53,6 +55,7 @@ git-version = "0.3.5" indexmap = "1.9.3" indoc = "2" insta = "1.29.0" +ipc-channel = "0.18.0" js-sys = "0.3" lalrpop = "0.19.9" lalrpop-util = "0.19.9" diff --git a/core/src/error/mod.rs b/core/src/error/mod.rs index 6ed3bc1bd5..f3929156f2 100644 --- a/core/src/error/mod.rs +++ b/core/src/error/mod.rs @@ -204,7 +204,7 @@ impl IllegalPolymorphicTailAction { } } -const UNKNOWN_SOURCE_NAME: &str = " (generated by evaluation)"; +pub const UNKNOWN_SOURCE_NAME: &str = " (generated by evaluation)"; /// An error occurring during the static typechecking phase. #[derive(Debug, PartialEq, Clone)] diff --git a/core/src/eval/mod.rs b/core/src/eval/mod.rs index 6ebd951db6..10d167ca5d 100644 --- a/core/src/eval/mod.rs +++ b/core/src/eval/mod.rs @@ -935,6 +935,53 @@ impl VirtualMachine { }) } } + + /// Evaluate a term, but attempt to continue on errors. + /// + /// This differs from `VirtualMachine::eval_full` in 3 ways: + /// - We try to accumulate errors instead of bailing out. When recursing into record + /// fields and array elements, we keep evaluating subsequent elements even if one + /// fails. + /// - We ignore missing field errors. It would be nice not to ignore them, but it's hard + /// to tell when they're appropriate: the term might intentionally be a partial configuration. + /// - We only return the accumulated errors; we don't return the eval'ed term. + pub fn eval_permissive(&mut self, rt: RichTerm) -> Vec { + fn inner( + slf: &mut VirtualMachine, + acc: &mut Vec, + rt: RichTerm, + ) { + match slf.eval(rt) { + Err(e) => acc.push(e), + Ok(t) => match t.as_ref() { + Term::Array(ts, _) => { + for t in ts.iter() { + // After eval_closure, all the array elements are + // closurized already, so we don't need to do any tracking + // of the env. + inner(slf, acc, t.clone()); + } + } + Term::Record(data) => { + for field in data.fields.values() { + if let Some(v) = &field.value { + let value_with_ctr = RuntimeContract::apply_all( + v.clone(), + field.pending_contracts.iter().cloned(), + v.pos, + ); + inner(slf, acc, value_with_ctr); + } + } + } + _ => {} + }, + } + } + let mut ret = Vec::new(); + inner(self, &mut ret, rt); + ret + } } impl VirtualMachine { diff --git a/lsp/lsp-harness/src/lib.rs b/lsp/lsp-harness/src/lib.rs index c917b1a9a7..954d21df1d 100644 --- a/lsp/lsp-harness/src/lib.rs +++ b/lsp/lsp-harness/src/lib.rs @@ -27,6 +27,7 @@ use serde::Deserialize; pub struct TestFixture { pub files: Vec, pub reqs: Vec, + pub expected_diags: Vec, } pub struct TestFile { @@ -49,7 +50,11 @@ pub enum Request { #[derive(Deserialize, Debug, Default)] pub struct Requests { - request: Vec, + request: Option>, + // A list of files to compare diagnostic snapshots. + // TODO: once the background output has settled down a little, + // consider checking diagnostic snapshots for all tests + diagnostic: Option>, } impl TestFixture { @@ -92,15 +97,18 @@ impl TestFixture { Some(TestFixture { files, reqs: Vec::new(), + expected_diags: Vec::new(), }) } else { // The remaining lines at the end of the file are a toml source - // listing the LSP requests we need to make. + // listing the LSP requests we need to make and the diagnostics + // we expect to receive. let remaining = header_lines.join("\n"); let reqs: Requests = toml::from_str(&remaining).unwrap(); Some(TestFixture { files, - reqs: reqs.request, + reqs: reqs.request.unwrap_or_default(), + expected_diags: reqs.diagnostic.unwrap_or_default(), }) } } @@ -191,14 +199,64 @@ impl TestHarness { } // For debug purposes, drain and print notifications. - pub fn drain_notifications(&mut self) { - // FIXME: nls doesn't report progress, so we have no way to check whether - // it's finished sending notifications. We just retrieve any that we've already - // received. - // We should also have a better format for printing diagnostics and other - // notifications. + pub fn drain_diagnostics(&mut self, files: impl Iterator) { + let mut diags = self.drain_diagnostics_inner(files); + + // Sort and dedup the diagnostics, for stability of the output. + let mut files: Vec<_> = diags.keys().cloned().collect(); + files.sort(); + + for f in files { + let mut diags = diags.remove(&f).unwrap(); + diags.sort_by_cached_key(|d| (d.range.start, d.range.end, d.message.clone())); + diags.dedup_by_key(|d| (d.range.start, d.range.end, d.message.clone())); + for d in diags { + (&f, d).debug(&mut self.out).unwrap(); + self.out.push(b'\n'); + } + } + } + + fn drain_diagnostics_inner( + &mut self, + files: impl Iterator, + ) -> HashMap> { + let mut diags: HashMap> = HashMap::new(); + + // This is pretty fragile, but I don't know of a better way to handle notifications: we + // expect 2 rounds of notifications from each file (one synchronously from typechecking, + // and one from the background eval). So we just wait until we've received both, and we + // concatenate their outputs. + let mut waiting: HashMap = files.map(|f| (f, 2)).collect(); + + // Handle a single diagnostic, returning true if we have enough of them. + let mut handle_diag = |diag: PublishDiagnosticsParams| -> bool { + if let Some(remaining) = waiting.get_mut(&diag.uri) { + *remaining -= 1; + if *remaining == 0 { + waiting.remove(&diag.uri); + } + diags + .entry(diag.uri.clone()) + .or_default() + .extend(diag.diagnostics); + } + + waiting.is_empty() + }; + for msg in self.srv.pending_notifications() { - eprintln!("{msg:?}"); + if msg.method == PublishDiagnostics::METHOD { + let diag: PublishDiagnosticsParams = + serde_json::value::from_value(msg.params).unwrap(); + if handle_diag(diag) { + return diags; + } + } } + + while !handle_diag(self.wait_for_diagnostics()) {} + + diags } } diff --git a/lsp/lsp-harness/src/output.rs b/lsp/lsp-harness/src/output.rs index 3a76a05890..e1502d3dd4 100644 --- a/lsp/lsp-harness/src/output.rs +++ b/lsp/lsp-harness/src/output.rs @@ -4,7 +4,7 @@ use std::io::Write; -use lsp_types::{DocumentSymbolResponse, GotoDefinitionResponse, WorkspaceEdit}; +use lsp_types::{Diagnostic, DocumentSymbolResponse, GotoDefinitionResponse, WorkspaceEdit}; pub trait LspDebug { fn debug(&self, w: impl Write) -> std::io::Result<()>; @@ -210,3 +210,9 @@ impl LspDebug for WorkspaceEdit { changes.debug(w) } } + +impl LspDebug for Diagnostic { + fn debug(&self, mut w: impl Write) -> std::io::Result<()> { + write!(w, "{}: {}", self.range.debug_str(), self.message) + } +} diff --git a/lsp/nls/Cargo.toml b/lsp/nls/Cargo.toml index 2b44d8343f..6d6459fcc1 100644 --- a/lsp/nls/Cargo.toml +++ b/lsp/nls/Cargo.toml @@ -26,23 +26,25 @@ harness = false lalrpop.workspace = true [dependencies] -lalrpop-util.workspace = true +anyhow.workspace = true +bincode.workspace = true clap = { workspace = true, features = ["derive"] } -codespan.workspace = true codespan-reporting.workspace = true -serde = { workspace = true, features = ["derive"] } -serde_json.workspace = true -regex.workspace = true - -nickel-lang-core.workspace = true -lsp-server.workspace = true -lsp-types.workspace = true -log.workspace = true -env_logger.workspace = true -anyhow.workspace = true +codespan.workspace = true +crossbeam.workspace = true +csv.workspace = true derive_more.workspace = true +env_logger.workspace = true +ipc-channel.workspace = true +lalrpop-util.workspace = true lazy_static.workspace = true -csv.workspace = true +log.workspace = true +lsp-server.workspace = true +lsp-types.workspace = true +nickel-lang-core.workspace = true +regex.workspace = true +serde = { workspace = true, features = ["derive"] } +serde_json.workspace = true thiserror.workspace = true [dev-dependencies] diff --git a/lsp/nls/src/actions.rs b/lsp/nls/src/actions.rs index 53817a9607..985741f8f1 100644 --- a/lsp/nls/src/actions.rs +++ b/lsp/nls/src/actions.rs @@ -10,7 +10,12 @@ pub fn handle_code_action( ) -> Result<(), ResponseError> { let mut actions = Vec::new(); - if server.cache.file_id(¶ms.text_document.uri)?.is_some() { + if server + .world + .cache + .file_id(¶ms.text_document.uri)? + .is_some() + { actions.push(CodeActionOrCommand::Command(lsp_types::Command { title: "evaluate term".to_owned(), command: "eval".to_owned(), diff --git a/lsp/nls/src/background.rs b/lsp/nls/src/background.rs new file mode 100644 index 0000000000..71e6e98ab2 --- /dev/null +++ b/lsp/nls/src/background.rs @@ -0,0 +1,352 @@ +use std::{ + collections::{HashMap, HashSet}, + path::PathBuf, + process::Child, + time::{Duration, Instant}, +}; + +use crossbeam::channel::{Receiver, Select, Sender}; +use ipc_channel::ipc::{IpcOneShotServer, IpcReceiver, IpcSender}; +use log::warn; +use lsp_types::Url; +use nickel_lang_core::{ + cache::SourcePath, + eval::{cache::CacheImpl, VirtualMachine}, +}; +use serde::{Deserialize, Serialize}; + +use crate::{diagnostic::SerializableDiagnostic, files::uri_to_path, world::World}; + +const EVAL_TIMEOUT: Duration = Duration::from_secs(1); + +#[derive(Debug, Serialize, Deserialize)] +enum Command { + UpdateFile { uri: Url, text: String }, + EvalFile { uri: Url }, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Diagnostics { + pub path: PathBuf, + pub diagnostics: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +pub enum Response { + Diagnostics(Diagnostics), + /// The background worker sends back one of these when it's about to start + /// an eval job. That way, if it becomes unresponsive we know which file is + /// the culprit. + Starting { + uri: Url, + }, +} + +pub struct BackgroundJobs { + receiver: Receiver, + sender: Sender, +} + +// The entry point of the background worker. If it fails to bootstrap the connection, +// panic immediately (it's a subprocess anyway). +pub fn worker_main(main_server: String) { + let oneshot_tx = IpcSender::connect(main_server).unwrap(); + let (cmd_tx, cmd_rx) = ipc_channel::ipc::channel().unwrap(); + let (response_tx, response_rx) = ipc_channel::ipc::channel().unwrap(); + oneshot_tx.send((cmd_tx, response_rx)).unwrap(); + + worker(cmd_rx, response_tx); +} + +fn drain_ready Deserialize<'de> + Serialize>(rx: &IpcReceiver, buf: &mut Vec) { + while let Ok(x) = rx.try_recv() { + buf.push(x); + } +} + +// Returning an Option, just to let us use `?` when channels disconnect. +fn worker(cmd_rx: IpcReceiver, response_tx: IpcSender) -> Option<()> { + let mut evals = Vec::new(); + let mut cmds = Vec::new(); + let mut world = World::default(); + + loop { + for cmd in cmds.drain(..) { + // Process all the file updates first, even if it's out of order with the evals. + // (Is there any use case for wanting the eval before updating the contents?) + match cmd { + Command::UpdateFile { uri, text } => { + // Failing to update a file's contents is bad, so we terminate (and restart) + // the worker. + world.update_file(uri, text).unwrap(); + } + Command::EvalFile { uri } => { + evals.push(uri); + } + } + } + + // Deduplicate the evals back-to-front, and then evaluate front-to-back. + // This means that if we get requests for `foo.ncl` and `bar.ncl`, and + // then another request for `foo.ncl` before started working on the + // first one, then we'll throw away the first `foo.ncl` and evaluate + // `bar.ncl` first. + let mut seen = HashSet::new(); + let mut dedup = Vec::new(); + for path in evals.iter().rev() { + if seen.insert(path) { + dedup.push(path.clone()); + } + } + evals = dedup; + + if let Some(uri) = evals.pop() { + let Ok(path) = uri_to_path(&uri) else { + warn!("skipping invalid uri {uri}"); + continue; + }; + + if let Some(file_id) = world.cache.id_of(&SourcePath::Path(path.clone())) { + response_tx + .send(Response::Starting { uri: uri.clone() }) + .ok()?; + + let mut diagnostics = world.parse_and_typecheck(file_id); + + // Evaluation diagnostics (but only if there were no parse/type errors). + if diagnostics.is_empty() { + // TODO: avoid cloning the cache. + let mut vm = + VirtualMachine::<_, CacheImpl>::new(world.cache.clone(), std::io::stderr()); + // We've already checked that parsing and typechecking are successful, so we + // don't expect further errors. + let rt = vm.prepare_eval(file_id).unwrap(); + let errors = vm.eval_permissive(rt); + diagnostics.extend( + errors + .into_iter() + .flat_map(|e| world.lsp_diagnostics(file_id, e)), + ); + } + + // If there's been an update to the file, don't send back a stale response. + cmds.extend(cmd_rx.try_recv()); + if !cmds.iter().any(|cmd| match cmd { + Command::UpdateFile { uri, .. } => { + uri_to_path(uri).map_or(false, |p| p == path) + } + _ => false, + }) { + response_tx + .send(Response::Diagnostics(Diagnostics { path, diagnostics })) + .ok()?; + } + } + } + + drain_ready(&cmd_rx, &mut cmds); + if cmds.is_empty() && evals.is_empty() { + // Wait for a command to be available. + cmds.push(cmd_rx.recv().ok()?); + } + } +} + +struct SupervisorState { + cmd_rx: Receiver, + response_tx: Sender, + child: Child, + cmd_tx: IpcSender, + response_rx: Receiver, + + contents: HashMap, + + // If evaluating a file causes the worker to time out or crash, we blacklist that file + // and refuse to evaluate it anymore. This could be relaxed (e.g. maybe we're willing to + // try again after a certain amount of time?). + banned_files: HashSet, + eval_in_progress: Option<(Url, Instant)>, +} + +enum SupervisorError { + MainExited, + WorkerExited, + WorkerTimedOut(Url), +} + +impl SupervisorState { + fn spawn_worker() -> anyhow::Result<(Child, IpcSender, Receiver)> { + let path = std::env::current_exe()?; + let (oneshot_server, server_name) = + IpcOneShotServer::<(IpcSender, IpcReceiver)>::new()?; + let child = std::process::Command::new(path) + .args(["--main-server", &server_name]) + .spawn()?; + + let (_, (ipc_cmd_tx, ipc_response_rx)) = oneshot_server.accept()?; + let ipc_response_rx = ipc_channel::router::ROUTER + .route_ipc_receiver_to_new_crossbeam_receiver(ipc_response_rx); + Ok((child, ipc_cmd_tx, ipc_response_rx)) + } + + fn new(cmd_rx: Receiver, response_tx: Sender) -> anyhow::Result { + let (child, cmd_tx, response_rx) = SupervisorState::spawn_worker()?; + Ok(Self { + cmd_rx, + response_tx, + contents: HashMap::new(), + child, + cmd_tx, + response_rx, + banned_files: HashSet::new(), + eval_in_progress: None, + }) + } + + fn handle_command(&mut self, msg: Command) -> Result<(), SupervisorError> { + if let Command::UpdateFile { uri, text } = &msg { + self.contents.insert(uri.clone(), text.clone()); + } + if let Command::EvalFile { uri } = &msg { + if self.banned_files.contains(uri) { + return Ok(()); + } + } + + self.cmd_tx + .send(msg) + .map_err(|_| SupervisorError::WorkerExited) + } + + fn handle_response(&mut self, msg: Response) -> Result<(), SupervisorError> { + match msg { + Response::Diagnostics(d) => { + self.eval_in_progress = None; + self.response_tx + .send(d) + .map_err(|_| SupervisorError::MainExited) + } + Response::Starting { uri } => { + let timeout = Instant::now() + EVAL_TIMEOUT; + self.eval_in_progress = Some((uri, timeout)); + Ok(()) + } + } + } + + // The Result return type is only there to allow the ? shortcuts. In fact, + // this only ever returns errors. + fn run_one(&mut self) -> Result<(), SupervisorError> { + loop { + let mut select = Select::new(); + select.recv(&self.cmd_rx); + select.recv(&self.response_rx); + + let op = if let Some((path, timeout)) = &self.eval_in_progress { + select + .select_deadline(*timeout) + .map_err(|_| SupervisorError::WorkerTimedOut(path.clone()))? + } else { + select.select() + }; + match op.index() { + 0 => { + let cmd = op + .recv(&self.cmd_rx) + .map_err(|_| SupervisorError::MainExited)?; + self.handle_command(cmd)?; + } + 1 => { + let resp = op + .recv(&self.response_rx) + .map_err(|_| SupervisorError::WorkerExited)?; + self.handle_response(resp)?; + } + _ => unreachable!(), + } + } + } + + fn restart_worker(&mut self) -> anyhow::Result<()> { + let (child, cmd_tx, response_rx) = Self::spawn_worker()?; + self.cmd_tx = cmd_tx; + self.child = child; + self.response_rx = response_rx; + + // Tell the worker about all the files we know about. + // Currently, we don't restart any evals that were queued up before the failure. Maybe + // we should? + for (path, contents) in self.contents.iter() { + self.cmd_tx.send(Command::UpdateFile { + uri: path.clone(), + text: contents.clone(), + })?; + } + + Ok(()) + } + + fn run(&mut self) { + loop { + match self.run_one() { + // unreachable because run_one only returns on error + Ok(_) => unreachable!(), + Err(SupervisorError::MainExited) => break, + Err(SupervisorError::WorkerExited) => { + if let Some((path, _)) = self.eval_in_progress.take() { + self.banned_files.insert(path); + if let Err(e) = self.restart_worker() { + warn!("failed to restart worker: {e}"); + break; + } + } + } + Err(SupervisorError::WorkerTimedOut(path)) => { + self.banned_files.insert(path); + self.eval_in_progress = None; + let _ = self.child.kill(); + if let Err(e) = self.restart_worker() { + warn!("failed to restart worker: {e}"); + break; + } + } + } + } + } +} + +impl BackgroundJobs { + pub fn new() -> Self { + let (cmd_tx, cmd_rx) = crossbeam::channel::unbounded(); + let (diag_tx, diag_rx) = crossbeam::channel::unbounded(); + match SupervisorState::new(cmd_rx, diag_tx) { + Ok(mut sup) => { + std::thread::spawn(move || { + sup.run(); + }); + } + Err(e) => { + eprintln!("failed to spawn background jobs: {e}"); + } + } + + Self { + sender: cmd_tx, + receiver: diag_rx, + } + } + + pub fn update_file(&mut self, uri: Url, text: String) { + // Ignore errors here, because if we've failed to set up a background worker + // then we just skip doing background evaluation. + let _ = self.sender.send(Command::UpdateFile { uri, text }); + } + + pub fn eval_file(&mut self, uri: Url) { + let _ = self.sender.send(Command::EvalFile { uri }); + } + + pub fn receiver(&self) -> &Receiver { + &self.receiver + } +} diff --git a/lsp/nls/src/command.rs b/lsp/nls/src/command.rs index d1e5787653..7a0a9d19fb 100644 --- a/lsp/nls/src/command.rs +++ b/lsp/nls/src/command.rs @@ -1,9 +1,6 @@ use lsp_server::{RequestId, Response, ResponseError}; use lsp_types::{ExecuteCommandParams, TextDocumentIdentifier, Url}; -use nickel_lang_core::{ - error::IntoDiagnostics, - eval::{cache::CacheImpl, VirtualMachine}, -}; +use nickel_lang_core::eval::{cache::CacheImpl, VirtualMachine}; use crate::{cache::CacheExt, error::Error, server::Server}; @@ -26,12 +23,13 @@ pub fn handle_command( } fn eval(server: &mut Server, uri: &Url) -> Result<(), Error> { - if let Some(file_id) = server.cache.file_id(uri)? { + if let Some(file_id) = server.world.cache.file_id(uri)? { // TODO: avoid cloning the cache. Maybe we can have a VM with a &mut Cache? - let mut vm = VirtualMachine::<_, CacheImpl>::new(server.cache.clone(), std::io::stderr()); + let mut vm = + VirtualMachine::<_, CacheImpl>::new(server.world.cache.clone(), std::io::stderr()); let rt = vm.prepare_eval(file_id)?; if let Err(e) = vm.eval_full(rt) { - let diags = e.into_diagnostics(server.cache.files_mut(), None); + let diags = server.world.lsp_diagnostics(file_id, e); server.issue_diagnostics(file_id, diags); } } diff --git a/lsp/nls/src/diagnostic.rs b/lsp/nls/src/diagnostic.rs index 6895bfb6f8..b0e1bb925e 100644 --- a/lsp/nls/src/diagnostic.rs +++ b/lsp/nls/src/diagnostic.rs @@ -2,16 +2,54 @@ use std::ops::Range; use codespan::{FileId, Files}; use codespan_reporting::diagnostic::{self, Diagnostic}; -use lsp_types::NumberOrString; -use nickel_lang_core::position::RawSpan; +use lsp_types::{DiagnosticRelatedInformation, NumberOrString}; +use nickel_lang_core::{error::UNKNOWN_SOURCE_NAME, position::RawSpan}; +use serde::{Deserialize, Serialize}; use crate::codespan_lsp::byte_span_to_range; +/// A more serializable alternative to lsp_types::Diagnostic +/// +/// lsp_types::Diagnostic is not serializable to bincode (and therefore not +/// sendable across an ipc-channel channel) because it has optional fields that +/// get skipped serializing if empty. See +#[derive(Debug, Serialize, Deserialize)] +pub struct SerializableDiagnostic { + pub range: lsp_types::Range, + pub severity: Option, + pub code: Option, + pub message: String, + pub related_information: Option>, +} + +impl From for lsp_types::Diagnostic { + fn from(d: SerializableDiagnostic) -> Self { + Self { + range: d.range, + severity: d.severity, + code: d.code, + message: d.message, + ..Default::default() + } + } +} + /// Convert [codespan_reporting::diagnostic::Diagnostic] into a list of another type /// Diagnostics tend to contain a list of labels pointing to errors in the code which /// we want to extract, hence a list of `Self` pub trait DiagnosticCompat: Sized { - fn from_codespan(diagnostic: Diagnostic, files: &mut Files) -> Vec; + // We convert a single diagnostic into a list of diagnostics: one "main" diagnostic and an + // additional hint for each label. LSP also has a `related_information` field that we could + // use for the labels, but we prefer multiple diagnostics because (1) that's what rust-analyzer + // does and (2) it gives a better experience in helix, at least. + // + // We do use the `related_information` field for cross-file diagnostics, because the main + // diagnostics notification assumes all the diagnostics are for the same file. + fn from_codespan( + file_id: FileId, + diagnostic: Diagnostic, + files: &mut Files, + ) -> Vec; } /// Determine the position of a [codespan_reporting::diagnostic::Label] by looking it up @@ -35,14 +73,6 @@ impl LocationCompat for lsp_types::Range { end: Default::default(), }) } - - fn from_span(span: &RawSpan, files: &Files) -> Self { - Self::from_codespan( - &span.src_id, - &(span.start.to_usize()..span.end.to_usize()), - files, - ) - } } impl LocationCompat for lsp_types::Location { @@ -54,8 +84,12 @@ impl LocationCompat for lsp_types::Location { } } -impl DiagnosticCompat for lsp_types::Diagnostic { - fn from_codespan(diagnostic: Diagnostic, files: &mut Files) -> Vec { +impl DiagnosticCompat for SerializableDiagnostic { + fn from_codespan( + file_id: FileId, + diagnostic: Diagnostic, + files: &mut Files, + ) -> Vec { let severity = Some(match diagnostic.severity { diagnostic::Severity::Bug => lsp_types::DiagnosticSeverity::WARNING, diagnostic::Severity::Error => lsp_types::DiagnosticSeverity::ERROR, @@ -64,23 +98,76 @@ impl DiagnosticCompat for lsp_types::Diagnostic { diagnostic::Severity::Help => lsp_types::DiagnosticSeverity::HINT, }); - diagnostic + let code = diagnostic.code.clone().map(NumberOrString::String); + + let mut diagnostics = Vec::new(); + + let within_file_labels = diagnostic .labels .iter() - .map(|label| { - let range = lsp_types::Range::from_codespan(&label.file_id, &label.range, files); + .filter(|label| label.file_id == file_id); - let code = diagnostic.code.clone().map(NumberOrString::String); - let message = format!("{}\n{}", diagnostic.message, diagnostic.notes.join("\n")); + let cross_file_labels = diagnostic.labels.iter().filter(|label| { + label.file_id != file_id + // When errors point to generated code, the diagnostic-formatting machinery + // replaces it with a generated file. This is appropriate for command line errors, + // but not for us. It would be nice if we could filter this out at an earlier stage. + && files.name(label.file_id) != UNKNOWN_SOURCE_NAME + }); - lsp_types::Diagnostic { + if !diagnostic.message.is_empty() { + // What location should we use for the "overall" diagnostic? `Diagnostic` doesn't + // have an "overall" location, so we arbitrarily take the location of the first label. + if let Some(range) = within_file_labels + .clone() + .next() + .map(|label| lsp_types::Range::from_codespan(&label.file_id, &label.range, files)) + { + diagnostics.push(SerializableDiagnostic { range, severity, - code, - message, - ..Default::default() - } - }) - .collect::>() + code: code.clone(), + message: diagnostic.message, + related_information: Some( + cross_file_labels + .map(|label| DiagnosticRelatedInformation { + location: lsp_types::Location::from_codespan( + &label.file_id, + &label.range, + files, + ), + message: label.message.clone(), + }) + .collect(), + ), + }); + } + } + + diagnostics.extend(within_file_labels.map(|label| { + let range = lsp_types::Range::from_codespan(&label.file_id, &label.range, files); + + SerializableDiagnostic { + range, + message: label.message.clone(), + severity: Some(lsp_types::DiagnosticSeverity::HINT), + code: code.clone(), + related_information: None, + } + })); + diagnostics + } +} + +impl DiagnosticCompat for lsp_types::Diagnostic { + fn from_codespan( + file_id: FileId, + diagnostic: Diagnostic, + files: &mut Files, + ) -> Vec { + SerializableDiagnostic::from_codespan(file_id, diagnostic, files) + .into_iter() + .map(From::from) + .collect() } } diff --git a/lsp/nls/src/field_walker.rs b/lsp/nls/src/field_walker.rs index 505f4963cd..7bdaa8a962 100644 --- a/lsp/nls/src/field_walker.rs +++ b/lsp/nls/src/field_walker.rs @@ -11,7 +11,7 @@ use nickel_lang_core::{ typ::{RecordRows, RecordRowsIteratorItem, Type, TypeF}, }; -use crate::{identifier::LocIdent, requests::completion::CompletionItem, server::Server}; +use crate::{identifier::LocIdent, requests::completion::CompletionItem, world::World}; /// Either a record term or a record type. #[derive(Clone, Debug, PartialEq)] @@ -263,7 +263,7 @@ fn filter_records(containers: Vec) -> Vec { /// `({foo = {...}} & {foo = {...}})` will return both values of `foo`. #[derive(Clone)] pub struct FieldResolver<'a> { - server: &'a Server, + world: &'a World, // Most of our analysis moves "down" the AST and so can't get stuck in a loop. // Variable resolution is an exception, however, and so we protect against @@ -273,9 +273,9 @@ pub struct FieldResolver<'a> { } impl<'a> FieldResolver<'a> { - pub fn new(server: &'a Server) -> Self { + pub fn new(world: &'a World) -> Self { Self { - server, + world, blackholed_ids: Default::default(), } } @@ -369,7 +369,7 @@ impl<'a> FieldResolver<'a> { fn cousin_containers(&self, rt: &RichTerm) -> Vec { let mut ret = Vec::new(); - if let Some(mut ancestors) = self.server.analysis.get_parent_chain(rt) { + if let Some(mut ancestors) = self.world.analysis.get_parent_chain(rt) { while let Some(ancestor) = ancestors.next_merge() { let path = ancestors.path().unwrap_or_default(); ret.extend(self.containers_at_path(&ancestor, path.iter().rev().copied())); @@ -418,7 +418,7 @@ impl<'a> FieldResolver<'a> { let id = LocIdent::from(*id); if self.blackholed_ids.borrow_mut().insert(id) { let ret = self - .server + .world .analysis .get_def(&id) .map(|def| { @@ -438,7 +438,7 @@ impl<'a> FieldResolver<'a> { } } Term::ResolvedImport(file_id) => self - .server + .world .cache .get_ref(*file_id) .map(|term| self.resolve_container(term)) @@ -458,7 +458,7 @@ impl<'a> FieldResolver<'a> { _ => Default::default(), }; - let typ_fields = if let Some(typ) = self.server.analysis.get_type(rt) { + let typ_fields = if let Some(typ) = self.world.analysis.get_type(rt) { log::info!("got inferred type {typ:?}"); self.resolve_type(typ) } else { diff --git a/lsp/nls/src/files.rs b/lsp/nls/src/files.rs index 9305cc5b68..4103fbe94b 100644 --- a/lsp/nls/src/files.rs +++ b/lsp/nls/src/files.rs @@ -1,25 +1,17 @@ use std::path::PathBuf; use anyhow::Result; -use codespan::FileId; -use codespan_reporting::diagnostic::Diagnostic; -use log::trace; use lsp_server::RequestId; use lsp_types::{ notification::{DidOpenTextDocument, Notification}, DidChangeTextDocumentParams, DidOpenTextDocumentParams, Url, }; -use nickel_lang_core::{ - cache::{CacheError, CacheOp, SourcePath}, - error::{ImportError, IntoDiagnostics}, -}; use crate::{ error::Error, trace::{param::FileUpdate, Enrich, Trace}, }; -use super::cache::CacheExt; use super::server::Server; pub(crate) fn uri_to_path(uri: &Url) -> std::result::Result { @@ -45,40 +37,16 @@ pub fn handle_open(server: &mut Server, params: DidOpenTextDocumentParams) -> Re content: ¶ms.text_document.text, }, ); - let path = uri_to_path(¶ms.text_document.uri)?; - - // Invalidate the cache of every file that tried, but failed, to import a file - // with a name like this. - let mut invalid = path - .file_name() - .and_then(|name| server.failed_imports.remove(name)) - .unwrap_or_default(); - - // Replace the path (as opposed to adding it): we may already have this file in the - // cache if it was imported by an already-open file. - let file_id = server - .cache - .replace_string(SourcePath::Path(path), params.text_document.text); - - // Invalidate any cached inputs that imported the newly-opened file, so that any - // cross-file references are updated. - invalid.extend(server.cache.get_rev_imports_transitive(file_id)); - - for rev_dep in &invalid { - server.analysis.remove(*rev_dep); - // Reset the cached state (Parsed is the earliest one) so that it will - // re-resolve its imports. - server - .cache - .update_state(*rev_dep, nickel_lang_core::cache::EntryState::Parsed); - } + let (file_id, invalid) = server + .world + .add_file(params.text_document.uri.clone(), params.text_document.text)?; - server.file_uris.insert(file_id, params.text_document.uri); - - parse_and_typecheck(server, file_id)?; + let diags = server.world.parse_and_typecheck(file_id); + server.issue_diagnostics(file_id, diags); for rev_dep in &invalid { - parse_and_typecheck(server, *rev_dep)?; + let diags = server.world.parse_and_typecheck(*rev_dep); + server.issue_diagnostics(file_id, diags); } Trace::reply(id); Ok(()) @@ -99,83 +67,18 @@ pub fn handle_save(server: &mut Server, params: DidChangeTextDocumentParams) -> }, ); - let path = uri_to_path(¶ms.text_document.uri)?; - let file_id = server.cache.replace_string( - SourcePath::Path(path), - params.content_changes[0].text.to_owned(), - ); - - // Any transitive dependency of the modified file needs to be re-type-checked (but not - // re-parsed). - let invalid = server.cache.get_rev_imports_transitive(file_id); - for f in &invalid { - server.analysis.remove(*f); - } + let (file_id, invalid) = server.world.update_file( + params.text_document.uri.clone(), + params.content_changes[0].text.clone(), + )?; - // TODO: make this part more abstracted - // implement typecheck (at least) as part of a persistent AST representation - // for now execute the same as above for handling `open` notifications - parse_and_typecheck(server, file_id)?; + let diags = server.world.parse_and_typecheck(file_id); + server.issue_diagnostics(file_id, diags); for f in &invalid { - let errors = typecheck(server, *f).err().unwrap_or_default(); + let errors = server.world.typecheck(*f).err().unwrap_or_default(); server.issue_diagnostics(*f, errors); } Trace::reply(id); Ok(()) } - -// Make a record of I/O errors in imports so that we can retry them when appropriate. -fn associate_failed_import(server: &mut Server, err: &nickel_lang_core::error::Error) { - if let nickel_lang_core::error::Error::ImportError(ImportError::IOError(name, _, pos)) = &err { - if let Some((filename, pos)) = PathBuf::from(name).file_name().zip(pos.into_opt()) { - server - .failed_imports - .entry(filename.to_owned()) - .or_default() - .insert(pos.src_id); - } - } -} - -pub(crate) fn typecheck( - server: &mut Server, - file_id: FileId, -) -> Result, Vec>> { - server - .cache - .typecheck_with_analysis( - file_id, - &server.initial_ctxt, - &server.initial_term_env, - &mut server.analysis, - ) - .map_err(|error| match error { - CacheError::Error(tc_error) => tc_error - .into_iter() - .flat_map(|err| { - associate_failed_import(server, &err); - err.into_diagnostics(server.cache.files_mut(), None) - }) - .collect(), - CacheError::NotParsed => unreachable!(), - }) -} - -fn parse_and_typecheck(server: &mut Server, file_id: FileId) -> Result<()> { - let (parse_errs, fatal) = match server.cache.parse(file_id) { - Ok(errs) => (errs.inner(), false), - Err(errs) => (errs, true), - }; - let mut diags = parse_errs.into_diagnostics(server.cache.files_mut(), None); - - if !fatal { - trace!("Parsed, checking types"); - if let Err(e) = typecheck(server, file_id) { - diags.extend_from_slice(&e); - } - } - server.issue_diagnostics(file_id, diags); - - Ok(()) -} diff --git a/lsp/nls/src/incomplete.rs b/lsp/nls/src/incomplete.rs index 44f912505c..0990eab565 100644 --- a/lsp/nls/src/incomplete.rs +++ b/lsp/nls/src/incomplete.rs @@ -15,7 +15,7 @@ use nickel_lang_core::{ transform::import_resolution, }; -use crate::{files::typecheck, server::Server, usage::Environment}; +use crate::{usage::Environment, world::World}; // Take a bunch of tokens and the end of a possibly-delimited sequence, and return the // index of the beginning of the possibly-delimited sequence. The sequence might not @@ -94,18 +94,18 @@ fn path_start(toks: &[SpannedToken]) -> Option { } } -fn resolve_imports(rt: RichTerm, server: &mut Server) -> RichTerm { +fn resolve_imports(rt: RichTerm, world: &mut World) -> RichTerm { let import_resolution::tolerant::ResolveResult { transformed_term, resolved_ids, .. - } = import_resolution::tolerant::resolve_imports(rt, &mut server.cache); + } = import_resolution::tolerant::resolve_imports(rt, &mut world.cache); for id in resolved_ids { - if server.cache.parse(id).is_ok() { + if world.cache.parse(id).is_ok() { // If a new input got imported in an incomplete term, try to typecheck // (and build lookup tables etc.) for it, but don't issue diagnostics. - let _ = typecheck(server, id); + let _ = world.typecheck(id); } } @@ -121,9 +121,9 @@ fn resolve_imports(rt: RichTerm, server: &mut Server) -> RichTerm { pub fn parse_path_from_incomplete_input( range: RawSpan, env: &Environment, - server: &mut Server, + world: &mut World, ) -> Option { - let text = server.cache.files().source(range.src_id); + let text = world.cache.files().source(range.src_id); let subtext = &text[range.start.to_usize()..range.end.to_usize()]; let lexer = lexer::Lexer::new(subtext); @@ -150,15 +150,15 @@ pub fn parse_path_from_incomplete_input( // In order to help the input resolver find relative imports, we add a fake input whose parent // is the same as the real file. - let path = PathBuf::from(server.cache.files().name(range.src_id)); - let file_id = server + let path = PathBuf::from(world.cache.files().name(range.src_id)); + let file_id = world .cache .replace_string(SourcePath::Snippet(path), to_parse); - match server.cache.parse_nocache(file_id) { + match world.cache.parse_nocache(file_id) { Ok((rt, _errors)) if !matches!(rt.as_ref(), Term::ParseError(_)) => { - server.analysis.insert_usage(file_id, &rt, env); - Some(resolve_imports(rt, server)) + world.analysis.insert_usage(file_id, &rt, env); + Some(resolve_imports(rt, world)) } _ => None, } diff --git a/lsp/nls/src/main.rs b/lsp/nls/src/main.rs index 54879ef5f7..a78b90b47c 100644 --- a/lsp/nls/src/main.rs +++ b/lsp/nls/src/main.rs @@ -7,6 +7,7 @@ use lsp_server::Connection; mod actions; mod analysis; +mod background; mod cache; mod codespan_lsp; mod command; @@ -24,6 +25,8 @@ mod pattern; mod term; mod trace; mod usage; +mod utils; +mod world; use crate::trace::Trace; @@ -33,6 +36,12 @@ struct Opt { /// The trace output file, disables tracing if not given #[arg(short, long)] trace: Option, + + /// The main server's id, in the platform-specific format used by the `ipc-channel` crate. + /// + /// If provided, this process will connect to the provided main server and run as a background worker. + #[arg(long)] + main_server: Option, } fn main() -> Result<()> { @@ -42,6 +51,11 @@ fn main() -> Result<()> { let options = Opt::parse(); + if let Some(main_server) = options.main_server { + background::worker_main(main_server); + return Ok(()); + } + if let Some(file) = options.trace { debug!("Writing trace to {:?}", file.canonicalize()?); Trace::set_writer(csv::Writer::from_writer(io::BufWriter::new( diff --git a/lsp/nls/src/requests/completion.rs b/lsp/nls/src/requests/completion.rs index fea5aeb5d2..ef3b1c15d9 100644 --- a/lsp/nls/src/requests/completion.rs +++ b/lsp/nls/src/requests/completion.rs @@ -20,6 +20,7 @@ use crate::{ incomplete, server::Server, usage::Environment, + world::World, }; fn remove_duplicates_and_myself( @@ -60,11 +61,11 @@ fn extract_static_path(mut rt: RichTerm) -> (RichTerm, Vec) { fn sanitize_record_path_for_completion( term: &RichTerm, cursor: RawPos, - server: &mut Server, + world: &mut World, ) -> Option { if let (Term::ParseError(_), Some(range)) = (term.term.as_ref(), term.pos.as_opt_ref()) { let mut range = *range; - let env = server + let env = world .analysis .get_env(term) .cloned() @@ -74,7 +75,7 @@ fn sanitize_record_path_for_completion( } range.end = cursor.index; - incomplete::parse_path_from_incomplete_input(range, &env, server) + incomplete::parse_path_from_incomplete_input(range, &env, world) } else if let Term::Op1(UnaryOp::StaticAccess(_), parent) = term.term.as_ref() { // For completing record paths, we discard the last path element: if we're // completing `foo.bar.bla`, we only look at `foo.bar` to find the completions. @@ -105,18 +106,18 @@ impl From for lsp_types::CompletionItem { } } -fn record_path_completion(term: RichTerm, server: &Server) -> Vec { +fn record_path_completion(term: RichTerm, world: &World) -> Vec { log::info!("term based completion path: {term:?}"); let (start_term, path) = extract_static_path(term); - let defs = FieldResolver::new(server).resolve_path(&start_term, path.iter().copied()); + let defs = FieldResolver::new(world).resolve_path(&start_term, path.iter().copied()); defs.iter().flat_map(Record::completion_items).collect() } -fn env_completion(rt: &RichTerm, server: &Server) -> Vec { - let env = server.analysis.get_env(rt).cloned().unwrap_or_default(); - let resolver = FieldResolver::new(server); +fn env_completion(rt: &RichTerm, world: &World) -> Vec { + let env = world.analysis.get_env(rt).cloned().unwrap_or_default(); + let resolver = FieldResolver::new(world); let mut items: Vec<_> = env .iter_elems() .map(|(_, def_with_path)| def_with_path.completion_item()) @@ -160,7 +161,10 @@ pub fn handle_completion( // 3), which does not contain the cursor. For most purposes we're interested // in querying information about foo, so to do that we use the position just // *before* the cursor. - let cursor = server.cache.position(¶ms.text_document_position)?; + let cursor = server + .world + .cache + .position(¶ms.text_document_position)?; let pos = RawPos { index: (cursor.index.0.saturating_sub(1)).into(), ..cursor @@ -170,7 +174,7 @@ pub fn handle_completion( .as_ref() .and_then(|context| context.trigger_character.as_deref()); - let term = server.lookup_term_by_position(pos)?.cloned(); + let term = server.world.lookup_term_by_position(pos)?.cloned(); if let Some(Term::Import(import)) = term.as_ref().map(|t| t.term.as_ref()) { // Don't respond with anything if trigger is a `.`, as that may be the @@ -184,12 +188,12 @@ pub fn handle_completion( let sanitized_term = term .as_ref() - .and_then(|rt| sanitize_record_path_for_completion(rt, cursor, server)); + .and_then(|rt| sanitize_record_path_for_completion(rt, cursor, &mut server.world)); #[allow(unused_mut)] // needs to be mut with feature = old-completer let mut completions = match (sanitized_term, term) { - (Some(sanitized), _) => record_path_completion(sanitized, server), - (_, Some(term)) => env_completion(&term, server), + (Some(sanitized), _) => record_path_completion(sanitized, &server.world), + (_, Some(term)) => env_completion(&term, &server.world), (None, None) => Vec::new(), }; @@ -235,6 +239,7 @@ fn handle_import_completion( }); let cached_entries = server + .world .file_uris .values() .filter_map(|uri| uri.to_file_path().ok()) diff --git a/lsp/nls/src/requests/formatting.rs b/lsp/nls/src/requests/formatting.rs index e4d555d120..b5f0058fca 100644 --- a/lsp/nls/src/requests/formatting.rs +++ b/lsp/nls/src/requests/formatting.rs @@ -13,8 +13,8 @@ pub fn handle_format_document( server: &mut Server, ) -> Result<(), ResponseError> { let path = uri_to_path(¶ms.text_document.uri)?; - let file_id = server.cache.id_of(&SourcePath::Path(path)).unwrap(); - let text = server.cache.files().source(file_id).clone(); + let file_id = server.world.cache.id_of(&SourcePath::Path(path)).unwrap(); + let text = server.world.cache.files().source(file_id).clone(); let document_length = text.lines().count() as u32; let mut formatted: Vec = Vec::new(); diff --git a/lsp/nls/src/requests/goto.rs b/lsp/nls/src/requests/goto.rs index 3726e44a0b..d69aacf449 100644 --- a/lsp/nls/src/requests/goto.rs +++ b/lsp/nls/src/requests/goto.rs @@ -5,18 +5,18 @@ use lsp_types::{GotoDefinitionParams, GotoDefinitionResponse, Location, Referenc use nickel_lang_core::position::RawSpan; use serde_json::Value; -use crate::{cache::CacheExt, diagnostic::LocationCompat, server::Server}; +use crate::{cache::CacheExt, diagnostic::LocationCompat, server::Server, world::World}; -fn ids_to_locations(ids: impl IntoIterator, server: &Server) -> Vec { +fn ids_to_locations(ids: impl IntoIterator, world: &World) -> Vec { let mut spans: Vec<_> = ids.into_iter().collect(); // The sort order of our response is a little arbitrary. But we want to deduplicate, and we // don't want the response to be random. - spans.sort_by_key(|span| (server.cache.files().name(span.src_id), span.start, span.end)); + spans.sort_by_key(|span| (world.cache.files().name(span.src_id), span.start, span.end)); spans.dedup(); spans .iter() - .map(|loc| Location::from_span(loc, server.cache.files())) + .map(|loc| Location::from_span(loc, world.cache.files())) .collect() } @@ -26,15 +26,17 @@ pub fn handle_to_definition( server: &mut Server, ) -> Result<(), ResponseError> { let pos = server + .world .cache .position(¶ms.text_document_position_params)?; - let ident = server.lookup_ident_by_position(pos)?; + let ident = server.world.lookup_ident_by_position(pos)?; let locations = server + .world .lookup_term_by_position(pos)? - .map(|term| server.get_defs(term, ident)) - .map(|defs| ids_to_locations(defs, server)) + .map(|term| server.world.get_defs(term, ident)) + .map(|defs| ids_to_locations(defs, &server.world)) .unwrap_or_default(); let response = if locations.is_empty() { @@ -54,14 +56,17 @@ pub fn handle_references( id: RequestId, server: &mut Server, ) -> Result<(), ResponseError> { - let pos = server.cache.position(¶ms.text_document_position)?; - let ident = server.lookup_ident_by_position(pos)?; + let pos = server + .world + .cache + .position(¶ms.text_document_position)?; + let ident = server.world.lookup_ident_by_position(pos)?; // The "references" of a symbol are all the usages of its definitions, // so first find the definitions and then find their usages. - let term = server.lookup_term_by_position(pos)?; + let term = server.world.lookup_term_by_position(pos)?; let mut def_locs = term - .map(|term| server.get_defs(term, ident)) + .map(|term| server.world.get_defs(term, ident)) .unwrap_or_default(); // Maybe the position is pointing straight at the definition already. @@ -70,7 +75,7 @@ pub fn handle_references( let mut usages: HashSet<_> = def_locs .iter() - .flat_map(|id| server.analysis.get_usages(id)) + .flat_map(|id| server.world.analysis.get_usages(id)) .filter_map(|id| id.pos.into_opt()) .collect(); @@ -79,10 +84,10 @@ pub fn handle_references( } for span in def_locs { - usages.extend(server.get_field_refs(span)); + usages.extend(server.world.get_field_refs(span)); } - let locations = ids_to_locations(usages, server); + let locations = ids_to_locations(usages, &server.world); if locations.is_empty() { server.reply(Response::new_ok(id, Value::Null)); diff --git a/lsp/nls/src/requests/hover.rs b/lsp/nls/src/requests/hover.rs index 8a5da77a01..92cb54c7e5 100644 --- a/lsp/nls/src/requests/hover.rs +++ b/lsp/nls/src/requests/hover.rs @@ -15,6 +15,7 @@ use crate::{ field_walker::{FieldResolver, Record}, identifier::LocIdent, server::Server, + world::World, }; #[derive(Debug, Default)] @@ -64,8 +65,8 @@ fn values_and_metadata_from_field( (values, metadata) } -fn ident_hover(ident: LocIdent, server: &Server) -> Option { - let ty = server.analysis.get_type_for_ident(&ident).cloned(); +fn ident_hover(ident: LocIdent, world: &World) -> Option { + let ty = world.analysis.get_type_for_ident(&ident).cloned(); let span = ident.pos.into_opt()?; let mut ret = HoverData { values: Vec::new(), @@ -74,8 +75,8 @@ fn ident_hover(ident: LocIdent, server: &Server) -> Option { ty, }; - if let Some(def) = server.analysis.get_def(&ident) { - let resolver = FieldResolver::new(server); + if let Some(def) = world.analysis.get_def(&ident) { + let resolver = FieldResolver::new(world); if let Some(((last, path), val)) = def.path().split_last().zip(def.value()) { let parents = resolver.resolve_path(val, path.iter().copied()); let (values, metadata) = values_and_metadata_from_field(parents, *last); @@ -99,13 +100,13 @@ fn ident_hover(ident: LocIdent, server: &Server) -> Option { Some(ret) } -fn term_hover(rt: &RichTerm, server: &Server) -> Option { - let ty = server.analysis.get_type(rt).cloned(); +fn term_hover(rt: &RichTerm, world: &World) -> Option { + let ty = world.analysis.get_type(rt).cloned(); let span = rt.pos.into_opt(); match rt.as_ref() { Term::Op1(UnaryOp::StaticAccess(id), parent) => { - let resolver = FieldResolver::new(server); + let resolver = FieldResolver::new(world); let parents = resolver.resolve_record(parent); let (values, metadata) = values_and_metadata_from_field(parents, id.ident()); Some(HoverData { @@ -130,15 +131,17 @@ pub fn handle( server: &mut Server, ) -> Result<(), ResponseError> { let pos = server + .world .cache .position(¶ms.text_document_position_params)?; let ident_hover_data = server + .world .lookup_ident_by_position(pos)? - .and_then(|ident| ident_hover(ident, server)); + .and_then(|ident| ident_hover(ident, &server.world)); - let term = server.lookup_term_by_position(pos)?; - let term_hover_data = term.and_then(|rt| term_hover(rt, server)); + let term = server.world.lookup_term_by_position(pos)?; + let term_hover_data = term.and_then(|rt| term_hover(rt, &server.world)); // We combine the hover information from the term (which can have better type information) // and the ident (which can have better metadata), but only when hovering over a `Var`. @@ -186,7 +189,7 @@ pub fn handle( contents: HoverContents::Array(contents), range: hover .span - .map(|s| Range::from_span(&s, server.cache.files())), + .map(|s| Range::from_span(&s, server.world.cache.files())), }, )); } else { diff --git a/lsp/nls/src/requests/rename.rs b/lsp/nls/src/requests/rename.rs index 7b1af0f8c2..8f39f3d297 100644 --- a/lsp/nls/src/requests/rename.rs +++ b/lsp/nls/src/requests/rename.rs @@ -12,22 +12,29 @@ pub fn handle_rename( id: RequestId, server: &mut Server, ) -> Result<(), ResponseError> { - let pos = server.cache.position(¶ms.text_document_position)?; + let pos = server + .world + .cache + .position(¶ms.text_document_position)?; - let ident = server.lookup_ident_by_position(pos)?; - let term = server.lookup_term_by_position(pos)?; + let ident = server.world.lookup_ident_by_position(pos)?; + let term = server.world.lookup_term_by_position(pos)?; let mut def_locs = term - .map(|term| server.get_defs(term, ident)) + .map(|term| server.world.get_defs(term, ident)) .unwrap_or_default(); def_locs.extend(ident.and_then(|id| id.pos.into_opt())); let mut all_positions: Vec<_> = def_locs .iter() - .flat_map(|id| server.analysis.get_usages(id)) + .flat_map(|id| server.world.analysis.get_usages(id)) .filter_map(|id| id.pos.into_opt()) .chain(def_locs.iter().copied()) - .chain(def_locs.iter().flat_map(|def| server.get_field_refs(*def))) + .chain( + def_locs + .iter() + .flat_map(|def| server.world.get_field_refs(*def)), + ) .collect(); // Sort in some arbitrary order, for determinism and deduplication. @@ -37,9 +44,9 @@ pub fn handle_rename( // Group edits by file let mut changes = HashMap::>::new(); for pos in all_positions { - let url = Url::from_file_path(server.cache.files().name(pos.src_id)).unwrap(); + let url = Url::from_file_path(server.world.cache.files().name(pos.src_id)).unwrap(); changes.entry(url).or_default().push(TextEdit { - range: Range::from_span(&pos, server.cache.files()), + range: Range::from_span(&pos, server.world.cache.files()), new_text: params.new_name.clone(), }); } diff --git a/lsp/nls/src/requests/symbols.rs b/lsp/nls/src/requests/symbols.rs index 5c4d22be7d..d411e6e274 100644 --- a/lsp/nls/src/requests/symbols.rs +++ b/lsp/nls/src/requests/symbols.rs @@ -1,31 +1,31 @@ use lsp_server::{RequestId, Response, ResponseError}; use lsp_types::{DocumentSymbol, DocumentSymbolParams, SymbolKind}; -use nickel_lang_core::cache::SourcePath; use nickel_lang_core::typ::Type; +use crate::cache::CacheExt as _; use crate::server::Server; -use crate::{files::uri_to_path, term::RawSpanExt}; +use crate::term::RawSpanExt; pub fn handle_document_symbols( params: DocumentSymbolParams, id: RequestId, server: &mut Server, ) -> Result<(), ResponseError> { - let path = uri_to_path(¶ms.text_document.uri)?; let file_id = server + .world .cache - .id_of(&SourcePath::Path(path)) + .file_id(¶ms.text_document.uri)? .ok_or_else(|| crate::error::Error::FileNotFound(params.text_document.uri.clone()))?; - let usage_lookups = &server.file_analysis(file_id)?.usage_lookup; - let type_lookups = &server.file_analysis(file_id)?.type_lookup; + let usage_lookups = &server.world.file_analysis(file_id)?.usage_lookup; + let type_lookups = &server.world.file_analysis(file_id)?.type_lookup; let mut symbols = usage_lookups .symbols() .filter_map(|ident| { let (file_id, span) = ident.pos.into_opt()?.to_range(); let range = - crate::codespan_lsp::byte_span_to_range(server.cache.files(), file_id, span) + crate::codespan_lsp::byte_span_to_range(server.world.cache.files(), file_id, span) .ok()?; let ty = type_lookups.idents.get(&ident); diff --git a/lsp/nls/src/server.rs b/lsp/nls/src/server.rs index cb30ce8ec7..87ad2a4eea 100644 --- a/lsp/nls/src/server.rs +++ b/lsp/nls/src/server.rs @@ -1,15 +1,8 @@ -use std::{ - collections::{HashMap, HashSet}, - ffi::OsString, -}; - use anyhow::Result; use codespan::FileId; -use codespan_reporting::diagnostic::Diagnostic; +use crossbeam::select; use log::{debug, trace, warn}; -use lsp_server::{ - Connection, ErrorCode, Message, Notification, RequestId, Response, ResponseError, -}; +use lsp_server::{Connection, ErrorCode, Message, Notification, RequestId, Response}; use lsp_types::{ notification::Notification as _, notification::{DidChangeTextDocument, DidOpenTextDocument}, @@ -22,45 +15,27 @@ use lsp_types::{ WorkDoneProgressOptions, }; -use nickel_lang_core::{ - cache::{Cache, ErrorTolerance}, - position::{RawPos, RawSpan, TermPos}, - stdlib::StdlibModule, - term::{record::FieldMetadata, RichTerm, Term, UnaryOp}, -}; -use nickel_lang_core::{stdlib, typecheck::Context}; - use crate::{ actions, - analysis::{Analysis, AnalysisRegistry}, - cache::CacheExt, + background::BackgroundJobs, command, - diagnostic::DiagnosticCompat, - field_walker::{Def, FieldResolver}, - identifier::LocIdent, - pattern::Bindings, requests::{completion, formatting, goto, hover, rename, symbols}, trace::Trace, + world::World, }; pub const COMPLETIONS_TRIGGERS: &[&str] = &[".", "\"", "/"]; +#[derive(Copy, Clone, PartialEq, Eq)] +enum Shutdown { + Shutdown, + Continue, +} + pub struct Server { pub connection: Connection, - pub cache: Cache, - /// In order to return diagnostics, we store the URL of each file we know about. - pub file_uris: HashMap, - pub analysis: AnalysisRegistry, - pub initial_ctxt: Context, - pub initial_term_env: crate::usage::Environment, - - /// A map associating imported files with failed imports. This allows us to - /// invalidate the cached version of a file when one of its imports becomes available. - /// - /// The keys in this map are the filenames (just the basename; no directory) of the - /// files that failed to import, and the values in this map are the file ids that tried - /// to import it. - pub failed_imports: HashMap>, + pub world: World, + pub background_jobs: BackgroundJobs, } impl Server { @@ -99,23 +74,10 @@ impl Server { } pub fn new(connection: Connection) -> Server { - let mut cache = Cache::new(ErrorTolerance::Tolerant); - - if let Ok(nickel_path) = std::env::var("NICKEL_IMPORT_PATH") { - cache.add_import_paths(nickel_path.split(':')); - } - - // We don't recover from failing to load the stdlib for now. - cache.load_stdlib().unwrap(); - let initial_ctxt = cache.mk_type_ctxt().unwrap(); Server { connection, - cache, - file_uris: HashMap::new(), - analysis: AnalysisRegistry::default(), - initial_ctxt, - initial_term_env: crate::usage::Environment::new(), - failed_imports: HashMap::new(), + world: World::default(), + background_jobs: BackgroundJobs::new(), } } @@ -153,86 +115,89 @@ impl Server { )); } - fn linearize_stdlib(&mut self) -> Result<()> { - self.cache.load_stdlib().unwrap(); - let cache = &mut self.cache; - for module in stdlib::modules() { - let file_id = cache.get_submodule_file_id(module).unwrap(); - cache - .typecheck_with_analysis( - file_id, - &self.initial_ctxt, - &self.initial_term_env, - &mut self.analysis, - ) - .unwrap(); - - // Add the std module to the environment (but not `internals`, because those symbols - // don't get their own namespace, and we don't want to use them for completion anyway). - if module == StdlibModule::Std { - // The term should always be populated by typecheck_with_analysis. - let term = cache.terms().get(&file_id).unwrap(); - let name = module.name().into(); - let def = Def::Let { - ident: crate::identifier::LocIdent { - ident: name, - pos: TermPos::None, - }, - value: term.term.clone(), - path: Vec::new(), - }; - self.initial_term_env.insert(name, def); - } - } - Ok(()) - } - pub fn run(&mut self) -> Result<()> { trace!("Running..."); - self.linearize_stdlib()?; - while let Ok(msg) = self.connection.receiver.recv() { - trace!("Message: {:#?}", msg); - match msg { - Message::Request(req) => { - let id = req.id.clone(); - match self.connection.handle_shutdown(&req) { - Ok(true) => break, - Ok(false) => self.handle_request(req)?, - Err(err) => { - // This only fails if a shutdown was - // requested in the first place, so it - // should definitely break out of the - // loop. - self.err(id, err); - break; - } + loop { + select! { + recv(self.connection.receiver) -> msg => { + // Failure here means the connection was closed, so exit quietly. + let Ok(msg) = msg else { + break; + }; + if self.handle_message(msg)? == Shutdown::Shutdown { + break; } } - Message::Notification(notification) => { - let _ = self.handle_notification(notification); + recv(self.background_jobs.receiver()) -> msg => { + // Failure here means our background thread panicked, and that's a bug. + let crate::background::Diagnostics { path, diagnostics } = msg.unwrap(); + let uri = Url::from_file_path(path).unwrap(); + let diagnostics = diagnostics.into_iter().map(From::from).collect(); + self.publish_diagnostics(uri, diagnostics); } - Message::Response(_) => (), + } + } + while let Ok(msg) = self.connection.receiver.recv() { + if self.handle_message(msg)? == Shutdown::Shutdown { + break; } } Ok(()) } + fn handle_message(&mut self, msg: Message) -> Result { + trace!("Message: {:#?}", msg); + match msg { + Message::Request(req) => { + let id = req.id.clone(); + match self.connection.handle_shutdown(&req) { + Ok(true) => Ok(Shutdown::Shutdown), + Ok(false) => { + self.handle_request(req)?; + Ok(Shutdown::Continue) + } + Err(err) => { + // This only fails if a shutdown was + // requested in the first place, so it + // should definitely break out of the + // loop. + self.err(id, err); + Ok(Shutdown::Shutdown) + } + } + } + Message::Notification(notification) => { + let _ = self.handle_notification(notification); + Ok(Shutdown::Continue) + } + Message::Response(_) => Ok(Shutdown::Continue), + } + } + fn handle_notification(&mut self, notification: Notification) -> Result<()> { match notification.method.as_str() { DidOpenTextDocument::METHOD => { trace!("handle open notification"); - crate::files::handle_open( - self, - serde_json::from_value::(notification.params)?, - ) + let params = + serde_json::from_value::(notification.params)?; + let uri = params.text_document.uri.clone(); + let contents = params.text_document.text.clone(); + crate::files::handle_open(self, params)?; + self.background_jobs.update_file(uri.clone(), contents); + self.background_jobs.eval_file(uri); + Ok(()) } DidChangeTextDocument::METHOD => { trace!("handle save notification"); - crate::files::handle_save( - self, - serde_json::from_value::(notification.params)?, - ) + let params = + serde_json::from_value::(notification.params)?; + let uri = params.text_document.uri.clone(); + let contents = params.content_changes[0].text.clone(); + crate::files::handle_save(self, params)?; + self.background_jobs.update_file(uri.clone(), contents); + self.background_jobs.eval_file(uri); + Ok(()) } _ => Ok(()), } @@ -308,45 +273,20 @@ impl Server { Ok(()) } - pub fn file_analysis(&self, file: FileId) -> Result<&Analysis, ResponseError> { - self.analysis - .analysis - .get(&file) - .ok_or_else(|| ResponseError { - data: None, - message: "File has not yet been parsed or cached.".to_owned(), - code: ErrorCode::ParseError as i32, - }) - } - - pub fn lookup_term_by_position(&self, pos: RawPos) -> Result, ResponseError> { - Ok(self - .file_analysis(pos.src_id)? - .position_lookup - .get(pos.index)) - } - - pub fn lookup_ident_by_position( - &self, - pos: RawPos, - ) -> Result, ResponseError> { - Ok(self - .file_analysis(pos.src_id)? - .position_lookup - .get_ident(pos.index)) - } - - pub fn issue_diagnostics(&mut self, file_id: FileId, diagnostics: Vec>) { - let Some(uri) = self.file_uris.get(&file_id).cloned() else { + pub fn issue_diagnostics( + &mut self, + file_id: FileId, + diagnostics: impl IntoIterator>, + ) { + let Some(uri) = self.world.file_uris.get(&file_id).cloned() else { warn!("tried to issue diagnostics for unknown file id {file_id:?}"); return; }; + let diagnostics = diagnostics.into_iter().map(Into::into).collect(); + self.publish_diagnostics(uri, diagnostics) + } - let diagnostics: Vec<_> = diagnostics - .into_iter() - .flat_map(|d| lsp_types::Diagnostic::from_codespan(d, self.cache.files_mut())) - .collect(); - + pub fn publish_diagnostics(&mut self, uri: Url, diagnostics: Vec) { // Issue diagnostics even if they're empty (empty diagnostics are how the editor knows // that any previous errors were resolved). self.notify(lsp_server::Notification::new( @@ -358,130 +298,4 @@ impl Server { }, )); } - - /// Finds all the locations at which a term (or possibly an ident within a term) is "defined". - /// - /// "Ident within a term" applies when the term is a record or a pattern binding, so that we - /// can refer to fields in a record, or specific idents in a pattern binding. - /// - /// The return value contains all the spans of all the definition locations. It's a span instead - /// of a `LocIdent` because when `term` is an import, the definition location is the whole - /// included file. In every other case, the definition location will be the span of a LocIdent. - pub fn get_defs(&self, term: &RichTerm, ident: Option) -> Vec { - // The inner function returning Option is just for ?-early-return convenience. - fn inner( - server: &Server, - term: &RichTerm, - ident: Option, - ) -> Option> { - let resolver = FieldResolver::new(server); - let ret = match (term.as_ref(), ident) { - (Term::Var(id), _) => { - let id = LocIdent::from(*id); - let def = server.analysis.get_def(&id)?; - let cousins = resolver.cousin_defs(def); - if cousins.is_empty() { - vec![def.ident().pos.unwrap()] - } else { - cousins - .into_iter() - .filter_map(|(loc, _)| loc.pos.into_opt()) - .collect() - } - } - (Term::Op1(UnaryOp::StaticAccess(id), parent), _) => { - let parents = resolver.resolve_record(parent); - parents - .iter() - .filter_map(|parent| { - parent - .field_loc(id.ident()) - .and_then(|def| def.pos.into_opt()) - }) - .collect() - } - (Term::LetPattern(pat, value, _), Some(hovered_id)) => { - let (path, _, _) = pat - .bindings() - .into_iter() - .find(|(_path, bound_id, _)| bound_id.ident() == hovered_id.ident)?; - - let (last, path) = path.split_last()?; - let path: Vec<_> = path.iter().map(|id| id.ident()).collect(); - let parents = resolver.resolve_path(value, path.iter().copied()); - - parents - .iter() - .filter_map(|parent| { - parent - .field_loc(last.ident()) - .and_then(|def| def.pos.into_opt()) - }) - .collect() - } - (Term::ResolvedImport(file), _) => { - let pos = server.cache.terms().get(file)?.term.pos; - vec![pos.into_opt()?] - } - (Term::RecRecord(..) | Term::Record(_), Some(id)) => { - let def = Def::Field { - ident: id, - value: None, - record: term.clone(), - metadata: FieldMetadata::default(), - }; - let cousins = resolver.cousin_defs(&def); - cousins - .into_iter() - .filter_map(|(loc, _)| loc.pos.into_opt()) - .collect() - } - _ => { - return None; - } - }; - Some(ret) - } - - inner(self, term, ident).unwrap_or_default() - } - - /// If `span` is pointing at the identifier binding a record field, returns - /// all the places that the record field is referenced. - /// - /// This is a sort of inverse of `get_defs`, at least when the argument to `get_defs` - /// is a static access: the spans returned by this function are exactly the static accesses - /// that, when passed to `get_defs`, return `span`. - /// - /// This function can be expensive, because it calls `get_defs` on every static access - /// that could potentially be referencing this field. - pub fn get_field_refs(&self, span: RawSpan) -> Vec { - // The inner function returning Option is just for ?-early-return convenience. - fn inner(server: &Server, span: RawSpan) -> Option> { - let ident = server.lookup_ident_by_position(span.start_pos()).ok()??; - let term = server.lookup_term_by_position(span.start_pos()).ok()??; - - if let Term::RecRecord(..) | Term::Record(_) = term.as_ref() { - let accesses = server.analysis.get_static_accesses(ident.ident); - Some( - accesses - .into_iter() - .filter_map(|access| { - let Term::Op1(UnaryOp::StaticAccess(id), _) = access.as_ref() else { - return None; - }; - if server.get_defs(&access, None).contains(&span) { - id.pos.into_opt() - } else { - None - } - }) - .collect(), - ) - } else { - None - } - } - inner(self, span).unwrap_or_default() - } } diff --git a/lsp/nls/src/utils.rs b/lsp/nls/src/utils.rs new file mode 100644 index 0000000000..1b34a84217 --- /dev/null +++ b/lsp/nls/src/utils.rs @@ -0,0 +1,43 @@ +use nickel_lang_core::{ + cache::Cache, + environment::Environment, + identifier::Ident, + position::TermPos, + stdlib::{self, StdlibModule}, +}; + +use crate::{analysis::AnalysisRegistry, cache::CacheExt as _, field_walker::Def}; + +pub(crate) fn initialize_stdlib( + cache: &mut Cache, + analysis: &mut AnalysisRegistry, +) -> Environment { + cache.load_stdlib().unwrap(); + let initial_ctxt = cache.mk_type_ctxt().unwrap(); + let mut initial_env = Environment::default(); + + for module in stdlib::modules() { + let file_id = cache.get_submodule_file_id(module).unwrap(); + cache + .typecheck_with_analysis(file_id, &initial_ctxt, &initial_env, analysis) + .unwrap(); + + // Add the std module to the environment (but not `internals`, because those symbols + // don't get their own namespace, and we don't want to use them for completion anyway). + if module == StdlibModule::Std { + // The term should always be populated by typecheck_with_analysis. + let term = cache.terms().get(&file_id).unwrap(); + let name = module.name().into(); + let def = Def::Let { + ident: crate::identifier::LocIdent { + ident: name, + pos: TermPos::None, + }, + value: term.term.clone(), + path: Vec::new(), + }; + initial_env.insert(name, def); + } + } + initial_env +} diff --git a/lsp/nls/src/world.rs b/lsp/nls/src/world.rs new file mode 100644 index 0000000000..ddf93f76dc --- /dev/null +++ b/lsp/nls/src/world.rs @@ -0,0 +1,354 @@ +use std::{ + collections::{HashMap, HashSet}, + ffi::OsString, + path::PathBuf, +}; + +use codespan::FileId; +use lsp_server::{ErrorCode, ResponseError}; +use lsp_types::Url; +use nickel_lang_core::{ + cache::{Cache, CacheError, ErrorTolerance, SourcePath}, + error::{ImportError, IntoDiagnostics}, + position::{RawPos, RawSpan}, + term::{record::FieldMetadata, RichTerm, Term, UnaryOp}, + typecheck::Context, +}; + +use crate::{ + analysis::{Analysis, AnalysisRegistry}, + cache::CacheExt as _, + diagnostic::{DiagnosticCompat, SerializableDiagnostic}, + field_walker::{Def, FieldResolver}, + files::uri_to_path, + identifier::LocIdent, + pattern::Bindings as _, +}; + +/// All the state associated with the files we know about. +/// +/// Includes cached analyses, cached parse trees, etc. +pub struct World { + pub cache: Cache, + /// In order to return diagnostics, we store the URL of each file we know about. + pub file_uris: HashMap, + pub analysis: AnalysisRegistry, + pub initial_ctxt: Context, + pub initial_term_env: crate::usage::Environment, + + /// A map associating imported files with failed imports. This allows us to + /// invalidate the cached version of a file when one of its imports becomes available. + /// + /// The keys in this map are the filenames (just the basename; no directory) of the + /// files that failed to import, and the values in this map are the file ids that tried + /// to import it. + pub failed_imports: HashMap>, +} + +impl Default for World { + fn default() -> Self { + let mut cache = Cache::new(ErrorTolerance::Tolerant); + if let Ok(nickel_path) = std::env::var("NICKEL_IMPORT_PATH") { + cache.add_import_paths(nickel_path.split(':')); + } + // We don't recover from failing to load the stdlib for now. + cache.load_stdlib().unwrap(); + let initial_ctxt = cache.mk_type_ctxt().unwrap(); + + let mut analysis = AnalysisRegistry::default(); + let initial_term_env = crate::utils::initialize_stdlib(&mut cache, &mut analysis); + + Self { + cache, + initial_ctxt, + analysis, + initial_term_env, + file_uris: HashMap::default(), + failed_imports: HashMap::default(), + } + } +} + +impl World { + /// Adds a new file to our world. + /// + /// Returns a list of files that were invalidated by this addition. Even though this is a new + /// file, it can still invalidate existing files if they were already importing this file + /// directly from the filesystem instead of through the editor. + pub fn add_file( + &mut self, + uri: Url, + contents: String, + ) -> anyhow::Result<(FileId, HashSet)> { + let path = uri_to_path(&uri)?; + + // Invalidate the cache of every file that tried, but failed, to import a file + // with a name like this. + let mut invalid = path + .file_name() + .and_then(|name| self.failed_imports.remove(name)) + .unwrap_or_default(); + + // Replace the path (as opposed to adding it): we may already have this file in the + // cache if it was imported by an already-open file. + let file_id = self.cache.replace_string(SourcePath::Path(path), contents); + + // Invalidate any cached inputs that imported the newly-opened file, so that any + // cross-file references are updated. + invalid.extend(self.cache.get_rev_imports_transitive(file_id)); + + for rev_dep in &invalid { + self.analysis.remove(*rev_dep); + // Reset the cached state (Parsed is the earliest one) so that it will + // re-resolve its imports. + self.cache + .update_state(*rev_dep, nickel_lang_core::cache::EntryState::Parsed); + } + + self.file_uris.insert(file_id, uri); + + Ok((file_id, invalid)) + } + + /// Updates a file's contents. + /// + /// Returns a list of files that were invalidated by this change. + pub fn update_file( + &mut self, + uri: Url, + contents: String, + ) -> anyhow::Result<(FileId, HashSet)> { + let path = uri_to_path(&uri)?; + let file_id = self.cache.replace_string(SourcePath::Path(path), contents); + + let invalid = self.cache.get_rev_imports_transitive(file_id); + for f in &invalid { + self.analysis.remove(*f); + } + Ok((file_id, invalid)) + } + + pub fn lsp_diagnostics( + &mut self, + file_id: FileId, + err: impl IntoDiagnostics, + ) -> Vec { + let stdlib_ids = self.cache.get_all_stdlib_modules_file_id(); + err.into_diagnostics(self.cache.files_mut(), stdlib_ids.as_ref()) + .into_iter() + .flat_map(|d| SerializableDiagnostic::from_codespan(file_id, d, self.cache.files_mut())) + .collect() + } + + // Make a record of I/O errors in imports so that we can retry them when appropriate. + fn associate_failed_import(&mut self, err: &nickel_lang_core::error::Error) { + if let nickel_lang_core::error::Error::ImportError(ImportError::IOError(name, _, pos)) = + &err + { + if let Some((filename, pos)) = PathBuf::from(name).file_name().zip(pos.into_opt()) { + self.failed_imports + .entry(filename.to_owned()) + .or_default() + .insert(pos.src_id); + } + } + } + + /// Returns `Ok` for recoverable (or no) errors, or `Err` for fatal errors. + pub fn parse( + &mut self, + file_id: FileId, + ) -> Result, Vec> { + self.cache + .parse(file_id) + .map(|nonfatal| self.lsp_diagnostics(file_id, nonfatal.inner())) + .map_err(|fatal| self.lsp_diagnostics(file_id, fatal)) + } + + /// Typechecks a file, returning diagnostics on error. + /// + /// Panics if the file has not yet been parsed. (Use [`World::parse_and_typecheck`] if you + /// want to do both.) + pub fn typecheck(&mut self, file_id: FileId) -> Result<(), Vec> { + self.cache + .typecheck_with_analysis( + file_id, + &self.initial_ctxt, + &self.initial_term_env, + &mut self.analysis, + ) + .map_err(|error| match error { + CacheError::Error(tc_error) => tc_error + .into_iter() + .flat_map(|err| { + self.associate_failed_import(&err); + self.lsp_diagnostics(file_id, err) + }) + .collect::>(), + CacheError::NotParsed => panic!("must parse first!"), + })?; + + Ok(()) + } + + pub fn parse_and_typecheck(&mut self, file_id: FileId) -> Vec { + match self.parse(file_id) { + Ok(mut nonfatal) => { + if let Err(e) = self.typecheck(file_id) { + nonfatal.extend(e); + } + nonfatal + } + Err(fatal) => fatal, + } + } + + pub fn file_analysis(&self, file: FileId) -> Result<&Analysis, ResponseError> { + self.analysis + .analysis + .get(&file) + .ok_or_else(|| ResponseError { + data: None, + message: "File has not yet been parsed or cached.".to_owned(), + code: ErrorCode::ParseError as i32, + }) + } + + pub fn lookup_term_by_position(&self, pos: RawPos) -> Result, ResponseError> { + Ok(self + .file_analysis(pos.src_id)? + .position_lookup + .get(pos.index)) + } + + pub fn lookup_ident_by_position( + &self, + pos: RawPos, + ) -> Result, ResponseError> { + Ok(self + .file_analysis(pos.src_id)? + .position_lookup + .get_ident(pos.index)) + } + + /// Finds all the locations at which a term (or possibly an ident within a term) is "defined". + /// + /// "Ident within a term" applies when the term is a record or a pattern binding, so that we + /// can refer to fields in a record, or specific idents in a pattern binding. + /// + /// The return value contains all the spans of all the definition locations. It's a span instead + /// of a `LocIdent` because when `term` is an import, the definition location is the whole + /// included file. In every other case, the definition location will be the span of a LocIdent. + pub fn get_defs(&self, term: &RichTerm, ident: Option) -> Vec { + // The inner function returning Option is just for ?-early-return convenience. + fn inner(world: &World, term: &RichTerm, ident: Option) -> Option> { + let resolver = FieldResolver::new(world); + let ret = match (term.as_ref(), ident) { + (Term::Var(id), _) => { + let id = LocIdent::from(*id); + let def = world.analysis.get_def(&id)?; + let cousins = resolver.cousin_defs(def); + if cousins.is_empty() { + vec![def.ident().pos.unwrap()] + } else { + cousins + .into_iter() + .filter_map(|(loc, _)| loc.pos.into_opt()) + .collect() + } + } + (Term::Op1(UnaryOp::StaticAccess(id), parent), _) => { + let parents = resolver.resolve_record(parent); + parents + .iter() + .filter_map(|parent| { + parent + .field_loc(id.ident()) + .and_then(|def| def.pos.into_opt()) + }) + .collect() + } + (Term::LetPattern(pat, value, _), Some(hovered_id)) => { + let (path, _, _) = pat + .bindings() + .into_iter() + .find(|(_path, bound_id, _)| bound_id.ident() == hovered_id.ident)?; + + let (last, path) = path.split_last()?; + let path: Vec<_> = path.iter().map(|id| id.ident()).collect(); + let parents = resolver.resolve_path(value, path.iter().copied()); + parents + .iter() + .filter_map(|parent| { + parent + .field_loc(last.ident()) + .and_then(|def| def.pos.into_opt()) + }) + .collect() + } + (Term::ResolvedImport(file), _) => { + let pos = world.cache.terms().get(file)?.term.pos; + vec![pos.into_opt()?] + } + (Term::RecRecord(..) | Term::Record(_), Some(id)) => { + let def = Def::Field { + ident: id, + value: None, + record: term.clone(), + metadata: FieldMetadata::default(), + }; + let cousins = resolver.cousin_defs(&def); + cousins + .into_iter() + .filter_map(|(loc, _)| loc.pos.into_opt()) + .collect() + } + _ => { + return None; + } + }; + Some(ret) + } + + inner(self, term, ident).unwrap_or_default() + } + + /// If `span` is pointing at the identifier binding a record field, returns + /// all the places that the record field is referenced. + /// + /// This is a sort of inverse of `get_defs`, at least when the argument to `get_defs` + /// is a static access: the spans returned by this function are exactly the static accesses + /// that, when passed to `get_defs`, return `span`. + /// + /// This function can be expensive, because it calls `get_defs` on every static access + /// that could potentially be referencing this field. + pub fn get_field_refs(&self, span: RawSpan) -> Vec { + // The inner function returning Option is just for ?-early-return convenience. + fn inner(world: &World, span: RawSpan) -> Option> { + let ident = world.lookup_ident_by_position(span.start_pos()).ok()??; + let term = world.lookup_term_by_position(span.start_pos()).ok()??; + + if let Term::RecRecord(..) | Term::Record(_) = term.as_ref() { + let accesses = world.analysis.get_static_accesses(ident.ident); + Some( + accesses + .into_iter() + .filter_map(|access| { + let Term::Op1(UnaryOp::StaticAccess(id), _) = access.as_ref() else { + return None; + }; + if world.get_defs(&access, None).contains(&span) { + id.pos.into_opt() + } else { + None + } + }) + .collect(), + ) + } else { + None + } + } + inner(self, span).unwrap_or_default() + } +} diff --git a/lsp/nls/tests/inputs/diagnostics-basic.ncl b/lsp/nls/tests/inputs/diagnostics-basic.ncl new file mode 100644 index 0000000000..18a483e180 --- /dev/null +++ b/lsp/nls/tests/inputs/diagnostics-basic.ncl @@ -0,0 +1,6 @@ +### /diagnostics-basic.ncl +{ + num | Number = "a", + num2 = 'hi, +} | { num2 | Number, other | String, num } +### diagnostic = ["file:///diagnostics-basic.ncl"] diff --git a/lsp/nls/tests/main.rs b/lsp/nls/tests/main.rs index 3620f22665..71e399f049 100644 --- a/lsp/nls/tests/main.rs +++ b/lsp/nls/tests/main.rs @@ -17,7 +17,8 @@ fn check_snapshots(path: &str) { for req in fixture.reqs { harness.request_dyn(req); } - harness.drain_notifications(); + + harness.drain_diagnostics(fixture.expected_diags.iter().cloned()); let output = String::from_utf8(harness.out).unwrap(); insta::assert_snapshot!(path, output); @@ -31,7 +32,7 @@ fn refresh_missing_imports() { let url = |s: &str| lsp_types::Url::from_file_path(s).unwrap(); harness.send_file(url("/test.ncl"), "import \"dep.ncl\""); let diags = harness.wait_for_diagnostics().diagnostics; - assert_eq!(1, diags.len()); + assert_eq!(2, diags.len()); assert!(diags[0].message.contains("import of dep.ncl failed")); // Now provide the import. @@ -39,11 +40,21 @@ fn refresh_missing_imports() { // Check that we get back clean diagnostics for both files. // (LSP doesn't define the order, but we happen to know it) - let diags = harness.wait_for_diagnostics(); - assert_eq!(diags.uri.path(), "/dep.ncl"); - assert!(diags.diagnostics.is_empty()); + // Loop because we can get back the diagnostics twice from each + // file (once from synchronous typechecking, once from eval in the background). + loop { + let diags = harness.wait_for_diagnostics(); + assert!(diags.diagnostics.is_empty()); + if diags.uri.path() == "/dep.ncl" { + break; + } + } - let diags = harness.wait_for_diagnostics(); - assert_eq!(diags.uri.path(), "/test.ncl"); - assert!(diags.diagnostics.is_empty()); + loop { + let diags = harness.wait_for_diagnostics(); + assert!(diags.diagnostics.is_empty()); + if diags.uri.path() == "/test.ncl" { + break; + } + } } diff --git a/lsp/nls/tests/snapshots/main__lsp__nls__tests__inputs__diagnostics-basic.ncl.snap b/lsp/nls/tests/snapshots/main__lsp__nls__tests__inputs__diagnostics-basic.ncl.snap new file mode 100644 index 0000000000..2fcaa8fc28 --- /dev/null +++ b/lsp/nls/tests/snapshots/main__lsp__nls__tests__inputs__diagnostics-basic.ncl.snap @@ -0,0 +1,11 @@ +--- +source: lsp/nls/tests/main.rs +expression: output +--- +(file:///diagnostics-basic.ncl, 1:8-1:14: contract broken by the value of `num`) +(file:///diagnostics-basic.ncl, 1:8-1:14: expected type) +(file:///diagnostics-basic.ncl, 1:17-1:20: applied to this expression) +(file:///diagnostics-basic.ncl, 2:9-2:12: applied to this expression) +(file:///diagnostics-basic.ncl, 3:13-3:19: contract broken by the value of `num2`) +(file:///diagnostics-basic.ncl, 3:13-3:19: expected type) +