From 8e9b7382b576239bf35897d9bfdfa0783d2a19dd Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Mon, 25 Nov 2024 14:27:12 +0100 Subject: [PATCH 1/3] Detect assignments in calls to avoid false positive lints --- CHANGELOG.md | 3 +- crates/ark/src/lsp/diagnostics.rs | 89 ++++++++++++++++++++++++------- 2 files changed, 72 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ee39f2ca..2b50f7aae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ## 2024-11 +- LSP: Assignments in function calls (e.g. `list(x <- 1)`) are now detected by the missing symbol linter to avoid annoying false positive diagnostics (https://github.com/posit-dev/positron/issues/3048). The downside is that this causes false negatives when the assignment happens in a call with local scope, e.g. in `local()` or `test_that()`. We prefer to be overly permissive than overly cautious in these matters. + - Jupyter: The following environment variables are now set in the same way that R does: - `R_SHARE_DIR` @@ -12,7 +14,6 @@ This solves a number of problems in situations that depend on these variables being defined (https://github.com/posit-dev/positron/issues/3637). - ## 2024-10 - Objects assigned at top level are now indexed, in addition to assigned functions. When a name is assigned multiple times, we now only index the first occurrence. This allows you to jump to the first "declaration" of the variable. In the future we'll improve this mechanism so that you can jump to the most recent assignment. diff --git a/crates/ark/src/lsp/diagnostics.rs b/crates/ark/src/lsp/diagnostics.rs index 7a0c1bc2b..c9d44ccb5 100644 --- a/crates/ark/src/lsp/diagnostics.rs +++ b/crates/ark/src/lsp/diagnostics.rs @@ -785,9 +785,24 @@ fn recurse_call_arguments_default( // TODO: Can we better handle NSE in things like `quote()` and // `dplyr::mutate()` so we don't have to turn off certain diagnostics when // we are inside a call's arguments? - let mut context = context.clone(); + + // We used to clone `context` here to prevent assignments from applying to + // the caller. We now purposely preserve the caller's context with its + // `document_symbols`. We explicitly want to index symbols assigned in the + // call arguments to avoid false positive lints about missing symbols, see + // https://github.com/posit-dev/positron/issues/3048. + // + // Because of the way we traverse the syntax tree, this makes the assumption + // that execution order is deterministic from left to right. This is not + // necessarily correct since arguments are lazily evaluated, and whether + // this is true depends on the function's implementation. For now we assume + // every function behaves like `list()`, which is our default model of + // strict evaluation. + + // Save `in_call` to restore it on exit. Necessary to handle nested calls + // and maintain the state to `true` until we've left the outermost call. + let in_call = context.in_call; context.in_call = true; - let context = &mut context; // Recurse into arguments. if let Some(arguments) = node.child_by_field_name("arguments") { @@ -804,6 +819,7 @@ fn recurse_call_arguments_default( } } + context.in_call = in_call; ().ok() } @@ -1311,11 +1327,7 @@ foo let document = Document::new(code, None); let diagnostics = generate_diagnostics(document.clone(), DEFAULT_STATE.clone()); - assert_eq!(diagnostics.len(), 1); - - let diagnostic = diagnostics.get(0).unwrap(); - assert_eq!(diagnostic.range.start.line, 2); - insta::assert_snapshot!(diagnostic.message); + assert_eq!(diagnostics.len(), 0); }) } @@ -1331,31 +1343,70 @@ foo let document = Document::new(code, None); let diagnostics = generate_diagnostics(document.clone(), DEFAULT_STATE.clone()); - assert_eq!(diagnostics.len(), 1); - - let diagnostic = diagnostics.get(0).unwrap(); - assert_eq!(diagnostic.range.start.line, 2); - insta::assert_snapshot!(diagnostic.message); + assert_eq!(diagnostics.len(), 0); }) } #[test] - fn test_assignment_within_function_arguments() { - // TODO: `x` should be defined in the global env and there should not be a diagnostic here + fn test_assignment_within_call() { + // https://github.com/posit-dev/positron/issues/3048 + // With our current approach we also incorrectly index symbols in calls + // with local scopes such as `local()` or `test_that()`. We prefer to be + // overly permissive than the opposite to avoid annoying false positive + // diagnostics. r_task(|| { + let code = " + list(x <- 1) + x + "; + let document = Document::new(code, None); + + assert_eq!( + generate_diagnostics(document.clone(), DEFAULT_STATE.clone()).len(), + 0 + ); + let code = " list({ x <- 1 }) x "; + let document = Document::new(code, None); + + assert_eq!( + generate_diagnostics(document.clone(), DEFAULT_STATE.clone()).len(), + 0 + ); + }) + } + #[test] + fn test_no_symbol_diagnostics_in_calls() { + // For now we never check for missing symbols inside calls because we + // don't have a good way to deal with NSE in functions like `quote()` or + // `mutate()`. + r_task(|| { + let code = " + list(x) + "; let document = Document::new(code, None); - let diagnostics = generate_diagnostics(document.clone(), DEFAULT_STATE.clone()); - assert_eq!(diagnostics.len(), 1); + assert_eq!( + generate_diagnostics(document.clone(), DEFAULT_STATE.clone()).len(), + 0 + ); - let diagnostic = diagnostics.get(0).unwrap(); - assert_eq!(diagnostic.range.start.line, 2); - insta::assert_snapshot!(diagnostic.message); + // Important to test nested case. We have a dynamic stack of state + // variable to keep track of whether we are in a call. The inner + // call should restore the outer state on exit. + let code = " + list(list(), x) + "; + let document = Document::new(code, None); + + assert_eq!( + generate_diagnostics(document.clone(), DEFAULT_STATE.clone()).len(), + 0 + ); }) } } From 3f86ae1f4b7bee187398dd6cac304a2158cf73d8 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 27 Nov 2024 16:59:47 +0100 Subject: [PATCH 2/3] Address code review --- crates/ark/src/lsp/diagnostics.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/ark/src/lsp/diagnostics.rs b/crates/ark/src/lsp/diagnostics.rs index c9d44ccb5..2f292aabb 100644 --- a/crates/ark/src/lsp/diagnostics.rs +++ b/crates/ark/src/lsp/diagnostics.rs @@ -1333,7 +1333,6 @@ foo #[test] fn test_dotty_assignment_within_native_pipe_braced_expr() { - // TODO: `apple` should be defined in the global env and there should not be a diagnostic here r_task(|| { let code = " mtcars |> list({ .[apple] <- 1; apple }) @@ -1407,6 +1406,18 @@ foo generate_diagnostics(document.clone(), DEFAULT_STATE.clone()).len(), 0 ); + + // `in_call` state variable is reset + let code = " + list() + x + "; + let document = Document::new(code, None); + + assert_eq!( + generate_diagnostics(document.clone(), DEFAULT_STATE.clone()).len(), + 1 + ); }) } } From 89c4b090f2b8b588959f30ec6e047bcc27f16200 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Wed, 27 Nov 2024 17:11:56 +0100 Subject: [PATCH 3/3] Protect state restoration from early returns --- crates/ark/src/lsp/diagnostics.rs | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/crates/ark/src/lsp/diagnostics.rs b/crates/ark/src/lsp/diagnostics.rs index 2f292aabb..d84778885 100644 --- a/crates/ark/src/lsp/diagnostics.rs +++ b/crates/ark/src/lsp/diagnostics.rs @@ -781,7 +781,7 @@ fn recurse_call_arguments_default( node: Node, context: &mut DiagnosticContext, diagnostics: &mut Vec, -) -> Result<()> { +) -> anyhow::Result<()> { // TODO: Can we better handle NSE in things like `quote()` and // `dplyr::mutate()` so we don't have to turn off certain diagnostics when // we are inside a call's arguments? @@ -804,23 +804,26 @@ fn recurse_call_arguments_default( let in_call = context.in_call; context.in_call = true; - // Recurse into arguments. - if let Some(arguments) = node.child_by_field_name("arguments") { - let mut cursor = arguments.walk(); - let children = arguments.children_by_field_name("argument", &mut cursor); - for child in children { - // Warn if the next sibling is neither a comma nor a closing delimiter. - check_call_next_sibling(child, context, diagnostics)?; - - // Recurse into values. - if let Some(value) = child.child_by_field_name("value") { - recurse(value, context, diagnostics)?; + let result = (|| -> anyhow::Result<()> { + // Recurse into arguments. + if let Some(arguments) = node.child_by_field_name("arguments") { + let mut cursor = arguments.walk(); + let children = arguments.children_by_field_name("argument", &mut cursor); + for child in children { + // Warn if the next sibling is neither a comma nor a closing delimiter. + check_call_next_sibling(child, context, diagnostics)?; + + // Recurse into values. + if let Some(value) = child.child_by_field_name("value") { + recurse(value, context, diagnostics)?; + } } } - } + Ok(()) + })(); context.in_call = in_call; - ().ok() + result } fn recurse_call(