From 5138f822abefe77fa76230140d2d2f51c91c17f3 Mon Sep 17 00:00:00 2001 From: Jason Foral Date: Fri, 3 Jan 2025 17:58:09 -0500 Subject: [PATCH] Refactor timeout watchdog thread into its own struct so the caller doesn't need to explicitly manage state. --- .../src/analysis/ddsa_lib.rs | 1 + .../analysis/ddsa_lib/resource_watchdog.rs | 264 ++++++++++++++++++ .../src/analysis/ddsa_lib/runtime.rs | 150 +--------- 3 files changed, 273 insertions(+), 142 deletions(-) create mode 100644 crates/static-analysis-kernel/src/analysis/ddsa_lib/resource_watchdog.rs diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib.rs index 679e062c..679ef03f 100644 --- a/crates/static-analysis-kernel/src/analysis/ddsa_lib.rs +++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib.rs @@ -9,6 +9,7 @@ pub use context::*; pub mod extension; pub(crate) mod js; pub(crate) mod ops; +pub(crate) mod resource_watchdog; pub(crate) mod runtime; pub use runtime::JsRuntime; #[allow(dead_code)] diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/resource_watchdog.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/resource_watchdog.rs new file mode 100644 index 00000000..3e957ef7 --- /dev/null +++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/resource_watchdog.rs @@ -0,0 +1,264 @@ +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache License, Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +use crate::analysis::ddsa_lib::common::DDSAJsRuntimeError; +use deno_core::v8; +use std::sync::{Arc, Condvar, Mutex}; +use std::time::{Duration, Instant}; + +/// A persistent thread that can track execution on a [`v8::Isolate`] and terminate it if +/// a per-execution configurable duration has been exceeded. +#[derive(Debug)] +pub(crate) struct V8ResourceWatchdog { + state: Arc>, + timeout_condvar: Arc, + isolate_handle: v8::IsolateHandle, +} + +impl V8ResourceWatchdog { + // Creates a new watchdog that can terminate execution on a v8 isolate that exceeds a resource quota. + pub fn new(isolate_handle: v8::IsolateHandle) -> Self { + let state = WatchdogState::default(); + let state = Arc::new(Mutex::new(state)); + let timeout_condvar = Arc::new(Condvar::new()); + + // Spawn the timeout thread, _ignoring_ the returned JoinHandle. It's ok if the thread + // terminates itself after this `V8ResourceWatchdog` struct is dropped. + let _ = Self::spawn_timeout_thread( + isolate_handle.clone(), + Arc::clone(&state), + Arc::clone(&timeout_condvar), + ); + + Self { + state, + timeout_condvar, + isolate_handle, + } + } + + /// Executes the provided closure, optionally enforcing a resource constraint: + /// * A `timeout_duration` can be specified to limit the time of a single execution of a [`v8::Isolate`]. + pub fn execute<'rt, 's, F, T, S>( + &'rt self, + timeout_duration: Option, + scope: &mut S, + f: F, + ) -> Result + where + 'rt: 's, + F: Fn(&mut S) -> T, + S: AsMut, + { + // Request to cancel any in-progress terminations. If there is not one + // (i.e. `v8::IsolateHandle::is_execution_terminating()` is false), this is a no-op. + self.isolate_handle.cancel_terminate_execution(); + + let mut using_timeout = false; + if let Some(duration) = timeout_duration { + using_timeout = true; + let mut state = self.state.lock().unwrap(); + let _ = state.timeout.timings.insert((Instant::now(), duration)); + drop(state); + self.timeout_condvar.notify_one(); + } + + let execution_result = f(scope); + + let mut state = self.state.lock().unwrap(); + let termination_err = state.termination_err.take(); + state.timeout.timings = None; + drop(state); + + let did_timeout = matches!( + termination_err, + Some(DDSAJsRuntimeError::JavaScriptTimeout { .. }) + ); + + // (If the watchdog timed out an execution, a wakeup isn't necessary because it's already + // at the "beginning" state and waiting for the next execution to start) + if using_timeout && !did_timeout { + self.timeout_condvar.notify_one(); + } + + match termination_err { + None => Ok(execution_result), + Some(e) => Err(e), + } + } + + /// Spawns a thread that calls [`terminate_execution`](v8::Isolate::terminate_execution) on a + /// JavaScript execution that exceeds a time quota. This thread can be communicated with via + /// the shared [`WatchdogState`] and woken up with the provided condvar. + fn spawn_timeout_thread( + isolate_handle: v8::IsolateHandle, + wd_state: Arc>, + condvar: Arc, + ) -> std::thread::JoinHandle<()> { + std::thread::spawn(move || { + let (lock, cvar) = (wd_state, condvar); + loop { + let mut state = cvar + .wait_while(lock.lock().unwrap(), |state| { + state.timeout.timings.is_none() && !state.timeout.thread_should_shut_down + }) + .expect("mutex should not be poisoned"); + + if state.timeout.thread_should_shut_down { + break; + } + + let (start_instant, timeout_duration) = + state.timeout.timings.expect("should meet cvar condition"); + + // Any instant after `timeout_threshold` will trigger the timeout + let timeout_threshold = start_instant + timeout_duration; + let now = Instant::now(); + + if now >= timeout_threshold { + // This branch represents an edge case where the OS couldn't wake this thread up + // until after the watchdog should've already triggered a timeout. + state.timeout.timings = None; + state.termination_err = Some(DDSAJsRuntimeError::JavaScriptTimeout { + timeout: timeout_duration, + }); + drop(state); + isolate_handle.terminate_execution(); + } else { + // This is guaranteed not to underflow + let additional_wait = timeout_threshold - now; + let result = cvar + .wait_timeout_while(state, additional_wait, |state| { + state.timeout.timings.is_some() + }) + .expect("mutex should not be poisoned"); + state = result.0; + + // If the condvar triggered a timeout, because of our use of `Condvar::wait_timeout_while`, + // there _must_ be an actively tracked timeout, Thus, it's always appropriate to terminate execution. + if result.1.timed_out() { + state.timeout.timings = None; + state.termination_err = Some(DDSAJsRuntimeError::JavaScriptTimeout { + timeout: timeout_duration, + }); + drop(state); + isolate_handle.terminate_execution(); + } + } + } + }) + } +} + +impl Drop for V8ResourceWatchdog { + fn drop(&mut self) { + let mut state = self.state.lock().unwrap(); + state.timeout.thread_should_shut_down = true; + drop(state); + // Wake the spawned thread so that it can terminate itself. + self.timeout_condvar.notify_one(); + } +} + +/// State for a [`V8ResourceWatchdog`] that needs to be synchronized in a thread-safe manner. +#[derive(Default, Debug)] +struct WatchdogState { + timeout: TimeoutState, + /// This will be `Some` if there was a termination. Otherwise, it will be `None`. + termination_err: Option, +} + +/// State for the timeout watchdog implemented by a [`V8ResourceWatchdog`]. This should be guarded +/// with a mutex. +#[derive(Default, Debug, Clone, Eq, PartialEq, Hash)] +struct TimeoutState { + /// Data for an active timeout watchdog. If no execution is being tracked, this will be `None`. + /// If an execution is being tracked, this will be `Some((start_instant, timeout_duration))`. + timings: Option<(Instant, Duration)>, + thread_should_shut_down: bool, +} + +#[cfg(test)] +mod tests { + use super::V8ResourceWatchdog; + use crate::analysis::ddsa_lib::common::{compile_script, DDSAJsRuntimeError}; + use crate::analysis::ddsa_lib::test_utils::cfg_test_v8; + use deno_core::v8; + use std::time::{Duration, Instant}; + + /// The watchdog's state should be properly cleared across executions. + #[test] + fn watchdog_state_cleared() { + let mut runtime = cfg_test_v8().deno_core_rt(); + let timeout = Duration::from_millis(500); + let loop_code = "while (true) {}"; + let loop_script = compile_script(&mut runtime.handle_scope(), loop_code).unwrap(); + let code = "123;"; + let normal_script = compile_script(&mut runtime.handle_scope(), code).unwrap(); + + let watchdog = V8ResourceWatchdog::new(runtime.v8_isolate().thread_safe_handle()); + + // First, ensure that the implementation isn't forcing a minimum execution time to that of the + // timeout (which could happen if we are improperly handling a mutex lock). + let now = Instant::now(); + let scope = &mut runtime.handle_scope(); + let tc_scope = &mut v8::TryCatch::new(scope); + + let res = watchdog.execute(Some(Duration::from_secs(10)), tc_scope, |sc| { + let opened = normal_script.open(sc); + let bound_script = opened.bind_to_current_context(sc); + bound_script.run(sc) + }); + assert!(res.is_ok()); + assert!(now.elapsed() < Duration::from_secs(10)); + + let transitions = [ + (Some(timeout), Some(timeout + Duration::from_millis(1))), + (None, Some(timeout)), + // After calling `TerminateExecution`, a v8 isolate cannot execute JavaScript until all frames have + // propagated the uncatchable exception (or we've manually cancelled the termination). Invoking + // a subsequent non-timing-out script execution ensures that we're handling this properly. + (Some(timeout), None), + (None, None), + ]; + // Ensure that the state for each execution is as expected, even after a previous execution + // with a potentially different timeout setting. + for (first, second) in transitions { + for timeout_duration in [first, second] { + assert!(watchdog.state.lock().unwrap().termination_err.is_none()); + assert!(watchdog.state.lock().unwrap().timeout.timings.is_none()); + let res = watchdog.execute(timeout_duration, &mut *tc_scope, |sc| { + // The timer should have been configured with the correct timeout. + if let Some((_, stored_duration)) = + watchdog.state.lock().unwrap().timeout.timings + { + assert_eq!(stored_duration, timeout_duration.unwrap()) + } else { + // If `timings` is none, the test case should be `None` as well. + assert!(timeout_duration.is_none()); + } + // Run the infinite loop if a timeout is configured, otherwise a normal script. + let opened = if timeout_duration.is_some() { + loop_script.open(sc) + } else { + normal_script.open(sc) + }; + let bound_script = opened.bind_to_current_context(sc); + bound_script.run(sc) + }); + // There should be a timeout error if we configured one, otherwise not. + if timeout_duration.is_some() { + assert!(matches!( + res.unwrap_err(), + DDSAJsRuntimeError::JavaScriptTimeout { .. } + )); + } else { + assert!(res.is_ok()); + } + assert!(watchdog.state.lock().unwrap().termination_err.is_none()); + assert!(watchdog.state.lock().unwrap().timeout.timings.is_none()); + } + } + } +} diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs index 6ad90bda..24d4d8d2 100644 --- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs +++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs @@ -11,6 +11,7 @@ use crate::analysis::ddsa_lib::common::{ }; use crate::analysis::ddsa_lib::js; use crate::analysis::ddsa_lib::js::{VisitArgCodeCompat, VisitArgFilenameCompat}; +use crate::analysis::ddsa_lib::resource_watchdog::V8ResourceWatchdog; use crate::model::common::Language; use crate::model::rule::RuleInternal; use crate::model::violation; @@ -19,7 +20,7 @@ use deno_core::v8::HandleScope; use std::cell::RefCell; use std::collections::HashMap; use std::rc::Rc; -use std::sync::{Arc, Condvar, Mutex}; +use std::sync::Arc; use std::time::{Duration, Instant}; const BRIDGE_CONTEXT: &str = "__RUST_BRIDGE__context"; @@ -32,10 +33,7 @@ const STELLA_COMPAT_FILE_CONTENTS: &str = "STELLA_COMPAT_FILE_CONTENTS"; /// The Datadog Static Analyzer JavaScript runtime pub struct JsRuntime { runtime: deno_core::JsRuntime, - /// Each `JsRuntime` spawns a thread that lives for as long as the `JsRuntime`, and it is used - /// to manually terminate JavaScript executions that go on for too long. Synchronization between - /// the `JsRuntime` and this thread is performed through this `watchdog_pair`. - watchdog_pair: Arc<(Mutex, Condvar)>, + v8_resource_watchdog: V8ResourceWatchdog, console: Rc>, bridge_context: Rc>, bridge_query_match: QueryMatchBridge, @@ -122,11 +120,11 @@ impl JsRuntime { op_state.put(Rc::clone(&console)); let v8_isolate_handle = deno_runtime.v8_isolate().thread_safe_handle(); - let watchdog_pair = spawn_watchdog_thread(v8_isolate_handle); + let v8_resource_watchdog = V8ResourceWatchdog::new(v8_isolate_handle); Ok(Self { runtime: deno_runtime, - watchdog_pair, + v8_resource_watchdog, console, bridge_context: context, bridge_query_match: query_match, @@ -330,35 +328,9 @@ impl JsRuntime { let opened = script.open(tc_ctx_scope); let bound_script = opened.bind_to_current_context(tc_ctx_scope); - // Notify the watchdog thread that an execution is starting. - if let Some(duration) = timeout { - let (lock, cvar) = &*self.watchdog_pair; - let mut state = lock.lock().unwrap(); - let _ = state.active_timer.insert((Instant::now(), duration)); - drop(state); - cvar.notify_one(); - } - - let execution_result = bound_script.run(tc_ctx_scope); - - // If the watchdog requested termination, it should have marked `is_currently_executing` as false, - // and so we don't need to set `is_currently_executing`, nor do we need to (inefficiently) re-notify - // via the condvar. We just reset the v8 isolate's termination state and return a timeout error. - if tc_ctx_scope.is_execution_terminating() { - debug_assert!(self.watchdog_pair.0.lock().unwrap().active_timer.is_none()); - tc_ctx_scope.cancel_terminate_execution(); - tc_ctx_scope.reset(); - let timeout = timeout.expect("timeout should exist if we had v8 terminate execution"); - return Err(DDSAJsRuntimeError::JavaScriptTimeout { timeout }); - } else if timeout.is_some() { - // Otherwise, we successfully completed execution without timing out. We need to notify - // the watchdog thread to stop actively tracking a timeout. - let (lock, cvar) = &*self.watchdog_pair; - let mut state = lock.lock().unwrap(); - state.active_timer = None; - drop(state); - cvar.notify_one(); - } + let execution_result = self + .v8_resource_watchdog + .execute(timeout, tc_ctx_scope, |sc| bound_script.run(sc))?; let return_val = execution_result.ok_or_else(|| { let exception = tc_ctx_scope @@ -430,17 +402,6 @@ for (const queryMatch of globalThis.__RUST_BRIDGE__query_match) {{ } } -impl Drop for JsRuntime { - fn drop(&mut self) { - let (lock, cvar) = &*self.watchdog_pair; - let mut state = lock.lock().unwrap(); - // Tell the watchdog thread that it should shut down. - state.thread_should_shut_down = true; - drop(state); - cvar.notify_one(); - } -} - /// Creates a [`deno_core::JsRuntime`] with the provided `extensions`. /// /// # Warning @@ -497,65 +458,6 @@ pub(crate) fn inner_make_deno_core_runtime( }) } -/// Spawns a watchdog thread that invokes [`v8::Isolate::terminate_execution`] when a JavaScript execution -/// runs past a specified timeout duration. Returns a tuple containing the `JsExecutionState` used -/// to communicate state to the watchdog thread, as well as a `Condvar` to notify the watchdog. -/// -/// The spawned thread may be shut down by setting `should_shut_down` on `JsExecutionState` and -/// notifying the thread via the condvar. -fn spawn_watchdog_thread(v8_handle: v8::IsolateHandle) -> Arc<(Mutex, Condvar)> { - let state_pair = Arc::new((Mutex::new(TimeoutState::new()), Condvar::new())); - let pair_clone = Arc::clone(&state_pair); - - std::thread::spawn(move || { - let (lock, cvar) = &*pair_clone; - loop { - let mut state = cvar - .wait_while(lock.lock().unwrap(), |state| { - state.active_timer.is_none() && !state.thread_should_shut_down - }) - .expect("mutex should not be poisoned"); - - if state.thread_should_shut_down { - break; - } - - let (start_instant, timeout_duration) = - state.active_timer.expect("should meet cvar condition"); - - // Any instant after `timeout_threshold` will trigger the timeout - let timeout_threshold = start_instant + timeout_duration; - let now = Instant::now(); - - if now >= timeout_threshold { - // This branch represents an edge case where the OS couldn't wake this thread up - // until after the watchdog should've already triggered a timeout. - state.active_timer = None; - drop(state); - v8_handle.terminate_execution(); - } else { - // This is guaranteed not to underflow - let additional_wait = timeout_threshold - now; - let result = cvar - .wait_timeout_while(state, additional_wait, |state| { - state.active_timer.is_some() - }) - .expect("mutex should not be poisoned"); - state = result.0; - - // If the condvar triggered a timeout, because of our use of `Condvar::wait_timeout_while`, - // there _must_ be an actively tracked timeout, Thus, it's always appropriate to terminate execution. - if result.1.timed_out() { - state.active_timer = None; - drop(state); - v8_handle.terminate_execution(); - } - } - } - }); - state_pair -} - /// The result of a ddsa JavaScript execution. #[derive(Debug)] pub struct ExecutionResult { @@ -575,25 +477,6 @@ pub struct ExecutionTimingCompat { pub execution: Duration, } -/// State for the timeout watchdog implemented by a [`V8ResourceWatchdog`]. This should be guarded -/// with a mutex. -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -struct TimeoutState { - /// Data for an active timeout watchdog. If no execution is being tracked, this will be `None`. - /// If an execution is being tracked, this will be `Some((start_instant, timeout_duration))`. - active_timer: Option<(Instant, Duration)>, - thread_should_shut_down: bool, -} - -impl TimeoutState { - pub fn new() -> Self { - Self { - active_timer: None, - thread_should_shut_down: false, - } - } -} - /// A mutable scratch space that collects the output of the `console.log` function invoked by JavaScript code. pub(crate) struct JsConsole(Vec); @@ -974,28 +857,11 @@ function visit(captures) { let timeout = Duration::from_millis(500); let loop_code = "while (true) {}"; let loop_script = compile_script(&mut runtime.v8_handle_scope(), loop_code).unwrap(); - let code = "123;"; - let script = compile_script(&mut runtime.v8_handle_scope(), code).unwrap(); - - // First, ensure that the implementation isn't forcing a minimum execution time to that of the - // timeout (which could happen if we are improperly handling a mutex lock). - let now = Instant::now(); - runtime - .scoped_execute(&script, |_, _| (), Some(Duration::from_secs(10))) - .unwrap(); - assert!(now.elapsed() < Duration::from_secs(10)); let err = runtime .scoped_execute(&loop_script, |_, _| (), Some(timeout)) .unwrap_err(); assert!(matches!(err, DDSAJsRuntimeError::JavaScriptTimeout { .. })); - - // After calling `TerminateExecution`, a v8 isolate cannot execute JavaScript until all frames have - // propagated the uncatchable exception (or we've manually cancelled the termination). Invoking - // a subsequent script execution ensures that we're handling this properly. - let return_val = - runtime.scoped_execute(&script, |scope, value| value.uint32_value(scope), None); - assert_eq!(return_val.unwrap().unwrap(), 123); } /// `scoped_execute` should always execute with an empty console (despite a previous execution