Skip to content

Commit

Permalink
Add option for max wait before terminating interactive process (#15767)
Browse files Browse the repository at this point in the history
  • Loading branch information
thejcannon committed Jun 8, 2022
1 parent 4ff44d4 commit 97d200c
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 6 deletions.
1 change: 1 addition & 0 deletions src/python/pants/engine/internals/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ def __init__(
remote_parallelism=execution_options.process_execution_remote_parallelism,
child_max_memory=execution_options.process_total_child_memory_usage or 0,
child_default_memory=execution_options.process_per_child_memory_usage,
graceful_shutdown_timeout=execution_options.process_execution_graceful_shutdown_timeout,
)

self._py_scheduler = native_engine.scheduler_create(
Expand Down
14 changes: 14 additions & 0 deletions src/python/pants/option/global_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ class ExecutionOptions:
process_execution_local_enable_nailgun: bool
process_execution_remote_parallelism: int
process_execution_cache_namespace: str | None
process_execution_graceful_shutdown_timeout: int

process_total_child_memory_usage: int | None
process_per_child_memory_usage: int
Expand Down Expand Up @@ -394,6 +395,7 @@ def from_options(
process_execution_local_parallelism=bootstrap_options.process_execution_local_parallelism,
process_execution_remote_parallelism=dynamic_remote_options.parallelism,
process_execution_cache_namespace=bootstrap_options.process_execution_cache_namespace,
process_execution_graceful_shutdown_timeout=bootstrap_options.process_execution_graceful_shutdown_timeout,
process_execution_local_enable_nailgun=bootstrap_options.process_execution_local_enable_nailgun,
process_total_child_memory_usage=bootstrap_options.process_total_child_memory_usage,
process_per_child_memory_usage=bootstrap_options.process_per_child_memory_usage,
Expand Down Expand Up @@ -480,6 +482,7 @@ def from_options(cls, options: OptionValueContainer) -> LocalStoreOptions:
process_cleanup=True,
local_cache=True,
process_execution_local_enable_nailgun=True,
process_execution_graceful_shutdown_timeout=3,
# Remote store setup.
remote_store_address=None,
remote_store_headers={
Expand Down Expand Up @@ -1169,6 +1172,17 @@ class BootstrapOptions:
help="Whether or not to use nailgun to run JVM requests that are marked as supporting nailgun.",
advanced=True,
)
process_execution_graceful_shutdown_timeout = IntOption(
"--process-execution-graceful-shutdown-timeout",
default=DEFAULT_EXECUTION_OPTIONS.process_execution_graceful_shutdown_timeout,
help=softwrap(
f"""
The time in seconds to wait when gracefully shutting down an interactive process (such
as one opened using `{bin_name()} run`) before killing it.
"""
),
advanced=True,
)
remote_execution = BoolOption(
"--remote-execution",
default=DEFAULT_EXECUTION_OPTIONS.remote_execution,
Expand Down
12 changes: 7 additions & 5 deletions src/rust/engine/process_execution/src/children.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@ use nix::unistd::getpgid;
use nix::unistd::Pid;
use tokio::process::{Child, Command};

// We keep this relatively low to start to encourage interactivity. If users end up needing longer graceful
// shutdown periods, we can expose it as a per-process setting.
const GRACEFUL_SHUTDOWN_MAX_WAIT_TIME: time::Duration = time::Duration::from_secs(3);
const GRACEFUL_SHUTDOWN_POLL_TIME: time::Duration = time::Duration::from_millis(50);

/// A child process running in its own PGID, with a drop implementation that will kill that
Expand All @@ -20,11 +17,15 @@ const GRACEFUL_SHUTDOWN_POLL_TIME: time::Duration = time::Duration::from_millis(
/// signals in sequence for https://github.com/pantsbuild/pants/issues/13230.
pub struct ManagedChild {
child: Child,
graceful_shutdown_timeout: time::Duration,
killed: AtomicBool,
}

impl ManagedChild {
pub fn spawn(mut command: Command) -> Result<Self, String> {
pub fn spawn(
mut command: Command,
graceful_shutdown_timeout: time::Duration,
) -> Result<Self, String> {
// Set `kill_on_drop` to encourage `tokio` to `wait` the process via its own "reaping"
// mechanism:
// see https://docs.rs/tokio/1.14.0/tokio/process/struct.Command.html#method.kill_on_drop
Expand All @@ -49,6 +50,7 @@ impl ManagedChild {
.map_err(|e| format!("Error executing interactive process: {}", e))?;
Ok(Self {
child,
graceful_shutdown_timeout,
killed: AtomicBool::new(false),
})
}
Expand Down Expand Up @@ -115,7 +117,7 @@ impl ManagedChild {
/// This method *will* block the current thread but will do so for a bounded amount of time.
pub fn graceful_shutdown_sync(&mut self) -> Result<(), String> {
self.signal_pg(signal::Signal::SIGINT)?;
match self.wait_for_child_exit_sync(GRACEFUL_SHUTDOWN_MAX_WAIT_TIME) {
match self.wait_for_child_exit_sync(self.graceful_shutdown_timeout) {
Ok(true) => {
// process was gracefully shutdown
self.killed.store(true, Ordering::SeqCst);
Expand Down
3 changes: 3 additions & 0 deletions src/rust/engine/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ pub struct Core {
pub watcher: Option<Arc<InvalidationWatcher>>,
pub build_root: PathBuf,
pub local_parallelism: usize,
pub graceful_shutdown_timeout: Duration,
pub sessions: Sessions,
pub named_caches_dir: PathBuf,
}
Expand Down Expand Up @@ -106,6 +107,7 @@ pub struct ExecutionStrategyOptions {
pub remote_cache_write: bool,
pub child_max_memory: usize,
pub child_default_memory: usize,
pub graceful_shutdown_timeout: Duration,
}

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -497,6 +499,7 @@ impl Core {
build_root,
watcher,
local_parallelism: exec_strategy_opts.local_parallelism,
graceful_shutdown_timeout: exec_strategy_opts.graceful_shutdown_timeout,
sessions,
named_caches_dir,
})
Expand Down
2 changes: 2 additions & 0 deletions src/rust/engine/src/externs/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ impl PyExecutionStrategyOptions {
remote_cache_write: bool,
child_default_memory: usize,
child_max_memory: usize,
graceful_shutdown_timeout: usize,
) -> Self {
Self(ExecutionStrategyOptions {
local_parallelism,
Expand All @@ -259,6 +260,7 @@ impl PyExecutionStrategyOptions {
remote_cache_write,
child_default_memory,
child_max_memory,
graceful_shutdown_timeout: Duration::from_secs(graceful_shutdown_timeout.try_into().unwrap()),
})
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/rust/engine/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ fn interactive_process(
.try_clone_as_file()
.map_err(|e| format!("Couldn't clone stderr: {}", e))?,
));
let mut subprocess = ManagedChild::spawn(command)?;
let mut subprocess = ManagedChild::spawn(command, context.core.graceful_shutdown_timeout)?;
tokio::select! {
_ = session.cancelled() => {
// The Session was cancelled: attempt to kill the process group / process, and
Expand Down

0 comments on commit 97d200c

Please sign in to comment.