From 0e367c72a1cc163fd781f98b9fac809d90f4beb7 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 7 Jun 2024 08:15:06 -0500 Subject: [PATCH] [Clang] Add timeout for GPU detection utilities Summary: The utilities `nvptx-arch` and `amdgpu-arch` are used to support `--offload-arch=native` among other utilities in clang. However, these rely on the GPU drivers to query the features. In certain cases these drivers can become locked up, which will lead to indefinate hangs on any compiler jobs running in the meantime. This patch adds a ten second timeout period for these utilities before it kills the job and errors out. --- clang/include/clang/Driver/ToolChain.h | 3 ++- clang/lib/Driver/ToolChain.cpp | 8 ++++---- clang/lib/Driver/ToolChains/AMDGPU.cpp | 2 +- clang/lib/Driver/ToolChains/Cuda.cpp | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index a4f9cad98aa8b1..9789cfacafd780 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -205,7 +205,8 @@ class ToolChain { /// Executes the given \p Executable and returns the stdout. llvm::Expected> - executeToolChainProgram(StringRef Executable) const; + executeToolChainProgram(StringRef Executable, + unsigned SecondsToWait = 0) const; void setTripleEnvironment(llvm::Triple::EnvironmentType Env); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 0e86bc07e0ea2c..40ab2e91125d1e 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -104,7 +104,8 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, } llvm::Expected> -ToolChain::executeToolChainProgram(StringRef Executable) const { +ToolChain::executeToolChainProgram(StringRef Executable, + unsigned SecondsToWait) const { llvm::SmallString<64> OutputFile; llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile); llvm::FileRemover OutputRemover(OutputFile.c_str()); @@ -115,9 +116,8 @@ ToolChain::executeToolChainProgram(StringRef Executable) const { }; std::string ErrorMessage; - if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, - /* SecondsToWait */ 0, - /*MemoryLimit*/ 0, &ErrorMessage)) + if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait, + /*MemoryLimit=*/0, &ErrorMessage)) return llvm::createStringError(std::error_code(), Executable + ": " + ErrorMessage); diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 9ffea57b005de0..11a98a0ec314d6 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -877,7 +877,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("amdgpu-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10); if (!StdoutOrErr) return StdoutOrErr.takeError(); diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index bbc8be91fd70b7..2dfc7457b0ac7c 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -826,7 +826,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("nvptx-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10); if (!StdoutOrErr) return StdoutOrErr.takeError();