From a6149e71b9c21c6b01e6ba363bfbb521963e1145 Mon Sep 17 00:00:00 2001 From: Kyle Mayes Date: Sun, 26 May 2024 23:41:27 -0400 Subject: [PATCH 1/3] Improve DLL filtering on Windows (#170) --- CHANGELOG.md | 5 +++ build/dynamic.rs | 29 +++++++++++--- build/macros.rs | 11 +++++ tests/build.rs | 102 +++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 129 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db0655a4a..a55978775 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## [1.8.1] - UNRELEASED + +### Fixed +- Improve DLL search on Windows to take target architecture into account (e.g., ARM64 vs x86-64) + ## [1.8.0] - 2024-05-26 ### Changed diff --git a/build/dynamic.rs b/build/dynamic.rs index 25e1c183c..d24490f6b 100644 --- a/build/dynamic.rs +++ b/build/dynamic.rs @@ -23,8 +23,8 @@ fn parse_elf_header(path: &Path) -> io::Result { } } -/// Extracts the magic number from the PE header in a shared library. -fn parse_pe_header(path: &Path) -> io::Result { +/// Extracts the magic number and machine type from the PE header in a shared library. +fn parse_pe_header(path: &Path) -> io::Result<(u16, u16)> { let mut file = File::open(path)?; // Extract the header offset. @@ -45,7 +45,15 @@ fn parse_pe_header(path: &Path) -> io::Result { let mut buffer = [0; 2]; file.seek(SeekFrom::Current(20))?; file.read_exact(&mut buffer)?; - Ok(u16::from_le_bytes(buffer)) + let magic_number = u16::from_le_bytes(buffer); + + // Extract the machine type. + let mut buffer = [0; 2]; + file.seek(SeekFrom::Current(-22))?; + file.read_exact(&mut buffer)?; + let machine_type = u16::from_le_bytes(buffer); + + return Ok((magic_number, machine_type)); } /// Checks that a `libclang` shared library matches the target platform. @@ -63,7 +71,7 @@ fn validate_library(path: &Path) -> Result<(), String> { Ok(()) } else if target_os!("windows") { - let magic = parse_pe_header(path).map_err(|e| e.to_string())?; + let (magic, machine_type) = parse_pe_header(path).map_err(|e| e.to_string())?; if target_pointer_width!("32") && magic != 267 { return Err("invalid DLL (64-bit)".into()); @@ -73,7 +81,18 @@ fn validate_library(path: &Path) -> Result<(), String> { return Err("invalid DLL (32-bit)".into()); } - Ok(()) + let arch_mismatch = match machine_type { + 0x014C if !target_arch!("x86") => Some("x86"), + 0x8664 if !target_arch!("x86_64") => Some("x86-64"), + 0xAA64 if !target_arch!("aarch64") => Some("ARM64"), + _ => None, + }; + + if let Some(arch) = arch_mismatch { + Err(format!("invalid DLL ({arch})")) + } else { + Ok(()) + } } else { Ok(()) } diff --git a/build/macros.rs b/build/macros.rs index 811c7c306..ff6545bf1 100644 --- a/build/macros.rs +++ b/build/macros.rs @@ -15,6 +15,17 @@ macro_rules! target_os { }; } +macro_rules! target_arch { + ($arch:expr) => { + if cfg!(test) && ::std::env::var("_CLANG_SYS_TEST").is_ok() { + let var = ::std::env::var("_CLANG_SYS_TEST_ARCH"); + var.map_or(false, |v| v == $arch) + } else { + cfg!(target_arch = $arch) + } + }; +} + macro_rules! target_pointer_width { ($pointer_width:expr) => { if cfg!(test) && ::std::env::var("_CLANG_SYS_TEST").is_ok() { diff --git a/tests/build.rs b/tests/build.rs index 94625315f..e56f3fefb 100644 --- a/tests/build.rs +++ b/tests/build.rs @@ -1,5 +1,6 @@ #![allow(dead_code)] +use core::fmt; use std::collections::HashMap; use std::env; use std::fs; @@ -26,9 +27,38 @@ struct RunCommandMock { responses: HashMap, String>, } + +#[derive(Copy, Clone, Debug)] +enum Arch { + ARM64, + X86, + X86_64, +} + +impl Arch { + fn pe_machine_type(self) -> u16 { + match self { + Arch::ARM64 => 0xAA64, + Arch::X86 => 0x014C, + Arch::X86_64 => 0x8664, + } + } +} + +impl fmt::Display for Arch { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Arch::ARM64 => write!(f, "aarch64"), + Arch::X86 => write!(f, "x86"), + Arch::X86_64 => write!(f, "x86_64"), + } + } +} + #[derive(Debug)] struct Env { os: String, + arch: Arch, pointer_width: String, env: Option, vars: HashMap, Option)>, @@ -39,9 +69,10 @@ struct Env { } impl Env { - fn new(os: &str, pointer_width: &str) -> Self { + fn new(os: &str, arch: Arch, pointer_width: &str) -> Self { Env { os: os.into(), + arch, pointer_width: pointer_width.into(), env: None, vars: HashMap::new(), @@ -84,11 +115,12 @@ impl Env { self } - fn dll(self, path: &str, pointer_width: &str) -> Self { + fn dll(self, path: &str, arch: Arch, pointer_width: &str) -> Self { // PE header. let mut contents = [0; 64]; contents[0x3C..0x3C + 4].copy_from_slice(&i32::to_le_bytes(10)); contents[10..14].copy_from_slice(&[b'P', b'E', 0, 0]); + contents[14..16].copy_from_slice(&u16::to_le_bytes(arch.pe_machine_type())); let magic = if pointer_width == "64" { 523 } else { 267 }; contents[34..36].copy_from_slice(&u16::to_le_bytes(magic)); @@ -117,6 +149,7 @@ impl Env { fn enable(self) -> Self { env::set_var("_CLANG_SYS_TEST", "yep"); env::set_var("_CLANG_SYS_TEST_OS", &self.os); + env::set_var("_CLANG_SYS_TEST_ARCH", &format!("{}", self.arch)); env::set_var("_CLANG_SYS_TEST_POINTER_WIDTH", &self.pointer_width); if let Some(env) = &self.env { env::set_var("_CLANG_SYS_TEST_ENV", env); @@ -155,6 +188,7 @@ impl Drop for Env { fn drop(&mut self) { env::remove_var("_CLANG_SYS_TEST"); env::remove_var("_CLANG_SYS_TEST_OS"); + env::remove_var("_CLANG_SYS_TEST_ARCH"); env::remove_var("_CLANG_SYS_TEST_POINTER_WIDTH"); env::remove_var("_CLANG_SYS_TEST_ENV"); @@ -185,9 +219,23 @@ fn test_all() { test_windows_bin_sibling(); test_windows_mingw_gnu(); test_windows_mingw_msvc(); + test_windows_arm64_on_x86_64(); + test_windows_x86_64_on_arm64(); } } +macro_rules! assert_error { + ($result:expr, $contents:expr $(,)?) => { + if let Err(error) = $result { + if !error.contains($contents) { + panic!("expected error to contain {:?}, received: {error:?}", $contents); + } + } else { + panic!("expected error, received: {:?}", $result); + } + }; +} + //================================================ // Dynamic //================================================ @@ -195,7 +243,7 @@ fn test_all() { // Linux ----------------------------------------- fn test_linux_directory_preference() { - let _env = Env::new("linux", "64") + let _env = Env::new("linux", Arch::X86_64, "64") .so("usr/lib/libclang.so.1", "64") .so("usr/local/lib/libclang.so.1", "64") .enable(); @@ -207,7 +255,7 @@ fn test_linux_directory_preference() { } fn test_linux_version_preference() { - let _env = Env::new("linux", "64") + let _env = Env::new("linux", Arch::X86_64, "64") .so("usr/lib/libclang-3.so", "64") .so("usr/lib/libclang-3.5.so", "64") .so("usr/lib/libclang-3.5.0.so", "64") @@ -220,7 +268,7 @@ fn test_linux_version_preference() { } fn test_linux_directory_and_version_preference() { - let _env = Env::new("linux", "64") + let _env = Env::new("linux", Arch::X86_64, "64") .so("usr/local/llvm/lib/libclang-3.so", "64") .so("usr/local/lib/libclang-3.5.so", "64") .so("usr/lib/libclang-3.5.0.so", "64") @@ -236,9 +284,9 @@ fn test_linux_directory_and_version_preference() { #[cfg(target_os = "windows")] fn test_windows_bin_sibling() { - let _env = Env::new("windows", "64") + let _env = Env::new("windows", Arch::X86_64, "64") .dir("Program Files\\LLVM\\lib") - .dll("Program Files\\LLVM\\bin\\libclang.dll", "64") + .dll("Program Files\\LLVM\\bin\\libclang.dll", Arch::X86_64, "64") .enable(); assert_eq!( @@ -249,12 +297,12 @@ fn test_windows_bin_sibling() { #[cfg(target_os = "windows")] fn test_windows_mingw_gnu() { - let _env = Env::new("windows", "64") + let _env = Env::new("windows", Arch::X86_64, "64") .env("gnu") .dir("MSYS\\MinGW\\lib") - .dll("MSYS\\MinGW\\bin\\clang.dll", "64") + .dll("MSYS\\MinGW\\bin\\clang.dll", Arch::X86_64, "64") .dir("Program Files\\LLVM\\lib") - .dll("Program Files\\LLVM\\bin\\libclang.dll", "64") + .dll("Program Files\\LLVM\\bin\\libclang.dll", Arch::X86_64, "64") .enable(); assert_eq!( @@ -265,12 +313,12 @@ fn test_windows_mingw_gnu() { #[cfg(target_os = "windows")] fn test_windows_mingw_msvc() { - let _env = Env::new("windows", "64") + let _env = Env::new("windows", Arch::X86_64, "64") .env("msvc") .dir("MSYS\\MinGW\\lib") - .dll("MSYS\\MinGW\\bin\\clang.dll", "64") + .dll("MSYS\\MinGW\\bin\\clang.dll", Arch::X86_64, "64") .dir("Program Files\\LLVM\\lib") - .dll("Program Files\\LLVM\\bin\\libclang.dll", "64") + .dll("Program Files\\LLVM\\bin\\libclang.dll", Arch::X86_64, "64") .enable(); assert_eq!( @@ -278,3 +326,31 @@ fn test_windows_mingw_msvc() { Ok(("Program Files\\LLVM\\bin".into(), "libclang.dll".into())), ); } + +#[cfg(target_os = "windows")] +fn test_windows_arm64_on_x86_64() { + let _env = Env::new("windows", Arch::X86_64, "64") + .env("msvc") + .dir("Program Files\\LLVM\\lib") + .dll("Program Files\\LLVM\\bin\\libclang.dll", Arch::ARM64, "64") + .enable(); + + assert_error!( + dynamic::find(true), + "invalid: [(Program Files\\LLVM\\bin\\libclang.dll: invalid DLL (ARM64)", + ); +} + +#[cfg(target_os = "windows")] +fn test_windows_x86_64_on_arm64() { + let _env = Env::new("windows", Arch::ARM64, "64") + .env("msvc") + .dir("Program Files\\LLVM\\lib") + .dll("Program Files\\LLVM\\bin\\libclang.dll", Arch::X86_64, "64") + .enable(); + + assert_error!( + dynamic::find(true), + "invalid: [(Program Files\\LLVM\\bin\\libclang.dll: invalid DLL (x86-64)", + ); +} From 637733c5533e4672895c55a56bf2002a7d3e65d1 Mon Sep 17 00:00:00 2001 From: Kyle Mayes Date: Tue, 28 May 2024 21:06:42 -0400 Subject: [PATCH 2/3] Improve static linking error/documentation (#174) --- README.md | 25 +++++++++++++++++++++++++ build/static.rs | 6 +++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c05ec617a..50858ebcb 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,31 @@ The availability of `llvm-config` is not optional for static linking. Ensure tha **Note:** The `libcpp` Cargo feature can be used to enable linking to `libc++` instead of `libstd++` when linking to `libclang` statically on Linux or Haiku. +#### Static Library Availability + +Linking to `libclang` statically on *nix systems requires that the `libclang.a` static library be available. +This library is usually *not* included in most distributions of LLVM and Clang (e.g., `libclang-dev` on Debian-based systems). +If you need to link to `libclang` statically then most likely the only consistent way to get your hands on `libclang.a` is to build it yourself. + +Here's an example of building the required static libraries and using them with `clang-sys`: + +```text +git clone git@github.com:llvm/llvm-project.git +cd llvm-project + +cmake -S llvm -B build -G Ninja -DLLVM_ENABLE_PROJECTS=clang -DLIBCLANG_BUILD_STATIC=ON +ninja -C build + +cd .. +git clone git@github.com:KyleMayes/clang-sys.git +cd clang-sys + +LLVM_CONFIG_PATH=../llvm-project/build/bin/llvm-config cargo test --features static +``` + +Linking to `libclang` statically requires linking a large number of big static libraries. +Using [`rust-lld` as a linker](https://blog.rust-lang.org/2024/05/17/enabling-rust-lld-on-linux.html) can greatly reduce linking times. + ### Runtime The `clang_sys::load` function is used to load a `libclang` shared library for use in the thread in which it is called. The `clang_sys::unload` function will unload the `libclang` shared library. `clang_sys::load` searches for a `libclang` shared library in the same way one is searched for when linking to `libclang` dynamically at compiletime. diff --git a/build/static.rs b/build/static.rs index 013dfd52b..6bc2beaf9 100644 --- a/build/static.rs +++ b/build/static.rs @@ -87,7 +87,11 @@ fn find() -> PathBuf { if let Some((directory, _)) = files.into_iter().next() { directory } else { - panic!("could not find any static libraries"); + panic!( + "could not find the required `{name}` static library, see the \ + README for more information on how to link to `libclang` statically: \ + https://github.com/KyleMayes/clang-sys?tab=readme-ov-file#static" + ); } } From 919aa827ea75e73fa8d16d5fd0efd460e9464b50 Mon Sep 17 00:00:00 2001 From: Kyle Mayes Date: Tue, 28 May 2024 21:14:56 -0400 Subject: [PATCH 3/3] Improve Visual Studio detection (#166) --- CHANGELOG.md | 1 + build/common.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a55978775..ecba9bf61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Fixed - Improve DLL search on Windows to take target architecture into account (e.g., ARM64 vs x86-64) +- Improved detection of `libclang` installed with Visual Studio on Windows ## [1.8.0] - 2024-05-26 diff --git a/build/common.rs b/build/common.rs index a32e8b686..696d92339 100644 --- a/build/common.rs +++ b/build/common.rs @@ -189,7 +189,7 @@ const DIRECTORIES_WINDOWS: &[(&str, bool)] = &[ ("C:\\LLVM\\lib", true), // LLVM + Clang can be installed as a component of Visual Studio. // https://github.com/KyleMayes/clang-sys/issues/121 - ("C:\\Program Files*\\Microsoft Visual Studio\\*\\BuildTools\\VC\\Tools\\Llvm\\**\\lib", true), + ("C:\\Program Files*\\Microsoft Visual Studio\\*\\VC\\Tools\\Llvm\\**\\lib", true), ]; /// `libclang` directory patterns for illumos