diff --git a/Cargo.lock b/Cargo.lock index 883872590e0..4d7cd6200d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1546,9 +1546,9 @@ dependencies = [ [[package]] name = "crossbeam-queue" -version = "0.3.6" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd42583b04998a5363558e5f9291ee5a5ff6b49944332103f251e7479a82aa7" +checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" dependencies = [ "cfg-if 1.0.0", "crossbeam-utils", @@ -4316,6 +4316,7 @@ version = "0.0.0" dependencies = [ "backtrace", "cfg-if 1.0.0", + "crossbeam-queue", "enumset", "finite-wasm", "lazy_static", @@ -4327,6 +4328,7 @@ dependencies = [ "region", "rkyv", "rustc-demangle", + "rustix 0.37.20", "target-lexicon 0.12.3", "thiserror", "tracing", diff --git a/Cargo.toml b/Cargo.toml index 80116012519..e771c34aacf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -124,6 +124,7 @@ cpu-time = "1.0" criterion = { version = "0.3.5", default_features = false, features = ["html_reports", "cargo_bench_support"] } crossbeam = "0.8" crossbeam-channel = "0.5" +crossbeam-queue = "0.3.8" csv = "1.2.1" curve25519-dalek = "3" delay-detector = { path = "tools/delay-detector" } @@ -273,6 +274,7 @@ runtime-tester = { path = "test-utils/runtime-tester" } rusqlite = { version = "0.27.0", features = ["bundled", "chrono", "functions"] } rustc-demangle = "0.1" rust-s3 = { version = "0.32.3", features = ["blocking"] } +rustix = "0.37" secp256k1 = { version = "0.27.0", features = ["recovery", "rand-std"] } semver = "1.0.4" serde = { version = "1.0.136", features = ["alloc", "derive", "rc"] } diff --git a/runtime/near-vm-runner/src/near_vm_runner.rs b/runtime/near-vm-runner/src/near_vm_runner.rs index 6308897572a..1132a321d6b 100644 --- a/runtime/near-vm-runner/src/near_vm_runner.rs +++ b/runtime/near-vm-runner/src/near_vm_runner.rs @@ -19,7 +19,7 @@ use near_primitives_core::runtime::fees::RuntimeFeesConfig; use near_stable_hasher::StableHasher; use near_vm_compiler_singlepass::Singlepass; use near_vm_engine::universal::{ - Universal, UniversalEngine, UniversalExecutable, UniversalExecutableRef, + LimitedMemoryPool, Universal, UniversalEngine, UniversalExecutable, UniversalExecutableRef, }; use near_vm_types::{FunctionIndex, InstanceConfig, MemoryType, Pages, WASM_PAGE_SIZE}; use near_vm_vm::{ @@ -28,7 +28,7 @@ use near_vm_vm::{ use std::borrow::Cow; use std::hash::{Hash, Hasher}; use std::mem::size_of; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; #[derive(Clone)] pub struct NearVmMemory(Arc); @@ -244,11 +244,38 @@ impl NearVM { let compiler = Singlepass::new(); // We only support universal engine at the moment. assert_eq!(VM_CONFIG.engine, NearVmEngine::Universal); + + static CODE_MEMORY_POOL_CELL: OnceLock = OnceLock::new(); + let code_memory_pool = CODE_MEMORY_POOL_CELL + .get_or_init(|| { + // FIXME: should have as many code memories as there are possible parallel + // invocations of the runtime… How do we determine that? Should we make it + // configurable for the node operators, perhaps, so that they can make an informed + // choice based on the amount of memory they have and shards they track? Should we + // actually use some sort of semaphore to enforce a parallelism limit? + // + // NB: 64MiB is a best guess as to what the maximum size a loaded artifact can + // plausibly be. This is not necessarily true – there may be WebAssembly + // instructions that expand by more than 4 times in terms of instruction size after + // a conversion to x86_64, In that case a re-allocation will occur and executing + // that particular function call will be slower. Not to mention there isn't a + // strong guarantee on the upper bound of the memory that the contract runtime may + // require. + LimitedMemoryPool::new(8, 64 * 1024 * 1024).unwrap_or_else(|e| { + panic!("could not pre-allocate resources for the runtime: {e}"); + }) + }) + .clone(); + let features = crate::features::WasmFeatures::from(config.limit_config.contract_prepare_version); Self { config, - engine: Universal::new(compiler).target(target).features(features.into()).engine(), + engine: Universal::new(compiler) + .target(target) + .features(features.into()) + .code_memory_pool(code_memory_pool) + .engine(), } } @@ -322,84 +349,58 @@ impl NearVM { code: &ContractCode, cache: Option<&dyn CompiledContractCache>, ) -> VMResult> { - // A bit of a tricky logic ahead! We need to deal with two levels of - // caching: - // * `cache` stores compiled machine code in the database - // * `MEM_CACHE` below holds in-memory cache of loaded contracts + // `cache` stores compiled machine code in the database // // Caches also cache _compilation_ errors, so that we don't have to // re-parse invalid code (invalid code, in a sense, is a normal // outcome). And `cache`, being a database, can fail with an `io::Error`. let _span = tracing::debug_span!(target: "vm", "NearVM::compile_and_load").entered(); - let key = get_contract_cache_key(code, VMKind::NearVm, &self.config); - - let compile_or_read_from_cache = || -> VMResult> { - let _span = - tracing::debug_span!(target: "vm", "NearVM::compile_or_read_from_cache").entered(); - let cache_record = cache - .map(|cache| cache.get(&key)) - .transpose() - .map_err(CacheError::ReadError)? - .flatten(); - - let stored_artifact: Option = match cache_record { - None => None, - Some(CompiledContract::CompileModuleError(err)) => return Ok(Err(err)), - Some(CompiledContract::Code(serialized_module)) => { - let _span = - tracing::debug_span!(target: "vm", "NearVM::read_from_cache").entered(); - unsafe { - // (UN-)SAFETY: the `serialized_module` must have been produced by a prior call to - // `serialize`. - // - // In practice this is not necessarily true. One could have forgotten to change the - // cache key when upgrading the version of the near_vm library or the database could - // have had its data corrupted while at rest. - // - // There should definitely be some validation in near_vm to ensure we load what we think - // we load. - let executable = UniversalExecutableRef::deserialize(&serialized_module) - .map_err(|_| CacheError::DeserializationError)?; - let artifact = self - .engine - .load_universal_executable_ref(&executable) - .map(Arc::new) - .map_err(|err| VMRunnerError::LoadingError(err.to_string()))?; - Some(artifact) - } - } - }; - - Ok(if let Some(it) = stored_artifact { - Ok(it) - } else { - match self.compile_and_cache(code, cache)? { - Ok(executable) => Ok(self + let cache_record = cache + .map(|cache| cache.get(&key)) + .transpose() + .map_err(CacheError::ReadError)? + .flatten(); + + let stored_artifact: Option = match cache_record { + None => None, + Some(CompiledContract::CompileModuleError(err)) => return Ok(Err(err)), + Some(CompiledContract::Code(serialized_module)) => { + let _span = tracing::debug_span!(target: "vm", "NearVM::read_from_cache").entered(); + unsafe { + // (UN-)SAFETY: the `serialized_module` must have been produced by a prior call to + // `serialize`. + // + // In practice this is not necessarily true. One could have forgotten to change the + // cache key when upgrading the version of the near_vm library or the database could + // have had its data corrupted while at rest. + // + // There should definitely be some validation in near_vm to ensure we load what we think + // we load. + let executable = UniversalExecutableRef::deserialize(&serialized_module) + .map_err(|_| CacheError::DeserializationError)?; + let artifact = self .engine - .load_universal_executable(&executable) + .load_universal_executable_ref(&executable) .map(Arc::new) - .map_err(|err| VMRunnerError::LoadingError(err.to_string()))?), - Err(err) => Err(err), + .map_err(|err| VMRunnerError::LoadingError(err.to_string()))?; + Some(artifact) } - }) + } }; - #[cfg(feature = "no_cache")] - return compile_or_read_from_cache(); - - #[cfg(not(feature = "no_cache"))] - return { - static MEM_CACHE: once_cell::sync::Lazy< - near_cache::SyncLruCache< - near_primitives_core::hash::CryptoHash, - Result, - >, - > = once_cell::sync::Lazy::new(|| { - near_cache::SyncLruCache::new(crate::cache::CACHE_SIZE) - }); - MEM_CACHE.get_or_try_put(key, |_key| compile_or_read_from_cache()) - }; + Ok(if let Some(it) = stored_artifact { + Ok(it) + } else { + match self.compile_and_cache(code, cache)? { + Ok(executable) => Ok(self + .engine + .load_universal_executable(&executable) + .map(Arc::new) + .map_err(|err| VMRunnerError::LoadingError(err.to_string()))?), + Err(err) => Err(err), + } + }) } fn run_method( diff --git a/runtime/near-vm/compiler/src/error.rs b/runtime/near-vm/compiler/src/error.rs index 585a7309f4e..2572312baf0 100644 --- a/runtime/near-vm/compiler/src/error.rs +++ b/runtime/near-vm/compiler/src/error.rs @@ -40,8 +40,8 @@ pub enum CompileError { Resource(String), /// Cannot downcast the engine to a specific type. - #[error("cannot downcast the engine to a specific type")] - EngineDowncast, + #[error("data offset is out of bounds")] + InvalidOffset, } impl From for CompileError { diff --git a/runtime/near-vm/engine/Cargo.toml b/runtime/near-vm/engine/Cargo.toml index 200f062f6f2..3a346cfb1e5 100644 --- a/runtime/near-vm/engine/Cargo.toml +++ b/runtime/near-vm/engine/Cargo.toml @@ -29,6 +29,8 @@ target-lexicon.workspace = true thiserror.workspace = true cfg-if.workspace = true tracing.workspace = true +crossbeam-queue.workspace = true +rustix = { workspace = true, features = ["param", "mm"] } [badges] maintenance = { status = "actively-developed" } diff --git a/runtime/near-vm/engine/src/universal/artifact.rs b/runtime/near-vm/engine/src/universal/artifact.rs index 8b019a22ee3..902b726a863 100644 --- a/runtime/near-vm/engine/src/universal/artifact.rs +++ b/runtime/near-vm/engine/src/universal/artifact.rs @@ -20,7 +20,8 @@ use std::sync::Arc; /// A compiled wasm module, containing everything necessary for instantiation. pub struct UniversalArtifact { // TODO: figure out how to allocate fewer distinct structures onto heap. Maybe have an arena…? - pub(crate) engine: crate::universal::UniversalEngine, + pub(crate) engine: super::UniversalEngine, + pub(crate) _code_memory: super::CodeMemory, pub(crate) import_counts: ImportCounts, pub(crate) start_function: Option, pub(crate) vmoffsets: VMOffsets, @@ -47,7 +48,7 @@ impl UniversalArtifact { } /// Return the engine instance this artifact is loaded into. - pub fn engine(&self) -> &crate::universal::UniversalEngine { + pub fn engine(&self) -> &super::UniversalEngine { &self.engine } } diff --git a/runtime/near-vm/engine/src/universal/builder.rs b/runtime/near-vm/engine/src/universal/builder.rs index d91707ca819..7c27c8e1b03 100644 --- a/runtime/near-vm/engine/src/universal/builder.rs +++ b/runtime/near-vm/engine/src/universal/builder.rs @@ -7,6 +7,7 @@ pub struct Universal { compiler_config: Option>, target: Option, features: Option, + pool: Option, } impl Universal { @@ -15,12 +16,17 @@ impl Universal { where T: Into>, { - Self { compiler_config: Some(compiler_config.into()), target: None, features: None } + Self { + compiler_config: Some(compiler_config.into()), + target: None, + features: None, + pool: None, + } } /// Create a new headless Universal pub fn headless() -> Self { - Self { compiler_config: None, target: None, features: None } + Self { compiler_config: None, target: None, features: None, pool: None } } /// Set the target @@ -35,17 +41,25 @@ impl Universal { self } + /// Set the pool of reusable code memory + pub fn code_memory_pool(mut self, pool: super::LimitedMemoryPool) -> Self { + self.pool = Some(pool); + self + } + /// Build the `UniversalEngine` for this configuration pub fn engine(self) -> UniversalEngine { let target = self.target.unwrap_or_default(); + let pool = + self.pool.unwrap_or_else(|| panic!("Universal::code_memory_pool was not set up!")); if let Some(compiler_config) = self.compiler_config { let features = self .features .unwrap_or_else(|| compiler_config.default_features_for_target(&target)); let compiler = compiler_config.compiler(); - UniversalEngine::new(compiler, target, features) + UniversalEngine::new(compiler, target, features, pool) } else { - UniversalEngine::headless() + UniversalEngine::headless(pool) } } } diff --git a/runtime/near-vm/engine/src/universal/code_memory.rs b/runtime/near-vm/engine/src/universal/code_memory.rs index 84d88f83006..f4b4c6a2407 100644 --- a/runtime/near-vm/engine/src/universal/code_memory.rs +++ b/runtime/near-vm/engine/src/universal/code_memory.rs @@ -2,170 +2,284 @@ // Attributions: https://github.com/wasmerio/wasmer/blob/master/ATTRIBUTIONS.md //! Memory management for executable code. -use near_vm_compiler::{CustomSectionRef, FunctionBodyRef}; -use near_vm_vm::{Mmap, VMFunctionBody}; +use near_vm_compiler::CompileError; +use rustix::mm::{self, MapFlags, MprotectFlags, ProtFlags}; +use std::sync::Arc; /// The optimal alignment for functions. /// /// On x86-64, this is 16 since it's what the optimizations assume. /// When we add support for other architectures, we should also figure out their /// optimal alignment values. -const ARCH_FUNCTION_ALIGNMENT: usize = 16; +pub(crate) const ARCH_FUNCTION_ALIGNMENT: u16 = 16; /// The optimal alignment for data. /// -const DATA_SECTION_ALIGNMENT: usize = 64; +pub(crate) const DATA_SECTION_ALIGNMENT: u16 = 64; -/// Memory manager for executable code. -pub struct CodeMemory { - mmap: Mmap, - start_of_nonexecutable_pages: usize, +fn round_up(size: usize, multiple: usize) -> usize { + debug_assert!(multiple.is_power_of_two()); + (size + (multiple - 1)) & !(multiple - 1) } -impl CodeMemory { - /// Create a new `CodeMemory` instance. - pub fn new() -> Self { - Self { mmap: Mmap::new(), start_of_nonexecutable_pages: 0 } +pub struct CodeMemoryWriter<'a> { + memory: &'a mut CodeMemory, + offset: usize, +} + +impl<'a> CodeMemoryWriter<'a> { + /// Write the contents from the provided buffer into the location of `self.memory` aligned to + /// provided `alignment`. + /// + /// The `alignment` actually used may be greater than the spepcified value. This is relevant, + /// for example, when calling this function after a sequence of [`Self::write_executable`] + /// calls. + /// + /// Returns the position within the mapping at which the buffer was written. + pub fn write_data(&mut self, mut alignment: u16, input: &[u8]) -> Result { + if self.offset == self.memory.executable_end { + alignment = u16::try_from(rustix::param::page_size()).expect("page size > u16::MAX"); + } + self.write_inner(alignment, input) } - /// Allocate a single contiguous block of memory for the functions and custom sections, and copy the data in place. - pub fn allocate( + /// Write the executable code from the provided buffer into the executable portion of + /// `self.memory`. + /// + /// All executable parts must be written out before `self.write_data` is called for the first + /// time. + /// + /// Returns the position within the mapping at which the buffer was written. + pub fn write_executable( &mut self, - functions: &[FunctionBodyRef<'_>], - executable_sections: &[CustomSectionRef<'_>], - data_sections: &[CustomSectionRef<'_>], - ) -> Result<(Vec<&mut [VMFunctionBody]>, Vec<&mut [u8]>, Vec<&mut [u8]>), String> { - let mut function_result = vec![]; - let mut data_section_result = vec![]; - let mut executable_section_result = vec![]; - - let page_size = region::page::size(); - - // 1. Calculate the total size, that is: - // - function body size, including all trampolines - // -- windows unwind info - // -- padding between functions - // - executable section body - // -- padding between executable sections - // - padding until a new page to change page permissions - // - data section body size - // -- padding between data sections - - let total_len = round_up( - functions.iter().fold(0, |acc, func| { - round_up(acc + Self::function_allocation_size(*func), ARCH_FUNCTION_ALIGNMENT) - }) + executable_sections - .iter() - .fold(0, |acc, exec| round_up(acc + exec.bytes.len(), ARCH_FUNCTION_ALIGNMENT)), - page_size, - ) + data_sections - .iter() - .fold(0, |acc, data| round_up(acc + data.bytes.len(), DATA_SECTION_ALIGNMENT)); - - // 2. Allocate the pages. Mark them all read-write. - - self.mmap = Mmap::with_at_least(total_len)?; - - // 3. Determine where the pointers to each function, executable section - // or data section are. Copy the functions. Collect the addresses of each and return them. - - let mut bytes = 0; - let mut buf = self.mmap.as_mut_slice(); - for func in functions { - let len = round_up(Self::function_allocation_size(*func), ARCH_FUNCTION_ALIGNMENT); - let (func_buf, next_buf) = buf.split_at_mut(len); - buf = next_buf; - bytes += len; - - let vmfunc = Self::copy_function(*func, func_buf); - assert_eq!(vmfunc.as_ptr() as usize % ARCH_FUNCTION_ALIGNMENT, 0); - function_result.push(vmfunc); - } - for section in executable_sections { - let section = §ion.bytes; - assert_eq!(buf.as_mut_ptr() as usize % ARCH_FUNCTION_ALIGNMENT, 0); - let len = round_up(section.len(), ARCH_FUNCTION_ALIGNMENT); - let (s, next_buf) = buf.split_at_mut(len); - buf = next_buf; - bytes += len; - s[..section.len()].copy_from_slice(*section); - executable_section_result.push(s); - } + alignment: u16, + input: &[u8], + ) -> Result { + assert_eq!( + self.memory.executable_end, self.offset, + "may not interleave executable and data in the same map" + ); + let result = self.write_inner(alignment, input); + self.memory.executable_end = self.offset; + result + } - self.start_of_nonexecutable_pages = bytes; - - if !data_sections.is_empty() { - // Data sections have different page permissions from the executable - // code that came before it, so they need to be on different pages. - let padding = round_up(bytes, page_size) - bytes; - buf = buf.split_at_mut(padding).1; - - for section in data_sections { - let section = §ion.bytes; - assert_eq!(buf.as_mut_ptr() as usize % DATA_SECTION_ALIGNMENT, 0); - let len = round_up(section.len(), DATA_SECTION_ALIGNMENT); - let (s, next_buf) = buf.split_at_mut(len); - buf = next_buf; - s[..section.len()].copy_from_slice(*section); - data_section_result.push(s); - } - } + fn write_inner(&mut self, alignment: u16, input: &[u8]) -> Result { + let entry_offset = self.offset; + let aligned_offset = round_up(entry_offset, usize::from(alignment)); + let final_offset = aligned_offset + input.len(); + let out_buffer = self.memory.as_slice_mut(); + // Fill out the padding with zeroes, if only to make sure there are no gadgets in there. + out_buffer + .get_mut(entry_offset..aligned_offset) + .ok_or_else(|| CompileError::Resource("out of code memory space".into()))? + .fill(0); + out_buffer + .get_mut(aligned_offset..final_offset) + .ok_or_else(|| CompileError::Resource("out of code memory space".into()))? + .copy_from_slice(input); + self.offset = final_offset; + Ok(aligned_offset) + } - Ok((function_result, executable_section_result, data_section_result)) + /// The current position of the writer. + pub fn position(&self) -> usize { + self.offset } +} - /// Apply the page permissions. - pub fn publish(&mut self) { - if self.mmap.is_empty() || self.start_of_nonexecutable_pages == 0 { - return; - } - assert!(self.mmap.len() >= self.start_of_nonexecutable_pages); +/// Mappings to regions of memory storing the executable JIT code. +pub struct CodeMemory { + /// Where to return this memory to when dropped. + source_pool: Option>>, + + /// The mapping + map: *mut u8, + + /// Mapping size + size: usize, + + /// Addresses `0..executable_end` contain executable memory. + /// + /// In a populated buffer rounding this up to the next page will give the address of the + /// read-write data portion of this memory. + executable_end: usize, +} + +impl CodeMemory { + fn create(size: usize) -> rustix::io::Result { + // Make sure callers don’t pass in a 0-sized map request. That is most likely a bug. + assert!(size != 0); + let size = round_up(size, rustix::param::page_size()); + let map = unsafe { + mm::mmap_anonymous( + std::ptr::null_mut(), + size, + ProtFlags::WRITE | ProtFlags::READ, + MapFlags::SHARED, + )? + }; + Ok(Self { source_pool: None, map: map.cast(), executable_end: 0, size }) + } + + fn as_slice_mut(&mut self) -> &mut [u8] { unsafe { - region::protect( - self.mmap.as_mut_ptr(), - self.start_of_nonexecutable_pages, - region::Protection::READ_EXECUTE, - ) + // SAFETY: We have made sure that this is the only reference to the memory region by + // requiring a mutable self reference. + std::slice::from_raw_parts_mut(self.map, self.size) } - .expect("unable to make memory readonly and executable"); } - /// Calculates the allocation size of the given compiled function. - fn function_allocation_size(func: FunctionBodyRef<'_>) -> usize { - func.body.len() + /// Ensure this CodeMemory is at least of the requested size. + /// + /// This will invalidate any data previously written into the mapping if the mapping needs to + /// be resized. + pub fn resize(mut self, size: usize) -> rustix::io::Result { + if self.size < size { + // Ideally we would use mremap, but see + // https://bugzilla.kernel.org/show_bug.cgi?id=8691 + let source_pool = unsafe { + mm::munmap(self.map.cast(), self.size)?; + let source_pool = self.source_pool.take(); + std::mem::forget(self); + source_pool + }; + Self::create(size).map(|mut m| { + m.source_pool = source_pool; + m + }) + } else { + self.executable_end = 0; + Ok(self) + } } - /// Copies the data of the compiled function to the given buffer. + /// Write to this code memory from the beginning of the mapping. + /// + /// # Safety /// - /// This will also add the function to the current function table. - fn copy_function<'a>(func: FunctionBodyRef<'_>, buf: &'a mut [u8]) -> &'a mut [VMFunctionBody] { - assert_eq!(buf.as_ptr() as usize % ARCH_FUNCTION_ALIGNMENT, 0); + /// At the time this method is called, there should remain no dangling readable/executable + /// references to this `CodeMemory`, for the original code memory that those references point + /// to are invalidated as soon as this method is invoked. + pub unsafe fn writer(&mut self) -> CodeMemoryWriter<'_> { + self.executable_end = 0; + CodeMemoryWriter { memory: self, offset: 0 } + } - let func_len = func.body.len(); + /// Publish the specified number of bytes as executable code. + /// + /// # Safety + /// + /// Calling this requires that no mutable references to the code memory remain. + pub unsafe fn publish(&mut self) -> Result<(), CompileError> { + mm::mprotect( + self.map.cast(), + self.executable_end, + MprotectFlags::EXEC | MprotectFlags::READ, + ) + .map_err(|e| { + CompileError::Resource(format!("could not make code memory executable: {}", e)) + }) + } - let (body, _remainder) = buf.split_at_mut(func_len); - body.copy_from_slice(&func.body); - Self::view_as_mut_vmfunc_slice(body) + /// Remap the offset into an absolute address within a read-execute mapping. + /// + /// Offset must not exceed `isize::MAX`. + pub unsafe fn executable_address(&self, offset: usize) -> *const u8 { + // TODO: encapsulate offsets so that this `offset` is guaranteed to be sound. + debug_assert!(offset <= isize::MAX as usize); + self.map.offset(offset as isize) } - /// Convert mut a slice from u8 to VMFunctionBody. - fn view_as_mut_vmfunc_slice(slice: &mut [u8]) -> &mut [VMFunctionBody] { - let byte_ptr: *mut [u8] = slice; - let body_ptr = byte_ptr as *mut [VMFunctionBody]; - unsafe { &mut *body_ptr } + /// Remap the offset into an absolute address within a read-write mapping. + /// + /// Offset must not exceed `isize::MAX`. + pub unsafe fn writable_address(&self, offset: usize) -> *mut u8 { + // TODO: encapsulate offsets so that this `offset` is guaranteed to be sound. + debug_assert!(offset <= isize::MAX as usize); + self.map.offset(offset as isize) } } -fn round_up(size: usize, multiple: usize) -> usize { - debug_assert!(multiple.is_power_of_two()); - (size + (multiple - 1)) & !(multiple - 1) +impl Drop for CodeMemory { + fn drop(&mut self) { + if let Some(source_pool) = self.source_pool.take() { + unsafe { + let result = mm::mprotect( + self.map.cast(), + self.size, + MprotectFlags::WRITE | MprotectFlags::READ, + ); + if let Err(e) = result { + panic!( + "could not mprotect mapping before returning it to the memory pool: \ + map={:?}, size={:?}, error={}", + self.map, self.size, e + ); + } + } + drop(source_pool.push(Self { + source_pool: None, + map: self.map, + size: self.size, + executable_end: 0, + })); + } else { + unsafe { + if let Err(e) = mm::munmap(self.map.cast(), self.size) { + tracing::error!( + message="could not unmap mapping", + map=?self.map, size=self.size, error=%e + ); + } + } + } + } +} + +unsafe impl Send for CodeMemory {} + +/// The pool of preallocated memory maps for storing the code. +/// +/// This pool cannot grow and will only allow up to a number of code mappings that were specified +/// at construction time. +/// +/// However it is possible for the mappings inside to grow to accomodate larger code. +#[derive(Clone)] +pub struct LimitedMemoryPool { + pool: Arc>, +} + +impl LimitedMemoryPool { + /// Create a new pool with `count` mappings initialized to `default_memory_size` each. + pub fn new(count: usize, default_memory_size: usize) -> rustix::io::Result { + let pool = Arc::new(crossbeam_queue::ArrayQueue::new(count)); + let this = Self { pool }; + for _ in 0..count { + this.pool + .push(CodeMemory::create(default_memory_size)?) + .unwrap_or_else(|_| panic!("ArrayQueue could not accomodate {count} memories!")); + } + Ok(this) + } + + /// Get a memory mapping, at least `size` bytes large. + pub fn get(&self, size: usize) -> rustix::io::Result { + let mut memory = self.pool.pop().ok_or(rustix::io::Errno::NOMEM)?; + memory.source_pool = Some(Arc::clone(&self.pool)); + if memory.size < size { + Ok(memory.resize(size)?) + } else { + Ok(memory) + } + } } #[cfg(test)] mod tests { use super::CodeMemory; fn _assert() { - fn _assert_send_sync() {} - _assert_send_sync::(); + fn _assert_send() {} + _assert_send::(); } } diff --git a/runtime/near-vm/engine/src/universal/engine.rs b/runtime/near-vm/engine/src/universal/engine.rs index 60d27ca9a42..bcd0fbb5c2f 100644 --- a/runtime/near-vm/engine/src/universal/engine.rs +++ b/runtime/near-vm/engine/src/universal/engine.rs @@ -1,7 +1,8 @@ //! Universal compilation. -use crate::universal::executable::{unrkyv, UniversalExecutableRef}; -use crate::universal::{CodeMemory, UniversalArtifact, UniversalExecutable}; +use super::code_memory::{ARCH_FUNCTION_ALIGNMENT, DATA_SECTION_ALIGNMENT}; +use super::executable::{unrkyv, UniversalExecutableRef}; +use super::{CodeMemory, UniversalArtifact, UniversalExecutable}; use crate::EngineId; use near_vm_compiler::Compiler; use near_vm_compiler::{ @@ -16,7 +17,7 @@ use near_vm_types::{ }; use near_vm_vm::{ FuncDataRegistry, FunctionBodyPtr, SectionBodyPtr, SignatureRegistry, Tunables, - VMCallerCheckedAnyfunc, VMFuncRef, VMFunctionBody, VMImportType, VMLocalFunction, VMOffsets, + VMCallerCheckedAnyfunc, VMFuncRef, VMImportType, VMLocalFunction, VMOffsets, VMSharedSignatureIndex, VMTrampoline, }; use rkyv::de::deserializers::SharedDeserializeMap; @@ -35,11 +36,16 @@ pub struct UniversalEngine { impl UniversalEngine { /// Create a new `UniversalEngine` with the given config - pub fn new(compiler: Box, target: Target, features: Features) -> Self { + pub fn new( + compiler: Box, + target: Target, + features: Features, + memory_allocator: super::LimitedMemoryPool, + ) -> Self { Self { inner: Arc::new(Mutex::new(UniversalEngineInner { compiler: Some(compiler), - code_memory: vec![], + code_memory_pool: memory_allocator, signatures: SignatureRegistry::new(), func_data: Arc::new(FuncDataRegistry::new()), features, @@ -62,11 +68,11 @@ impl UniversalEngine { /// /// Headless engines can't compile or validate any modules, /// they just take already processed Modules (via `Module::serialize`). - pub fn headless() -> Self { + pub fn headless(memory_allocator: super::LimitedMemoryPool) -> Self { Self { inner: Arc::new(Mutex::new(UniversalEngineInner { compiler: None, - code_memory: vec![], + code_memory_pool: memory_allocator, signatures: SignatureRegistry::new(), func_data: Arc::new(FuncDataRegistry::new()), features: Features::default(), @@ -215,8 +221,8 @@ impl UniversalEngine { .map(|(_, sig)| inner_engine.signatures.register(sig.clone())) .collect::>() .into_boxed_slice(); - let (functions, trampolines, dynamic_trampolines, custom_sections) = inner_engine - .allocate( + let (functions, trampolines, dynamic_trampolines, custom_sections, mut code_memory) = + inner_engine.allocate( local_functions, function_call_trampolines.iter().map(|(_, b)| b.into()), dynamic_function_trampolines.iter().map(|(_, b)| b.into()), @@ -264,7 +270,12 @@ impl UniversalEngine { ); // Make all code loaded executable. - inner_engine.publish_compiled_code(); + unsafe { + // SAFETY: We finished relocation and linking just above. There should be no write + // access past this point, though I don’t think we have a good mechanism to ensure this + // statically at this point.. + code_memory.publish()?; + } let exports = module .exports .iter() @@ -273,6 +284,7 @@ impl UniversalEngine { Ok(UniversalArtifact { engine: self.clone(), + _code_memory: code_memory, import_counts: module.import_counts, start_function: module.start_function, vmoffsets: VMOffsets::for_host().with_module_info(&*module), @@ -350,8 +362,8 @@ impl UniversalEngine { }) .collect::>() .into_boxed_slice(); - let (functions, trampolines, dynamic_trampolines, custom_sections) = inner_engine - .allocate( + let (functions, trampolines, dynamic_trampolines, custom_sections, mut code_memory) = + inner_engine.allocate( local_functions, call_trampolines.map(|(_, b)| b.into()), dynamic_trampolines.map(|(_, b)| b.into()), @@ -405,7 +417,12 @@ impl UniversalEngine { ); // Make all code compiled thus far executable. - inner_engine.publish_compiled_code(); + unsafe { + // SAFETY: We finished relocation and linking just above. There should be no write + // access past this point, though I don’t think we have a good mechanism to ensure this + // statically at this point.. + code_memory.publish()?; + } let exports = module .exports .iter() @@ -413,6 +430,7 @@ impl UniversalEngine { .collect::>(); Ok(UniversalArtifact { engine: self.clone(), + _code_memory: code_memory, import_counts, start_function: unrkyv(&module.start_function), vmoffsets: VMOffsets::for_host().with_archived_module_info(&*module), @@ -467,11 +485,10 @@ impl UniversalEngine { pub struct UniversalEngineInner { /// The compiler compiler: Option>, + /// Pool from which code memory can be allocated. + code_memory_pool: super::LimitedMemoryPool, /// The features to compile the Wasm module with features: Features, - /// The code memory is responsible of publishing the compiled - /// functions to memory. - code_memory: Vec, /// The signature registry is used mainly to operate with trampolines /// performantly. pub(crate) signatures: SignatureRegistry, @@ -515,10 +532,11 @@ impl UniversalEngineInner { PrimaryMap, PrimaryMap, PrimaryMap, + CodeMemory, ), CompileError, > { - let code_memory = &mut self.code_memory; + let code_memory_pool = &mut self.code_memory_pool; let function_count = local_functions.len(); let call_trampoline_count = call_trampolines.len(); let function_bodies = @@ -536,41 +554,79 @@ impl UniversalEngineInner { } section_types.push(section.protection); } - code_memory.push(CodeMemory::new()); - let code_memory = self.code_memory.last_mut().expect("infallible"); - - let (mut allocated_functions, allocated_executable_sections, allocated_data_sections) = - code_memory - .allocate( - function_bodies.as_slice(), - executable_sections.as_slice(), - data_sections.as_slice(), - ) - .map_err(|message| { - CompileError::Resource(format!( - "failed to allocate memory for functions: {}", - message - )) - })?; + + // 1. Calculate the total size, that is: + // - function body size, including all trampolines + // -- windows unwind info + // -- padding between functions + // - executable section body + // -- padding between executable sections + // - padding until a new page to change page permissions + // - data section body size + // -- padding between data sections + let page_size = rustix::param::page_size(); + let total_len = 0; + let total_len = function_bodies.iter().fold(total_len, |acc, func| { + round_up(acc, ARCH_FUNCTION_ALIGNMENT.into()) + function_allocation_size(*func) + }); + let total_len = executable_sections.iter().fold(total_len, |acc, exec| { + round_up(acc, ARCH_FUNCTION_ALIGNMENT.into()) + exec.bytes.len() + }); + let total_len = round_up(total_len, page_size); + let total_len = data_sections.iter().fold(total_len, |acc, data| { + round_up(acc, DATA_SECTION_ALIGNMENT.into()) + data.bytes.len() + }); + + let mut code_memory = code_memory_pool.get(total_len).map_err(|e| { + CompileError::Resource(format!("could not allocate code memory: {}", e)) + })?; + let mut code_writer = unsafe { + // SAFETY: We just popped out an unused code memory from an allocator pool. + code_memory.writer() + }; + + let mut allocated_functions = vec![]; + let mut allocated_data_sections = vec![]; + let mut allocated_executable_sections = vec![]; + for func in function_bodies { + let offset = code_writer + .write_executable(ARCH_FUNCTION_ALIGNMENT, func.body) + .expect("incorrectly computed code memory size"); + allocated_functions.push((offset, func.body.len())); + } + for section in executable_sections { + let offset = code_writer.write_executable(ARCH_FUNCTION_ALIGNMENT, section.bytes)?; + allocated_executable_sections.push(offset); + } + if !data_sections.is_empty() { + for section in data_sections { + let offset = code_writer + .write_data(DATA_SECTION_ALIGNMENT, section.bytes) + .expect("incorrectly computed code memory size"); + allocated_data_sections.push(offset); + } + } let mut allocated_function_call_trampolines: PrimaryMap = PrimaryMap::new(); - for ptr in allocated_functions.drain(0..call_trampoline_count).map(|slice| slice.as_ptr()) { + + for (offset, _) in allocated_functions.drain(0..call_trampoline_count) { // TODO: What in damnation have you done?! – Bannon - let trampoline = - unsafe { std::mem::transmute::<*const VMFunctionBody, VMTrampoline>(ptr) }; + let trampoline = unsafe { + std::mem::transmute::<_, VMTrampoline>(code_memory.executable_address(offset)) + }; allocated_function_call_trampolines.push(trampoline); } let allocated_functions_result = allocated_functions .drain(0..function_count) .enumerate() - .map(|(index, slice)| -> Result<_, CompileError> { + .map(|(index, (offset, length))| -> Result<_, CompileError> { let index = LocalFunctionIndex::new(index); let (sig_idx, sig) = function_signature(index); Ok(VMLocalFunction { - body: FunctionBodyPtr(slice.as_ptr()), - length: u32::try_from(slice.len()).map_err(|_| { + body: FunctionBodyPtr(unsafe { code_memory.executable_address(offset).cast() }), + length: u32::try_from(length).map_err(|_| { CompileError::Codegen("function body length exceeds 4GiB".into()) })?, signature: sig, @@ -581,7 +637,9 @@ impl UniversalEngineInner { let allocated_dynamic_function_trampolines = allocated_functions .drain(..) - .map(|slice| FunctionBodyPtr(slice.as_ptr())) + .map(|(offset, _)| { + FunctionBodyPtr(unsafe { code_memory.executable_address(offset).cast() }) + }) .collect::>(); let mut exec_iter = allocated_executable_sections.iter(); @@ -589,15 +647,11 @@ impl UniversalEngineInner { let allocated_custom_sections = section_types .into_iter() .map(|protection| { - SectionBodyPtr( - if protection == CustomSectionProtection::ReadExecute { - exec_iter.next() - } else { - data_iter.next() - } - .unwrap() - .as_ptr(), - ) + SectionBodyPtr(if protection == CustomSectionProtection::ReadExecute { + unsafe { code_memory.executable_address(*exec_iter.next().unwrap()).cast() } + } else { + unsafe { code_memory.writable_address(*data_iter.next().unwrap()).cast() } + }) }) .collect::>(); @@ -606,16 +660,21 @@ impl UniversalEngineInner { allocated_function_call_trampolines, allocated_dynamic_function_trampolines, allocated_custom_sections, + code_memory, )) } - /// Make memory containing compiled code executable. - pub(crate) fn publish_compiled_code(&mut self) { - self.code_memory.last_mut().unwrap().publish(); - } - /// Shared func metadata registry. pub(crate) fn func_data(&self) -> &Arc { &self.func_data } } + +fn round_up(size: usize, multiple: usize) -> usize { + debug_assert!(multiple.is_power_of_two()); + (size + (multiple - 1)) & !(multiple - 1) +} + +fn function_allocation_size(func: FunctionBodyRef<'_>) -> usize { + func.body.len() +} diff --git a/runtime/near-vm/engine/src/universal/mod.rs b/runtime/near-vm/engine/src/universal/mod.rs index 1ce0297d7f9..7cc02f49d49 100644 --- a/runtime/near-vm/engine/src/universal/mod.rs +++ b/runtime/near-vm/engine/src/universal/mod.rs @@ -7,7 +7,7 @@ mod link; pub use self::artifact::UniversalArtifact; pub use self::builder::Universal; -pub use self::code_memory::CodeMemory; +pub use self::code_memory::{CodeMemory, LimitedMemoryPool}; pub use self::engine::UniversalEngine; pub use self::executable::{UniversalExecutable, UniversalExecutableRef}; pub use self::link::link_module; diff --git a/runtime/near-vm/test-api/src/sys/instance.rs b/runtime/near-vm/test-api/src/sys/instance.rs index f9c94422a59..aad589112db 100644 --- a/runtime/near-vm/test-api/src/sys/instance.rs +++ b/runtime/near-vm/test-api/src/sys/instance.rs @@ -24,20 +24,6 @@ pub struct Instance { module: Module, } -#[cfg(test)] -mod send_test { - use super::*; - - fn is_send() -> bool { - true - } - - #[test] - fn instance_is_send() { - assert!(is_send::()); - } -} - /// An error while instantiating a module. /// /// This is not a common WebAssembly error, however diff --git a/runtime/near-vm/test-api/src/sys/store.rs b/runtime/near-vm/test-api/src/sys/store.rs index de72384f591..97993826a54 100644 --- a/runtime/near-vm/test-api/src/sys/store.rs +++ b/runtime/near-vm/test-api/src/sys/store.rs @@ -85,7 +85,9 @@ impl Default for Store { fn get_engine(mut config: impl CompilerConfig + 'static) -> UniversalEngine { cfg_if::cfg_if! { if #[cfg(feature = "default-universal")] { + let pool = near_vm_engine::universal::LimitedMemoryPool::new(1, 0x10000).unwrap(); near_vm_engine::universal::Universal::new(config) + .code_memory_pool(pool) .engine() } else if #[cfg(feature = "default-dylib")] { near_vm_engine_dylib::Dylib::new(config) diff --git a/runtime/near-vm/tests/compilers/compilation.rs b/runtime/near-vm/tests/compilers/compilation.rs index 2c991409584..928b4eb2f19 100644 --- a/runtime/near-vm/tests/compilers/compilation.rs +++ b/runtime/near-vm/tests/compilers/compilation.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use near_vm_compiler::CompileError; -use near_vm_engine::universal::Universal; +use near_vm_engine::universal::{LimitedMemoryPool, Universal}; use near_vm_test_api::*; use near_vm_vm::Artifact; @@ -75,7 +75,8 @@ fn profiling() { "#; let wasm = wat2wasm(wat.as_bytes()).unwrap(); let compiler = Singlepass::default(); - let engine = Arc::new(Universal::new(compiler).engine()); + let pool = LimitedMemoryPool::new(1, 0x10000).unwrap(); + let engine = Arc::new(Universal::new(compiler).code_memory_pool(pool).engine()); let store = Store::new(Arc::clone(&engine)); match compile_uncached(&store, &engine, &wasm, false) { Ok(art) => unsafe { diff --git a/runtime/near-vm/tests/compilers/config.rs b/runtime/near-vm/tests/compilers/config.rs index b2babd21b2c..543cb998118 100644 --- a/runtime/near-vm/tests/compilers/config.rs +++ b/runtime/near-vm/tests/compilers/config.rs @@ -45,7 +45,10 @@ impl Config { } pub fn engine(&self, compiler_config: Box) -> UniversalEngine { - let mut engine = near_vm_engine::universal::Universal::new(compiler_config); + let mut engine = near_vm_engine::universal::Universal::new(compiler_config) + .code_memory_pool( + near_vm_engine::universal::LimitedMemoryPool::new(128, 16 * 4096).unwrap(), + ); if let Some(ref features) = self.features { engine = engine.features(features.clone()) } diff --git a/runtime/near-vm/tests/compilers/deterministic.rs b/runtime/near-vm/tests/compilers/deterministic.rs index dc00bb404c9..0d58205d1cc 100644 --- a/runtime/near-vm/tests/compilers/deterministic.rs +++ b/runtime/near-vm/tests/compilers/deterministic.rs @@ -1,11 +1,12 @@ use anyhow::Result; use near_vm_compiler_singlepass::Singlepass; -use near_vm_engine::universal::Universal; +use near_vm_engine::universal::{LimitedMemoryPool, Universal}; use near_vm_test_api::{wat2wasm, BaseTunables}; fn compile_and_compare(wasm: &[u8]) -> Result<()> { let compiler = Singlepass::default(); - let engine = Universal::new(compiler).engine(); + let pool = LimitedMemoryPool::new(1, 0x10000).unwrap(); + let engine = Universal::new(compiler).code_memory_pool(pool).engine(); let tunables = BaseTunables::for_target(engine.target()); // compile for first time diff --git a/runtime/near-vm/tests/compilers/stack_limiter.rs b/runtime/near-vm/tests/compilers/stack_limiter.rs index 9e83632224f..e6014f9d68b 100644 --- a/runtime/near-vm/tests/compilers/stack_limiter.rs +++ b/runtime/near-vm/tests/compilers/stack_limiter.rs @@ -1,12 +1,13 @@ use near_vm_compiler_singlepass::Singlepass; -use near_vm_engine::universal::Universal; +use near_vm_engine::universal::{LimitedMemoryPool, Universal}; use near_vm_test_api::*; use near_vm_types::InstanceConfig; use near_vm_vm::TrapCode; fn get_store() -> Store { let compiler = Singlepass::default(); - let store = Store::new(Universal::new(compiler).engine().into()); + let pool = LimitedMemoryPool::new(6, 0x100000).expect("foo"); + let store = Store::new(Universal::new(compiler).code_memory_pool(pool).engine().into()); store } diff --git a/runtime/near-vm/vm/src/artifact.rs b/runtime/near-vm/vm/src/artifact.rs index cf7f5c53779..d8a69dee4b8 100644 --- a/runtime/near-vm/vm/src/artifact.rs +++ b/runtime/near-vm/vm/src/artifact.rs @@ -8,7 +8,7 @@ use std::{any::Any, collections::BTreeMap, sync::Arc}; /// [`Artifact`]s that can be instantiated. pub trait Instantiatable: Artifact { /// The errors that can occur when instantiating. - type Error: std::error::Error + Send + Sync; + type Error: std::error::Error + Send; /// Crate an `Instance` from this `Artifact`. /// @@ -31,7 +31,7 @@ pub trait Instantiatable: Artifact { /// /// Some other operations such as linking, relocating and similar may also be performed during /// constructon of the Artifact, making this type particularly well suited for caching in-memory. -pub trait Artifact: Send + Sync { +pub trait Artifact: Send { /// The information about offsets into the VM context table. fn offsets(&self) -> &crate::VMOffsets;