diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs
index df3da48aae60..27012d4bd027 100644
--- a/crates/runtime/src/instance.rs
+++ b/crates/runtime/src/instance.rs
@@ -11,7 +11,7 @@ use crate::vmcontext::{
     VMCallerCheckedAnyfunc, VMContext, VMFunctionImport, VMGlobalDefinition, VMGlobalImport,
     VMInterrupts, VMMemoryDefinition, VMMemoryImport, VMTableDefinition, VMTableImport,
 };
-use crate::{ExportFunction, ExportGlobal, ExportMemory, ExportTable, Store};
+use crate::{CompiledModuleId, ExportFunction, ExportGlobal, ExportMemory, ExportTable, Store};
 use anyhow::Error;
 use memoffset::offset_of;
 use more_asserts::assert_lt;
@@ -54,6 +54,9 @@ pub(crate) struct Instance {
     /// The `Module` this `Instance` was instantiated from.
     module: Arc<Module>,
 
+    /// The unique ID for the `Module` this `Instance` was instantiated from.
+    unique_id: Option<CompiledModuleId>,
+
     /// Offsets in the `vmctx` region, precomputed from the `module` above.
     offsets: VMOffsets<HostPtr>,
 
@@ -100,6 +103,7 @@ impl Instance {
     /// Helper for allocators; not a public API.
     pub(crate) fn create_raw(
         module: &Arc<Module>,
+        unique_id: Option<CompiledModuleId>,
         wasm_data: &'static [u8],
         memories: PrimaryMap<DefinedMemoryIndex, Memory>,
         tables: PrimaryMap<DefinedTableIndex, Table>,
@@ -107,6 +111,7 @@ impl Instance {
     ) -> Instance {
         Instance {
             module: module.clone(),
+            unique_id,
             offsets: VMOffsets::new(HostPtr, &module),
             memories,
             tables,
diff --git a/crates/runtime/src/instance/allocator.rs b/crates/runtime/src/instance/allocator.rs
index 5813a0bdc5ef..4b9b61397a34 100644
--- a/crates/runtime/src/instance/allocator.rs
+++ b/crates/runtime/src/instance/allocator.rs
@@ -7,7 +7,7 @@ use crate::vmcontext::{
     VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMGlobalDefinition, VMSharedSignatureIndex,
 };
 use crate::ModuleMemFds;
-use crate::Store;
+use crate::{CompiledModuleId, Store};
 use anyhow::Result;
 use std::alloc;
 use std::any::Any;
@@ -35,6 +35,9 @@ pub struct InstanceAllocationRequest<'a> {
     /// The module being instantiated.
     pub module: Arc<Module>,
 
+    /// The unique ID of the module being allocated within this engine.
+    pub unique_id: Option<CompiledModuleId>,
+
     /// The base address of where JIT functions are located.
     pub image_base: usize,
 
@@ -726,8 +729,14 @@ unsafe impl InstanceAllocator for OnDemandInstanceAllocator {
         let host_state = std::mem::replace(&mut req.host_state, Box::new(()));
 
         let mut handle = {
-            let instance =
-                Instance::create_raw(&req.module, &*req.wasm_data, memories, tables, host_state);
+            let instance = Instance::create_raw(
+                &req.module,
+                req.unique_id,
+                &*req.wasm_data,
+                memories,
+                tables,
+                host_state,
+            );
             let layout = instance.alloc_layout();
             let instance_ptr = alloc::alloc(layout) as *mut Instance;
             if instance_ptr.is_null() {
diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs
index 72a21ff5ba6e..90db21fdebcb 100644
--- a/crates/runtime/src/instance/allocator/pooling.rs
+++ b/crates/runtime/src/instance/allocator/pooling.rs
@@ -15,7 +15,6 @@ use crate::MemFdSlot;
 use crate::{instance::Instance, Memory, Mmap, ModuleMemFds, Table};
 use anyhow::{anyhow, bail, Context, Result};
 use libc::c_void;
-use rand::Rng;
 use std::convert::TryFrom;
 use std::mem;
 use std::sync::Arc;
@@ -25,6 +24,9 @@ use wasmtime_environ::{
     WASM_PAGE_SIZE,
 };
 
+mod index_allocator;
+use index_allocator::{PoolingAllocationState, SlotId};
+
 cfg_if::cfg_if! {
     if #[cfg(windows)] {
         mod windows;
@@ -250,20 +252,19 @@ pub enum PoolingAllocationStrategy {
     NextAvailable,
     /// Allocate from a random available instance.
     Random,
+    /// Try to allocate an instance slot that was previously used for
+    /// the same module, potentially enabling faster instantiation by
+    /// reusing e.g. memory mappings.
+    ReuseAffinity,
 }
 
-impl PoolingAllocationStrategy {
-    fn next(&self, free_count: usize) -> usize {
-        debug_assert!(free_count > 0);
-
-        match self {
-            Self::NextAvailable => free_count - 1,
-            Self::Random => rand::thread_rng().gen_range(0..free_count),
-        }
+impl Default for PoolingAllocationStrategy {
+    #[cfg(feature = "memfd-allocator")]
+    fn default() -> Self {
+        Self::ReuseAffinity
     }
-}
 
-impl Default for PoolingAllocationStrategy {
+    #[cfg(not(feature = "memfd-allocator"))]
     fn default() -> Self {
         Self::NextAvailable
     }
@@ -283,13 +284,14 @@ struct InstancePool {
     mapping: Mmap,
     instance_size: usize,
     max_instances: usize,
-    free_list: Mutex<Vec<usize>>,
+    index_allocator: Mutex<PoolingAllocationState>,
     memories: MemoryPool,
     tables: TablePool,
 }
 
 impl InstancePool {
     fn new(
+        strategy: PoolingAllocationStrategy,
         module_limits: &ModuleLimits,
         instance_limits: &InstanceLimits,
         tunables: &Tunables,
@@ -330,7 +332,7 @@ impl InstancePool {
             mapping,
             instance_size,
             max_instances,
-            free_list: Mutex::new((0..max_instances).collect()),
+            index_allocator: Mutex::new(PoolingAllocationState::new(strategy, max_instances)),
             memories: MemoryPool::new(module_limits, instance_limits, tunables)?,
             tables: TablePool::new(module_limits, instance_limits)?,
         };
@@ -351,6 +353,7 @@ impl InstancePool {
         let host_state = std::mem::replace(&mut req.host_state, Box::new(()));
         let instance_data = Instance::create_raw(
             &req.module,
+            req.unique_id,
             &*req.wasm_data,
             PrimaryMap::default(),
             PrimaryMap::default(),
@@ -362,6 +365,7 @@ impl InstancePool {
         // chosen slot before we do anything else with it. (This is
         // paired with a `drop_in_place` in deallocate below.)
         let instance = self.instance(index);
+
         std::ptr::write(instance as _, instance_data);
 
         // set_instance_memories and _tables will need the store before we can completely
@@ -393,16 +397,14 @@ impl InstancePool {
 
     fn allocate(
         &self,
-        strategy: PoolingAllocationStrategy,
         req: InstanceAllocationRequest,
     ) -> Result<InstanceHandle, InstantiationError> {
         let index = {
-            let mut free_list = self.free_list.lock().unwrap();
-            if free_list.is_empty() {
+            let mut alloc = self.index_allocator.lock().unwrap();
+            if alloc.is_empty() {
                 return Err(InstantiationError::Limit(self.max_instances as u32));
             }
-            let free_index = strategy.next(free_list.len());
-            free_list.swap_remove(free_index)
+            alloc.alloc(req.unique_id).index()
         };
 
         unsafe {
@@ -497,7 +499,7 @@ impl InstancePool {
         // touched again until we write a fresh Instance in-place with
         // std::ptr::write in allocate() above.
 
-        self.free_list.lock().unwrap().push(index);
+        self.index_allocator.lock().unwrap().free(SlotId(index));
     }
 
     fn set_instance_memories(
@@ -860,7 +862,7 @@ struct StackPool {
     stack_size: usize,
     max_instances: usize,
     page_size: usize,
-    free_list: Mutex<Vec<usize>>,
+    index_allocator: Mutex<PoolingAllocationState>,
 }
 
 #[cfg(all(feature = "async", unix))]
@@ -903,25 +905,29 @@ impl StackPool {
             stack_size,
             max_instances,
             page_size,
-            free_list: Mutex::new((0..max_instances).collect()),
+            // We always use a `NextAvailable` strategy for stack
+            // allocation. We don't want or need an affinity policy
+            // here: stacks do not benefit from being allocated to the
+            // same compiled module with the same image (they always
+            // start zeroed just the same for everyone).
+            index_allocator: Mutex::new(PoolingAllocationState::new(
+                PoolingAllocationStrategy::NextAvailable,
+                max_instances,
+            )),
         })
     }
 
-    fn allocate(
-        &self,
-        strategy: PoolingAllocationStrategy,
-    ) -> Result<wasmtime_fiber::FiberStack, FiberStackError> {
+    fn allocate(&self) -> Result<wasmtime_fiber::FiberStack, FiberStackError> {
         if self.stack_size == 0 {
             return Err(FiberStackError::NotSupported);
         }
 
         let index = {
-            let mut free_list = self.free_list.lock().unwrap();
-            if free_list.is_empty() {
+            let mut alloc = self.index_allocator.lock().unwrap();
+            if alloc.is_empty() {
                 return Err(FiberStackError::Limit(self.max_instances as u32));
             }
-            let free_index = strategy.next(free_list.len());
-            free_list.swap_remove(free_index)
+            alloc.alloc(None).index()
         };
 
         debug_assert!(index < self.max_instances);
@@ -967,7 +973,7 @@ impl StackPool {
 
         decommit_stack_pages(bottom_of_stack as _, stack_size).unwrap();
 
-        self.free_list.lock().unwrap().push(index);
+        self.index_allocator.lock().unwrap().free(SlotId(index));
     }
 }
 
@@ -978,7 +984,6 @@ impl StackPool {
 /// Note: the resource pools are manually dropped so that the fault handler terminates correctly.
 #[derive(Debug)]
 pub struct PoolingInstanceAllocator {
-    strategy: PoolingAllocationStrategy,
     module_limits: ModuleLimits,
     // This is manually drop so that the pools unmap their memory before the page fault handler drops.
     instances: mem::ManuallyDrop<InstancePool>,
@@ -1003,7 +1008,7 @@ impl PoolingInstanceAllocator {
             bail!("the instance count limit cannot be zero");
         }
 
-        let instances = InstancePool::new(&module_limits, &instance_limits, tunables)?;
+        let instances = InstancePool::new(strategy, &module_limits, &instance_limits, tunables)?;
 
         #[cfg(all(feature = "uffd", target_os = "linux"))]
         let _fault_handler = imp::PageFaultHandler::new(&instances)?;
@@ -1011,7 +1016,6 @@ impl PoolingInstanceAllocator {
         drop(stack_size); // suppress unused warnings w/o async feature
 
         Ok(Self {
-            strategy,
             module_limits,
             instances: mem::ManuallyDrop::new(instances),
             #[cfg(all(feature = "async", unix))]
@@ -1050,7 +1054,7 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator {
         &self,
         req: InstanceAllocationRequest,
     ) -> Result<InstanceHandle, InstantiationError> {
-        self.instances.allocate(self.strategy, req)
+        self.instances.allocate(req)
     }
 
     unsafe fn initialize(
@@ -1097,7 +1101,7 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator {
 
     #[cfg(all(feature = "async", unix))]
     fn allocate_fiber_stack(&self) -> Result<wasmtime_fiber::FiberStack, FiberStackError> {
-        self.stacks.allocate(self.strategy)
+        self.stacks.allocate()
     }
 
     #[cfg(all(feature = "async", unix))]
@@ -1417,21 +1421,6 @@ mod test {
         );
     }
 
-    #[test]
-    fn test_next_available_allocation_strategy() {
-        let strat = PoolingAllocationStrategy::NextAvailable;
-        assert_eq!(strat.next(10), 9);
-        assert_eq!(strat.next(5), 4);
-        assert_eq!(strat.next(1), 0);
-    }
-
-    #[test]
-    fn test_random_allocation_strategy() {
-        let strat = PoolingAllocationStrategy::Random;
-        assert!(strat.next(100) < 100);
-        assert_eq!(strat.next(1), 0);
-    }
-
     #[cfg(target_pointer_width = "64")]
     #[test]
     fn test_instance_pool() -> Result<()> {
@@ -1451,6 +1440,7 @@ mod test {
         let instance_limits = InstanceLimits { count: 3 };
 
         let instances = InstancePool::new(
+            PoolingAllocationStrategy::NextAvailable,
             &module_limits,
             &instance_limits,
             &Tunables {
@@ -1464,7 +1454,10 @@ mod test {
         assert_eq!(instances.instance_size, region::page::size());
         assert_eq!(instances.max_instances, 3);
 
-        assert_eq!(&*instances.free_list.lock().unwrap(), &[0, 1, 2]);
+        assert_eq!(
+            instances.index_allocator.lock().unwrap().testing_freelist(),
+            &[SlotId(0), SlotId(1), SlotId(2)]
+        );
 
         let mut handles = Vec::new();
         let module = Arc::new(Module::default());
@@ -1473,50 +1466,49 @@ mod test {
         for _ in (0..3).rev() {
             handles.push(
                 instances
-                    .allocate(
-                        PoolingAllocationStrategy::NextAvailable,
-                        InstanceAllocationRequest {
-                            module: module.clone(),
-                            image_base: 0,
-                            functions,
-                            imports: Imports {
-                                functions: &[],
-                                tables: &[],
-                                memories: &[],
-                                globals: &[],
-                            },
-                            shared_signatures: VMSharedSignatureIndex::default().into(),
-                            host_state: Box::new(()),
-                            store: StorePtr::empty(),
-                            wasm_data: &[],
-                            memfds: None,
+                    .allocate(InstanceAllocationRequest {
+                        module: module.clone(),
+                        unique_id: None,
+                        image_base: 0,
+                        functions,
+                        imports: Imports {
+                            functions: &[],
+                            tables: &[],
+                            memories: &[],
+                            globals: &[],
                         },
-                    )
+                        shared_signatures: VMSharedSignatureIndex::default().into(),
+                        host_state: Box::new(()),
+                        store: StorePtr::empty(),
+                        wasm_data: &[],
+                        memfds: None,
+                    })
                     .expect("allocation should succeed"),
             );
         }
 
-        assert_eq!(&*instances.free_list.lock().unwrap(), &[]);
+        assert_eq!(
+            instances.index_allocator.lock().unwrap().testing_freelist(),
+            &[]
+        );
 
-        match instances.allocate(
-            PoolingAllocationStrategy::NextAvailable,
-            InstanceAllocationRequest {
-                module: module.clone(),
-                functions,
-                image_base: 0,
-                imports: Imports {
-                    functions: &[],
-                    tables: &[],
-                    memories: &[],
-                    globals: &[],
-                },
-                shared_signatures: VMSharedSignatureIndex::default().into(),
-                host_state: Box::new(()),
-                store: StorePtr::empty(),
-                wasm_data: &[],
-                memfds: None,
+        match instances.allocate(InstanceAllocationRequest {
+            module: module.clone(),
+            unique_id: None,
+            functions,
+            image_base: 0,
+            imports: Imports {
+                functions: &[],
+                tables: &[],
+                memories: &[],
+                globals: &[],
             },
-        ) {
+            shared_signatures: VMSharedSignatureIndex::default().into(),
+            host_state: Box::new(()),
+            store: StorePtr::empty(),
+            wasm_data: &[],
+            memfds: None,
+        }) {
             Err(InstantiationError::Limit(3)) => {}
             _ => panic!("unexpected error"),
         };
@@ -1525,7 +1517,10 @@ mod test {
             instances.deallocate(&handle);
         }
 
-        assert_eq!(&*instances.free_list.lock().unwrap(), &[2, 1, 0]);
+        assert_eq!(
+            instances.index_allocator.lock().unwrap().testing_freelist(),
+            &[SlotId(2), SlotId(1), SlotId(0)]
+        );
 
         Ok(())
     }
@@ -1635,17 +1630,26 @@ mod test {
         assert_eq!(pool.page_size, native_page_size);
 
         assert_eq!(
-            &*pool.free_list.lock().unwrap(),
-            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+            pool.index_allocator.lock().unwrap().testing_freelist(),
+            &[
+                SlotId(0),
+                SlotId(1),
+                SlotId(2),
+                SlotId(3),
+                SlotId(4),
+                SlotId(5),
+                SlotId(6),
+                SlotId(7),
+                SlotId(8),
+                SlotId(9)
+            ],
         );
 
         let base = pool.mapping.as_ptr() as usize;
 
         let mut stacks = Vec::new();
         for i in (0..10).rev() {
-            let stack = pool
-                .allocate(PoolingAllocationStrategy::NextAvailable)
-                .expect("allocation should succeed");
+            let stack = pool.allocate().expect("allocation should succeed");
             assert_eq!(
                 ((stack.top().unwrap() as usize - base) / pool.stack_size) - 1,
                 i
@@ -1653,12 +1657,9 @@ mod test {
             stacks.push(stack);
         }
 
-        assert_eq!(&*pool.free_list.lock().unwrap(), &[]);
+        assert_eq!(pool.index_allocator.lock().unwrap().testing_freelist(), &[]);
 
-        match pool
-            .allocate(PoolingAllocationStrategy::NextAvailable)
-            .unwrap_err()
-        {
+        match pool.allocate().unwrap_err() {
             FiberStackError::Limit(10) => {}
             _ => panic!("unexpected error"),
         };
@@ -1668,8 +1669,19 @@ mod test {
         }
 
         assert_eq!(
-            &*pool.free_list.lock().unwrap(),
-            &[9, 8, 7, 6, 5, 4, 3, 2, 1, 0],
+            pool.index_allocator.lock().unwrap().testing_freelist(),
+            &[
+                SlotId(9),
+                SlotId(8),
+                SlotId(7),
+                SlotId(6),
+                SlotId(5),
+                SlotId(4),
+                SlotId(3),
+                SlotId(2),
+                SlotId(1),
+                SlotId(0)
+            ],
         );
 
         Ok(())
diff --git a/crates/runtime/src/instance/allocator/pooling/index_allocator.rs b/crates/runtime/src/instance/allocator/pooling/index_allocator.rs
new file mode 100644
index 000000000000..e2b7f13e93ee
--- /dev/null
+++ b/crates/runtime/src/instance/allocator/pooling/index_allocator.rs
@@ -0,0 +1,541 @@
+//! Index/slot allocator policies for the pooling allocator.
+
+use super::PoolingAllocationStrategy;
+use crate::CompiledModuleId;
+use rand::Rng;
+use std::collections::HashMap;
+
+/// A slot index. The job of this allocator is to hand out these
+/// indices.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct SlotId(pub usize);
+impl SlotId {
+    /// The index of this slot.
+    pub fn index(self) -> usize {
+        self.0
+    }
+}
+
+/// An index in the global freelist.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct GlobalFreeListIndex(usize);
+impl GlobalFreeListIndex {
+    /// The index of this slot.
+    fn index(self) -> usize {
+        self.0
+    }
+}
+
+/// An index in a per-module freelist.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct PerModuleFreeListIndex(usize);
+impl PerModuleFreeListIndex {
+    /// The index of this slot.
+    fn index(self) -> usize {
+        self.0
+    }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) enum PoolingAllocationState {
+    NextAvailable(Vec<SlotId>),
+    Random(Vec<SlotId>),
+    /// Reuse-affinity policy state.
+    ///
+    /// The data structures here deserve a little explanation:
+    ///
+    /// - free_list: this is a vec of slot indices that are free, no
+    ///   matter their affinities (or no affinity at all).
+    /// - per_module: this is a hashmap of vecs of slot indices that
+    ///   are free, with affinity for particular module IDs. A slot may
+    ///   appear in zero or one of these lists.
+    /// - slot_state: indicates what state each slot is in: allocated
+    ///   (Taken), only in free_list (Empty), or in free_list and a
+    ///   per_module list (Affinity).
+    ///
+    /// The slot state tracks a slot's index in the global and
+    /// per-module freelists, so it can be efficiently removed from
+    /// both. We take some care to keep these up-to-date as well.
+    ///
+    /// On allocation, we first try to find a slot with affinity for
+    /// the given module ID, if any. If not, we pick a random slot
+    /// ID. This random choice is unbiased across all free slots.
+    ReuseAffinity {
+        /// Free-list of all slots. We use this to pick a victim when
+        /// we don't have an appropriate slot with the preferred
+        /// affinity.
+        free_list: Vec<SlotId>,
+        /// Invariant: any module ID in this hashmap must have a
+        /// non-empty list of free slots (otherwise we remove it). We
+        /// remove a module's freelist when we have no more slots with
+        /// affinity for that module.
+        per_module: HashMap<CompiledModuleId, Vec<SlotId>>,
+        /// The state of any given slot. Records indices in the above
+        /// list (empty) or two lists (with affinity), and these
+        /// indices are kept up-to-date to allow fast removal.
+        slot_state: Vec<SlotState>,
+    },
+}
+
+#[derive(Clone, Debug)]
+pub(crate) enum SlotState {
+    /// Currently allocated.
+    ///
+    /// Invariant: no slot in this state has its index in either
+    /// `free_list` or any list in `per_module`.
+    Taken(Option<CompiledModuleId>),
+    /// Currently free. A free slot is able to be allocated for any
+    /// request, but may have affinity to a certain module that we
+    /// prefer to use it for.
+    ///
+    /// Invariant: every slot in this state has its index in at least
+    /// `free_list`, and possibly a `per_module` free-list; see
+    /// FreeSlotState.
+    Free(FreeSlotState),
+}
+
+impl SlotState {
+    fn unwrap_free(&self) -> &FreeSlotState {
+        match self {
+            &Self::Free(ref free) => free,
+            _ => panic!("Slot not free"),
+        }
+    }
+
+    fn unwrap_free_mut(&mut self) -> &mut FreeSlotState {
+        match self {
+            &mut Self::Free(ref mut free) => free,
+            _ => panic!("Slot not free"),
+        }
+    }
+
+    fn unwrap_module_id(&self) -> Option<CompiledModuleId> {
+        match self {
+            &Self::Taken(module_id) => module_id,
+            _ => panic!("Slot not in Taken state"),
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) enum FreeSlotState {
+    /// The slot is free, and has no affinity.
+    ///
+    /// Invariant: every slot in this state has its index in
+    /// `free_list`. No slot in this state has its index in any other
+    /// (per-module) free-list.
+    NoAffinity {
+        /// Index in the global free list.
+        ///
+        /// Invariant: free_list[slot_state[i].free_list_index] == i.
+        free_list_index: GlobalFreeListIndex,
+    },
+    /// The slot is free, and has an affinity for some module. This
+    /// means we prefer to choose this slot (or some other one with
+    /// the same affinity) given a request to allocate a slot for this
+    /// module. It can, however, still be used for any other module if
+    /// needed.
+    ///
+    /// Invariant: every slot in this state has its index in both
+    /// `free_list` *and* exactly one list in `per_module`.
+    Affinity {
+        module: CompiledModuleId,
+        /// Index in the global free list.
+        ///
+        /// Invariant: free_list[slot_state[i].free_list_index] == i.
+        free_list_index: GlobalFreeListIndex,
+        /// Index in a per-module free list.
+        ///
+        /// Invariant: per_module[slot_state[i].module][slot_state[i].per_module_index]
+        /// == i.
+        per_module_index: PerModuleFreeListIndex,
+    },
+}
+
+impl FreeSlotState {
+    /// Get the index of this slot in the global free list.
+    fn free_list_index(&self) -> GlobalFreeListIndex {
+        match self {
+            &Self::NoAffinity { free_list_index }
+            | &Self::Affinity {
+                free_list_index, ..
+            } => free_list_index,
+        }
+    }
+
+    /// Update the index of this slot in the global free list.
+    fn update_free_list_index(&mut self, index: GlobalFreeListIndex) {
+        match self {
+            &mut Self::NoAffinity {
+                ref mut free_list_index,
+            }
+            | &mut Self::Affinity {
+                ref mut free_list_index,
+                ..
+            } => {
+                *free_list_index = index;
+            }
+        }
+    }
+
+    /// Get the index of this slot in its per-module free list.
+    fn per_module_index(&self) -> PerModuleFreeListIndex {
+        match self {
+            &Self::Affinity {
+                per_module_index, ..
+            } => per_module_index,
+            _ => panic!("per_module_index on slot with no affinity"),
+        }
+    }
+
+    /// Update the index of this slot in its per-module free list.
+    fn update_per_module_index(&mut self, index: PerModuleFreeListIndex) {
+        match self {
+            &mut Self::Affinity {
+                ref mut per_module_index,
+                ..
+            } => {
+                *per_module_index = index;
+            }
+            _ => panic!("per_module_index on slot with no affinity"),
+        }
+    }
+}
+
+/// Internal: remove a slot-index from the global free list.
+fn remove_global_free_list_item(
+    slot_state: &mut Vec<SlotState>,
+    free_list: &mut Vec<SlotId>,
+    index: SlotId,
+) {
+    let free_list_index = slot_state[index.index()].unwrap_free().free_list_index();
+    assert_eq!(index, free_list.swap_remove(free_list_index.index()));
+    if free_list_index.index() < free_list.len() {
+        let replaced = free_list[free_list_index.index()];
+        slot_state[replaced.index()]
+            .unwrap_free_mut()
+            .update_free_list_index(free_list_index);
+    }
+}
+
+/// Internal: remove a slot-index from a per-module free list.
+fn remove_module_free_list_item(
+    slot_state: &mut Vec<SlotState>,
+    per_module: &mut HashMap<CompiledModuleId, Vec<SlotId>>,
+    id: CompiledModuleId,
+    index: SlotId,
+) {
+    debug_assert!(
+        per_module.contains_key(&id),
+        "per_module list for given module should not be empty"
+    );
+
+    let per_module_list = per_module.get_mut(&id).unwrap();
+    debug_assert!(!per_module_list.is_empty());
+
+    let per_module_index = slot_state[index.index()].unwrap_free().per_module_index();
+    assert_eq!(index, per_module_list.swap_remove(per_module_index.index()));
+    if per_module_index.index() < per_module_list.len() {
+        let replaced = per_module_list[per_module_index.index()];
+        slot_state[replaced.index()]
+            .unwrap_free_mut()
+            .update_per_module_index(per_module_index);
+    }
+    if per_module_list.is_empty() {
+        per_module.remove(&id);
+    }
+}
+
+impl PoolingAllocationState {
+    /// Create the default state for this strategy.
+    pub(crate) fn new(strategy: PoolingAllocationStrategy, max_instances: usize) -> Self {
+        let ids = (0..max_instances).map(|i| SlotId(i)).collect::<Vec<_>>();
+        match strategy {
+            PoolingAllocationStrategy::NextAvailable => PoolingAllocationState::NextAvailable(ids),
+            PoolingAllocationStrategy::Random => PoolingAllocationState::Random(ids),
+            PoolingAllocationStrategy::ReuseAffinity => PoolingAllocationState::ReuseAffinity {
+                free_list: ids,
+                per_module: HashMap::new(),
+                slot_state: (0..max_instances)
+                    .map(|i| {
+                        SlotState::Free(FreeSlotState::NoAffinity {
+                            free_list_index: GlobalFreeListIndex(i),
+                        })
+                    })
+                    .collect(),
+            },
+        }
+    }
+
+    /// Are any slots left, or is this allocator empty?
+    pub(crate) fn is_empty(&self) -> bool {
+        match self {
+            &PoolingAllocationState::NextAvailable(ref free_list)
+            | &PoolingAllocationState::Random(ref free_list) => free_list.is_empty(),
+            &PoolingAllocationState::ReuseAffinity { ref free_list, .. } => free_list.is_empty(),
+        }
+    }
+
+    /// Allocate a new slot.
+    pub(crate) fn alloc(&mut self, id: Option<CompiledModuleId>) -> SlotId {
+        match self {
+            &mut PoolingAllocationState::NextAvailable(ref mut free_list) => {
+                debug_assert!(free_list.len() > 0);
+                free_list.pop().unwrap()
+            }
+            &mut PoolingAllocationState::Random(ref mut free_list) => {
+                debug_assert!(free_list.len() > 0);
+                let id = rand::thread_rng().gen_range(0..free_list.len());
+                free_list.swap_remove(id)
+            }
+            &mut PoolingAllocationState::ReuseAffinity {
+                ref mut free_list,
+                ref mut per_module,
+                ref mut slot_state,
+                ..
+            } => {
+                if let Some(this_module) = id.and_then(|id| per_module.get_mut(&id)) {
+                    // There is a freelist of slots with affinity for
+                    // the requested module-ID. Pick the last one; any
+                    // will do, no need for randomness here.
+                    assert!(!this_module.is_empty());
+                    let slot_id = this_module.pop().expect("List should never be empty");
+                    if this_module.is_empty() {
+                        per_module.remove(&id.unwrap());
+                    }
+                    // Make sure to remove from the global
+                    // freelist. We already removed from the
+                    // per-module list above.
+                    remove_global_free_list_item(slot_state, free_list, slot_id);
+                    slot_state[slot_id.index()] = SlotState::Taken(id);
+                    slot_id
+                } else {
+                    // Pick a random free slot ID. Note that we do
+                    // this, rather than pick a victim module first,
+                    // to maintain an unbiased stealing distribution:
+                    // we want the likelihood of our taking a slot
+                    // from some other module's freelist to be
+                    // proportional to that module's freelist
+                    // length. Or in other words, every *slot* should
+                    // be equally likely to be stolen. The
+                    // alternative, where we pick the victim module
+                    // freelist first, means that either a module with
+                    // an affinity freelist of one slot has the same
+                    // chances of losing that slot as one with a
+                    // hundred slots; or else we need a weighted
+                    // random choice among modules, which is just as
+                    // complex as this process.
+                    //
+                    // We don't bother picking an empty slot (no
+                    // established affinity) before a random slot,
+                    // because this is more complex, and in the steady
+                    // state, all slots will see at least one
+                    // instantiation very quickly, so there will never
+                    // (past an initial phase) be a slot with no
+                    // affinity.
+                    let free_list_index = rand::thread_rng().gen_range(0..free_list.len());
+                    let slot_id = free_list[free_list_index];
+                    // Remove from both the global freelist and
+                    // per-module freelist, if any.
+                    remove_global_free_list_item(slot_state, free_list, slot_id);
+                    if let &SlotState::Free(FreeSlotState::Affinity { module, .. }) =
+                        &slot_state[slot_id.index()]
+                    {
+                        remove_module_free_list_item(slot_state, per_module, module, slot_id);
+                    }
+                    slot_state[slot_id.index()] = SlotState::Taken(id);
+
+                    slot_id
+                }
+            }
+        }
+    }
+
+    pub(crate) fn free(&mut self, index: SlotId) {
+        match self {
+            &mut PoolingAllocationState::NextAvailable(ref mut free_list)
+            | &mut PoolingAllocationState::Random(ref mut free_list) => {
+                free_list.push(index);
+            }
+            &mut PoolingAllocationState::ReuseAffinity {
+                ref mut per_module,
+                ref mut free_list,
+                ref mut slot_state,
+            } => {
+                let module_id = slot_state[index.index()].unwrap_module_id();
+
+                let free_list_index = GlobalFreeListIndex(free_list.len());
+                free_list.push(index);
+                if let Some(id) = module_id {
+                    let per_module_list = per_module
+                        .entry(id)
+                        .or_insert_with(|| Vec::with_capacity(1));
+                    let per_module_index = PerModuleFreeListIndex(per_module_list.len());
+                    per_module_list.push(index);
+                    slot_state[index.index()] = SlotState::Free(FreeSlotState::Affinity {
+                        module: id,
+                        free_list_index,
+                        per_module_index,
+                    });
+                } else {
+                    slot_state[index.index()] =
+                        SlotState::Free(FreeSlotState::NoAffinity { free_list_index });
+                }
+            }
+        }
+    }
+
+    /// For testing only, we want to be able to assert what is on the
+    /// single freelist, for the policies that keep just one.
+    #[cfg(test)]
+    pub(crate) fn testing_freelist(&self) -> &[SlotId] {
+        match self {
+            &PoolingAllocationState::NextAvailable(ref free_list)
+            | &PoolingAllocationState::Random(ref free_list) => &free_list[..],
+            _ => panic!("Wrong kind of state"),
+        }
+    }
+
+    /// For testing only, get the list of all modules with at least
+    /// one slot with affinity for that module.
+    #[cfg(test)]
+    pub(crate) fn testing_module_affinity_list(&self) -> Vec<CompiledModuleId> {
+        match self {
+            &PoolingAllocationState::NextAvailable(..) | &PoolingAllocationState::Random(..) => {
+                panic!("Wrong kind of state")
+            }
+            &PoolingAllocationState::ReuseAffinity { ref per_module, .. } => {
+                let mut ret = vec![];
+                for (module, list) in per_module {
+                    assert!(!list.is_empty());
+                    ret.push(*module);
+                }
+                ret
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::{PoolingAllocationState, SlotId};
+    use crate::CompiledModuleIdAllocator;
+    use crate::PoolingAllocationStrategy;
+
+    #[test]
+    fn test_next_available_allocation_strategy() {
+        let strat = PoolingAllocationStrategy::NextAvailable;
+        let mut state = PoolingAllocationState::new(strat, 10);
+        assert_eq!(state.alloc(None).index(), 9);
+        let mut state = PoolingAllocationState::new(strat, 5);
+        assert_eq!(state.alloc(None).index(), 4);
+        let mut state = PoolingAllocationState::new(strat, 1);
+        assert_eq!(state.alloc(None).index(), 0);
+    }
+
+    #[test]
+    fn test_random_allocation_strategy() {
+        let strat = PoolingAllocationStrategy::Random;
+        let mut state = PoolingAllocationState::new(strat, 100);
+        assert!(state.alloc(None).index() < 100);
+        let mut state = PoolingAllocationState::new(strat, 1);
+        assert_eq!(state.alloc(None).index(), 0);
+    }
+
+    #[test]
+    fn test_affinity_allocation_strategy() {
+        let strat = PoolingAllocationStrategy::ReuseAffinity;
+        let id_alloc = CompiledModuleIdAllocator::new();
+        let id1 = id_alloc.alloc();
+        let id2 = id_alloc.alloc();
+        let mut state = PoolingAllocationState::new(strat, 100);
+
+        let index1 = state.alloc(Some(id1));
+        assert!(index1.index() < 100);
+        let index2 = state.alloc(Some(id2));
+        assert!(index2.index() < 100);
+        assert_ne!(index1, index2);
+
+        state.free(index1);
+        let index3 = state.alloc(Some(id1));
+        assert_eq!(index3, index1);
+        state.free(index3);
+
+        state.free(index2);
+
+        // Both id1 and id2 should have some slots with affinity.
+        let affinity_modules = state.testing_module_affinity_list();
+        assert_eq!(2, affinity_modules.len());
+        assert!(affinity_modules.contains(&id1));
+        assert!(affinity_modules.contains(&id2));
+
+        // Now there is 1 free instance for id2 and 1 free instance
+        // for id1, and 98 empty. Allocate 100 for id2. The first
+        // should be equal to the one we know was previously used for
+        // id2. The next 99 are arbitrary.
+
+        let mut indices = vec![];
+        for _ in 0..100 {
+            assert!(!state.is_empty());
+            indices.push(state.alloc(Some(id2)));
+        }
+        assert!(state.is_empty());
+        assert_eq!(indices[0], index2);
+
+        for i in indices {
+            state.free(i);
+        }
+
+        // Now there should be no slots left with affinity for id1.
+        let affinity_modules = state.testing_module_affinity_list();
+        assert_eq!(1, affinity_modules.len());
+        assert!(affinity_modules.contains(&id2));
+
+        // Allocate an index we know previously had an instance but
+        // now does not (list ran empty).
+        let index = state.alloc(Some(id1));
+        state.free(index);
+    }
+
+    #[test]
+    fn test_affinity_allocation_strategy_random() {
+        use rand::Rng;
+        let mut rng = rand::thread_rng();
+
+        let strat = PoolingAllocationStrategy::ReuseAffinity;
+        let id_alloc = CompiledModuleIdAllocator::new();
+        let ids = std::iter::repeat_with(|| id_alloc.alloc())
+            .take(10)
+            .collect::<Vec<_>>();
+        let mut state = PoolingAllocationState::new(strat, 1000);
+        let mut allocated: Vec<SlotId> = vec![];
+        let mut last_id = vec![None; 1000];
+
+        let mut hits = 0;
+        for _ in 0..100_000 {
+            if !allocated.is_empty() && (state.is_empty() || rng.gen_bool(0.5)) {
+                let i = rng.gen_range(0..allocated.len());
+                let to_free_idx = allocated.swap_remove(i);
+                state.free(to_free_idx);
+            } else {
+                assert!(!state.is_empty());
+                let id = ids[rng.gen_range(0..ids.len())];
+                let index = state.alloc(Some(id));
+                if last_id[index.index()] == Some(id) {
+                    hits += 1;
+                }
+                last_id[index.index()] = Some(id);
+                allocated.push(index);
+            }
+        }
+
+        // 10% reuse would be random chance (because we have 10 module
+        // IDs). Check for at least double that to ensure some sort of
+        // affinity is occurring.
+        assert!(
+            hits > 20000,
+            "expected at least 20000 (20%) ID-reuses but got {}",
+            hits
+        );
+    }
+}
diff --git a/crates/runtime/src/instance/allocator/pooling/uffd.rs b/crates/runtime/src/instance/allocator/pooling/uffd.rs
index 87dd9a0c57d5..be16ca2db1ec 100644
--- a/crates/runtime/src/instance/allocator/pooling/uffd.rs
+++ b/crates/runtime/src/instance/allocator/pooling/uffd.rs
@@ -466,8 +466,13 @@ mod test {
             ..Tunables::default()
         };
 
-        let instances = InstancePool::new(&module_limits, &instance_limits, &tunables)
-            .expect("should allocate");
+        let instances = InstancePool::new(
+            PoolingAllocationStrategy::Random,
+            &module_limits,
+            &instance_limits,
+            &tunables,
+        )
+        .expect("should allocate");
 
         let locator = FaultLocator::new(&instances);
 
@@ -573,25 +578,23 @@ mod test {
             for _ in 0..instances.max_instances {
                 handles.push(
                     instances
-                        .allocate(
-                            PoolingAllocationStrategy::Random,
-                            InstanceAllocationRequest {
-                                module: module.clone(),
-                                memfds: None,
-                                image_base: 0,
-                                functions,
-                                imports: Imports {
-                                    functions: &[],
-                                    tables: &[],
-                                    memories: &[],
-                                    globals: &[],
-                                },
-                                shared_signatures: VMSharedSignatureIndex::default().into(),
-                                host_state: Box::new(()),
-                                store: StorePtr::new(&mut mock_store),
-                                wasm_data: &[],
+                        .allocate(InstanceAllocationRequest {
+                            module: module.clone(),
+                            memfds: None,
+                            unique_id: None,
+                            image_base: 0,
+                            functions,
+                            imports: Imports {
+                                functions: &[],
+                                tables: &[],
+                                memories: &[],
+                                globals: &[],
                             },
-                        )
+                            shared_signatures: VMSharedSignatureIndex::default().into(),
+                            host_state: Box::new(()),
+                            store: StorePtr::new(&mut mock_store),
+                            wasm_data: &[],
+                        })
                         .expect("instance should allocate"),
                 );
             }
diff --git a/crates/runtime/src/module_id.rs b/crates/runtime/src/module_id.rs
index 481a63e0bd3a..3c5ed7adf193 100644
--- a/crates/runtime/src/module_id.rs
+++ b/crates/runtime/src/module_id.rs
@@ -1,11 +1,14 @@
 //! Unique IDs for modules in the runtime.
 
-use std::sync::atomic::{AtomicU64, Ordering};
+use std::{
+    num::NonZeroU64,
+    sync::atomic::{AtomicU64, Ordering},
+};
 
 /// A unique identifier (within an engine or similar) for a compiled
 /// module.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct CompiledModuleId(u64);
+pub struct CompiledModuleId(NonZeroU64);
 
 /// An allocator for compiled module IDs.
 pub struct CompiledModuleIdAllocator {
@@ -22,7 +25,19 @@ impl CompiledModuleIdAllocator {
 
     /// Allocate a new ID.
     pub fn alloc(&self) -> CompiledModuleId {
+        // Note: why is `Relaxed` OK here?
+        //
+        // The only requirement we have is that IDs are unique. We
+        // don't care how one module's ID compares to another, i.e.,
+        // what order they come in. `Relaxed` means that this
+        // `fetch_add` operation does not have any particular
+        // synchronization (ordering) with respect to any other memory
+        // access in the program. However, `fetch_add` is always
+        // atomic with respect to other accesses to this variable
+        // (`self.next`). So we will always hand out separate, unique
+        // IDs correctly, just in some possibly arbitrary order (which
+        // is fine).
         let id = self.next.fetch_add(1, Ordering::Relaxed);
-        CompiledModuleId(id)
+        CompiledModuleId(NonZeroU64::new(id).unwrap())
     }
 }
diff --git a/crates/wasmtime/src/config/pooling.rs b/crates/wasmtime/src/config/pooling.rs
index a2cbc470017e..6f8f9ae62b5f 100644
--- a/crates/wasmtime/src/config/pooling.rs
+++ b/crates/wasmtime/src/config/pooling.rs
@@ -249,6 +249,10 @@ pub enum PoolingAllocationStrategy {
     NextAvailable,
     /// Allocate from a random available instance.
     Random,
+    /// Try to allocate an instance slot that was previously used for
+    /// the same module, potentially enabling faster instantiation by
+    /// reusing e.g. memory mappings.
+    ReuseAffinity,
 }
 
 impl Default for PoolingAllocationStrategy {
@@ -256,6 +260,7 @@ impl Default for PoolingAllocationStrategy {
         match wasmtime_runtime::PoolingAllocationStrategy::default() {
             wasmtime_runtime::PoolingAllocationStrategy::NextAvailable => Self::NextAvailable,
             wasmtime_runtime::PoolingAllocationStrategy::Random => Self::Random,
+            wasmtime_runtime::PoolingAllocationStrategy::ReuseAffinity => Self::ReuseAffinity,
         }
     }
 }
@@ -268,6 +273,7 @@ impl Into<wasmtime_runtime::PoolingAllocationStrategy> for PoolingAllocationStra
         match self {
             Self::NextAvailable => wasmtime_runtime::PoolingAllocationStrategy::NextAvailable,
             Self::Random => wasmtime_runtime::PoolingAllocationStrategy::Random,
+            Self::ReuseAffinity => wasmtime_runtime::PoolingAllocationStrategy::ReuseAffinity,
         }
     }
 }
diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs
index 7f5b5e823df7..99687621026a 100644
--- a/crates/wasmtime/src/instance.rs
+++ b/crates/wasmtime/src/instance.rs
@@ -707,6 +707,7 @@ impl<'a> Instantiator<'a> {
                     .allocator()
                     .allocate(InstanceAllocationRequest {
                         module: compiled_module.module().clone(),
+                        unique_id: Some(compiled_module.unique_id()),
                         memfds: self.cur.module.memfds().clone(),
                         image_base: compiled_module.code().as_ptr() as usize,
                         functions: compiled_module.functions(),
diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs
index f07064bf0677..e6b4c8709abe 100644
--- a/crates/wasmtime/src/store.rs
+++ b/crates/wasmtime/src/store.rs
@@ -426,6 +426,7 @@ impl<T> Store<T> {
                     shared_signatures: None.into(),
                     imports: Default::default(),
                     module: Arc::new(wasmtime_environ::Module::default()),
+                    unique_id: None,
                     memfds: None,
                     store: StorePtr::empty(),
                     wasm_data: &[],
diff --git a/crates/wasmtime/src/trampoline.rs b/crates/wasmtime/src/trampoline.rs
index 790cbf9ef991..02e0b51c8130 100644
--- a/crates/wasmtime/src/trampoline.rs
+++ b/crates/wasmtime/src/trampoline.rs
@@ -41,6 +41,7 @@ fn create_handle(
         let handle = OnDemandInstanceAllocator::new(config.mem_creator.clone(), 0).allocate(
             InstanceAllocationRequest {
                 module: Arc::new(module),
+                unique_id: None,
                 memfds: None,
                 functions,
                 image_base: 0,
diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs
index 47513f83cfea..77d5f26d188d 100644
--- a/crates/wasmtime/src/trampoline/func.rs
+++ b/crates/wasmtime/src/trampoline/func.rs
@@ -161,6 +161,7 @@ pub unsafe fn create_raw_function(
     Ok(
         OnDemandInstanceAllocator::default().allocate(InstanceAllocationRequest {
             module: Arc::new(module),
+            unique_id: None,
             memfds: None,
             functions: &functions,
             image_base: (*func).as_ptr() as usize,