diff --git a/include/mimalloc.h b/include/mimalloc.h
index 26bb849de..9fc770cc3 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -262,7 +262,7 @@ typedef struct mi_heap_area_s {
 
 typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
 
-mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
+mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
 
 // Experimental
 mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
@@ -292,9 +292,13 @@ mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t a
 // Experimental: allow sub-processes whose memory segments stay separated (and no reclamation between them) 
 // Used for example for separate interpreter's in one process.
 typedef void* mi_subproc_id_t;
+mi_decl_export mi_subproc_id_t mi_subproc_main(void);
 mi_decl_export mi_subproc_id_t mi_subproc_new(void);
-mi_decl_export void   mi_subproc_delete(mi_subproc_id_t subproc);
-mi_decl_export void   mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet)
+mi_decl_export void            mi_subproc_delete(mi_subproc_id_t subproc);
+mi_decl_export void            mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet)
+
+// Experimental: visit abandoned heap areas (from threads that have been terminated)
+mi_decl_export bool   mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
 
 // deprecated
 mi_decl_export int    mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
@@ -355,6 +359,7 @@ typedef enum mi_option_e {
   mi_option_abandoned_reclaim_on_free,  // allow to reclaim an abandoned segment on a free (=1)
   mi_option_disallow_arena_alloc,       // 1 = do not use arena's for allocation (except if using specific arena id's)
   mi_option_retry_on_oom,               // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows)
+  mi_option_visit_abandoned,            // allow visiting heap blocks from abandoned threads (=0)
   _mi_option_last,
   // legacy option names
   mi_option_large_os_pages = mi_option_allow_large_os_pages,
diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h
index 4e3250f9d..d27110197 100644
--- a/include/mimalloc/atomic.h
+++ b/include/mimalloc/atomic.h
@@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define WIN32_LEAN_AND_MEAN
 #endif
 #include <windows.h>
-#elif !defined(_WIN32) && (defined(__EMSCRIPTEN_SHARED_MEMORY__) || !defined(__wasi__))
+#elif !defined(__wasi__) && (!defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__))
 #define  MI_USE_PTHREADS
 #include <pthread.h>
 #endif
@@ -35,9 +35,9 @@ terms of the MIT license. A copy of the license can be found in the file
 #define  mi_atomic(name)        std::atomic_##name
 #define  mi_memory_order(name)  std::memory_order_##name
 #if (__cplusplus >= 202002L)    // c++20, see issue #571
-#define MI_ATOMIC_VAR_INIT(x)  x
+ #define MI_ATOMIC_VAR_INIT(x)  x
 #elif !defined(ATOMIC_VAR_INIT)
-#define MI_ATOMIC_VAR_INIT(x)  x
+ #define MI_ATOMIC_VAR_INIT(x)  x
 #else
  #define MI_ATOMIC_VAR_INIT(x)  ATOMIC_VAR_INIT(x)
 #endif
@@ -337,6 +337,7 @@ typedef _Atomic(uintptr_t) mi_atomic_guard_t;
 // ----------------------------------------------------------------------
 // Yield
 // ----------------------------------------------------------------------
+
 #if defined(__cplusplus)
 #include <thread>
 static inline void mi_atomic_yield(void) {
@@ -401,59 +402,73 @@ static inline void mi_atomic_yield(void) {
 
 
 // ----------------------------------------------------------------------
-// Locks are only used for abandoned segment visiting
+// Locks are only used for abandoned segment visiting in `arena.c`
 // ----------------------------------------------------------------------
+
 #if defined(_WIN32)
 
-#define mi_lock_t  CRITICAL_SECTION 
+#define mi_lock_t  CRITICAL_SECTION
 
-static inline bool _mi_prim_lock(mi_lock_t* lock) {
+static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
+  return TryEnterCriticalSection(lock);
+}
+static inline bool mi_lock_acquire(mi_lock_t* lock) {
   EnterCriticalSection(lock);
   return true;
 }
-
-static inline bool _mi_prim_try_lock(mi_lock_t* lock) {
-  return TryEnterCriticalSection(lock);
-}
-
-static inline void _mi_prim_unlock(mi_lock_t* lock) {
+static inline void mi_lock_release(mi_lock_t* lock) {
   LeaveCriticalSection(lock);
 }
+static inline void mi_lock_init(mi_lock_t* lock) {
+  InitializeCriticalSection(lock);
+}
+static inline void mi_lock_done(mi_lock_t* lock) {
+  DeleteCriticalSection(lock);
+}
 
 
 #elif defined(MI_USE_PTHREADS)
 
 #define mi_lock_t  pthread_mutex_t
 
-static inline bool _mi_prim_lock(mi_lock_t* lock) {
-  return (pthread_mutex_lock(lock) == 0);
-}
-
-static inline bool _mi_prim_try_lock(mi_lock_t* lock) {
+static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
   return (pthread_mutex_trylock(lock) == 0);
 }
-
-static inline void _mi_prim_unlock(mi_lock_t* lock) {
+static inline bool mi_lock_acquire(mi_lock_t* lock) {
+  return (pthread_mutex_lock(lock) == 0);
+}
+static inline void mi_lock_release(mi_lock_t* lock) {
   pthread_mutex_unlock(lock);
 }
+static inline void mi_lock_init(mi_lock_t* lock) {
+  (void)(lock);
+}
+static inline void mi_lock_done(mi_lock_t* lock) {
+  (void)(lock);
+}
+
 
 #elif defined(__cplusplus)
 
 #include <mutex>
 #define mi_lock_t  std::mutex
 
-static inline bool _mi_prim_lock(mi_lock_t* lock) {
+static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
+  return lock->lock_try_acquire();
+}
+static inline bool mi_lock_acquire(mi_lock_t* lock) {
   lock->lock();
   return true;
 }
-
-static inline bool _mi_prim_try_lock(mi_lock_t* lock) {
-  return (lock->try_lock();
-}
-
-static inline void _mi_prim_unlock(mi_lock_t* lock) {
+static inline void mi_lock_release(mi_lock_t* lock) {
   lock->unlock();
 }
+static inline void mi_lock_init(mi_lock_t* lock) {
+  (void)(lock);
+}
+static inline void mi_lock_done(mi_lock_t* lock) {
+  (void)(lock);
+}
 
 #else
 
@@ -462,22 +477,26 @@ static inline void _mi_prim_unlock(mi_lock_t* lock) {
 
 #define mi_lock_t  _Atomic(uintptr_t)
 
-static inline bool _mi_prim_try_lock(mi_lock_t* lock) {
+static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
   uintptr_t expected = 0;
   return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1);
 }
-
-static inline bool _mi_prim_lock(mi_lock_t* lock) {
+static inline bool mi_lock_acquire(mi_lock_t* lock) {
   for (int i = 0; i < 1000; i++) {  // for at most 1000 tries?
-    if (_mi_prim_try_lock(lock)) return true;
+    if (mi_lock_try_acquire(lock)) return true;
     mi_atomic_yield();
   }
   return true;
 }
-
-static inline void _mi_prim_unlock(mi_lock_t* lock) {
+static inline void mi_lock_release(mi_lock_t* lock) {
   mi_atomic_store_release(lock, (uintptr_t)0);
 }
+static inline void mi_lock_init(mi_lock_t* lock) {
+  mi_lock_release(lock);
+}
+static inline void mi_lock_done(mi_lock_t* lock) {
+  (void)(lock);
+}
 
 #endif
 
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 9046e3ad9..89f041030 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -79,11 +79,12 @@ extern mi_decl_cache_align const mi_page_t  _mi_page_empty;
 bool       _mi_is_main_thread(void);
 size_t     _mi_current_thread_count(void);
 bool       _mi_preloading(void);           // true while the C runtime is not initialized yet
-mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
-mi_heap_t*    _mi_heap_main_get(void);     // statically allocated main backing heap
 void       _mi_thread_done(mi_heap_t* heap);
 void       _mi_thread_data_collect(void);
 void       _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap);
+mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
+mi_heap_t*    _mi_heap_main_get(void);     // statically allocated main backing heap
+mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id);
 
 // os.c
 void       _mi_os_init(void);                                            // called from process init
@@ -136,7 +137,7 @@ typedef struct mi_arena_field_cursor_s { // abstract struct
   mi_subproc_t*  subproc;
 } mi_arena_field_cursor_t;
 void          _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_arena_field_cursor_t* current);
-mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous);
+mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous, bool visit_all);
 
 // "segment-map.c"
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
@@ -158,6 +159,7 @@ void       _mi_segments_collect(bool force, mi_segments_tld_t* tld);
 void       _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
 void       _mi_abandoned_await_readers(void);
 bool       _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment);
+bool       _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
 
 // "page.c"
 void*      _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment)  mi_attr_noexcept mi_attr_malloc;
@@ -189,6 +191,8 @@ void       _mi_heap_set_default_direct(mi_heap_t* heap);
 bool       _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid);
 void       _mi_heap_unsafe_destroy_all(void);
 mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag);
+void       _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page);
+bool       _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg);
 
 // "stats.c"
 void       _mi_stats_done(mi_stats_t* stats);
diff --git a/src/arena.c b/src/arena.c
index aeadd604e..595149502 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -40,23 +40,24 @@ typedef uintptr_t mi_block_info_t;
 
 // A memory arena descriptor
 typedef struct mi_arena_s {
-  mi_arena_id_t id;                       // arena id; 0 for non-specific
-  mi_memid_t memid;                       // memid of the memory area
-  _Atomic(uint8_t*) start;                // the start of the memory area
-  size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
-  size_t   field_count;                   // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
-  size_t   meta_size;                     // size of the arena structure itself (including its bitmaps)
-  mi_memid_t meta_memid;                  // memid of the arena structure itself (OS or static allocation)
-  int      numa_node;                     // associated NUMA node
-  bool     exclusive;                     // only allow allocations if specifically for this arena
-  bool     is_large;                      // memory area consists of large- or huge OS pages (always committed)
-  _Atomic(size_t) search_idx;             // optimization to start the search for free blocks
-  _Atomic(mi_msecs_t) purge_expire;       // expiration time when blocks should be decommitted from `blocks_decommit`.  
-  mi_bitmap_field_t* blocks_dirty;        // are the blocks potentially non-zero?
-  mi_bitmap_field_t* blocks_committed;    // are the blocks committed? (can be NULL for memory that cannot be decommitted)
-  mi_bitmap_field_t* blocks_purge;        // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
-  mi_bitmap_field_t* blocks_abandoned;    // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
-  mi_bitmap_field_t  blocks_inuse[1];     // in-place bitmap of in-use blocks (of size `field_count`)
+  mi_arena_id_t       id;                   // arena id; 0 for non-specific
+  mi_memid_t          memid;                // memid of the memory area
+  _Atomic(uint8_t*)   start;                // the start of the memory area
+  size_t              block_count;          // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
+  size_t              field_count;          // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
+  size_t              meta_size;            // size of the arena structure itself (including its bitmaps)
+  mi_memid_t          meta_memid;           // memid of the arena structure itself (OS or static allocation)
+  int                 numa_node;            // associated NUMA node
+  bool                exclusive;            // only allow allocations if specifically for this arena
+  bool                is_large;             // memory area consists of large- or huge OS pages (always committed)
+  mi_lock_t           abandoned_visit_lock; // lock is only used when abandoned segments are being visited
+  _Atomic(size_t)     search_idx;           // optimization to start the search for free blocks
+  _Atomic(mi_msecs_t) purge_expire;         // expiration time when blocks should be decommitted from `blocks_decommit`.  
+  mi_bitmap_field_t*  blocks_dirty;         // are the blocks potentially non-zero?
+  mi_bitmap_field_t*  blocks_committed;     // are the blocks committed? (can be NULL for memory that cannot be decommitted)
+  mi_bitmap_field_t*  blocks_purge;         // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
+  mi_bitmap_field_t*  blocks_abandoned;     // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
+  mi_bitmap_field_t   blocks_inuse[1];      // in-place bitmap of in-use blocks (of size `field_count`)
   // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields.
 } mi_arena_t;
 
@@ -65,7 +66,6 @@ typedef struct mi_arena_s {
 static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
 static mi_decl_cache_align _Atomic(size_t)      mi_arena_count; // = 0
 
-
 //static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept;
 
 /* -----------------------------------------------------------
@@ -702,6 +702,7 @@ static void mi_arenas_unsafe_destroy(void) {
   for (size_t i = 0; i < max_arena; i++) {
     mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
     if (arena != NULL) {
+      mi_lock_done(&arena->abandoned_visit_lock);
       if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {
         mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
         _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main);
@@ -813,9 +814,9 @@ void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
 
 // start a cursor at a randomized arena
 void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_arena_field_cursor_t* current) {
-  mi_assert_internal(heap->tld->segments.subproc == subproc);
+  mi_assert_internal(heap == NULL || heap->tld->segments.subproc == subproc);
   const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
-  current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena));
+  current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena));
   current->count = 0;
   current->bitmap_idx = 0;  
   current->subproc = subproc;
@@ -823,7 +824,7 @@ void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_aren
 
 // reclaim abandoned segments 
 // this does not set the thread id (so it appears as still abandoned)
-mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous ) 
+mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous, bool visit_all ) 
 {
   const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count);
   if (max_arena <= 0 || mi_atomic_load_relaxed(&previous->subproc->abandoned_count) == 0) return NULL;
@@ -831,18 +832,31 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
   int count = previous->count;
   size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx);
   size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1;
-  // visit arena's (from previous)
+  // visit arena's (from the previous cursor)
   for (; count < max_arena; count++, field_idx = 0, bit_idx = 0) {
     mi_arena_id_t arena_idx = previous->start + count;
     if (arena_idx >= max_arena) { arena_idx = arena_idx % max_arena; } // wrap around
     mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
     if (arena != NULL) {
+      bool has_lock = false;      
       // visit the abandoned fields (starting at previous_idx)
-      for ( ; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
+      for (; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
         size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]);
         if mi_unlikely(field != 0) { // skip zero fields quickly
+          // we only take the arena lock if there are actually abandoned segments present
+          if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) {
+            has_lock = (visit_all ? mi_lock_acquire(&arena->abandoned_visit_lock) : mi_lock_try_acquire(&arena->abandoned_visit_lock));
+            if (!has_lock) {
+              if (visit_all) {
+                _mi_error_message(EINVAL, "failed to visit all abandoned segments due to failure to acquire the visitor lock");
+              }
+              // skip to next arena
+              break;
+            }
+          }
+          mi_assert_internal(has_lock || !mi_option_is_enabled(mi_option_visit_abandoned));
           // visit each set bit in the field  (todo: maybe use `ctz` here?)
-          for ( ; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
+          for (; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
             // pre-check if the bit is set
             size_t mask = ((size_t)1 << bit_idx);
             if mi_unlikely((field & mask) == mask) {
@@ -852,7 +866,10 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
                 mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
                 mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
                 mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
-                // check that belongs to our sub-process
+                // check that the segment belongs to our sub-process 
+                // note: this is the reason we need a lock in the case abandoned visiting is enabled.
+                //  without the lock an abandoned visit may otherwise fail to visit all segments.
+                //  for regular reclaim it is fine to miss one sometimes so without abandoned visiting we don't need the arena lock.
                 if (segment->subproc != previous->subproc) {
                   // it is from another subprocess, re-mark it and continue searching
                   const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
@@ -865,6 +882,7 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
                   previous->count = count;
 
                   //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
+                  if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
                   return segment;
                 }
               }
@@ -872,6 +890,7 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
           }
         }
       }
+      if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
     }
   }
   // no more found
@@ -881,6 +900,29 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
 }
 
 
+static bool mi_arena_visit_abandoned_blocks(mi_subproc_t* subproc, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
+  mi_arena_field_cursor_t current;
+  _mi_arena_field_cursor_init(NULL, subproc, &current);
+  mi_segment_t* segment;
+  while ((segment = _mi_arena_segment_clear_abandoned_next(&current, true /* visit all */)) != NULL) {
+    if (!_mi_segment_visit_blocks(segment, heap_tag, visit_blocks, visitor, arg)) return false;
+  }
+  return true;
+}
+
+bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
+  // (unfortunately) the visit_abandoned option must be enabled from the start.
+  // This is to avoid taking locks if abandoned list visiting is not required (as for most programs)
+  if (!mi_option_is_enabled(mi_option_visit_abandoned)) {
+    mi_assert(false);
+    _mi_error_message(EINVAL, "internal error: can only visit abandoned blocks when MIMALLOC_VISIT_ABANDONED=ON");
+    return false;
+  }
+  // visit abandoned segments in the arena's  
+  return mi_arena_visit_abandoned_blocks(_mi_subproc_from_id(subproc_id), heap_tag, visit_blocks, visitor, arg);
+}
+
+
 /* -----------------------------------------------------------
   Add an arena.
 ----------------------------------------------------------- */
@@ -934,6 +976,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
   arena->is_large     = is_large;
   arena->purge_expire = 0;
   arena->search_idx   = 0;
+  mi_lock_init(&arena->abandoned_visit_lock);
   // consecutive bitmaps
   arena->blocks_dirty     = &arena->blocks_inuse[fields];     // just after inuse bitmap
   arena->blocks_abandoned = &arena->blocks_inuse[2 * fields]; // just after dirty bitmap
diff --git a/src/heap.c b/src/heap.c
index f6f235491..2cde5fb08 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -137,6 +137,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
   {
     // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
     // if all memory is freed by now, all segments should be freed.
+    // note: this only collects in the current subprocess
     _mi_abandoned_reclaim_all(heap, &heap->tld->segments);
   }
 
@@ -515,17 +516,21 @@ bool mi_check_owned(const void* p) {
         enable visiting all blocks of all heaps across threads
 ----------------------------------------------------------- */
 
-// Separate struct to keep `mi_page_t` out of the public interface
-typedef struct mi_heap_area_ex_s {
-  mi_heap_area_t area;
-  mi_page_t*     page;
-} mi_heap_area_ex_t;
+void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
+  const size_t bsize = mi_page_block_size(page);
+  const size_t ubsize = mi_page_usable_block_size(page);
+  area->reserved = page->reserved * bsize;
+  area->committed = page->capacity * bsize;
+  area->blocks = mi_page_start(page);
+  area->used = page->used;   // number of blocks in use (#553)
+  area->block_size = ubsize;
+  area->full_block_size = bsize;
+}
 
-static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) {
-  mi_assert(xarea != NULL);
-  if (xarea==NULL) return true;
-  const mi_heap_area_t* area = &xarea->area;
-  mi_page_t* page = xarea->page;
+
+bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg) {
+  mi_assert(area != NULL);
+  if (area==NULL) return true;
   mi_assert(page != NULL);
   if (page == NULL) return true;
 
@@ -590,23 +595,23 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
   return true;
 }
 
-typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);
 
 
+// Separate struct to keep `mi_page_t` out of the public interface
+typedef struct mi_heap_area_ex_s {
+  mi_heap_area_t area;
+  mi_page_t* page;
+} mi_heap_area_ex_t;
+
+typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);
+
 static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
   MI_UNUSED(heap);
   MI_UNUSED(pq);
   mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
   mi_heap_area_ex_t xarea;
-  const size_t bsize = mi_page_block_size(page);
-  const size_t ubsize = mi_page_usable_block_size(page);
   xarea.page = page;
-  xarea.area.reserved = page->reserved * bsize;
-  xarea.area.committed = page->capacity * bsize;
-  xarea.area.blocks = mi_page_start(page);
-  xarea.area.used = page->used;   // number of blocks in use (#553)
-  xarea.area.block_size = ubsize;
-  xarea.area.full_block_size = bsize;
+  _mi_heap_area_init(&xarea.area, page);  
   return fun(heap, &xarea, arg);
 }
 
@@ -627,7 +632,7 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t*
   mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg;
   if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false;
   if (args->visit_blocks) {
-    return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg);
+    return _mi_heap_area_visit_blocks(&xarea->area, xarea->page, args->visitor, args->arg);
   }
   else {
     return true;
diff --git a/src/init.c b/src/init.c
index 01625891f..be8c16de1 100644
--- a/src/init.c
+++ b/src/init.c
@@ -185,22 +185,30 @@ mi_heap_t* _mi_heap_main_get(void) {
   Sub process
 ----------------------------------------------------------- */
 
+static mi_decl_cache_align _Atomic(uintptr_t)  mi_subproc_count;
+
+mi_subproc_id_t mi_subproc_main(void) {
+  return NULL;
+}
+
 mi_subproc_id_t mi_subproc_new(void) {
   mi_memid_t memid = _mi_memid_none();
   mi_subproc_t* subproc = (mi_subproc_t*)_mi_arena_meta_zalloc(sizeof(mi_subproc_t), &memid);
   if (subproc == NULL) return NULL;
+  mi_atomic_increment_relaxed(&mi_subproc_count);
   subproc->memid = memid;
   return subproc;
 }
 
-static mi_subproc_t* mi_subproc_from_id(mi_subproc_id_t subproc_id) {
+mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id) {
   return (subproc_id == NULL ? &mi_subproc_default : (mi_subproc_t*)subproc_id);
 }
 
 void mi_subproc_delete(mi_subproc_id_t subproc_id) {
   if (subproc_id == NULL) return;
-  mi_subproc_t* subproc = mi_subproc_from_id(subproc_id);
+  mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
   _mi_arena_meta_free(subproc, subproc->memid, sizeof(mi_subproc_t));
+  mi_atomic_decrement_relaxed(&mi_subproc_count);
 }
 
 void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) {
@@ -208,7 +216,7 @@ void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) {
   if (heap == NULL) return;
   mi_assert(heap->tld->segments.subproc == &mi_subproc_default);
   if (heap->tld->segments.subproc != &mi_subproc_default) return;
-  heap->tld->segments.subproc = mi_subproc_from_id(subproc_id);
+  heap->tld->segments.subproc = _mi_subproc_from_id(subproc_id);
 }
 
 
diff --git a/src/options.c b/src/options.c
index db6e040fe..32fa212b5 100644
--- a/src/options.c
+++ b/src/options.c
@@ -94,6 +94,11 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1,   UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free
   { 0,   UNINIT, MI_OPTION(disallow_arena_alloc) },     // 1 = do not use arena's for allocation (except if using specific arena id's)
   { 400, UNINIT, MI_OPTION(retry_on_oom) },             // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
+#if defined(MI_VISIT_ABANDONED)  
+  { 1,   INITIALIZED, MI_OPTION(visit_abandoned) },     // allow visiting heap blocks in abandonded segments; requires taking locks during reclaim.
+#else
+  { 0,   UNINIT, MI_OPTION(visit_abandoned) },          
+#endif
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
diff --git a/src/segment.c b/src/segment.c
index 205d8753d..dc82b89d2 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -962,7 +962,7 @@ bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) {
 void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
   mi_segment_t* segment;
   mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, &current);
-  while ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL) {
+  while ((segment = _mi_arena_segment_clear_abandoned_next(&current, true /* blocking */)) != NULL) {
     mi_segment_reclaim(segment, heap, 0, NULL, tld);
   }
 }
@@ -987,7 +987,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
 
   mi_segment_t* segment;
   mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, &current);
-  while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL))
+  while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current, false /* non-blocking */)) != NULL))
   {
     mi_assert(segment->subproc == heap->tld->segments.subproc); // cursor only visits segments in our sub-process
     segment->abandoned_visits++;
@@ -1240,3 +1240,32 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
   mi_assert_internal(page == NULL || _mi_page_segment(page)->subproc == tld->subproc);
   return page;
 }
+
+
+/* -----------------------------------------------------------
+   Visit blocks in a segment (only used for abandoned segments)
+----------------------------------------------------------- */
+
+static bool mi_segment_visit_page(mi_page_t* page, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
+  mi_heap_area_t area;
+  _mi_heap_area_init(&area, page);
+  if (!visitor(NULL, &area, NULL, area.block_size, arg)) return false;
+  if (visit_blocks) {
+    return _mi_heap_area_visit_blocks(&area, page, visitor, arg);
+  }
+  else {
+    return true;
+  }
+}
+
+bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {  
+  for (size_t i = 0; i < segment->capacity; i++) {
+    mi_page_t* const page = &segment->pages[i];
+    if (page->segment_in_use) {
+      if (heap_tag < 0 || (int)page->heap_tag == heap_tag) {
+        if (!mi_segment_visit_page(page, visit_blocks, visitor, arg)) return false;
+      }
+    }
+  }
+  return true;
+}