Skip to content

Commit

Permalink
optimize heap walks, by Sam Gross, upstream of python/cpython#114133
Browse files Browse the repository at this point in the history
  • Loading branch information
daanx committed Jun 2, 2024
1 parent 855e3b2 commit f7fe5bf
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 25 deletions.
98 changes: 73 additions & 25 deletions src/heap.c
Original file line number Diff line number Diff line change
Expand Up @@ -528,46 +528,83 @@ void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
}


static void mi_get_fast_divisor(size_t divisor, uint64_t* magic, size_t* shift) {
mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX);
*shift = 64 - mi_clz(divisor - 1);
*magic = ((((uint64_t)1 << 32) * (((uint64_t)1 << *shift) - divisor)) / divisor + 1);
}

static size_t mi_fast_divide(size_t n, uint64_t magic, size_t shift) {
mi_assert_internal(n <= UINT32_MAX);
return ((((uint64_t)n * magic) >> 32) + n) >> shift;
}

bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg) {
mi_assert(area != NULL);
if (area==NULL) return true;
mi_assert(page != NULL);
if (page == NULL) return true;

_mi_page_free_collect(page,true);
_mi_page_free_collect(page,true); // collect both thread_delayed and local_free
mi_assert_internal(page->local_free == NULL);
if (page->used == 0) return true;

const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page); // without padding
size_t psize;
uint8_t* pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
size_t psize;
uint8_t* const pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
mi_heap_t* const heap = mi_page_heap(page);
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page); // without padding

// optimize page with one block
if (page->capacity == 1) {
// optimize page with one block
mi_assert_internal(page->used == 1 && page->free == NULL);
return visitor(mi_page_heap(page), area, pstart, ubsize, arg);
}
mi_assert(bsize <= UINT32_MAX);

// optimize full pages
if (page->used == page->capacity) {
uint8_t* block = pstart;
for (size_t i = 0; i < page->capacity; i++) {
if (!visitor(heap, area, block, ubsize, arg)) return false;
block += bsize;
}
return true;
}

// create a bitmap of free blocks.
#define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*))
uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)];
memset(free_map, 0, sizeof(free_map));
uintptr_t free_map[MI_MAX_BLOCKS / MI_INTPTR_BITS];
const uintptr_t bmapsize = _mi_divide_up(page->capacity, MI_INTPTR_BITS);
memset(free_map, 0, bmapsize * sizeof(intptr_t));
if (page->capacity % MI_INTPTR_BITS != 0) {
// mark left-over bits at the end as free
size_t shift = (page->capacity % MI_INTPTR_BITS);
uintptr_t mask = (UINTPTR_MAX << shift);
free_map[bmapsize - 1] = mask;
}

// fast repeated division by the block size
uint64_t magic;
size_t shift;
mi_get_fast_divisor(bsize, &magic, &shift);

#if MI_DEBUG>1
size_t free_count = 0;
#endif
for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) {
for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) {
#if MI_DEBUG>1
free_count++;
#endif
mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
size_t offset = (uint8_t*)block - pstart;
mi_assert_internal(offset % bsize == 0);
size_t blockidx = offset / bsize; // Todo: avoid division?
mi_assert_internal( blockidx < MI_MAX_BLOCKS);
size_t bitidx = (blockidx / sizeof(uintptr_t));
size_t bit = blockidx - (bitidx * sizeof(uintptr_t));
mi_assert_internal(offset <= UINT32_MAX);
size_t blockidx = mi_fast_divide(offset, magic, shift);
mi_assert_internal(blockidx == offset / bsize);
mi_assert_internal(blockidx < MI_MAX_BLOCKS);
size_t bitidx = (blockidx / MI_INTPTR_BITS);
size_t bit = blockidx - (bitidx * MI_INTPTR_BITS);
free_map[bitidx] |= ((uintptr_t)1 << bit);
}
mi_assert_internal(page->capacity == (free_count + page->used));
Expand All @@ -576,19 +613,30 @@ bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_
#if MI_DEBUG>1
size_t used_count = 0;
#endif
for (size_t i = 0; i < page->capacity; i++) {
size_t bitidx = (i / sizeof(uintptr_t));
size_t bit = i - (bitidx * sizeof(uintptr_t));
uintptr_t m = free_map[bitidx];
if (bit == 0 && m == UINTPTR_MAX) {
i += (sizeof(uintptr_t) - 1); // skip a run of free blocks
uint8_t* block = pstart;
for (size_t i = 0; i < bmapsize; i++) {
if (free_map[i] == 0) {
// every block is in use
for (size_t j = 0; j < MI_INTPTR_BITS; j++) {
#if MI_DEBUG>1
used_count++;
#endif
if (!visitor(heap, area, block, ubsize, arg)) return false;
block += bsize;
}
}
else if ((m & ((uintptr_t)1 << bit)) == 0) {
#if MI_DEBUG>1
used_count++;
#endif
uint8_t* block = pstart + (i * bsize);
if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false;
else {
// visit the used blocks in the mask
uintptr_t m = ~free_map[i];
while (m != 0) {
#if MI_DEBUG>1
used_count++;
#endif
size_t bitidx = mi_ctz(m);
if (!visitor(heap, area, block + (bitidx * bsize), ubsize, arg)) return false;
m &= m - 1; // clear least significant bit
}
block += bsize * MI_INTPTR_BITS;
}
}
mi_assert_internal(page->used == used_count);
Expand Down
14 changes: 14 additions & 0 deletions test/test-stress.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,16 @@ static void free_items(void* p) {
custom_free(p);
}

/*
static bool visit_blocks(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) {
(void)(heap); (void)(area);
size_t* total = (size_t*)arg;
if (block != NULL) {
total += block_size;
}
return true;
}
*/

static void stress(intptr_t tid) {
//bench_start_thread();
Expand Down Expand Up @@ -173,6 +183,10 @@ static void stress(intptr_t tid) {
data[data_idx] = q;
}
}
// walk the heap
// size_t total = 0;
// mi_heap_visit_blocks(mi_heap_get_default(), true, visit_blocks, &total);

// free everything that is left
for (size_t i = 0; i < retain_top; i++) {
free_items(retained[i]);
Expand Down

0 comments on commit f7fe5bf

Please sign in to comment.