Skip to content

Commit

Permalink
fast bit set
Browse files Browse the repository at this point in the history
  • Loading branch information
erincatto committed Jul 8, 2023
1 parent 22f30ca commit 35f7a7e
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 19 deletions.
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ set(BOX2D_SOURCE_FILES
array.c
array.h
atomic.inl
bitset.c
bitset.h
block_allocator.c
block_allocator.h
body.c
Expand Down
128 changes: 128 additions & 0 deletions src/bitset.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// SPDX-FileCopyrightText: 2023 Erin Catto
// SPDX-License-Identifier: MIT

#include "box2d/allocate.h"

#include "bitset.h"

#include <assert.h>
#include <string.h>

b2BitSet b2CreateBitSet(uint32_t bitCapacity)
{
b2BitSet bitSet = {0};

bitSet.wordCapacity = (bitCapacity + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8);
bitSet.wordCount = 0;
bitSet.bits = b2Alloc(bitSet.wordCapacity * sizeof(uint64_t));

return bitSet;
}

void b2DestroyBitSet(b2BitSet* bitSet)
{
b2Free(bitSet->bits);
bitSet->wordCapacity = 0;
bitSet->wordCount = 0;
bitSet->bits = NULL;
}

void b2SetBitCountAndClear(b2BitSet* bitSet, uint32_t bitCount)
{
uint32_t wordCount = (bitCount + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8);
if (bitSet->wordCapacity < wordCount)
{
b2DestroyBitSet(bitSet);
uint32_t newBitCapacity = bitCount + (bitCount >> 1);
*bitSet = b2CreateBitSet(newBitCapacity);
return;
}

bitSet->wordCount = wordCount;
memset(bitSet->bits, 0, bitSet->wordCount * sizeof(uint64_t));
}

void b2InPlaceUnion(b2BitSet* setA, const b2BitSet* setB)
{
assert(setA->wordCount == setB->wordCount);
uint32_t wordCount = setA->wordCount;
for (uint32_t i = 0; i < wordCount; ++i)
{
setA->bits[i] |= setB->bits[i];
}
}


#if defined(_MSC_VER) && !defined(__clang__)
#include <intrin.h>

// https://en.wikipedia.org/wiki/Find_first_set
static inline uint32_t b2CTZ(uint64_t word)
{
unsigned long index;

#ifdef _WIN64
_BitScanForward64(&index, word);
#else
// 32-bit fall back
if ((uint32_t)word != 0)
{
_BitScanForward(&index, (uint32_t)word);
}
else
{
_BitScanForward(&index, (uint32_t)(word >> 32));
index += 32;
}
#endif

return index;
}

#else

static inline uint32_t b2CTZ(uint64_t word)
{
return __builtin_ctzll(word);
}

#endif


// Iterate over the set bits
// https://lemire.me/blog/2018/02/21/iterating-over-set-bits-quickly/
bool b2GetNextSetBitIndex(const b2BitSet* bitset, uint32_t* bitIndexPtr)
{
uint32_t bitIndex = *bitIndexPtr;
uint32_t wordIndex = bitIndex / 64;
if (wordIndex >= bitset->wordCount)
{
return false;
}

uint64_t word = bitset->bits[wordIndex];
word >>= (bitIndex & 63);

if (word != 0)
{
*bitIndexPtr += b2CTZ(word);
return true;
}

wordIndex += 1;

while (wordIndex < bitset->wordCount)
{
word = bitset->bits[wordIndex];
if (word != 0)
{
*bitIndexPtr = 64 * wordIndex + b2CTZ(word);
return true;
}

wordIndex += 1;
}

return false;
}

29 changes: 29 additions & 0 deletions src/bitset.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// SPDX-FileCopyrightText: 2023 Erin Catto
// SPDX-License-Identifier: MIT

#pragma once

#include <assert.h>
#include <stdbool.h>
#include <stdint.h>

// Bit set provides fast operations on large arrays of bits
typedef struct b2BitSet
{
uint64_t* bits;
uint32_t wordCapacity;
uint32_t wordCount;
} b2BitSet;

b2BitSet b2CreateBitSet(uint32_t bitCapacity);
void b2DestroyBitSet(b2BitSet* bitSet);
void b2SetBitCountAndClear(b2BitSet* bitset, uint32_t bitCount);
void b2InPlaceUnion(b2BitSet* setA, const b2BitSet* setB);
bool b2GetNextSetBitIndex(const b2BitSet* bitset, uint32_t* bitIndexPtr);

static inline void b2SetBit(b2BitSet* bitSet, uint32_t bitIndex)
{
uint32_t wordIndex = bitIndex / 64;
assert(wordIndex < bitSet->wordCount);
bitSet->bits[wordIndex] |= ((uint64_t)1) << (bitIndex % 64);
}
30 changes: 12 additions & 18 deletions src/world.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "box2d/timer.h"

#include "array.h"
#include "bitset.h"
#include "block_allocator.h"
#include "body.h"
#include "contact.h"
Expand Down Expand Up @@ -35,7 +36,7 @@ typedef struct b2TaskContext
{
// These bits align with the awake contact array and signal change in contact status
// that affects the island graph.
bool* contactBitArray;
b2BitSet contactBitSet;
} b2TaskContext;

b2World* b2GetWorldFromId(b2WorldId id)
Expand Down Expand Up @@ -250,7 +251,7 @@ b2WorldId b2CreateWorld(const b2WorldDef* def)
world->taskContextArray = b2CreateArray(sizeof(b2TaskContext), world->workerCount);
for (uint32_t i = 0; i < world->workerCount; ++i)
{
world->taskContextArray[i].contactBitArray = b2CreateArray(sizeof(bool), def->contactCapacity);
world->taskContextArray[i].contactBitSet = b2CreateBitSet(def->contactCapacity);
}

return id;
Expand All @@ -262,7 +263,7 @@ void b2DestroyWorld(b2WorldId id)

for (uint32_t i = 0; i < world->workerCount; ++i)
{
b2DestroyArray(world->taskContextArray[i].contactBitArray);
b2DestroyBitSet(&world->taskContextArray[i].contactBitSet);
}
b2DestroyArray(world->taskContextArray);

Expand Down Expand Up @@ -334,7 +335,7 @@ static void b2CollideTask(int32_t startIndex, int32_t endIndex, uint32_t threadI
if (overlap == false)
{
contact->flags |= b2_contactDisjoint;
taskContext->contactBitArray[awakeIndex] = true;
b2SetBit(&taskContext->contactBitSet, awakeIndex);
}
else
{
Expand All @@ -350,12 +351,12 @@ static void b2CollideTask(int32_t startIndex, int32_t endIndex, uint32_t threadI
if (touching == true && wasTouching == false)
{
contact->flags |= b2_contactStartedTouching;
taskContext->contactBitArray[awakeIndex] = true;
b2SetBit(&taskContext->contactBitSet, awakeIndex);
}
else if (touching == false && wasTouching == true)
{
contact->flags |= b2_contactStoppedTouching;
taskContext->contactBitArray[awakeIndex] = true;
b2SetBit(&taskContext->contactBitSet, awakeIndex);
}
}
}
Expand All @@ -378,7 +379,7 @@ static void b2Collide(b2World* world)

for (uint32_t i = 0; i < world->workerCount; ++i)
{
memset(world->taskContextArray[i].contactBitArray, 0, awakeContactCount * sizeof(bool));
b2SetBitCountAndClear(&world->taskContextArray[i].contactBitSet, awakeContactCount);
}

if (g_parallel)
Expand All @@ -396,23 +397,16 @@ static void b2Collide(b2World* world)
b2TracyCZoneNC(contact_state, "Contact State", b2_colorCoral, true);

// Bitwise OR all contact bits
bool* bitArray = world->taskContextArray[0].contactBitArray;
b2BitSet* bitSet = &world->taskContextArray[0].contactBitSet;
for (uint32_t i = 1; i < world->workerCount; ++i)
{
bool* threadBits = world->taskContextArray[i].contactBitArray;
for (int32_t j = 0; j < awakeContactCount; ++j)
{
bitArray[j] |= threadBits[j];
}
b2InPlaceUnion(bitSet, &world->taskContextArray[i].contactBitSet);
}

// Process contact state changes
for (int32_t i = 0; i < awakeContactCount; ++i)

for (uint32_t i = 0; b2GetNextSetBitIndex(bitSet, &i); ++i)
{
if (bitArray[i] == false)
{
continue;
}

int32_t index = world->awakeContactArray[i];
b2Contact* contact = world->contacts + index;
Expand Down
4 changes: 3 additions & 1 deletion src/world.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ typedef struct b2World
uint16_t revision;

b2Profile profile;
B2_ATOMIC long contactPointCount;

// TODO_ERIN not used
_Atomic long contactPointCount;

b2PreSolveFcn* preSolveFcn;
void* preSolveContext;
Expand Down

0 comments on commit 35f7a7e

Please sign in to comment.