From 0a58afb83ecb374c5e4caa95580bc3c6b738764c Mon Sep 17 00:00:00 2001 From: Richard Biely Date: Fri, 22 Sep 2023 14:17:39 +0200 Subject: [PATCH] Tweaked: Bitset flip performance Added: More bitset unit tests --- include/gaia/containers/bitset.h | 37 ++++- include/gaia/containers/dbitset.h | 73 +++++---- single_include/gaia.h | 110 +++++++++----- src/test/src/main.cpp | 236 ++++++++++++++++++++++++++---- 4 files changed, 356 insertions(+), 100 deletions(-) diff --git a/include/gaia/containers/bitset.h b/include/gaia/containers/bitset.h index 0a61b909..4fb51990 100644 --- a/include/gaia/containers/bitset.h +++ b/include/gaia/containers/bitset.h @@ -235,21 +235,42 @@ namespace gaia { //! Flips the bit at the postion \param pos constexpr void flip(uint32_t pos) { GAIA_ASSERT(pos < NBits); - m_data[pos / BitsPerItem] ^= ((size_type)1 << (pos % BitsPerItem)); + const auto wordIdx = pos / BitsPerItem; + const auto bitIdx = pos % BitsPerItem; + m_data[wordIdx] ^= ((size_type)1 << bitIdx); } //! Flips all bits from \param bitFrom to \param bitTo (including) constexpr bitset& flip(uint32_t bitFrom, uint32_t bitTo) { GAIA_ASSERT(bitFrom <= bitTo); - GAIA_ASSERT(bitFrom < size()); + GAIA_ASSERT(bitTo < size()); - if GAIA_UNLIKELY (size() == 0) - return *this; + // The followign can't happen because we always have at least 1 bit + // if GAIA_UNLIKELY (size() == 0) + // return *this; + + const uint32_t wordIdxFrom = bitFrom / BitsPerItem; + const uint32_t wordIdxTo = bitTo / BitsPerItem; + + auto getMask = [](uint32_t from, uint32_t to) -> size_type { + const auto diff = to - from; + // Set all bits when asking for the full range + if (diff == BitsPerItem - 1) + return (size_type)-1; - for (uint32_t i = bitFrom; i <= bitTo; i++) { - uint32_t wordIdx = i / BitsPerItem; - uint32_t bitOffset = i % BitsPerItem; - m_data[wordIdx] ^= ((size_type)1 << bitOffset); + return ((size_type(1) << (diff + 1)) - 1) << from; + }; + + if (wordIdxFrom == wordIdxTo) { + m_data[wordIdxTo] ^= getMask(bitFrom % BitsPerItem, bitTo % BitsPerItem); + } else { + // First word + m_data[wordIdxFrom] ^= getMask(bitFrom % BitsPerItem, BitsPerItem - 1); + // Middle + for (uint32_t i = wordIdxFrom + 1; i <= wordIdxTo - 1; i++) + m_data[i] = ~m_data[i]; + // Last word + m_data[wordIdxTo] ^= getMask(0, bitTo % BitsPerItem); } return *this; diff --git a/include/gaia/containers/dbitset.h b/include/gaia/containers/dbitset.h index 233a3106..0c2bbc0f 100644 --- a/include/gaia/containers/dbitset.h +++ b/include/gaia/containers/dbitset.h @@ -85,36 +85,33 @@ namespace gaia { uint32_t find_next_set_bit(uint32_t pos) const { value_type wordIndex = pos / dbitset::BitsPerItem; - GAIA_ASSERT(wordIndex < m_bitset.Items()); + const auto items = m_bitset.Items(); + GAIA_ASSERT(wordIndex < items); size_type word = 0; - const size_type posInWord = pos % dbitset::BitsPerItem; - if (posInWord < dbitset::BitsPerItem - 1) { - const size_type mask = (size_type(1) << (posInWord + 1)) - 1; - const size_type maskInv = ~mask; - word = m_bitset.m_pData[wordIndex] & maskInv; + const size_type posInWord = pos % dbitset::BitsPerItem + 1; + if GAIA_LIKELY (posInWord < dbitset::BitsPerItem) { + const size_type mask = (size_type(1) << posInWord) - 1; + word = m_bitset.m_pData[wordIndex] & (~mask); } - // No set bit in the current word, move to the next one - while (word == 0) { - if (wordIndex >= m_bitset.Items() - 1) + GAIA_MSVC_WARNING_PUSH() + GAIA_MSVC_WARNING_DISABLE(4244) + while (true) { + if (word != 0) { + if constexpr (dbitset::BitsPerItem == 32) + return wordIndex * dbitset::BitsPerItem + GAIA_FFS(word) - 1; + else + return wordIndex * dbitset::BitsPerItem + GAIA_FFS64(word) - 1; + } + + // No set bit in the current word, move to the next one + if (++wordIndex >= items) return pos; - word = m_bitset.m_pData[++wordIndex]; + word = m_bitset.m_pData[wordIndex]; } - - // Process the word - uint32_t fwd = 0; - - GAIA_MSVC_WARNING_PUSH() - GAIA_MSVC_WARNING_DISABLE(4244) - if constexpr (dbitset::BitsPerItem == 32) - fwd = GAIA_FFS(word) - 1; - else - fwd = GAIA_FFS64(word) - 1; GAIA_MSVC_WARNING_POP() - - return wordIndex * dbitset::BitsPerItem + fwd; } uint32_t find_prev_set_bit(uint32_t pos) const { @@ -407,15 +404,33 @@ namespace gaia { //! Flips all bits from \param bitFrom to \param bitTo (including) dbitset& flip(uint32_t bitFrom, uint32_t bitTo) { GAIA_ASSERT(bitFrom <= bitTo); - GAIA_ASSERT(bitFrom < size()); + GAIA_ASSERT(bitTo < size()); if GAIA_UNLIKELY (size() == 0) return *this; - for (uint32_t i = bitFrom; i <= bitTo; i++) { - uint32_t wordIdx = i / BitsPerItem; - uint32_t bitOffset = i % BitsPerItem; - m_pData[wordIdx] ^= ((size_type)1 << bitOffset); + const uint32_t wordIdxFrom = bitFrom / BitsPerItem; + const uint32_t wordIdxTo = bitTo / BitsPerItem; + + auto getMask = [](uint32_t from, uint32_t to) -> size_type { + const auto diff = to - from; + // Set all bits when asking for the full range + if (diff == BitsPerItem - 1) + return (size_type)-1; + + return ((size_type(1) << (diff + 1)) - 1) << from; + }; + + if (wordIdxFrom == wordIdxTo) { + m_pData[wordIdxTo] ^= getMask(bitFrom % BitsPerItem, bitTo % BitsPerItem); + } else { + // First word + m_pData[wordIdxFrom] ^= getMask(bitFrom % BitsPerItem, BitsPerItem - 1); + // Middle + for (uint32_t i = wordIdxFrom + 1; i <= wordIdxTo - 1; i++) + m_pData[i] = ~m_pData[i]; + // Last word + m_pData[wordIdxTo] ^= getMask(0, bitTo % BitsPerItem); } return *this; @@ -451,8 +466,8 @@ namespace gaia { if (HasTrailingBits()) return (m_pData[items] & lastItemMask) == lastItemMask; - else - return m_pData[items] == (size_type)-1; + + return m_pData[items] == (size_type)-1; } //! Checks if any bit is set diff --git a/single_include/gaia.h b/single_include/gaia.h index 8a695cd7..fa583862 100644 --- a/single_include/gaia.h +++ b/single_include/gaia.h @@ -4647,21 +4647,42 @@ namespace gaia { //! Flips the bit at the postion \param pos constexpr void flip(uint32_t pos) { GAIA_ASSERT(pos < NBits); - m_data[pos / BitsPerItem] ^= ((size_type)1 << (pos % BitsPerItem)); + const auto wordIdx = pos / BitsPerItem; + const auto bitIdx = pos % BitsPerItem; + m_data[wordIdx] ^= ((size_type)1 << bitIdx); } //! Flips all bits from \param bitFrom to \param bitTo (including) constexpr bitset& flip(uint32_t bitFrom, uint32_t bitTo) { GAIA_ASSERT(bitFrom <= bitTo); - GAIA_ASSERT(bitFrom < size()); + GAIA_ASSERT(bitTo < size()); - if GAIA_UNLIKELY (size() == 0) - return *this; + // The followign can't happen because we always have at least 1 bit + // if GAIA_UNLIKELY (size() == 0) + // return *this; + + const uint32_t wordIdxFrom = bitFrom / BitsPerItem; + const uint32_t wordIdxTo = bitTo / BitsPerItem; - for (uint32_t i = bitFrom; i <= bitTo; i++) { - uint32_t wordIdx = i / BitsPerItem; - uint32_t bitOffset = i % BitsPerItem; - m_data[wordIdx] ^= ((size_type)1 << bitOffset); + auto getMask = [](uint32_t from, uint32_t to) -> size_type { + const auto diff = to - from; + // Set all bits when asking for the full range + if (diff == BitsPerItem - 1) + return (size_type)-1; + + return ((size_type(1) << (diff + 1)) - 1) << from; + }; + + if (wordIdxFrom == wordIdxTo) { + m_data[wordIdxTo] ^= getMask(bitFrom % BitsPerItem, bitTo % BitsPerItem); + } else { + // First word + m_data[wordIdxFrom] ^= getMask(bitFrom % BitsPerItem, BitsPerItem - 1); + // Middle + for (uint32_t i = wordIdxFrom + 1; i <= wordIdxTo - 1; i++) + m_data[i] = ~m_data[i]; + // Last word + m_data[wordIdxTo] ^= getMask(0, bitTo % BitsPerItem); } return *this; @@ -5347,36 +5368,33 @@ namespace gaia { uint32_t find_next_set_bit(uint32_t pos) const { value_type wordIndex = pos / dbitset::BitsPerItem; - GAIA_ASSERT(wordIndex < m_bitset.Items()); + const auto items = m_bitset.Items(); + GAIA_ASSERT(wordIndex < items); size_type word = 0; - const size_type posInWord = pos % dbitset::BitsPerItem; - if (posInWord < dbitset::BitsPerItem - 1) { - const size_type mask = (size_type(1) << (posInWord + 1)) - 1; - const size_type maskInv = ~mask; - word = m_bitset.m_pData[wordIndex] & maskInv; + const size_type posInWord = pos % dbitset::BitsPerItem + 1; + if GAIA_LIKELY (posInWord < dbitset::BitsPerItem) { + const size_type mask = (size_type(1) << posInWord) - 1; + word = m_bitset.m_pData[wordIndex] & (~mask); } - // No set bit in the current word, move to the next one - while (word == 0) { - if (wordIndex >= m_bitset.Items() - 1) + GAIA_MSVC_WARNING_PUSH() + GAIA_MSVC_WARNING_DISABLE(4244) + while (true) { + if (word != 0) { + if constexpr (dbitset::BitsPerItem == 32) + return wordIndex * dbitset::BitsPerItem + GAIA_FFS(word) - 1; + else + return wordIndex * dbitset::BitsPerItem + GAIA_FFS64(word) - 1; + } + + // No set bit in the current word, move to the next one + if (++wordIndex >= items) return pos; - word = m_bitset.m_pData[++wordIndex]; + word = m_bitset.m_pData[wordIndex]; } - - // Process the word - uint32_t fwd = 0; - - GAIA_MSVC_WARNING_PUSH() - GAIA_MSVC_WARNING_DISABLE(4244) - if constexpr (dbitset::BitsPerItem == 32) - fwd = GAIA_FFS(word) - 1; - else - fwd = GAIA_FFS64(word) - 1; GAIA_MSVC_WARNING_POP() - - return wordIndex * dbitset::BitsPerItem + fwd; } uint32_t find_prev_set_bit(uint32_t pos) const { @@ -5669,15 +5687,33 @@ namespace gaia { //! Flips all bits from \param bitFrom to \param bitTo (including) dbitset& flip(uint32_t bitFrom, uint32_t bitTo) { GAIA_ASSERT(bitFrom <= bitTo); - GAIA_ASSERT(bitFrom < size()); + GAIA_ASSERT(bitTo < size()); if GAIA_UNLIKELY (size() == 0) return *this; - for (uint32_t i = bitFrom; i <= bitTo; i++) { - uint32_t wordIdx = i / BitsPerItem; - uint32_t bitOffset = i % BitsPerItem; - m_pData[wordIdx] ^= ((size_type)1 << bitOffset); + const uint32_t wordIdxFrom = bitFrom / BitsPerItem; + const uint32_t wordIdxTo = bitTo / BitsPerItem; + + auto getMask = [](uint32_t from, uint32_t to) -> size_type { + const auto diff = to - from; + // Set all bits when asking for the full range + if (diff == BitsPerItem - 1) + return (size_type)-1; + + return ((size_type(1) << (diff + 1)) - 1) << from; + }; + + if (wordIdxFrom == wordIdxTo) { + m_pData[wordIdxTo] ^= getMask(bitFrom % BitsPerItem, bitTo % BitsPerItem); + } else { + // First word + m_pData[wordIdxFrom] ^= getMask(bitFrom % BitsPerItem, BitsPerItem - 1); + // Middle + for (uint32_t i = wordIdxFrom + 1; i <= wordIdxTo - 1; i++) + m_pData[i] = ~m_pData[i]; + // Last word + m_pData[wordIdxTo] ^= getMask(0, bitTo % BitsPerItem); } return *this; @@ -5713,8 +5749,8 @@ namespace gaia { if (HasTrailingBits()) return (m_pData[items] & lastItemMask) == lastItemMask; - else - return m_pData[items] == (size_type)-1; + + return m_pData[items] == (size_type)-1; } //! Checks if any bit is set diff --git a/src/test/src/main.cpp b/src/test/src/main.cpp index 66a31e1d..5bade19a 100644 --- a/src/test/src/main.cpp +++ b/src/test/src/main.cpp @@ -538,19 +538,6 @@ void test_bitset() { REQUIRE(bs.all() == false); REQUIRE(bs.none() == true); } - SECTION("Ranges") { - containers::bitset<11> bs; - bs.set(1); - bs.set(10); - bs.flip(2, 9); - for (uint32_t i = 2; i <= 10; ++i) - REQUIRE(bs.test(i) == true); - bs.flip(2, 9); - for (uint32_t i = 2; i < 10; ++i) - REQUIRE(bs.test(i) == false); - REQUIRE(bs.test(1)); - REQUIRE(bs.test(10)); - } SECTION("Iteration") { { containers::bitset bs; @@ -628,6 +615,111 @@ TEST_CASE("Containers - bitset") { SECTION("512 bits") { test_bitset<512>(); } + SECTION("Ranges 11 bits") { + containers::bitset<11> bs; + bs.set(1); + bs.set(10); + bs.flip(2, 9); + for (uint32_t i = 1; i <= 10; ++i) + REQUIRE(bs.test(i) == true); + bs.flip(2, 9); + for (uint32_t i = 2; i < 10; ++i) + REQUIRE(bs.test(i) == false); + REQUIRE(bs.test(1)); + REQUIRE(bs.test(10)); + + bs.reset(); + bs.flip(0, 0); + REQUIRE(bs.test(0)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(10, 10); + REQUIRE(bs.test(10)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(0, 10); + REQUIRE(bs.count() == 11); + REQUIRE(bs.all() == true); + bs.flip(0, 10); + REQUIRE(bs.count() == 0); + REQUIRE(bs.none() == true); + } + SECTION("Ranges 64 bits") { + containers::bitset<64> bs; + bs.set(1); + bs.set(10); + bs.flip(2, 9); + for (uint32_t i = 1; i <= 10; ++i) + REQUIRE(bs.test(i) == true); + bs.flip(2, 9); + for (uint32_t i = 2; i < 10; ++i) + REQUIRE(bs.test(i) == false); + REQUIRE(bs.test(1)); + REQUIRE(bs.test(10)); + + bs.reset(); + bs.flip(0, 0); + REQUIRE(bs.test(0)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(63, 63); + REQUIRE(bs.test(63)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(0, 63); + REQUIRE(bs.count() == 64); + REQUIRE(bs.all() == true); + bs.flip(0, 63); + REQUIRE(bs.count() == 0); + REQUIRE(bs.none() == true); + } + SECTION("Ranges 101 bits") { + containers::bitset<101> bs; + bs.set(1); + bs.set(100); + bs.flip(2, 99); + for (uint32_t i = 1; i <= 100; ++i) + REQUIRE(bs.test(i) == true); + bs.flip(2, 99); + for (uint32_t i = 2; i < 100; ++i) + REQUIRE(bs.test(i) == false); + REQUIRE(bs.test(1)); + REQUIRE(bs.test(100)); + + bs.reset(); + bs.flip(0, 0); + REQUIRE(bs.test(0)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(100, 100); + REQUIRE(bs.test(100)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(0, 100); + REQUIRE(bs.count() == 101); + REQUIRE(bs.all() == true); + bs.flip(0, 100); + REQUIRE(bs.count() == 0); + REQUIRE(bs.none() == true); + } } template @@ -709,19 +801,6 @@ void test_dbitset() { REQUIRE(bs.all() == false); REQUIRE(bs.none() == true); } - SECTION("Ranges") { - containers::dbitset bs; - bs.set(1); - bs.set(10); - bs.flip(2, 9); - for (uint32_t i = 2; i <= 10; ++i) - REQUIRE(bs.test(i) == true); - bs.flip(2, 9); - for (uint32_t i = 2; i < 10; ++i) - REQUIRE(bs.test(i) == false); - REQUIRE(bs.test(1)); - REQUIRE(bs.test(10)); - } SECTION("Iteration") { { containers::dbitset bs; @@ -799,6 +878,111 @@ TEST_CASE("Containers - dbitset") { SECTION("512 bits") { test_dbitset<512>(); } + SECTION("Ranges 11 bits") { + containers::dbitset bs; + bs.set(1); + bs.set(10); + bs.flip(2, 9); + for (uint32_t i = 1; i <= 10; ++i) + REQUIRE(bs.test(i) == true); + bs.flip(2, 9); + for (uint32_t i = 2; i < 10; ++i) + REQUIRE(bs.test(i) == false); + REQUIRE(bs.test(1)); + REQUIRE(bs.test(10)); + + bs.reset(); + bs.flip(0, 0); + REQUIRE(bs.test(0)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(10, 10); + REQUIRE(bs.test(10)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(0, 10); + REQUIRE(bs.count() == 11); + REQUIRE(bs.all() == true); + bs.flip(0, 10); + REQUIRE(bs.count() == 0); + REQUIRE(bs.none() == true); + } + SECTION("Ranges 64 bits") { + containers::dbitset bs; + bs.set(1); + bs.set(10); + bs.flip(2, 9); + for (uint32_t i = 1; i <= 10; ++i) + REQUIRE(bs.test(i) == true); + bs.flip(2, 9); + for (uint32_t i = 2; i < 10; ++i) + REQUIRE(bs.test(i) == false); + REQUIRE(bs.test(1)); + REQUIRE(bs.test(10)); + + bs.reset(); + bs.flip(0, 0); + REQUIRE(bs.test(0)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(63, 63); + REQUIRE(bs.test(63)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(0, 63); + REQUIRE(bs.count() == 64); + REQUIRE(bs.all() == true); + bs.flip(0, 63); + REQUIRE(bs.count() == 0); + REQUIRE(bs.none() == true); + } + SECTION("Ranges 101 bits") { + containers::dbitset bs; + bs.set(1); + bs.set(100); + bs.flip(2, 99); + for (uint32_t i = 1; i <= 100; ++i) + REQUIRE(bs.test(i) == true); + bs.flip(2, 99); + for (uint32_t i = 2; i < 100; ++i) + REQUIRE(bs.test(i) == false); + REQUIRE(bs.test(1)); + REQUIRE(bs.test(100)); + + bs.reset(); + bs.flip(0, 0); + REQUIRE(bs.test(0)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(100, 100); + REQUIRE(bs.test(100)); + REQUIRE(bs.count() == 1); + REQUIRE(bs.any() == true); + REQUIRE(bs.all() == false); + + bs.reset(); + bs.flip(0, 100); + REQUIRE(bs.count() == 101); + REQUIRE(bs.all() == true); + bs.flip(0, 100); + REQUIRE(bs.count() == 0); + REQUIRE(bs.none() == true); + } } TEST_CASE("for_each") {