Skip to content

Commit

Permalink
unordered_map,unordered_set: Extend support for custom execution poli…
Browse files Browse the repository at this point in the history
…cies
  • Loading branch information
stotko committed Nov 20, 2024
1 parent 4907180 commit c9ec551
Show file tree
Hide file tree
Showing 11 changed files with 304 additions and 36 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ jobs:
path: build

- name: Upload coverage report
uses: codecov/codecov-action@v5
uses: codecov/codecov-action@v5.0.2
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: build/stdgpu_coverage.info
Expand All @@ -90,6 +90,6 @@ jobs:
runs-on: ubuntu-24.04

steps:
- uses: re-actors/alls-green@release/v1
with:
jobs: ${{ toJSON(needs) }}
- uses: re-actors/alls-green@release/v1
with:
jobs: ${{ toJSON(needs) }}
8 changes: 4 additions & 4 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
runs-on: ubuntu-24.04

steps:
- uses: re-actors/alls-green@release/v1
with:
allowed-skips: publish
jobs: ${{ toJSON(needs) }}
- uses: re-actors/alls-green@release/v1
with:
allowed-skips: publish
jobs: ${{ toJSON(needs) }}
6 changes: 3 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,6 @@ jobs:
runs-on: ubuntu-24.04

steps:
- uses: re-actors/alls-green@release/v1
with:
jobs: ${{ toJSON(needs) }}
- uses: re-actors/alls-green@release/v1
with:
jobs: ${{ toJSON(needs) }}
10 changes: 5 additions & 5 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
ubuntu:
strategy:
matrix:
os: ['ubuntu-22.04', 'ubuntu-24.04']
os: ["ubuntu-22.04", "ubuntu-24.04"]
build_type: [Debug, Release]
shared_libs: [ON, OFF]
use_32bit_index: [ON, OFF]
Expand Down Expand Up @@ -86,7 +86,7 @@ jobs:
windows:
strategy:
matrix:
os: ['windows-2019', 'windows-2022']
os: ["windows-2019", "windows-2022"]
build_type: [Debug, Release]
shared_libs: [ON, OFF]
use_32bit_index: [ON, OFF]
Expand Down Expand Up @@ -148,6 +148,6 @@ jobs:
runs-on: ubuntu-24.04

steps:
- uses: re-actors/alls-green@release/v1
with:
jobs: ${{ toJSON(needs) }}
- uses: re-actors/alls-green@release/v1
with:
jobs: ${{ toJSON(needs) }}
44 changes: 44 additions & 0 deletions src/stdgpu/impl/unordered_base.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -408,20 +408,53 @@ public:
[[nodiscard]] STDGPU_HOST_DEVICE bool
empty() const;

/**
* \brief Checks if the object is empty
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
* \return True if the object is empty, false otherwise
*/
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
[[nodiscard]] bool
empty(ExecutionPolicy&& policy) const;

/**
* \brief Checks if the object is full
* \return True if the object is full, false otherwise
*/
STDGPU_HOST_DEVICE bool
full() const;

/**
* \brief Checks if the object is full
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
* \return True if the object is full, false otherwise
*/
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
bool
full(ExecutionPolicy&& policy) const;

/**
* \brief The size
* \return The size of the object
*/
STDGPU_HOST_DEVICE index_t
size() const;

/**
* \brief The size
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
* \return The size of the object
*/
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
index_t
size(ExecutionPolicy&& policy) const;

/**
* \brief The maximum size
* \return The maximum size
Expand All @@ -443,6 +476,17 @@ public:
STDGPU_HOST_DEVICE float
load_factor() const;

/**
* \brief The average number of elements per bucket
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
* \return The average number of elements per bucket
*/
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
float
load_factor(ExecutionPolicy&& policy) const;

/**
* \brief The maximum number of elements per bucket
* \return The maximum number of elements per bucket
Expand Down
76 changes: 64 additions & 12 deletions src/stdgpu/impl/unordered_base_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -168,14 +168,16 @@ template <typename ExecutionPolicy,
device_indexed_range<typename unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::value_type>
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::device_range(ExecutionPolicy&& policy)
{
_range_indices_end.store(0);
_range_indices_end.store(std::forward<ExecutionPolicy>(policy), 0);

for_each_index(std::forward<ExecutionPolicy>(policy),
total_count(),
unordered_base_collect_positions<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>(*this));

return device_indexed_range<value_type>(stdgpu::device_range<index_t>(_range_indices, _range_indices_end.load()),
_values);
return device_indexed_range<value_type>(
stdgpu::device_range<index_t>(_range_indices,
_range_indices_end.load(std::forward<ExecutionPolicy>(policy))),
_values);
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
Expand All @@ -191,14 +193,15 @@ template <typename ExecutionPolicy,
device_indexed_range<const typename unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::value_type>
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::device_range(ExecutionPolicy&& policy) const
{
_range_indices_end.store(0);
_range_indices_end.store(std::forward<ExecutionPolicy>(policy), 0);

for_each_index(std::forward<ExecutionPolicy>(policy),
total_count(),
unordered_base_collect_positions<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>(*this));

return device_indexed_range<const value_type>(
stdgpu::device_range<index_t>(_range_indices, _range_indices_end.load()),
stdgpu::device_range<index_t>(_range_indices,
_range_indices_end.load(std::forward<ExecutionPolicy>(policy))),
_values);
}

Expand Down Expand Up @@ -318,7 +321,13 @@ template <typename ExecutionPolicy,
inline bool
loop_free(ExecutionPolicy&& policy, const unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>& base)
{
int* flags = createDeviceArray<int>(base.total_count(), 0);
using flags_allocator_type = typename stdgpu::allocator_traits<Allocator>::template rebind_alloc<int>;
flags_allocator_type flags_allocator = flags_allocator_type(base.get_allocator());

int* flags = allocator_traits<flags_allocator_type>::allocate_filled(std::forward<ExecutionPolicy>(policy),
flags_allocator,
base.total_count(),
0);

for_each_index(std::forward<ExecutionPolicy>(policy),
base.bucket_count(),
Expand All @@ -330,7 +339,10 @@ loop_free(ExecutionPolicy&& policy, const unordered_base<Key, Value, KeyFromValu
logical_and<>(),
less_equal_one(flags));

destroyDeviceArray<int>(flags);
allocator_traits<flags_allocator_type>::deallocate_filled(std::forward<ExecutionPolicy>(policy),
flags_allocator,
flags,
base.total_count());

return result;
}
Expand Down Expand Up @@ -447,7 +459,7 @@ inline bool
occupied_count_valid(ExecutionPolicy&& policy,
const unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>& base)
{
index_t size_count = base.size();
index_t size_count = base.size(std::forward<ExecutionPolicy>(policy));
index_t size_sum = base._occupied.count(std::forward<ExecutionPolicy>(policy));

return (size_count == size_sum);
Expand Down Expand Up @@ -1059,13 +1071,31 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::empty() con
return (size() == 0);
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline bool
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::empty(ExecutionPolicy&& policy) const
{
return (size(std::forward<ExecutionPolicy>(policy)) == 0);
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE bool
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::full() const
{
return (size() == total_count());
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline bool
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::full(ExecutionPolicy&& policy) const
{
return (size(std::forward<ExecutionPolicy>(policy)) == total_count());
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE index_t
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::size() const
Expand All @@ -1077,6 +1107,19 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::size() cons
return current_size;
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline index_t
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::size(ExecutionPolicy&& policy) const
{
index_t current_size = _occupied_count.load(std::forward<ExecutionPolicy>(policy));

STDGPU_ENSURES(0 <= current_size);
STDGPU_ENSURES(current_size <= total_count());
return current_size;
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE index_t
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::max_size() const noexcept
Expand Down Expand Up @@ -1105,6 +1148,15 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::load_factor
return static_cast<float>(size()) / static_cast<float>(bucket_count());
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline float
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::load_factor(ExecutionPolicy&& policy) const
{
return static_cast<float>(size(std::forward<ExecutionPolicy>(policy))) / static_cast<float>(bucket_count());
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE float
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::max_load_factor() const
Expand Down Expand Up @@ -1167,7 +1219,7 @@ template <typename ExecutionPolicy,
void
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::clear(ExecutionPolicy&& policy)
{
if (empty())
if (empty(std::forward<ExecutionPolicy>(policy)))
{
return;
}
Expand All @@ -1183,7 +1235,7 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::clear(Execu

_occupied.reset(std::forward<ExecutionPolicy>(policy));

_occupied_count.store(0);
_occupied_count.store(std::forward<ExecutionPolicy>(policy), 0);

detail::vector_clear_iota(std::forward<ExecutionPolicy>(policy), _excess_list_positions, bucket_count());
}
Expand Down Expand Up @@ -1239,7 +1291,7 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::createDevic

detail::vector_clear_iota(std::forward<ExecutionPolicy>(policy), result._excess_list_positions, bucket_count);

STDGPU_ENSURES(result._excess_list_positions.full());
STDGPU_ENSURES(result._excess_list_positions.full(std::forward<ExecutionPolicy>(policy)));

return result;
}
Expand All @@ -1254,7 +1306,7 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::destroyDevi
{
if (!detail::is_destroy_optimizable<value_type>())
{
device_object.clear();
device_object.clear(std::forward<ExecutionPolicy>(policy));
}

device_object._bucket_count = 0;
Expand Down
36 changes: 36 additions & 0 deletions src/stdgpu/impl/unordered_map_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -270,20 +270,47 @@ unordered_map<Key, T, Hash, KeyEqual, Allocator>::empty() const
return _base.empty();
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline bool
unordered_map<Key, T, Hash, KeyEqual, Allocator>::empty(ExecutionPolicy&& policy) const
{
return _base.empty(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE bool
unordered_map<Key, T, Hash, KeyEqual, Allocator>::full() const
{
return _base.full();
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline bool
unordered_map<Key, T, Hash, KeyEqual, Allocator>::full(ExecutionPolicy&& policy) const
{
return _base.full(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE index_t
unordered_map<Key, T, Hash, KeyEqual, Allocator>::size() const
{
return _base.size();
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline index_t
unordered_map<Key, T, Hash, KeyEqual, Allocator>::size(ExecutionPolicy&& policy) const
{
return _base.size(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE index_t
unordered_map<Key, T, Hash, KeyEqual, Allocator>::max_size() const noexcept
Expand All @@ -305,6 +332,15 @@ unordered_map<Key, T, Hash, KeyEqual, Allocator>::load_factor() const
return _base.load_factor();
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline float
unordered_map<Key, T, Hash, KeyEqual, Allocator>::load_factor(ExecutionPolicy&& policy) const
{
return _base.load_factor(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE float
unordered_map<Key, T, Hash, KeyEqual, Allocator>::max_load_factor() const
Expand Down
Loading

0 comments on commit c9ec551

Please sign in to comment.