Skip to content

Commit

Permalink
Improve search/find_end perf by dropping memcmp (#4654)
Browse files Browse the repository at this point in the history
Co-authored-by: Stephan T. Lavavej <stl@nuwen.net>
  • Loading branch information
AlexGuteniev and StephanTLavavej authored Jun 18, 2024
1 parent a1f8a58 commit e3ed206
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 39 deletions.
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ add_benchmark(path_lexically_normal src/path_lexically_normal.cpp)
add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp)
add_benchmark(random_integer_generation src/random_integer_generation.cpp)
add_benchmark(replace src/replace.cpp)
add_benchmark(search src/search.cpp)
add_benchmark(std_copy src/std_copy.cpp)
add_benchmark(swap_ranges src/swap_ranges.cpp)

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/src/replace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ const char src[] =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum "
"ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, "
"ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. "
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquet "
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam "
"velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate "
"ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam "
"eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero "
Expand All @@ -34,7 +34,7 @@ const char src[] =
"montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum "
"justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum "
"vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium "
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquam malesuada est at dignissim. Pellentesque finibus "
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus "
"sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis.";

template <class T>
Expand Down
114 changes: 114 additions & 0 deletions benchmarks/src/search.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <algorithm>
#include <benchmark/benchmark.h>
#include <cstdint>
#include <cstring>
#include <functional>
#include <string>
#include <vector>

const char src_haystack[] =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum "
"ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, "
"ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. "
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam "
"velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate "
"ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam "
"eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero "
"accumsan velit elementum, eu laoreet metus convallis. Donec pellentesque lacus ut iaculis iaculis. Curabitur orci "
"elit, bibendum sit amet feugiat at, iaculis sit amet massa. Maecenas imperdiet lacus at vehicula iaculis. Donec "
"volutpat nunc sit amet accumsan tempor. Quisque pretium vestibulum ultricies. Suspendisse potenti. Aenean at diam "
"iaculis, condimentum felis venenatis, condimentum erat. Nam quis elit dui. Duis quis odio vitae metus hendrerit "
"rhoncus ut et magna. Cras ac augue quis nibh pharetra sagittis. Donec ullamcorper vel eros semper pretium. Proin "
"vel sollicitudin eros. Nulla sollicitudin mattis turpis id suscipit. Aliquam sed risus velit. Aliquam iaculis nec "
"nibh ac egestas. Duis finibus semper est sed consequat. Sed in sapien quis nibh dignissim mattis. Vestibulum nec "
"metus sodales, euismod mauris ac, sollicitudin libero. Maecenas non arcu ac velit ullamcorper fringilla et quis "
"nulla. Curabitur posuere leo eget ipsum tincidunt dignissim. Cras ultricies suscipit neque, quis suscipit tortor "
"venenatis non. Cras nisl mi, bibendum in vulputate quis, vestibulum ornare enim. Nunc hendrerit placerat dui, "
"aliquam mollis sem convallis et. Integer vitae urna diam. Phasellus et imperdiet est. Maecenas auctor facilisis "
"nibh non commodo. Suspendisse iaculis quam id bibendum feugiat. Pellentesque felis erat, egestas a libero ac, "
"laoreet consectetur elit. Cras ut suscipit ex. Etiam gravida sem quis ex porta, eu lacinia tortor fermentum. "
"Nulla consequat odio enim, sed condimentum est sagittis a. Quisque nec commodo tellus. Phasellus elementum "
"feugiat dolor et feugiat. Praesent sed mattis tortor. In vitae sodales purus. Morbi accumsan, ligula et interdum "
"lacinia, leo risus suscipit urna, non luctus mi justo eu ipsum. Curabitur venenatis pretium orci id porttitor. "
"Quisque dapibus nisl sit amet elit lobortis sagittis. Orci varius natoque penatibus et magnis dis parturient "
"montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum "
"justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum "
"vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium "
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus "
"sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis.";

const char src_needle[] = "aliquet";

void c_strstr(benchmark::State& state) {
const std::string haystack(std::begin(src_haystack), std::end(src_haystack));
const std::string needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::strstr(haystack.c_str(), needle.c_str());
benchmark::DoNotOptimize(res);
}
}

template <class T>
void classic_search(benchmark::State& state) {
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::search(haystack.begin(), haystack.end(), needle.begin(), needle.end());
benchmark::DoNotOptimize(res);
}
}

template <class T>
void ranges_search(benchmark::State& state) {
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::ranges::search(haystack, needle);
benchmark::DoNotOptimize(res);
}
}

template <class T>
void search_default_searcher(benchmark::State& state) {
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::search(haystack.begin(), haystack.end(), std::default_searcher{needle.begin(), needle.end()});
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(c_strstr);

BENCHMARK(classic_search<std::uint8_t>);
BENCHMARK(classic_search<std::uint16_t>);
BENCHMARK(classic_search<std::uint32_t>);
BENCHMARK(classic_search<std::uint64_t>);

BENCHMARK(ranges_search<std::uint8_t>);
BENCHMARK(ranges_search<std::uint16_t>);
BENCHMARK(ranges_search<std::uint32_t>);
BENCHMARK(ranges_search<std::uint64_t>);

BENCHMARK(search_default_searcher<std::uint8_t>);
BENCHMARK(search_default_searcher<std::uint16_t>);
BENCHMARK(search_default_searcher<std::uint32_t>);
BENCHMARK(search_default_searcher<std::uint64_t>);


BENCHMARK_MAIN();
10 changes: 1 addition & 9 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ _Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val
template <class _Ty1, class _Ty2>
_Ty1* _Find_first_of_vectorized(
_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, _Ty2* const _Last2) noexcept {
_STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2));
if constexpr (sizeof(_Ty1) == 1) {
return const_cast<_Ty1*>(
static_cast<const _Ty1*>(::__std_find_first_of_trivial_1(_First1, _Last1, _First2, _Last2)));
Expand Down Expand Up @@ -2119,15 +2120,6 @@ namespace ranges {
template <class _InIt1, class _InIt2, class _Pr>
_NODISCARD _CONSTEXPR20 bool _Equal_rev_pred_unchecked(_InIt1 _First1, _InIt2 _First2, const _InIt2 _Last2, _Pr _Pred) {
// compare [_First1, ...) to [_First2, _Last2)
if constexpr (_Equal_memcmp_is_safe<_InIt1, _InIt2, _Pr>) {
#if _HAS_CXX20
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
return _STD _Memcmp_ranges(_First2, _Last2, _First1) == 0;
}
}

for (; _First2 != _Last2; ++_First1, (void) ++_First2) {
if (!_Pred(*_First1, *_First2)) {
return false;
Expand Down
25 changes: 1 addition & 24 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -5356,7 +5356,7 @@ constexpr bool _Equal_memcmp_is_safe_helper =

template <class _Iter1, class _Iter2, class _Pr>
constexpr bool _Equal_memcmp_is_safe =
_Equal_memcmp_is_safe_helper<remove_const_t<_Iter1>, remove_const_t<_Iter2>, _Pr>;
_Equal_memcmp_is_safe_helper<remove_const_t<_Iter1>, remove_const_t<_Iter2>, remove_const_t<_Pr>>;

template <class _CtgIt1, class _CtgIt2>
_NODISCARD int _Memcmp_ranges(_CtgIt1 _First1, _CtgIt1 _Last1, _CtgIt2 _First2) {
Expand Down Expand Up @@ -6648,35 +6648,12 @@ namespace ranges {

_EXPORT_STD inline constexpr _Adjacent_find_fn adjacent_find;

template <class _It1, class _It2, class _Se2, class _Pr, class _Pj1, class _Pj2>
concept _Equal_rev_pred_can_memcmp = is_same_v<_Pj1, identity> && is_same_v<_Pj2, identity>
&& sized_sentinel_for<_Se2, _It2> && _Equal_memcmp_is_safe<_It1, _It2, _Pr>;

template <forward_iterator _It1, input_iterator _It2, sentinel_for<_It2> _Se2, class _Pr, class _Pj1, class _Pj2>
requires indirectly_comparable<_It1, _It2, _Pr, _Pj1, _Pj2>
_NODISCARD constexpr pair<bool, _It1> _Equal_rev_pred(
_It1 _First1, _It2 _First2, const _Se2 _Last2, _Pr _Pred, _Pj1 _Proj1, _Pj2 _Proj2) {
// Returns {true, _First1 + (_Last2 - _First2)} if [_First1, ...) equals [_First2, _Last2), and {false, {}}
// otherwise.
constexpr bool _Optimize = _Equal_rev_pred_can_memcmp<_It1, _It2, _Se2, _Pr, _Pj1, _Pj2>;
if constexpr (_Optimize) {
if (!_STD is_constant_evaluated()) {
bool _Ans;
if constexpr (same_as<_It2, _Se2>) {
_Ans = _STD _Memcmp_ranges(_First2, _Last2, _First1) == 0;
} else {
_Ans = _STD _Memcmp_count(_First1, _First2, static_cast<size_t>(_Last2 - _First2)) == 0;
}

if (_Ans) {
_First1 += (_Last2 - _First2);
return {true, _STD move(_First1)};
} else {
return {false, _It1 {}};
}
}
}

for (; _First2 != _Last2; ++_First1, (void) ++_First2) {
if (!_STD invoke(_Pred, _STD invoke(_Proj1, *_First1), _STD invoke(_Proj2, *_First2))) {
return {false, _It1 {}};
Expand Down
82 changes: 78 additions & 4 deletions tests/std/tests/VSO_0000000_vector_algorithms/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,19 @@ auto last_known_good_find_first_of(FwdItH h_first, FwdItH h_last, FwdItN n_first
return h_first;
}

template <class RanItH, class RanItN>
auto last_known_good_search(RanItH h_first, RanItH h_last, RanItN n_first, RanItN n_last) {
const auto n_len = n_last - n_first;

for (; h_last - h_first >= n_len; ++h_first) {
if (equal(h_first, h_first + n_len, n_first, n_last)) {
return h_first;
}
}

return h_last;
}

template <class T>
void test_case_find(const vector<T>& input, T v) {
auto expected = last_known_good_find(input.begin(), input.end(), v);
Expand Down Expand Up @@ -275,12 +288,13 @@ void test_case_find_first_of(const vector<T>& input_haystack, const vector<T>& i

template <class T>
void test_find_first_of(mt19937_64& gen) {
constexpr size_t needleDataCount = 50;
using TD = conditional_t<sizeof(T) == 1, int, T>;
constexpr size_t haystackDataCount = 200;
constexpr size_t needleDataCount = 35;
using TD = conditional_t<sizeof(T) == 1, int, T>;
uniform_int_distribution<TD> dis('a', 'z');
vector<T> input_haystack;
vector<T> input_needle;
input_haystack.reserve(dataCount);
input_haystack.reserve(haystackDataCount);
input_needle.reserve(needleDataCount);

for (;;) {
Expand All @@ -292,7 +306,7 @@ void test_find_first_of(mt19937_64& gen) {
test_case_find_first_of(input_haystack, input_needle);
}

if (input_haystack.size() == dataCount) {
if (input_haystack.size() == haystackDataCount) {
break;
}

Expand All @@ -312,6 +326,56 @@ void test_find_first_of_containers() {
#endif // _HAS_CXX20
}

template <class T>
void test_case_search(const vector<T>& input_haystack, const vector<T>& input_needle) {
auto expected =
last_known_good_search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end());
auto actual = search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end());
assert(expected == actual);
#if _HAS_CXX17
auto searcher_actual = search(
input_haystack.begin(), input_haystack.end(), default_searcher{input_needle.begin(), input_needle.end()});
assert(expected == searcher_actual);
#endif // _HAS_CXX17
#if _HAS_CXX20
auto ranges_actual = ranges::search(input_haystack, input_needle);
assert(expected == begin(ranges_actual));
if (expected != input_haystack.end()) {
assert(expected + static_cast<ptrdiff_t>(input_needle.size()) == end(ranges_actual));
} else {
assert(expected == end(ranges_actual));
}
#endif // _HAS_CXX20
}

template <class T>
void test_search(mt19937_64& gen) {
constexpr size_t haystackDataCount = 200;
constexpr size_t needleDataCount = 35;
using TD = conditional_t<sizeof(T) == 1, int, T>;
uniform_int_distribution<TD> dis('0', '9');
vector<T> input_haystack;
vector<T> input_needle;
input_haystack.reserve(haystackDataCount);
input_needle.reserve(needleDataCount);

for (;;) {
input_needle.clear();

test_case_search(input_haystack, input_needle);
for (size_t attempts = 0; attempts < needleDataCount; ++attempts) {
input_needle.push_back(static_cast<T>(dis(gen)));
test_case_search(input_haystack, input_needle);
}

if (input_haystack.size() == haystackDataCount) {
break;
}

input_haystack.push_back(static_cast<T>(dis(gen)));
}
}

template <class T>
void test_min_max_element(mt19937_64& gen) {
using Limits = numeric_limits<T>;
Expand Down Expand Up @@ -817,6 +881,16 @@ void test_vector_algorithms(mt19937_64& gen) {
test_find_first_of_containers<const vector<wchar_t>, vector<wchar_t>>();
test_find_first_of_containers<vector<char>, vector<int>>();

test_search<char>(gen);
test_search<signed char>(gen);
test_search<unsigned char>(gen);
test_search<short>(gen);
test_search<unsigned short>(gen);
test_search<int>(gen);
test_search<unsigned int>(gen);
test_search<long long>(gen);
test_search<unsigned long long>(gen);

test_min_max_element<char>(gen);
test_min_max_element<signed char>(gen);
test_min_max_element<unsigned char>(gen);
Expand Down

0 comments on commit e3ed206

Please sign in to comment.