diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index ad026b994f..4540860c92 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -12,8 +12,9 @@ #include #include "lorem.hpp" -using namespace std::string_view_literals; +#include "skewed_allocator.hpp" +using namespace std::string_view_literals; template constexpr auto make_fill_pattern_array() { @@ -48,12 +49,18 @@ constexpr data_and_pattern patterns[] = { /* 5. Large, evil */ {fill_pattern_view<3000, false>, fill_pattern_view<20, true>}, }; +template +using not_highly_aligned_basic_string = std::basic_string, not_highly_aligned_allocator>; + +using not_highly_aligned_string = not_highly_aligned_basic_string; +using not_highly_aligned_wstring = not_highly_aligned_basic_string; + void c_strstr(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::string haystack(src_haystack); - const std::string needle(src_needle); + const not_highly_aligned_string haystack(src_haystack); + const not_highly_aligned_string needle(src_needle); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -68,8 +75,8 @@ void classic_search(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(src_haystack.begin(), src_haystack.end()); - const std::vector needle(src_needle.begin(), src_needle.end()); + const std::vector> haystack(src_haystack.begin(), src_haystack.end()); + const std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -84,8 +91,8 @@ void ranges_search(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(src_haystack.begin(), src_haystack.end()); - const std::vector needle(src_needle.begin(), src_needle.end()); + const std::vector> haystack(src_haystack.begin(), src_haystack.end()); + const std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -100,8 +107,8 @@ void search_default_searcher(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(src_haystack.begin(), src_haystack.end()); - const std::vector needle(src_needle.begin(), src_needle.end()); + const std::vector> haystack(src_haystack.begin(), src_haystack.end()); + const std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -132,8 +139,8 @@ void classic_find_end(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(src_haystack.begin(), src_haystack.end()); - const std::vector needle(src_needle.begin(), src_needle.end()); + const std::vector> haystack(src_haystack.begin(), src_haystack.end()); + const std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -148,8 +155,8 @@ void ranges_find_end(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(src_haystack.begin(), src_haystack.end()); - const std::vector needle(src_needle.begin(), src_needle.end()); + const std::vector> haystack(src_haystack.begin(), src_haystack.end()); + const std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -159,6 +166,22 @@ void ranges_find_end(benchmark::State& state) { } } +template +void member_rfind(benchmark::State& state) { + const auto& src_haystack = patterns[static_cast(state.range())].data; + const auto& src_needle = patterns[static_cast(state.range())].pattern; + + const T haystack(src_haystack.begin(), src_haystack.end()); + const T needle(src_needle.begin(), src_needle.end()); + + for (auto _ : state) { + benchmark::DoNotOptimize(haystack); + benchmark::DoNotOptimize(needle); + auto res = haystack.rfind(needle); + benchmark::DoNotOptimize(res); + } +} + void common_args(auto bm) { bm->DenseRange(0, std::size(patterns) - 1, 1); } @@ -174,8 +197,8 @@ BENCHMARK(ranges_search)->Apply(common_args); BENCHMARK(search_default_searcher)->Apply(common_args); BENCHMARK(search_default_searcher)->Apply(common_args); -BENCHMARK(member_find)->Apply(common_args); -BENCHMARK(member_find)->Apply(common_args); +BENCHMARK(member_find)->Apply(common_args); +BENCHMARK(member_find)->Apply(common_args); BENCHMARK(classic_find_end)->Apply(common_args); BENCHMARK(classic_find_end)->Apply(common_args); @@ -183,4 +206,7 @@ BENCHMARK(classic_find_end)->Apply(common_args); BENCHMARK(ranges_find_end)->Apply(common_args); BENCHMARK(ranges_find_end)->Apply(common_args); +BENCHMARK(member_rfind)->Apply(common_args); +BENCHMARK(member_rfind)->Apply(common_args); + BENCHMARK_MAIN(); diff --git a/stl/inc/__msvc_string_view.hpp b/stl/inc/__msvc_string_view.hpp index a0a4d6f009..75ac8b6249 100644 --- a/stl/inc/__msvc_string_view.hpp +++ b/stl/inc/__msvc_string_view.hpp @@ -683,7 +683,28 @@ constexpr size_t _Traits_rfind(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits return static_cast(-1); } - for (auto _Match_try = _Haystack + (_STD min)(_Start_at, _Hay_size - _Needle_size);; --_Match_try) { + const size_t _Actual_start_at = (_STD min)(_Start_at, _Hay_size - _Needle_size); + +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Is_implementation_handled_char_traits<_Traits> && sizeof(typename _Traits::char_type) <= 2) { + if (!_STD _Is_constant_evaluated()) { + // _Find_end_vectorized takes into account the needle length when locating the search start. + // As a potentially earlier start position can be specified, we need to take it into account, + // and pick between the maximum possible start position and the specified one, + // and then add _Needle_size, so that it is subtracted back in _Find_end_vectorized. + const auto _End = _Haystack + _Actual_start_at + _Needle_size; + const auto _Ptr = _STD _Find_end_vectorized(_Haystack, _End, _Needle, _Needle_size); + + if (_Ptr != _End) { + return static_cast(_Ptr - _Haystack); + } else { + return static_cast(-1); + } + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS + + for (auto _Match_try = _Haystack + _Actual_start_at;; --_Match_try) { if (_Traits::eq(*_Match_try, *_Needle) && _Traits::compare(_Match_try, _Needle, _Needle_size) == 0) { return static_cast(_Match_try - _Haystack); // found a match } diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 23d74b52f6..40f3f965d2 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -59,11 +59,6 @@ const void* __stdcall __std_find_last_trivial_2(const void* _First, const void* const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; -const void* __stdcall __std_find_end_1( - const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept; -const void* __stdcall __std_find_end_2( - const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept; - __declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* _First, const void* _Last) noexcept; __declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* _First, const void* _Last) noexcept; __declspec(noalias) _Min_max_2i __stdcall __std_minmax_2i(const void* _First, const void* _Last) noexcept; @@ -194,19 +189,6 @@ _Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val } } -template -_Ty1* _Find_end_vectorized( - _Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, const size_t _Count2) noexcept { - _STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2)); - if constexpr (sizeof(_Ty1) == 1) { - return const_cast<_Ty1*>(static_cast(::__std_find_end_1(_First1, _Last1, _First2, _Count2))); - } else if constexpr (sizeof(_Ty1) == 2) { - return const_cast<_Ty1*>(static_cast(::__std_find_end_2(_First1, _Last1, _First2, _Count2))); - } else { - _STL_INTERNAL_STATIC_ASSERT(false); // unexpected size - } -} - template __declspec(noalias) void _Replace_vectorized( _Ty* const _First, _Ty* const _Last, const _TVal1 _Old_val, const _TVal2 _New_val) noexcept { diff --git a/stl/inc/xutility b/stl/inc/xutility index f9793535d6..04bdd476df 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -107,6 +107,11 @@ const void* __stdcall __std_search_1( const void* __stdcall __std_search_2( const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept; +const void* __stdcall __std_find_end_1( + const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept; +const void* __stdcall __std_find_end_2( + const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept; + const void* __stdcall __std_min_element_1(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept; @@ -248,6 +253,19 @@ _Ty1* _Search_vectorized(_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _F } } +template +_Ty1* _Find_end_vectorized( + _Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, const size_t _Count2) noexcept { + _STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2)); + if constexpr (sizeof(_Ty1) == 1) { + return const_cast<_Ty1*>(static_cast(::__std_find_end_1(_First1, _Last1, _First2, _Count2))); + } else if constexpr (sizeof(_Ty1) == 2) { + return const_cast<_Ty1*>(static_cast(::__std_find_end_2(_First1, _Last1, _First2, _Count2))); + } else { + _STL_INTERNAL_STATIC_ASSERT(false); // unexpected size + } +} + template _Ty* _Min_element_vectorized(_Ty* const _First, _Ty* const _Last) noexcept { constexpr bool _Signed = is_signed_v<_Ty>; diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 54876e3849..e7aec69bea 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -1109,6 +1109,25 @@ void test_case_string_find_str(const basic_string& input_haystack, const basi assert(expected == actual); } +template +void test_case_string_rfind_str(const basic_string& input_haystack, const basic_string& input_needle) { + ptrdiff_t expected; + if (input_needle.empty()) { + expected = static_cast(input_haystack.size()); + } else { + const auto expected_iter = last_known_good_find_end( + input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); + + if (expected_iter != input_haystack.end()) { + expected = expected_iter - input_haystack.begin(); + } else { + expected = -1; + } + } + const auto actual = static_cast(input_haystack.rfind(input_needle)); + assert(expected == actual); +} + template void test_basic_string_dis(mt19937_64& gen, D& dis) { basic_string input_haystack; @@ -1124,12 +1143,14 @@ void test_basic_string_dis(mt19937_64& gen, D& dis) { test_case_string_find_first_of(input_haystack, input_needle); test_case_string_find_last_of(input_haystack, input_needle); test_case_string_find_str(input_haystack, input_needle); + test_case_string_rfind_str(input_haystack, input_needle); for (size_t attempts = 0; attempts < needleDataCount; ++attempts) { input_needle.push_back(static_cast(dis(gen))); test_case_string_find_first_of(input_haystack, input_needle); test_case_string_find_last_of(input_haystack, input_needle); test_case_string_find_str(input_haystack, input_needle); + test_case_string_rfind_str(input_haystack, input_needle); // For large needles the chance of a match is low, so test a guaranteed match if (input_haystack.size() > input_needle.size() * 2) { @@ -1139,6 +1160,7 @@ void test_basic_string_dis(mt19937_64& gen, D& dis) { temp.assign(overwritten_first, overwritten_first + static_cast(input_needle.size())); copy(input_needle.begin(), input_needle.end(), overwritten_first); test_case_string_find_str(input_haystack, input_needle); + test_case_string_rfind_str(input_haystack, input_needle); copy(temp.begin(), temp.end(), overwritten_first); } }