Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ESIMD] Optimize the simd stride constructor #12553

Merged
merged 3 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 21 additions & 16 deletions sycl/include/sycl/ext/intel/esimd/detail/simd_obj_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,23 @@ constexpr vector_type_t<T, N> make_vector(const T (&&Arr)[N]) {
}

template <class T, int N, size_t... Is>
constexpr vector_type_t<T, N> make_vector_impl(T Base, T Stride,
std::index_sequence<Is...>) {
return vector_type_t<T, N>{(T)(Base + ((T)Is) * Stride)...};
constexpr auto make_vector_impl(T Base, T Stride, std::index_sequence<Is...>) {
if constexpr (std::is_integral_v<T> && N <= 3) {
// This sequence is a bit more efficient for integral types and N <= 3.
return vector_type_t<T, N>{(T)(Base + ((T)Is) * Stride)...};
} else {
using CppT = typename element_type_traits<T>::EnclosingCppT;
CppT BaseCpp = Base;
CppT StrideCpp = Stride;
vector_type_t<CppT, N> VBase = BaseCpp;
vector_type_t<CppT, N> VStride = StrideCpp;
vector_type_t<CppT, N> VStrideCoef{(CppT)(Is)...};
vector_type_t<CppT, N> Result{VBase + VStride * VStrideCoef};
return wrapper_type_converter<T>::template to_vector<N>(Result);
}
}

template <class T, int N>
constexpr vector_type_t<T, N> make_vector(T Base, T Stride) {
template <class T, int N> constexpr auto make_vector(T Base, T Stride) {
return make_vector_impl<T, N>(Base, Stride, std::make_index_sequence<N>{});
}

Expand Down Expand Up @@ -265,18 +275,13 @@ class [[__sycl_detail__::__uses_aspects__(
/// are initialized with the arithmetic progression defined by the arguments.
/// For example, <code>simd<int, 4> x(1, 3)</code> will initialize x to the
/// <code>{1, 4, 7, 10}</code> sequence.
/// @param Val The start of the progression.
/// If Ty is a floating-point type and \p Base or \p Step is +/-inf or nan,
/// then this constructor has undefined behavior.
/// @param Base The start of the progression.
/// @param Step The step of the progression.
simd_obj_impl(Ty Val, Ty Step) noexcept {
__esimd_dbg_print(simd_obj_impl(Ty Val, Ty Step));
if constexpr (is_wrapper_elem_type_v<Ty> || !std::is_integral_v<Ty>) {
for (int i = 0; i < N; ++i) {
M_data[i] = bitcast_to_raw_type(Val);
Val = binary_op<BinOp::add, Ty>(Val, Step);
}
} else {
M_data = make_vector<Ty, N>(Val, Step);
}
simd_obj_impl(Ty Base, Ty Step) noexcept {
__esimd_dbg_print(simd_obj_impl(Ty Base, Ty Step));
M_data = make_vector<Ty, N>(Base, Step);
}

/// Broadcast constructor. Given value is type-converted to the
Expand Down
16 changes: 3 additions & 13 deletions sycl/test-e2e/ESIMD/api/functional/ctors/ctor_fill.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,18 +246,8 @@ class run_test {
});
queue.wait_and_throw();

// Verify the base value was passed as-is
if (!are_bitwise_equal(result[0], base_value)) {
passed = false;
log::fail(TestDescriptionT(data_type, BaseVal, Step),
"Unexpected value at index 0, retrieved: ", result[0],
", expected: ", base_value);
}

// Verify the step value works as expected being passed to the fill
// constructor.
DataT expected_value = base_value;
for (size_t i = 1; i < result.size(); ++i) {
// Verify the the fill constructor.
for (size_t i = 0; i < result.size(); ++i) {
if constexpr (BaseVal == init_val::nan || Step == init_val::nan) {

if (!std::isnan(result[i])) {
Expand All @@ -268,7 +258,7 @@ class run_test {
}
} else {

expected_value += step_value;
DataT expected_value = base_value + (DataT)i * step_value;
if (!are_bitwise_equal(result[i], expected_value)) {
passed = false;
log::fail(TestDescriptionT(data_type, BaseVal, Step),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
// The test verifies that simd fill constructor has no precision differences.
// The test do the following actions:
// - call simd with predefined base and step values
// - bitwise comparing that output[0] value is equal to base value and
// output[i] is equal to output[i -1] + step_value
// - bitwise comparing that output[i] is equal to base + i * step_value.

#include "ctor_fill.hpp"

Expand Down
14 changes: 13 additions & 1 deletion sycl/test-e2e/ESIMD/api/functional/ctors/ctor_fill_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,22 @@ int main(int, char **) {
}
{
const auto types = get_tested_types<tested_types::fp>();
{
const auto base_values =
ctors::get_init_values_pack<init_val::negative>();
const auto step_values =
ctors::get_init_values_pack<init_val::positive>();
passed &= for_all_combinations<ctors::run_test>(
types, sizes, contexts, base_values, step_values, queue);
}
// The test cases below have never been guaranteed to work some certain
// way with base and step values set to inf or non. They may or may not
// work as expected by the checks in this test.
{
const auto base_values =
ctors::get_init_values_pack<init_val::neg_inf>();
const auto step_values = ctors::get_init_values_pack<init_val::max>();
const auto step_values =
ctors::get_init_values_pack<init_val::positive>();
passed &= for_all_combinations<ctors::run_test>(
types, sizes, contexts, base_values, step_values, queue);
}
Expand Down
58 changes: 49 additions & 9 deletions sycl/test/esimd/ctor_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,62 @@ SYCL_EXTERNAL auto foo(double i) SYCL_ESIMD_FUNCTION {
// CHECK-NEXT: }
}

// Base + step constructor, FP element type, loops exected - don't check.
SYCL_EXTERNAL auto bar() SYCL_ESIMD_FUNCTION {
simd<double, 2> val(17, 3);
return val;
// Const base + step constructor, FP element type.
SYCL_EXTERNAL auto double_base_step_const() SYCL_ESIMD_FUNCTION {
// CHECK: define dso_local spir_func void @_Z22double_base_step_constv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} {
return simd<double, 64>{1.0, 3.0};
// CHECK: store <64 x double> <double 1.000000e+00, double 4.000000e+00, double 7.000000e+00, double 1.000000e+01, double 1.300000e+01, double 1.600000e+01, double 1.900000e+01, double 2.200000e+01, double 2.500000e+01, double 2.800000e+01, double 3.100000e+01, double 3.400000e+01, double 3.700000e+01, double 4.000000e+01, double 4.300000e+01, double 4.600000e+01, double 4.900000e+01, double 5.200000e+01, double 5.500000e+01, double 5.800000e+01, double 6.100000e+01, double 6.400000e+01, double 6.700000e+01, double 7.000000e+01, double 7.300000e+01, double 7.600000e+01, double 7.900000e+01, double 8.200000e+01, double 8.500000e+01, double 8.800000e+01, double 9.100000e+01, double 9.400000e+01, double 9.700000e+01, double 1.000000e+02, double 1.030000e+02, double 1.060000e+02, double 1.090000e+02, double 1.120000e+02, double 1.150000e+02, double 1.180000e+02, double 1.210000e+02, double 1.240000e+02, double 1.270000e+02, double 1.300000e+02, double 1.330000e+02, double 1.360000e+02, double 1.390000e+02, double 1.420000e+02, double 1.450000e+02, double 1.480000e+02, double 1.510000e+02, double 1.540000e+02, double 1.570000e+02, double 1.600000e+02, double 1.630000e+02, double 1.660000e+02, double 1.690000e+02, double 1.720000e+02, double 1.750000e+02, double 1.780000e+02, double 1.810000e+02, double 1.840000e+02, double 1.870000e+02, double 1.900000e+02>, ptr addrspace(4) %[[RES]]
// CHECK-NEXT: ret void
}

// Variable base + step constructor, FP element type.
SYCL_EXTERNAL auto double_base_step_var(double base, double step) SYCL_ESIMD_FUNCTION {
// CHECK: define dso_local spir_func void @_Z20double_base_step_vardd({{.*}} %[[RES:[a-zA-Z0-9_\.]+]], double noundef %[[BASE:[a-zA-Z0-9_\.]+]], double noundef %[[STEP:[a-zA-Z0-9_\.]+]]){{.*}} {
return simd<double, 32>{base, step};
// CHECK: %[[BASE_VEC_TMP:[a-zA-Z0-9_\.]+]] = insertelement <32 x double> poison, double %[[BASE]], i64 0
// CHECK: %[[BASE_VEC:[a-zA-Z0-9_\.]+]] = shufflevector <32 x double> %[[BASE_VEC_TMP]], <32 x double> poison, <32 x i32> zeroinitializer
// CHECK: %[[STEP_VEC_TMP:[a-zA-Z0-9_\.]+]] = insertelement <32 x double> poison, double %[[STEP]], i64 0
// CHECK: %[[STEP_VEC:[a-zA-Z0-9_\.]+]] = shufflevector <32 x double> %[[STEP_VEC_TMP]], <32 x double> poison, <32 x i32> zeroinitializer
// CHECK: %[[FMA_VEC:[a-zA-Z0-9_\.]+]] = tail call noundef <32 x double> @llvm.fmuladd.v32f64(<32 x double> %[[STEP_VEC]], <32 x double> <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00, double 8.000000e+00, double 9.000000e+00, double 1.000000e+01, double 1.100000e+01, double 1.200000e+01, double 1.300000e+01, double 1.400000e+01, double 1.500000e+01, double 1.600000e+01, double 1.700000e+01, double 1.800000e+01, double 1.900000e+01, double 2.000000e+01, double 2.100000e+01, double 2.200000e+01, double 2.300000e+01, double 2.400000e+01, double 2.500000e+01, double 2.600000e+01, double 2.700000e+01, double 2.800000e+01, double 2.900000e+01, double 3.000000e+01, double 3.100000e+01>, <32 x double> %[[BASE_VEC]])
// CHECK: store <32 x double> %[[FMA_VEC]], ptr addrspace(4) %[[RES]]
// CHECK-NEXT: ret void
}

// Base + step constructor, integer element type, no loops exected - check.
SYCL_EXTERNAL auto baz() SYCL_ESIMD_FUNCTION {
// CHECK: define dso_local spir_func void @_Z3bazv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} {
simd<int, 2> val(17, 3);
// Const base + step constructor, integer element type.
SYCL_EXTERNAL auto int_base_step_const() SYCL_ESIMD_FUNCTION {
// CHECK: define dso_local spir_func void @_Z19int_base_step_constv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} {
simd<int, 16> val(17, 3);
return val;
// CHECK: store <2 x i32> <i32 17, i32 20>, ptr addrspace(4) %[[RES]]
// CHECK: store <16 x i32> <i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47, i32 50, i32 53, i32 56, i32 59, i32 62>, ptr addrspace(4) %[[RES]]
// CHECK-NEXT: ret void
// CHECK-NEXT: }
}

// Variable base + step constructor, integer element type.
SYCL_EXTERNAL auto int_base_step_var(int base, int step) SYCL_ESIMD_FUNCTION {
// CHECK: define dso_local spir_func void @_Z17int_base_step_varii({{.*}} %[[RES:[a-zA-Z0-9_\.]+]], i32 noundef %[[BASE:[a-zA-Z0-9_\.]+]], i32 noundef %[[STEP:[a-zA-Z0-9_\.]+]]){{.*}} {
return simd<int, 32>{base, step};
// CHECK: %[[BASE_VEC_TMP:[a-zA-Z0-9_\.]+]] = insertelement <32 x i32> poison, i32 %[[BASE]], i64 0
// CHECK: %[[BASE_VEC:[a-zA-Z0-9_\.]+]] = shufflevector <32 x i32> %[[BASE_VEC_TMP]], <32 x i32> poison, <32 x i32> zeroinitializer
// CHECK: %[[STEP_VEC_TMP:[a-zA-Z0-9_\.]+]] = insertelement <32 x i32> poison, i32 %[[STEP]], i64 0
// CHECK: %[[STEP_VEC:[a-zA-Z0-9_\.]+]] = shufflevector <32 x i32> %[[STEP_VEC_TMP]], <32 x i32> poison, <32 x i32> zeroinitializer
// CHECK: %[[MUL_VEC:[a-zA-Z0-9_\.]+]] = mul <32 x i32> %[[STEP_VEC]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
// CHECK: %[[ADD_VEC:[a-zA-Z0-9_\.]+]] = add <32 x i32> %[[BASE_VEC]], %[[MUL_VEC]]
// CHECK: store <32 x i32> %[[ADD_VEC]], ptr addrspace(4) %[[RES]]
// CHECK-NEXT: ret void
}

// Variable base + step constructor, integer element type.
SYCL_EXTERNAL auto int_base_step_var_n2(int base, int step) SYCL_ESIMD_FUNCTION {
// CHECK: define dso_local spir_func void @_Z20int_base_step_var_n2ii({{.*}} %[[RES:[a-zA-Z0-9_\.]+]], i32 noundef %[[BASE:[a-zA-Z0-9_\.]+]], i32 noundef %[[STEP:[a-zA-Z0-9_\.]+]]){{.*}} {
return simd<int, 2>{base, step};
// CHECK: %[[BASE_VEC_TMP1:[a-zA-Z0-9_\.]+]] = insertelement <2 x i32> poison, i32 %[[BASE]], i64 0
// CHECK: %[[BASE_INC:[a-zA-Z0-9_\.]+]] = add nsw i32 %[[BASE]], %[[STEP]]
// CHECK: %[[RESULT_VEC:[a-zA-Z0-9_\.]+]] = insertelement <2 x i32> %[[BASE_VEC_TMP1]], i32 %[[BASE_INC]], i64 1
// CHECK: store <2 x i32> %[[RESULT_VEC]], ptr addrspace(4) %[[RES]]
// CHECK-NEXT: ret void
}

// Broadcast constructor, FP element type, no loops exected - check.
SYCL_EXTERNAL auto gee() SYCL_ESIMD_FUNCTION {
// CHECK: define dso_local spir_func void @_Z3geev({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} {
Expand Down
Loading