From a599c44d4e70dee88eea6eec50b1e76c46303f01 Mon Sep 17 00:00:00 2001 From: DenisBakhvalov <61807338+DenisBakhvalov@users.noreply.github.com> Date: Thu, 21 Jan 2021 14:36:03 -0800 Subject: [PATCH 1/3] [sycl-post-link] Split SYCL and ESIMD kernels into separate modules (#3044) * [sycl-post-link] Split SYCL and ESIMD kernels into separate modules Enabled via a new option - '-split-esimd'. For now this change doesn't have any effect on existing programs since we don't allow mixing SYCL and ESIMD kernels in one source or in one program. But this is an essential step towards this goal since ESIMD kernels require specific processing as opposed to usual SYCL kernels. --- .../tools/sycl-post-link/ir-output-only.ll | 36 ++++ .../sycl-esimd/basic-sycl-esimd-split.ll | 45 +++++ .../sycl-esimd/no-sycl-esimd-split.ll | 55 ++++++ .../sycl-esimd/sycl-esimd-split-per-kernel.ll | 70 ++++++++ .../sycl-esimd/sycl-esimd-split-per-source.ll | 86 ++++++++++ .../sycl-esimd/sycl-esimd-split-symbols.ll | 58 +++++++ llvm/tools/sycl-post-link/sycl-post-link.cpp | 160 +++++++++++++----- 7 files changed, 471 insertions(+), 39 deletions(-) create mode 100644 llvm/test/tools/sycl-post-link/ir-output-only.ll create mode 100644 llvm/test/tools/sycl-post-link/sycl-esimd/basic-sycl-esimd-split.ll create mode 100644 llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split.ll create mode 100644 llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-kernel.ll create mode 100644 llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-source.ll create mode 100644 llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-symbols.ll diff --git a/llvm/test/tools/sycl-post-link/ir-output-only.ll b/llvm/test/tools/sycl-post-link/ir-output-only.ll new file mode 100644 index 000000000000..11e72debb78d --- /dev/null +++ b/llvm/test/tools/sycl-post-link/ir-output-only.ll @@ -0,0 +1,36 @@ +; RUN: sycl-post-link --ir-output-only -split=auto -S %s -o %t.ll +; RUN: FileCheck %s -input-file=%t.ll + +; This test checks that the --ir-output-only option writes a LLVM IR +; file instead of a table. In comparison with other tests, this one +; checks that the option works OK with -split=auto. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux-sycldevice" + +declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +define dso_local spir_kernel void @kernel1() #0 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @kernel2() #0 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +attributes #0 = { "sycl-module-id"="a.cpp" } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 0, i32 100000} + +; CHECK: define dso_local spir_kernel void @kernel1() +; CHECK: define dso_local spir_kernel void @kernel2() diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/basic-sycl-esimd-split.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/basic-sycl-esimd-split.ll new file mode 100644 index 000000000000..94f52ff519d0 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/basic-sycl-esimd-split.ll @@ -0,0 +1,45 @@ +; RUN: sycl-post-link -split-esimd -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-SYCL-IR +; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-ESIMD-IR + +; This is basic test of splitting SYCL and ESIMD kernels into separate +; modules. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux-sycldevice" + +declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +define dso_local spir_kernel void @ESIMD_kernel() #0 !sycl_explicit_simd !3{ +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @SYCL_kernel() #0 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +attributes #0 = { "sycl-module-id"="a.cpp" } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 0, i32 100000} +!3 = !{} + +; CHECK: [Code|Properties] +; CHECK: {{.*}}_0.ll|{{.*}}_0.prop +; CHECK: {{.*}}_esimd_0.ll|{{.*}}_esimd_0.prop + +; CHECK-SYCL-IR-DAG: define dso_local spir_kernel void @SYCL_kernel() +; CHECK-SYCL-IR-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +; CHECK-ESIMD-IR-DAG: define dso_local spir_kernel void @ESIMD_kernel() +; CHECK-ESIMD-IR-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split.ll new file mode 100644 index 000000000000..854f600b2bd8 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split.ll @@ -0,0 +1,55 @@ +; RUN: sycl-post-link -split=source -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-0 +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-1 + +; This test checks that if no '-split-esimd' provided, ther is no +; splitting of SYCL and ESIMD kernels into separate modules. +; However, the rest of the splitting still happens according to +; the '-split=' option. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux-sycldevice" + +declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +define dso_local spir_kernel void @ESIMD_kernel() #0 !sycl_explicit_simd !3{ +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @SYCL_kernel1() #0 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @SYCL_kernel2() #1 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +attributes #0 = { "sycl-module-id"="a.cpp" } +attributes #1 = { "sycl-module-id"="b.cpp" } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 0, i32 100000} +!3 = !{} + +; CHECK: [Code|Properties] +; CHECK: {{.*}}_0.ll|{{.*}}_0.prop +; CHECK: {{.*}}_1.ll|{{.*}}_1.prop + +; CHECK-IR-0-DAG: define dso_local spir_kernel void @SYCL_kernel1() +; CHECK-IR-0-DAG: define dso_local spir_kernel void @ESIMD_kernel() +; CHECK-IR-0-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +; CHECK-IR-1-DAG: define dso_local spir_kernel void @SYCL_kernel2() +; CHECK-IR-1-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-kernel.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-kernel.ll new file mode 100644 index 000000000000..9e27cfc50898 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-kernel.ll @@ -0,0 +1,70 @@ +; RUN: sycl-post-link -split-esimd -split=kernel -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-SYCL-IR-0 +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-SYCL-IR-1 +; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-ESIMD-IR-0 +; RUN: FileCheck %s -input-file=%t_esimd_1.ll --check-prefixes CHECK-ESIMD-IR-1 + +; This test checks that after we split SYCL and ESIMD kernels into +; separate modules, we split those two modules further according to +; -split option. In this case we have 2 SYCL and 2 ESIMD kernels, which +; are split into a total of 4 separate modules. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux-sycldevice" + +declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +define dso_local spir_kernel void @ESIMD_kernel1() #0 !sycl_explicit_simd !3{ +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @ESIMD_kernel2() #0 !sycl_explicit_simd !3{ +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @SYCL_kernel1() #1 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @SYCL_kernel2() #1 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +attributes #0 = { "sycl-module-id"="a.cpp" } +attributes #1 = { "sycl-module-id"="a.cpp" } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 0, i32 100000} +!3 = !{} + +; CHECK: [Code|Properties] +; CHECK: {{.*}}_0.ll|{{.*}}_0.prop +; CHECK: {{.*}}_1.ll|{{.*}}_1.prop +; CHECK: {{.*}}_esimd_0.ll|{{.*}}_esimd_0.prop +; CHECK: {{.*}}_esimd_1.ll|{{.*}}_esimd_1.prop + +; CHECK-SYCL-IR-0-DAG: define dso_local spir_kernel void @SYCL_kernel1() +; CHECK-SYCL-IR-0-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +; CHECK-SYCL-IR-1-DAG: define dso_local spir_kernel void @SYCL_kernel2() +; CHECK-SYCL-IR-1-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +; CHECK-ESIMD-IR-0-DAG: define dso_local spir_kernel void @ESIMD_kernel1() +; CHECK-ESIMD-IR-0-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +; CHECK-ESIMD-IR-1-DAG: define dso_local spir_kernel void @ESIMD_kernel2() +; CHECK-ESIMD-IR-1-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-source.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-source.ll new file mode 100644 index 000000000000..87b99efa01fe --- /dev/null +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-source.ll @@ -0,0 +1,86 @@ +; RUN: sycl-post-link -split-esimd -split=source -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-SYCL-IR-0 +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-SYCL-IR-1 +; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-ESIMD-IR-0 +; RUN: FileCheck %s -input-file=%t_esimd_1.ll --check-prefixes CHECK-ESIMD-IR-1 + +; This test checks that after we split SYCL and ESIMD kernels into +; separate modules, we split those two modules further according to +; -split option. In this case we have: +; - 3 SYCL kernels: 2 in a.cpp, 1 in b.cpp +; - 3 ESIMD kernels: 2 in a.cpp, 1 in b.cpp +; The module will be split into a total of 4 separate modules. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux-sycldevice" + +declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +define dso_local spir_kernel void @ESIMD_kernel1() #0 !sycl_explicit_simd !3{ +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @ESIMD_kernel2() #0 !sycl_explicit_simd !3{ +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @ESIMD_kernel3() #1 !sycl_explicit_simd !3{ +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @SYCL_kernel1() #0 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @SYCL_kernel2() #0 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @SYCL_kernel3() #1 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +attributes #0 = { "sycl-module-id"="a.cpp" } +attributes #1 = { "sycl-module-id"="b.cpp" } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 0, i32 100000} +!3 = !{} + +; CHECK: [Code|Properties] +; CHECK: {{.*}}_0.ll|{{.*}}_0.prop +; CHECK: {{.*}}_1.ll|{{.*}}_1.prop +; CHECK: {{.*}}_esimd_0.ll|{{.*}}_esimd_0.prop +; CHECK: {{.*}}_esimd_1.ll|{{.*}}_esimd_1.prop + +; CHECK-SYCL-IR-0-DAG: define dso_local spir_kernel void @SYCL_kernel1() +; CHECK-SYCL-IR-0-DAG: define dso_local spir_kernel void @SYCL_kernel2() +; CHECK-SYCL-IR-0-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +; CHECK-SYCL-IR-1-DAG: define dso_local spir_kernel void @SYCL_kernel3() +; CHECK-SYCL-IR-1-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +; CHECK-ESIMD-IR-0-DAG: define dso_local spir_kernel void @ESIMD_kernel1() +; CHECK-ESIMD-IR-0-DAG: define dso_local spir_kernel void @ESIMD_kernel2() +; CHECK-ESIMD-IR-0-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +; CHECK-ESIMD-IR-1-DAG: define dso_local spir_kernel void @ESIMD_kernel3() +; CHECK-ESIMD-IR-1-DAG: declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-symbols.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-symbols.ll new file mode 100644 index 000000000000..605c814d756e --- /dev/null +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-symbols.ll @@ -0,0 +1,58 @@ +; RUN: sycl-post-link -split-esimd -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM +; RUN: FileCheck %s -input-file=%t_esimd_0.sym --check-prefixes CHECK-ESIMD-SYM + +; This test checks symbols generation when we split SYCL and ESIMD kernels into +; separate modules. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux-sycldevice" + +declare dso_local spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + +define dso_local spir_kernel void @ESIMD_kernel1() #0 !sycl_explicit_simd !3{ +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @ESIMD_kernel2() #0 !sycl_explicit_simd !3{ +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @SYCL_kernel1() #0 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +define dso_local spir_kernel void @SYCL_kernel2() #0 { +entry: + %call = tail call spir_func i64 @_Z28__spirv_GlobalInvocationId_xv() + ret void +} + +attributes #0 = { "sycl-module-id"="a.cpp" } +attributes #1 = { "sycl-module-id"="b.cpp" } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 0, i32 100000} +!3 = !{} + +; CHECK: [Code|Properties|Symbols] +; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym +; CHECK: {{.*}}_esimd_0.ll|{{.*}}_esimd_0.prop|{{.*}}_esimd_0.sym + +; CHECK-SYCL-SYM: SYCL_kernel1 +; CHECK-SYCL-SYM: SYCL_kernel2 + +; CHECK-ESIMD-SYM: ESIMD_kernel1 +; CHECK-ESIMD-SYM: ESIMD_kernel2 diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 475d86e4dc4a..2e16ed561253 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -88,6 +88,10 @@ static cl::opt OutputAssembly{"S", cl::desc("Write output as LLVM assembly"), cl::Hidden, cl::cat(PostLinkCat)}; +static cl::opt SplitEsimd{"split-esimd", + cl::desc("Split SYCL and ESIMD kernels"), + cl::cat(PostLinkCat)}; + enum IRSplitMode { SPLIT_PER_TU, // one module per translation unit SPLIT_PER_KERNEL, // one module per kernel @@ -130,6 +134,7 @@ struct ImagePropSaveInfo { bool SetSpecConstAtRT; bool SpecConstsMet; bool EmitKernelParamInfo; + bool IsEsimdKernel; }; // Please update DeviceLibFuncMap if any item is added to or removed from // fallback device libraries in libdevice. @@ -445,7 +450,7 @@ splitModule(Module &M, } } -static std::string makeResultFileName(Twine Ext, int I) { +static std::string makeResultFileName(Twine Ext, int I, StringRef Suffix) { const StringRef Dir0 = OutputDir.getNumOccurrences() > 0 ? OutputDir : sys::path::parent_path(OutputFilename); @@ -453,9 +458,8 @@ static std::string makeResultFileName(Twine Ext, int I) { std::string Dir = Dir0.str(); if (!Dir0.empty() && !Dir0.endswith(Sep)) Dir += Sep.str(); - return (Dir + Twine(sys::path::stem(OutputFilename)) + "_" + - std::to_string(I) + Ext) - .str(); + return Dir + sys::path::stem(OutputFilename).str() + "_" + Suffix.str() + + std::to_string(I) + Ext.str(); } static void saveModule(Module &M, StringRef OutFilename) { @@ -476,13 +480,14 @@ static void saveModule(Module &M, StringRef OutFilename) { // Saves specified collection of llvm IR modules to files. // Saves file list if user specified corresponding filename. static string_vector -saveResultModules(std::vector> &ResModules) { +saveResultModules(std::vector> &ResModules, + StringRef Suffix) { string_vector Res; for (size_t I = 0; I < ResModules.size(); ++I) { std::error_code EC; StringRef FileExt = (OutputAssembly) ? ".ll" : ".bc"; - std::string CurOutFileName = makeResultFileName(FileExt, I); + std::string CurOutFileName = makeResultFileName(FileExt, I, Suffix); saveModule(*ResModules[I].get(), CurOutFileName); Res.emplace_back(std::move(CurOutFileName)); } @@ -582,7 +587,8 @@ static string_vector saveDeviceImageProperty( } } std::error_code EC; - std::string SCFile = makeResultFileName(".prop", I); + std::string SCFile = + makeResultFileName(".prop", I, ImgPSInfo.IsEsimdKernel ? "esimd_" : ""); raw_fd_ostream SCOut(SCFile, EC); PropSet.write(SCOut); Res.emplace_back(std::move(SCFile)); @@ -593,12 +599,13 @@ static string_vector saveDeviceImageProperty( // Saves specified collection of symbols lists to files. // Saves file list if user specified corresponding filename. -static string_vector saveResultSymbolsLists(string_vector &ResSymbolsLists) { +static string_vector saveResultSymbolsLists(string_vector &ResSymbolsLists, + StringRef Suffix) { string_vector Res; std::string TxtFilesList; for (size_t I = 0; I < ResSymbolsLists.size(); ++I) { - std::string CurOutFileName = makeResultFileName(".sym", I); + std::string CurOutFileName = makeResultFileName(".sym", I, Suffix); writeToFile(CurOutFileName, ResSymbolsLists[I]); Res.emplace_back(std::move(CurOutFileName)); } @@ -616,8 +623,12 @@ static string_vector saveResultSymbolsLists(string_vector &ResSymbolsLists) { using TableFiles = std::map; -static TableFiles processOneModule(std::unique_ptr M) { +static TableFiles processOneModule(std::unique_ptr M, bool IsEsimd, + bool SyclAndEsimdKernels) { TableFiles TblFiles; + if (!M) + return TblFiles; + std::map> GlobalsSet; bool DoSplit = SplitMode.getNumOccurrences() > 0; @@ -671,17 +682,19 @@ static TableFiles processOneModule(std::unique_ptr M) { { // reuse input module if there were no spec constants and no splitting - string_vector Files = SpecConstsMet || (ResultModules.size() > 1) - ? saveResultModules(ResultModules) - : string_vector{InputFilename}; + string_vector Files = + SpecConstsMet || (ResultModules.size() > 1) || SyclAndEsimdKernels + ? saveResultModules(ResultModules, IsEsimd ? "esimd_" : "") + : string_vector{InputFilename}; // "Code" column is always output std::copy(Files.begin(), Files.end(), std::back_inserter(TblFiles[COL_CODE])); } { - ImagePropSaveInfo ImgPSInfo = {true, DoSpecConst, SetSpecConstAtRT, - SpecConstsMet, EmitKernelParamInfo}; + ImagePropSaveInfo ImgPSInfo = { + true, DoSpecConst, SetSpecConstAtRT, + SpecConstsMet, EmitKernelParamInfo, IsEsimd}; string_vector Files = saveDeviceImageProperty(ResultModules, ImgPSInfo); std::copy(Files.begin(), Files.end(), std::back_inserter(TblFiles[COL_PROPS])); @@ -694,13 +707,81 @@ static TableFiles processOneModule(std::unique_ptr M) { assert(ResultModules.size() == 1); ResultSymbolsLists.push_back(""); } - string_vector Files = saveResultSymbolsLists(ResultSymbolsLists); + string_vector Files = + saveResultSymbolsLists(ResultSymbolsLists, IsEsimd ? "esimd_" : ""); std::copy(Files.begin(), Files.end(), std::back_inserter(TblFiles[COL_SYM])); } return TblFiles; } +using ModulePair = std::pair, std::unique_ptr>; + +// This function splits a module with a mix of SYCL and ESIMD kernels +// into two separate modules. +static ModulePair splitSyclEsimd(std::unique_ptr M) { + // Collect information about the SYCL and ESIMD kernels in the module. + std::vector SyclKernels; + std::vector EsimdKernels; + for (auto &F : M->functions()) { + if (F.getCallingConv() == CallingConv::SPIR_KERNEL) { + if (F.getMetadata("sycl_explicit_simd")) + EsimdKernels.push_back(&F); + else + SyclKernels.push_back(&F); + } + } + + // If only SYCL kernels or only ESIMD kernels, no splitting needed. + if (EsimdKernels.empty()) + return std::make_pair(std::move(M), std::unique_ptr(nullptr)); + + if (SyclKernels.empty()) + return std::make_pair(std::unique_ptr(nullptr), std::move(M)); + + // Key values in KernelModuleMap are not significant, but they define the + // order, in which kernels are processed in the splitModule function. The + // caller of the splitSyclEsimd function expects a pair of 1-Sycl and 2-Esimd + // modules, hence the strings names below. + std::map> KernelModuleMap( + {{"1-SYCL", SyclKernels}, {"2-ESIMD", EsimdKernels}}); + std::vector> ResultModules; + splitModule(*M, KernelModuleMap, ResultModules); + assert(ResultModules.size() == 2); + return std::make_pair(std::move(ResultModules[0]), + std::move(ResultModules[1])); +} + +static TableFiles processInputModule(std::unique_ptr M) { + if (!SplitEsimd) + return processOneModule(std::move(M), false, false); + + std::unique_ptr SyclModule; + std::unique_ptr EsimdModule; + std::tie(SyclModule, EsimdModule) = splitSyclEsimd(std::move(M)); + + // Do we have both Sycl and Esimd kernels? + bool SyclAndEsimdKernels = SyclModule && EsimdModule; + + TableFiles SyclTblFiles = + processOneModule(std::move(SyclModule), false, SyclAndEsimdKernels); + TableFiles EsimdTblFiles = + processOneModule(std::move(EsimdModule), true, SyclAndEsimdKernels); + + // Merge the two resulting file maps + TableFiles MergedTblFiles; + for (auto &ColumnStr : {COL_CODE, COL_PROPS, COL_SYM}) { + auto &SyclFiles = SyclTblFiles[ColumnStr]; + auto &EsimdFiles = EsimdTblFiles[ColumnStr]; + auto &MergedFiles = MergedTblFiles[ColumnStr]; + std::copy(SyclFiles.begin(), SyclFiles.end(), + std::back_inserter(MergedFiles)); + std::copy(EsimdFiles.begin(), EsimdFiles.end(), + std::back_inserter(MergedFiles)); + } + return MergedTblFiles; +} + int main(int argc, char **argv) { InitLLVM X{argc, argv}; @@ -712,6 +793,10 @@ int main(int argc, char **argv) { "This is a collection of utilities run on device code's LLVM IR before\n" "handing off to back-end for further compilation or emitting SPIRV.\n" "The utilities are:\n" + "- SYCL and ESIMD kernels can be split into separate modules with\n" + " '-split-esimd' option. The option has no effect when there is only\n" + " one type of kernels in the input module. Functions unreachable from\n" + " any kernel are dropped from the resulting module(s).\n" "- Module splitter to split a big input module into smaller ones.\n" " Groups kernels using function attribute 'sycl-module-id', i.e.\n" " kernels with the same values of the 'sycl-module-id' attribute will\n" @@ -719,6 +804,9 @@ int main(int argc, char **argv) { " one module per kernel will be emitted.\n" " '-split=auto' mode automatically selects the best way of splitting\n" " kernels into modules based on some heuristic.\n" + " The '-split' option is compatible with '-split-esimd'. In this case,\n" + " first input module will be split into SYCL and ESIMD modules. Then\n" + " both modules will be further split according to the '-split' option.\n" "- If -symbols options is also specified, then for each produced module\n" " a text file containing names of all spir kernels in it is generated.\n" "- Specialization constant intrinsic transformer. Replaces symbolic\n" @@ -743,10 +831,11 @@ int main(int argc, char **argv) { "than 'auto'.\n"); bool DoSplit = SplitMode.getNumOccurrences() > 0; + bool DoSplitEsimd = SplitEsimd.getNumOccurrences() > 0; bool DoSpecConst = SpecConstLower.getNumOccurrences() > 0; bool DoParamInfo = EmitKernelParamInfo.getNumOccurrences() > 0; - if (!DoSplit && !DoSpecConst && !DoSymGen && !DoParamInfo) { + if (!DoSplit && !DoSpecConst && !DoSymGen && !DoParamInfo && !DoSplitEsimd) { errs() << "no actions specified; try --help for usage info\n"; return 1; } @@ -755,6 +844,11 @@ int main(int argc, char **argv) { << " can't be used with -" << IROutputOnly.ArgStr << "\n"; return 1; } + if (IROutputOnly && DoSplitEsimd) { + errs() << "error: -" << SplitEsimd.ArgStr << " can't be used with -" + << IROutputOnly.ArgStr << "\n"; + return 1; + } if (IROutputOnly && DoSymGen) { errs() << "error: -" << DoSymGen.ArgStr << " can't be used with -" << IROutputOnly.ArgStr << "\n"; @@ -791,34 +885,22 @@ int main(int argc, char **argv) { if (OutputFilename.getNumOccurrences() == 0) OutputFilename = (Twine(sys::path::stem(InputFilename)) + ".files").str(); - TableFiles TblFiles = processOneModule(std::move(M)); + TableFiles TblFiles = processInputModule(std::move(M)); + // Input module was processed and a single output file was requested. if (IROutputOnly) return 0; + // Populate and emit the resulting table util::SimpleTable Table; - auto addTableColumn = [&Table, &TblFiles](std::string Str) { - auto &Files = TblFiles[Str]; - if (Files.empty()) - return 0; - Error Err = Table.addColumn(Str, Files); - CHECK_AND_EXIT(Err); - return 0; - }; + for (auto &ColumnStr : {COL_CODE, COL_PROPS, COL_SYM}) + if (!TblFiles[ColumnStr].empty()) + CHECK_AND_EXIT(Table.addColumn(ColumnStr, TblFiles[ColumnStr])); - int Res; - if ((Res = addTableColumn(COL_CODE)) != 0) - return Res; - if ((Res = addTableColumn(COL_PROPS)) != 0) - return Res; - if ((Res = addTableColumn(COL_SYM)) != 0) - return Res; + std::error_code EC; + raw_fd_ostream Out{OutputFilename, EC, sys::fs::OF_None}; + checkError(EC, "error opening file '" + OutputFilename + "'"); + Table.write(Out); - { - std::error_code EC; - raw_fd_ostream Out{OutputFilename, EC, sys::fs::OF_None}; - checkError(EC, "error opening file '" + OutputFilename + "'"); - Table.write(Out); - } return 0; } From 83c897e48196fdc23989410b89b31d9590498bfc Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Fri, 22 Jan 2021 11:21:18 +0300 Subject: [PATCH 2/3] [NFC][SYCL] Rename poorly named diagnostic (#3056) --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/lib/Sema/SemaDeclAttr.cpp | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 32bf65d7a4c5..3e6113ba064e 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11202,7 +11202,7 @@ def err_sycl_function_attribute_mismatch : Error< "SYCL kernel without %0 attribute can't call a function with this attribute">; def err_sycl_x_y_z_arguments_must_be_one : Error< "%0 X-, Y- and Z- sizes must be 1 when %1 attribute is used with value 0">; -def err_sycl_attibute_cannot_be_applied_here +def err_sycl_attribute_internal_function : Error<"%0 attribute cannot be applied to a " "static function or function in an anonymous namespace">; def err_sycl_compiletime_property_duplication : Error< diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 6df08ee25ae4..9a7b1a903003 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -4833,7 +4833,7 @@ static void handleOptimizeNoneAttr(Sema &S, Decl *D, const ParsedAttr &AL) { static void handleSYCLDeviceAttr(Sema &S, Decl *D, const ParsedAttr &AL) { auto *FD = cast(D); if (!FD->isExternallyVisible()) { - S.Diag(AL.getLoc(), diag::err_sycl_attibute_cannot_be_applied_here) << AL; + S.Diag(AL.getLoc(), diag::err_sycl_attribute_internal_function) << AL; return; } @@ -4844,7 +4844,7 @@ static void handleSYCLDeviceIndirectlyCallableAttr(Sema &S, Decl *D, const ParsedAttr &AL) { auto *FD = cast(D); if (!FD->isExternallyVisible()) { - S.Diag(AL.getLoc(), diag::err_sycl_attibute_cannot_be_applied_here) << AL; + S.Diag(AL.getLoc(), diag::err_sycl_attribute_internal_function) << AL; return; } @@ -4854,11 +4854,6 @@ static void handleSYCLDeviceIndirectlyCallableAttr(Sema &S, Decl *D, static void handleSYCLRegisterNumAttr(Sema &S, Decl *D, const ParsedAttr &AL) { auto *VD = cast(D); - if (!VD->hasGlobalStorage()) { - S.Diag(AL.getLoc(), diag::err_sycl_attibute_cannot_be_applied_here) - << AL << 0; - return; - } if (!checkAttributeNumArgs(S, AL, 1)) return; uint32_t RegNo = 0; From 1040b941a902c4bcf883211b8e11bc83b3df5769 Mon Sep 17 00:00:00 2001 From: jinge90 <43599496+jinge90@users.noreply.github.com> Date: Fri, 22 Jan 2021 16:31:48 +0800 Subject: [PATCH 3/3] [SYCL] Enable more clang builtins for SYCL device compiler (#3060) This patch enables math builtins supported by LLVM-SPIRV-Translator: fmax/f (depends on llvm.maxnum) fmin/f (depends on llvm.minnum) isinf (depends on llvm.fabs) isfinite (depends on llvm.fabs) isnormal (depends on llvm.fabs) fpclassify (depends on llvm.fabs) Signed-off-by: gejin --- clang/lib/Sema/SemaSYCL.cpp | 12 ---- clang/test/SemaSYCL/supported_math.cpp | 83 ++++++++++++++++---------- 2 files changed, 50 insertions(+), 45 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 1909c1d81a87..5b66f5a2f6d7 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -178,20 +178,8 @@ static bool IsSyclMathFunc(unsigned BuiltinID) { case Builtin::BI__builtin_truncl: case Builtin::BIlroundl: case Builtin::BI__builtin_lroundl: - case Builtin::BIfmax: - case Builtin::BI__builtin_fmax: - case Builtin::BIfmin: - case Builtin::BI__builtin_fmin: - case Builtin::BIfmaxf: - case Builtin::BI__builtin_fmaxf: - case Builtin::BIfminf: - case Builtin::BI__builtin_fminf: case Builtin::BIlroundf: case Builtin::BI__builtin_lroundf: - case Builtin::BI__builtin_fpclassify: - case Builtin::BI__builtin_isfinite: - case Builtin::BI__builtin_isinf: - case Builtin::BI__builtin_isnormal: return false; default: break; diff --git a/clang/test/SemaSYCL/supported_math.cpp b/clang/test/SemaSYCL/supported_math.cpp index aada7829ef78..86c4c792d599 100644 --- a/clang/test/SemaSYCL/supported_math.cpp +++ b/clang/test/SemaSYCL/supported_math.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-strict -verify %s +// expected-no-diagnostics extern "C" float sinf(float); extern "C" float cosf(float); extern "C" float floorf(float); @@ -8,6 +9,8 @@ extern "C" float rintf(float); extern "C" float roundf(float); extern "C" float truncf(float); extern "C" float copysignf(float, float); +extern "C" float fminf(float, float); +extern "C" float fmaxf(float, float); extern "C" double sin(double); extern "C" double cos(double); extern "C" double floor(double); @@ -17,6 +20,8 @@ extern "C" double rint(double); extern "C" double round(double); extern "C" double trunc(double); extern "C" double copysign(double, double); +extern "C" double fmin(double, double); +extern "C" double fmax(double, double); template __attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { kernelFunc(); @@ -26,39 +31,51 @@ int main() { kernel([=]() { int acc[1] = {5}; acc[0] *= 2; - acc[0] += (int)truncf(1.0f); // expected-no-diagnostics - acc[0] += (int)trunc(1.0); // expected-no-diagnostics - acc[0] += (int)roundf(1.0f); // expected-no-diagnostics - acc[0] += (int)round(1.0); // expected-no-diagnostics - acc[0] += (int)rintf(1.0f); // expected-no-diagnostics - acc[0] += (int)rint(1.0); // expected-no-diagnostics - acc[0] += (int)nearbyintf(0.5f); // expected-no-diagnostics - acc[0] += (int)nearbyint(0.5); // expected-no-diagnostics - acc[0] += (int)floorf(0.5f); // expected-no-diagnostics - acc[0] += (int)floor(0.5); // expected-no-diagnostics - acc[0] += (int)copysignf(1.0f, -0.5f); // expected-no-diagnostics - acc[0] += (int)copysign(1.0, -0.5); // expected-no-diagnostics - acc[0] += (int)sinf(1.0f); // expected-no-diagnostics - acc[0] += (int)sin(1.0); // expected-no-diagnostics - acc[0] += (int)__builtin_sinf(1.0f); // expected-no-diagnostics - acc[0] += (int)__builtin_sin(1.0); // expected-no-diagnostics - acc[0] += (int)cosf(1.0f); // expected-no-diagnostics - acc[0] += (int)cos(1.0); // expected-no-diagnostics - acc[0] += (int)__builtin_cosf(1.0f); // expected-no-diagnostics - acc[0] += (int)__builtin_cos(1.0); // expected-no-diagnostics - acc[0] += (int)logf(1.0f); // expected-no-diagnostics - acc[0] += (int)log(1.0); // expected-no-diagnostics - acc[0] += (int)__builtin_truncf(1.0f); // expected-no-diagnostics - acc[0] += (int)__builtin_trunc(1.0); // expected-no-diagnostics - acc[0] += (int)__builtin_rintf(1.0f); // expected-no-diagnostics - acc[0] += (int)__builtin_rint(1.0); // expected-no-diagnostics - acc[0] += (int)__builtin_nearbyintf(0.5f); // expected-no-diagnostics - acc[0] += (int)__builtin_nearbyint(0.5); // expected-no-diagnostics - acc[0] += (int)__builtin_floorf(0.5f); // expected-no-diagnostics - acc[0] += (int)__builtin_floor(0.5); // expected-no-diagnostics - acc[0] += (int)__builtin_copysignf(1.0f, -0.5f); // expected-no-diagnostics - acc[0] += (int)__builtin_logf(1.0f); // expected-no-diagnostics - acc[0] += (int)__builtin_log(1.0); // expected-no-diagnostics + acc[0] += (int)truncf(1.0f); + acc[0] += (int)trunc(1.0); + acc[0] += (int)roundf(1.0f); + acc[0] += (int)round(1.0); + acc[0] += (int)rintf(1.0f); + acc[0] += (int)rint(1.0); + acc[0] += (int)nearbyintf(0.5f); + acc[0] += (int)nearbyint(0.5); + acc[0] += (int)floorf(0.5f); + acc[0] += (int)floor(0.5); + acc[0] += (int)copysignf(1.0f, -0.5f); + acc[0] += (int)copysign(1.0, -0.5); + acc[0] += (int)fminf(1.5f, 0.5f); + acc[0] += (int)fmin(1.5, 0.5); + acc[0] += (int)fmaxf(1.5f, 0.5f); + acc[0] += (int)fmax(1.5, 0.5); + acc[0] += (int)sinf(1.0f); + acc[0] += (int)sin(1.0); + acc[0] += (int)__builtin_sinf(1.0f); + acc[0] += (int)__builtin_sin(1.0); + acc[0] += (int)cosf(1.0f); + acc[0] += (int)cos(1.0); + acc[0] += (int)__builtin_cosf(1.0f); + acc[0] += (int)__builtin_cos(1.0); + acc[0] += (int)logf(1.0f); + acc[0] += (int)log(1.0); + acc[0] += (int)__builtin_truncf(1.0f); + acc[0] += (int)__builtin_trunc(1.0); + acc[0] += (int)__builtin_rintf(1.0f); + acc[0] += (int)__builtin_rint(1.0); + acc[0] += (int)__builtin_nearbyintf(0.5f); + acc[0] += (int)__builtin_nearbyint(0.5); + acc[0] += (int)__builtin_floorf(0.5f); + acc[0] += (int)__builtin_floor(0.5); + acc[0] += (int)__builtin_copysignf(1.0f, -0.5f); + acc[0] += (int)__builtin_fminf(1.5f, 0.5f); + acc[0] += (int)__builtin_fmin(1.5, 0.5); + acc[0] += (int)__builtin_fmaxf(1.5f, 0.5f); + acc[0] += (int)__builtin_fmax(1.5, 0.5); + acc[0] += (int)__builtin_logf(1.0f); + acc[0] += (int)__builtin_log(1.0); + acc[0] += __builtin_isinf(1.0); + acc[0] += __builtin_isfinite(1.0); + acc[0] += __builtin_isnormal(1.0); + acc[0] += __builtin_fpclassify(0, 1, 4, 3, 2, 1.0); }); return 0; }